# desktopkit API — Agent Reference

Base: http://0.0.0.0:8082

Auth: POST /auth/login {"password":"..."} -> {"token":"..."}

All endpoints (except /health, /auth/login) require: Authorization: Bearer <token>

## Auth

POST /auth/login
Body: {"password":"..."}
-> {"token":"abc123...","expires_in_s":28800}
Authenticate with Unix password

POST /auth/logout
-> {"status":"ok"}
Invalidate current token

GET /auth/token-info
-> {"valid":true,"remaining_ttl_s":28750}
Check token validity and remaining TTL

## Screenshot

GET /screenshot ?screen_id=0&cursor_ring=false&wait_stable=false&pixel_delta=30&stable_ratio=0.2&stable_window_ms=500&timeout_ms=5000&poll_interval_ms=200&min_change_ratio=<number>&ignore_region=<string>&watch_region=<string>&upload_url=<string>&upload_method=POST&upload_format=raw&upload_header=<string>&upload_include_inline=true&upload_name=screenshot
-> {"image":"<base64>","width":1920,"height":1080,"stable":true,"timed_out":false,"waited_ms":1240,"polls":7,"change_ratio_last":0.0008}
Capture full screen

GET /screenshot/region ?x=<integer>&y=<integer>&width=<integer>&height=<integer>&wait_stable=false&pixel_delta=30&stable_ratio=0.2&stable_window_ms=500&timeout_ms=5000&poll_interval_ms=200&min_change_ratio=<number>&ignore_region=<string>&watch_region=<string>&upload_url=<string>&upload_method=POST&upload_format=raw&upload_header=<string>&upload_include_inline=true&upload_name=screenshot
-> {"image":"<base64>","width":400,"height":300,"stable":true,"timed_out":false,"waited_ms":1240,"polls":7,"change_ratio_last":0.0008}
Capture screen region

GET /screenshot/cursor-position
-> {"x":512,"y":384}
Get current cursor position

POST /screenshots
Body: {"variants":[{"name":"overview"},{"name":"calc_zoom","crop":{"x":400,"y":200,"width":800,"height":600},"scale_x":2.0,"scale_y":2.0,"format":"jpeg","jpeg_quality":70,"upload":{"url":"https://store.example.com/upload","headers":{"Authorization":"Bearer ..."},"include_inline":false}}]}
-> {"native_width":1920,"native_height":1080,"images":[{"name":"overview","image":"<base64>","format":"png","width":1920,"height":1080,"scale_x":1.0,"scale_y":1.0,"crop":null,"window_id":null,"url":null,"ok":true},{"name":"calc_zoom","image":null,"format":"jpeg","width":1600,"height":1200,"scale_x":2.0,"scale_y":2.0,"crop":{"x":400,"y":200,"width":800,"height":600},"window_id":null,"url":null,"ok":true,"upload":{"ok":true,"status":200,"response":{"status":200,"headers":{"location":"https://store.example.com/files/abc123.jpg"},"body":""}}}]}
Capture multiple variants (crop, scale, format, upload)

GET /screens
-> {"screens":[{"id":0,"width":1920,"height":1080}]}
List available screens

## Mouse

POST /mouse/move
Body: {"x":100,"y":200}
-> {"status":"ok"}
Move cursor to coordinates

POST /mouse/click
Body: {"button":"left"}
-> {"status":"ok"}
Click at current position

POST /mouse/double-click
Body: {"button":"left"}
-> {"status":"ok"}
Double-click at current position

POST /mouse/scroll
Body: {"x":500,"y":300,"delta_x":0,"delta_y":-3}
-> {"status":"ok"}
Scroll at position

POST /mouse/drag
Body: {"from_x":100,"from_y":200,"to_x":300,"to_y":400,"button":"left"}
-> {"status":"ok"}
Drag from one position to another

POST /mouse/down
Body: {"button":"left"}
-> {"status":"ok"}
Press mouse button (no release)

POST /mouse/up
Body: {"button":"left"}
-> {"status":"ok"}
Release a previously-pressed mouse button

## Keyboard

POST /keyboard/type
Body: {"text":"Hello, World!","duration_ms":500}
-> {"status":"ok"}
Type text

POST /keyboard/hotkey
Body: {"keys":["ctrl",{"char":"s"}],"duration_ms":1500}
-> {"status":"ok"}
Press key combination

## Clipboard

GET /clipboard ?max_bytes=1048576
-> {"text":"clipboard content","length":17,"truncated":false}
Read clipboard contents

POST /clipboard
Body: {"text":"new content"}
-> {"status":"ok"}
Write to clipboard

GET /clipboard/slots
-> [{"name":"panel_1","length":482,"stored_at":"2026-05-19T10:15:00Z"}]
List clipboard slots

GET /clipboard/slots/{name}
-> {"name":"panel_1","text":"...","length":482,"stored_at":"2026-05-19T10:15:00Z"}
Read a clipboard slot

PUT /clipboard/slots/{name}
Body: {"text":"collected text"}
-> {"name":"panel_1","length":14}
Store text in a named slot

DELETE /clipboard/slots/{name}
-> 
Delete a named slot

DELETE /clipboard/slots
-> 
Delete all slots

POST /clipboard/slots/{name}/restore
-> {"name":"panel_1","length":482}
Restore a slot to the OS clipboard

POST /clipboard/capture
Body: {"captures":[{"slot":"panel_1","focus_window_id":"0x4400003","select_all":true,"copy":true,"settle_ms":150}]}
-> {"results":[{"slot":"panel_1","text":"...","length":482,"truncated":false,"ok":true}]}
Batch capture into clipboard slots

## Sequence

POST /sequence ?lock_timeout_ms=10000
Body: {"on_error":"abort","steps":[{"name":"focus-calc","type":"window.focus","window_id":"0x4400003"},{"name":"paste-order-id","type":"clipboard.slot_restore","slot_name":"current_order"},{"name":"hotkey-paste","type":"keyboard.key","keys":"ctrl+v"},{"name":"scroll-down","type":"mouse.scroll","x":600,"y":400,"delta_x":0,"delta_y":5},{"name":"submit","type":"mouse.click","x":500,"y":400},{"name":"settle","type":"sleep","ms":150},{"name":"close-confirm","type":"dialog.close","window_id":"0x4500001","wait_dialog_gone_ms":800},{"name":"snap","type":"screenshot","format":"jpeg","jpeg_quality":80}]}
-> {"results":[{"name":"focus-calc","type":"window.focus","ok":true,"duration_ms":12},{"name":"paste-order-id","type":"clipboard.slot_restore","ok":true,"duration_ms":18,"length":8},{"name":"hotkey-paste","type":"keyboard.key","ok":true,"duration_ms":4},{"name":"scroll-down","type":"mouse.scroll","ok":true,"duration_ms":21},{"name":"submit","type":"mouse.click","ok":true,"duration_ms":7},{"name":"settle","type":"sleep","ok":true,"duration_ms":151},{"name":"close-confirm","type":"dialog.close","ok":true,"duration_ms":62,"closed_via":"escape"},{"name":"snap","type":"screenshot","ok":true,"duration_ms":34,"image":"<base64>","format":"jpeg","width":1920,"height":1080,"bytes":184523}],"aborted_at":null,"total_duration_ms":309}
Execute an ordered batch of input steps under the global lock

## Windows

GET /windows
-> [{"id":12345,"title":"Terminal","process_name":"gnome-terminal","pid":4321,"bounds":{"x":0,"y":0,"width":800,"height":600},"is_focused":true,"is_dialog":false}]
List all windows

GET /windows/focused
-> {"id":12345,"title":"Terminal","process_name":"gnome-terminal","pid":4321,"bounds":{"x":0,"y":0,"width":800,"height":600},"is_focused":true,"is_dialog":false}
Get focused window

POST /windows/{id}/focus
-> {"status":"ok"}
Focus a window by ID

POST /windows/{id}/move ?lock_timeout_ms=10000
Body: {"bounds":{"x":200,"y":100,"width":1280,"height":720}}
-> {"ok":true,"id":"12345","bounds":{"x":200,"y":100,"width":1280,"height":720}}
Move and resize a window

GET /windows/dialog
-> {"dialog":null}
Detect active dialog/modal

## Application

POST /application/open
Body: {"path":"/usr/bin/gedit","args":["file.txt"]}
-> {"status":"ok","pid":12345}
Open an application

GET /application/running ?name=<string>
-> {"running":true,"pids":[12345]}
Check if application is running

POST /application/close
Body: {"window_id":12345}
-> {"status":"ok"}
Close an application window

## Actions

GET /actions ?limit=100
-> [{"timestamp":"...","action":"mouse.click","params":"...","result":"ok","duration_ms":42}]
Get recent action log

## System

GET /health
-> {"status":"ok"}
Health check

