Browser API
Manage browser sessions and execute actions.
Endpoints
| Method | Endpoint | Description |
|---|---|---|
| GET | /api/agents/{id}/browser | List sessions |
| POST | /api/agents/{id}/browser | Create session |
| DELETE | /api/agents/{id}/browser | Delete session |
| POST | /api/agents/{id}/browser/{sessionId}/execute | Execute action |
| GET | /api/agents/{id}/browser/{sessionId}/live-view | Get live view |
| POST | /api/agents/{id}/browser/{sessionId}/takeover | Request takeover |
List Sessions
bash
GET /api/agents/{id}/browserResponse
json
{
"sessions": [
{
"id": "session-123",
"status": "active",
"headless": true,
"viewport": {
"width": 1280,
"height": 720
},
"created_at": "2024-12-15T10:00:00Z",
"last_activity": "2024-12-15T10:05:00Z"
}
]
}Create Session
bash
POST /api/agents/{id}/browser
Content-Type: application/jsonRequest Body
json
{
"headless": true,
"viewport": {
"width": 1920,
"height": 1080
},
"user_agent": "Mozilla/5.0..."
}Response
json
{
"id": "session-123",
"status": "active",
"live_view_url": "https://browserbase.com/live/...",
"created_at": "2024-12-15T10:00:00Z"
}Delete Session
bash
DELETE /api/agents/{id}/browser?id={sessionId}Response
json
{
"success": true
}Execute Action
bash
POST /api/agents/{id}/browser/{sessionId}/execute
Content-Type: application/jsonNavigate
json
{
"action": "navigate",
"url": "https://example.com"
}Click
json
{
"action": "click",
"selector": "#submit-button"
}Type
json
{
"action": "type",
"selector": "#email",
"text": "[email protected]"
}Press
json
{
"action": "press",
"key": "Enter"
}Screenshot
json
{
"action": "screenshot",
"full_page": false
}Response:
json
{
"success": true,
"result": {
"image": "data:image/png;base64,..."
}
}Extract
json
{
"action": "extract",
"selector": ".product-price",
"attribute": "text"
}Response:
json
{
"success": true,
"result": {
"value": "$99.99"
}
}Scroll
json
{
"action": "scroll",
"direction": "down",
"amount": 500
}Wait
json
{
"action": "wait",
"selector": "#content",
"state": "visible",
"timeout": 5000
}Multiple Actions
json
{
"actions": [
{ "action": "navigate", "url": "https://example.com" },
{ "action": "wait", "selector": "#login" },
{ "action": "click", "selector": "#login" },
{ "action": "type", "selector": "#email", "text": "[email protected]" },
{ "action": "press", "key": "Enter" }
]
}Response
json
{
"success": true,
"results": [
{ "action": "navigate", "success": true, "duration_ms": 1500 },
{ "action": "wait", "success": true, "duration_ms": 200 },
{ "action": "click", "success": true, "duration_ms": 50 },
{ "action": "type", "success": true, "duration_ms": 300 },
{ "action": "press", "success": true, "duration_ms": 10 }
],
"total_duration_ms": 2060
}Live View
bash
GET /api/agents/{id}/browser/{sessionId}/live-viewResponse
json
{
"url": "https://browserbase.com/live/session-123",
"expires_at": "2024-12-15T11:00:00Z"
}Human Takeover
bash
POST /api/agents/{id}/browser/{sessionId}/takeover
Content-Type: application/jsonRequest Body
json
{
"reason": "CAPTCHA detected",
"timeout": 300
}Response
json
{
"takeover_id": "takeover-123",
"status": "pending",
"takeover_url": "https://browserbase.com/takeover/...",
"expires_at": "2024-12-15T10:05:00Z"
}Session Status
| Status | Description |
|---|---|
active | Session is running |
paused | Temporarily paused |
takeover | Human control active |
closed | Session ended |
error | Session failed |
Action Types
| Action | Description |
|---|---|
navigate | Go to URL |
click | Click element |
type | Enter text |
press | Press key |
screenshot | Capture image |
extract | Get content |
scroll | Scroll page |
wait | Wait for condition |
hover | Hover element |
select | Select option |
upload | Upload file |
evaluate | Run JavaScript |
Errors
| Code | Description |
|---|---|
| 400 | Invalid action |
| 404 | Session not found |
| 408 | Action timeout |
| 500 | Browser error |
WebSocket API
Real-time browser interaction via WebSocket connections. All endpoints use the base URL wss://your-domain.com.
Endpoints
| Endpoint | Description |
|---|---|
/v1/sessions/cast | Screen casting & interactive control |
/v1/sessions/logs | Browser console log streaming |
/v1/sessions/pageId | Page ID events |
/v1/sessions/recording | User interaction recording |
Screen Casting
Connect to receive real-time screen frames and send input events.
javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/cast?sessionId=session-123')
ws.onmessage = (event) => {
const data = JSON.parse(event.data)
if (data.type === 'tabList') {
// List of open tabs
console.log(data.tabs, data.firstTabId)
} else if (data.data) {
// Screen frame
renderFrame(data.data, data.pageId, data.url, data.title)
}
}Client Events
Mouse Event
json
{
"type": "mouseEvent",
"event": "mousePressed",
"x": 100,
"y": 200,
"button": "left",
"deltaX": 0,
"deltaY": 0
}Mouse event types: mousePressed, mouseReleased, mouseMoved, mouseWheel
Keyboard Event
json
{
"type": "keyEvent",
"event": "keyDown",
"key": "Enter",
"code": "Enter",
"modifiers": 0
}Key event types: keyDown, keyUp, char
Navigation
json
{
"type": "navigation",
"action": "goto",
"url": "https://example.com"
}Navigation actions: goto, back, forward, refresh
Close Tab
json
{
"type": "closeTab",
"pageId": "page-123"
}Get Selected Text
json
{
"type": "getSelectedText",
"pageId": "page-123"
}Server Events
Frame Data
json
{
"pageId": "page-123",
"url": "https://example.com",
"title": "Example",
"favicon": "data:image/png;base64,...",
"data": "<base64-encoded-frame>"
}Tab List
json
{
"type": "tabList",
"tabs": [
{ "pageId": "page-123", "url": "https://example.com", "title": "Example" }
],
"firstTabId": "page-123"
}Tab Closed
json
{
"type": "tabClosed",
"pageId": "page-123"
}Selected Text Response
json
{
"type": "selectedTextResponse",
"pageId": "page-123",
"text": "selected content"
}Console Logs
Stream browser console output in real-time.
javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/logs?sessionId=session-123')
ws.onmessage = (event) => {
const logs = JSON.parse(event.data)
logs.forEach(log => {
console.log(`[${log.level}] ${log.text}`)
})
}Log Format
json
[
{
"pageId": "page-123",
"level": "log",
"text": "Hello from console",
"timestamp": 1702648800000
}
]Recording
Capture user interactions for replay or analysis.
javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/recording?sessionId=session-123')
ws.onmessage = (event) => {
const events = JSON.parse(event.data)
// Store events for replay
recordingBuffer.push(...events)
}Connection Parameters
| Parameter | Description |
|---|---|
sessionId | Browser session ID (required) |
Connection Management
The server sends heartbeat pings every 30 seconds. Dead connections are automatically cleaned up.
javascript
// Reconnection with backoff
class BrowserSocket {
constructor(endpoint, sessionId) {
this.url = `wss://your-domain.com${endpoint}?sessionId=${sessionId}`
this.reconnectDelay = 1000
}
connect() {
this.ws = new WebSocket(this.url)
this.ws.onclose = () => {
setTimeout(() => this.connect(), this.reconnectDelay)
this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000)
}
this.ws.onopen = () => {
this.reconnectDelay = 1000
}
}
}Examples
Complete Flow
bash
# Create session
SESSION=$(curl -X POST .../browser -d '{"headless": true}' | jq -r '.id')
# Navigate
curl -X POST ".../browser/$SESSION/execute" \
-d '{"action": "navigate", "url": "https://example.com"}'
# Fill form
curl -X POST ".../browser/$SESSION/execute" \
-d '{
"actions": [
{"action": "type", "selector": "#email", "text": "[email protected]"},
{"action": "type", "selector": "#password", "text": "password"},
{"action": "click", "selector": "#submit"}
]
}'
# Screenshot
curl -X POST ".../browser/$SESSION/execute" \
-d '{"action": "screenshot"}' | jq -r '.result.image' > screenshot.png
# Close session
curl -X DELETE ".../browser?id=$SESSION"Interactive Session with WebSocket
javascript
// Create session via REST
const session = await fetch('/api/agents/my-agent/browser', {
method: 'POST',
body: JSON.stringify({ headless: false })
}).then(r => r.json())
// Connect to screen cast
const cast = new WebSocket(
`wss://your-domain.com/v1/sessions/cast?sessionId=${session.id}`
)
// Connect to logs
const logs = new WebSocket(
`wss://your-domain.com/v1/sessions/logs?sessionId=${session.id}`
)
// Render frames to canvas
cast.onmessage = (e) => {
const frame = JSON.parse(e.data)
if (frame.data) {
const img = new Image()
img.onload = () => ctx.drawImage(img, 0, 0)
img.src = 'data:image/jpeg;base64,' + frame.data
}
}
// Forward mouse events
canvas.addEventListener('click', (e) => {
cast.send(JSON.stringify({
type: 'mouseEvent',
event: 'mousePressed',
x: e.offsetX,
y: e.offsetY,
button: 'left'
}))
})