Skip to content

Browser API

Manage browser sessions and execute actions.

Endpoints

MethodEndpointDescription
GET/api/agents/{id}/browserList sessions
POST/api/agents/{id}/browserCreate session
DELETE/api/agents/{id}/browserDelete session
POST/api/agents/{id}/browser/{sessionId}/executeExecute action
GET/api/agents/{id}/browser/{sessionId}/live-viewGet live view
POST/api/agents/{id}/browser/{sessionId}/takeoverRequest takeover

List Sessions

bash
GET /api/agents/{id}/browser

Response

json
{
  "sessions": [
    {
      "id": "session-123",
      "status": "active",
      "headless": true,
      "viewport": {
        "width": 1280,
        "height": 720
      },
      "created_at": "2024-12-15T10:00:00Z",
      "last_activity": "2024-12-15T10:05:00Z"
    }
  ]
}

Create Session

bash
POST /api/agents/{id}/browser
Content-Type: application/json

Request Body

json
{
  "headless": true,
  "viewport": {
    "width": 1920,
    "height": 1080
  },
  "user_agent": "Mozilla/5.0..."
}

Response

json
{
  "id": "session-123",
  "status": "active",
  "live_view_url": "https://browserbase.com/live/...",
  "created_at": "2024-12-15T10:00:00Z"
}

Delete Session

bash
DELETE /api/agents/{id}/browser?id={sessionId}

Response

json
{
  "success": true
}

Execute Action

bash
POST /api/agents/{id}/browser/{sessionId}/execute
Content-Type: application/json
json
{
  "action": "navigate",
  "url": "https://example.com"
}

Click

json
{
  "action": "click",
  "selector": "#submit-button"
}

Type

json
{
  "action": "type",
  "selector": "#email",
  "text": "[email protected]"
}

Press

json
{
  "action": "press",
  "key": "Enter"
}

Screenshot

json
{
  "action": "screenshot",
  "full_page": false
}

Response:

json
{
  "success": true,
  "result": {
    "image": "data:image/png;base64,..."
  }
}

Extract

json
{
  "action": "extract",
  "selector": ".product-price",
  "attribute": "text"
}

Response:

json
{
  "success": true,
  "result": {
    "value": "$99.99"
  }
}

Scroll

json
{
  "action": "scroll",
  "direction": "down",
  "amount": 500
}

Wait

json
{
  "action": "wait",
  "selector": "#content",
  "state": "visible",
  "timeout": 5000
}

Multiple Actions

json
{
  "actions": [
    { "action": "navigate", "url": "https://example.com" },
    { "action": "wait", "selector": "#login" },
    { "action": "click", "selector": "#login" },
    { "action": "type", "selector": "#email", "text": "[email protected]" },
    { "action": "press", "key": "Enter" }
  ]
}

Response

json
{
  "success": true,
  "results": [
    { "action": "navigate", "success": true, "duration_ms": 1500 },
    { "action": "wait", "success": true, "duration_ms": 200 },
    { "action": "click", "success": true, "duration_ms": 50 },
    { "action": "type", "success": true, "duration_ms": 300 },
    { "action": "press", "success": true, "duration_ms": 10 }
  ],
  "total_duration_ms": 2060
}

Live View

bash
GET /api/agents/{id}/browser/{sessionId}/live-view

Response

json
{
  "url": "https://browserbase.com/live/session-123",
  "expires_at": "2024-12-15T11:00:00Z"
}

Human Takeover

bash
POST /api/agents/{id}/browser/{sessionId}/takeover
Content-Type: application/json

Request Body

json
{
  "reason": "CAPTCHA detected",
  "timeout": 300
}

Response

json
{
  "takeover_id": "takeover-123",
  "status": "pending",
  "takeover_url": "https://browserbase.com/takeover/...",
  "expires_at": "2024-12-15T10:05:00Z"
}

Session Status

StatusDescription
activeSession is running
pausedTemporarily paused
takeoverHuman control active
closedSession ended
errorSession failed

Action Types

ActionDescription
navigateGo to URL
clickClick element
typeEnter text
pressPress key
screenshotCapture image
extractGet content
scrollScroll page
waitWait for condition
hoverHover element
selectSelect option
uploadUpload file
evaluateRun JavaScript

Errors

CodeDescription
400Invalid action
404Session not found
408Action timeout
500Browser error

WebSocket API

Real-time browser interaction via WebSocket connections. All endpoints use the base URL wss://your-domain.com.

Endpoints

EndpointDescription
/v1/sessions/castScreen casting & interactive control
/v1/sessions/logsBrowser console log streaming
/v1/sessions/pageIdPage ID events
/v1/sessions/recordingUser interaction recording

Screen Casting

Connect to receive real-time screen frames and send input events.

javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/cast?sessionId=session-123')

ws.onmessage = (event) => {
  const data = JSON.parse(event.data)

  if (data.type === 'tabList') {
    // List of open tabs
    console.log(data.tabs, data.firstTabId)
  } else if (data.data) {
    // Screen frame
    renderFrame(data.data, data.pageId, data.url, data.title)
  }
}

Client Events

Mouse Event

json
{
  "type": "mouseEvent",
  "event": "mousePressed",
  "x": 100,
  "y": 200,
  "button": "left",
  "deltaX": 0,
  "deltaY": 0
}

Mouse event types: mousePressed, mouseReleased, mouseMoved, mouseWheel

Keyboard Event

json
{
  "type": "keyEvent",
  "event": "keyDown",
  "key": "Enter",
  "code": "Enter",
  "modifiers": 0
}

Key event types: keyDown, keyUp, char

Navigation

json
{
  "type": "navigation",
  "action": "goto",
  "url": "https://example.com"
}

Navigation actions: goto, back, forward, refresh

Close Tab

json
{
  "type": "closeTab",
  "pageId": "page-123"
}

Get Selected Text

json
{
  "type": "getSelectedText",
  "pageId": "page-123"
}

Server Events

Frame Data

json
{
  "pageId": "page-123",
  "url": "https://example.com",
  "title": "Example",
  "favicon": "data:image/png;base64,...",
  "data": "<base64-encoded-frame>"
}

Tab List

json
{
  "type": "tabList",
  "tabs": [
    { "pageId": "page-123", "url": "https://example.com", "title": "Example" }
  ],
  "firstTabId": "page-123"
}

Tab Closed

json
{
  "type": "tabClosed",
  "pageId": "page-123"
}

Selected Text Response

json
{
  "type": "selectedTextResponse",
  "pageId": "page-123",
  "text": "selected content"
}

Console Logs

Stream browser console output in real-time.

javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/logs?sessionId=session-123')

ws.onmessage = (event) => {
  const logs = JSON.parse(event.data)
  logs.forEach(log => {
    console.log(`[${log.level}] ${log.text}`)
  })
}

Log Format

json
[
  {
    "pageId": "page-123",
    "level": "log",
    "text": "Hello from console",
    "timestamp": 1702648800000
  }
]

Recording

Capture user interactions for replay or analysis.

javascript
const ws = new WebSocket('wss://your-domain.com/v1/sessions/recording?sessionId=session-123')

ws.onmessage = (event) => {
  const events = JSON.parse(event.data)
  // Store events for replay
  recordingBuffer.push(...events)
}

Connection Parameters

ParameterDescription
sessionIdBrowser session ID (required)

Connection Management

The server sends heartbeat pings every 30 seconds. Dead connections are automatically cleaned up.

javascript
// Reconnection with backoff
class BrowserSocket {
  constructor(endpoint, sessionId) {
    this.url = `wss://your-domain.com${endpoint}?sessionId=${sessionId}`
    this.reconnectDelay = 1000
  }

  connect() {
    this.ws = new WebSocket(this.url)

    this.ws.onclose = () => {
      setTimeout(() => this.connect(), this.reconnectDelay)
      this.reconnectDelay = Math.min(this.reconnectDelay * 2, 30000)
    }

    this.ws.onopen = () => {
      this.reconnectDelay = 1000
    }
  }
}

Examples

Complete Flow

bash
# Create session
SESSION=$(curl -X POST .../browser -d '{"headless": true}' | jq -r '.id')

# Navigate
curl -X POST ".../browser/$SESSION/execute" \
  -d '{"action": "navigate", "url": "https://example.com"}'

# Fill form
curl -X POST ".../browser/$SESSION/execute" \
  -d '{
    "actions": [
      {"action": "type", "selector": "#email", "text": "[email protected]"},
      {"action": "type", "selector": "#password", "text": "password"},
      {"action": "click", "selector": "#submit"}
    ]
  }'

# Screenshot
curl -X POST ".../browser/$SESSION/execute" \
  -d '{"action": "screenshot"}' | jq -r '.result.image' > screenshot.png

# Close session
curl -X DELETE ".../browser?id=$SESSION"

Interactive Session with WebSocket

javascript
// Create session via REST
const session = await fetch('/api/agents/my-agent/browser', {
  method: 'POST',
  body: JSON.stringify({ headless: false })
}).then(r => r.json())

// Connect to screen cast
const cast = new WebSocket(
  `wss://your-domain.com/v1/sessions/cast?sessionId=${session.id}`
)

// Connect to logs
const logs = new WebSocket(
  `wss://your-domain.com/v1/sessions/logs?sessionId=${session.id}`
)

// Render frames to canvas
cast.onmessage = (e) => {
  const frame = JSON.parse(e.data)
  if (frame.data) {
    const img = new Image()
    img.onload = () => ctx.drawImage(img, 0, 0)
    img.src = 'data:image/jpeg;base64,' + frame.data
  }
}

// Forward mouse events
canvas.addEventListener('click', (e) => {
  cast.send(JSON.stringify({
    type: 'mouseEvent',
    event: 'mousePressed',
    x: e.offsetX,
    y: e.offsetY,
    button: 'left'
  }))
})

Released under the MIT License.