WebSocket Protocol

The WebSocket protocol enables real-time bidirectional audio streaming between your telephony provider and the Edesy voice agent.

Connection

Endpoint

wss://ws.edesy.in/v1/stream/{agent_id}

Authentication

Include your API key in the connection URL or headers:

// URL parameter
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123?token=YOUR_API_KEY');

// Or header (where supported)
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123', {
  headers: {
    'Authorization': 'Bearer YOUR_API_KEY'
  }
});

Message Format

All messages are JSON encoded:

{
  "type": "message_type",
  "data": { ... },
  "timestamp": 1703779800000
}

Client → Server Messages

start

Initialize the session:

{
  "type": "start",
  "data": {
    "call_id": "call_abc123",
    "caller_number": "+14155551234",
    "called_number": "+14155559876",
    "direction": "inbound",
    "audio_format": {
      "encoding": "mulaw",
      "sample_rate": 8000,
      "channels": 1
    },
    "metadata": {
      "campaign_id": "summer_promo"
    },
    "variables": {
      "customer_name": "John"
    }
  }
}

audio

Send audio data:

{
  "type": "audio",
  "data": {
    "audio": "base64_encoded_audio_data",
    "sequence": 1
  }
}

Or as binary message:

First 4 bytes: sequence number (uint32, big-endian)
Remaining bytes: raw audio data

interrupt

Signal user interruption:

{
  "type": "interrupt",
  "data": {
    "reason": "user_speaking"
  }
}

dtmf

Send DTMF digit:

{
  "type": "dtmf",
  "data": {
    "digit": "5"
  }
}

stop

End the session:

{
  "type": "stop",
  "data": {
    "reason": "user_hangup"
  }
}

Server → Client Messages

ready

Session initialized, ready for audio:

{
  "type": "ready",
  "data": {
    "session_id": "sess_xyz789",
    "greeting_audio": "base64_encoded_greeting"
  }
}

audio

Bot audio to play:

{
  "type": "audio",
  "data": {
    "audio": "base64_encoded_audio",
    "sequence": 1,
    "is_final": false
  }
}

transcript

Real-time transcript:

{
  "type": "transcript",
  "data": {
    "role": "user",
    "text": "What's my order status?",
    "is_final": true,
    "confidence": 0.95
  }
}

function_call

Agent is calling a function:

{
  "type": "function_call",
  "data": {
    "id": "fc_123",
    "name": "get_order_status",
    "arguments": {
      "order_id": "ORD-12345"
    }
  }
}

function_result

Provide function result:

{
  "type": "function_result",
  "data": {
    "id": "fc_123",
    "result": {
      "status": "shipped",
      "tracking": "1Z999AA10123456784"
    }
  }
}

clear

Clear pending audio (after interruption):

{
  "type": "clear",
  "data": {
    "sequence_after": 5
  }
}

error

Error occurred:

{
  "type": "error",
  "data": {
    "code": "stt_error",
    "message": "Speech recognition failed",
    "recoverable": true
  }
}

end

Session ended:

{
  "type": "end",
  "data": {
    "reason": "completed",
    "duration": 180,
    "summary": "Customer checked order status",
    "recording_url": "https://..."
  }
}

Audio Format

Supported Formats

Encoding	Sample Rate	Bit Depth	Provider
mulaw	8000	8-bit	Twilio
pcm16	8000	16-bit	Plivo, WebRTC
pcm16	16000	16-bit	WebRTC HD

Format Specification

{
  "audio_format": {
    "encoding": "mulaw",
    "sample_rate": 8000,
    "channels": 1
  }
}

Implementation Example

JavaScript/TypeScript

class VoiceAgentClient {
  private ws: WebSocket;
  private audioQueue: Uint8Array[] = [];
  private sequence = 0;

  constructor(agentId: string, apiKey: string) {
    this.ws = new WebSocket(
      `wss://ws.edesy.in/v1/stream/${agentId}?token=${apiKey}`
    );

    this.ws.onopen = () => this.onOpen();
    this.ws.onmessage = (event) => this.onMessage(event);
    this.ws.onerror = (error) => this.onError(error);
    this.ws.onclose = () => this.onClose();
  }

  private onOpen() {
    console.log('Connected to voice agent');
  }

  private onMessage(event: MessageEvent) {
    if (event.data instanceof ArrayBuffer) {
      // Binary audio data
      this.handleAudio(new Uint8Array(event.data));
      return;
    }

    const message = JSON.parse(event.data);

    switch (message.type) {
      case 'ready':
        this.handleReady(message.data);
        break;
      case 'audio':
        this.handleAudioMessage(message.data);
        break;
      case 'transcript':
        this.handleTranscript(message.data);
        break;
      case 'function_call':
        this.handleFunctionCall(message.data);
        break;
      case 'clear':
        this.handleClear(message.data);
        break;
      case 'end':
        this.handleEnd(message.data);
        break;
    }
  }

  start(callInfo: CallInfo) {
    this.ws.send(JSON.stringify({
      type: 'start',
      data: callInfo
    }));
  }

  sendAudio(audio: Uint8Array) {
    // Send as binary for efficiency
    const header = new ArrayBuffer(4);
    new DataView(header).setUint32(0, this.sequence++, false);

    const message = new Uint8Array(4 + audio.length);
    message.set(new Uint8Array(header), 0);
    message.set(audio, 4);

    this.ws.send(message);
  }

  interrupt() {
    this.ws.send(JSON.stringify({
      type: 'interrupt',
      data: { reason: 'user_speaking' }
    }));
  }

  provideFunctionResult(id: string, result: any) {
    this.ws.send(JSON.stringify({
      type: 'function_result',
      data: { id, result }
    }));
  }

  stop(reason: string) {
    this.ws.send(JSON.stringify({
      type: 'stop',
      data: { reason }
    }));
  }
}

Go

type VoiceAgentClient struct {
    conn     *websocket.Conn
    agentID  string
    sequence uint32
    mu       sync.Mutex
}

func NewVoiceAgentClient(agentID, apiKey string) (*VoiceAgentClient, error) {
    url := fmt.Sprintf("wss://ws.edesy.in/v1/stream/%s?token=%s", agentID, apiKey)

    conn, _, err := websocket.DefaultDialer.Dial(url, nil)
    if err != nil {
        return nil, err
    }

    client := &VoiceAgentClient{
        conn:    conn,
        agentID: agentID,
    }

    go client.readLoop()

    return client, nil
}

func (c *VoiceAgentClient) Start(callInfo CallInfo) error {
    return c.sendJSON(Message{
        Type: "start",
        Data: callInfo,
    })
}

func (c *VoiceAgentClient) SendAudio(audio []byte) error {
    c.mu.Lock()
    seq := c.sequence
    c.sequence++
    c.mu.Unlock()

    // Binary message: 4 bytes sequence + audio
    message := make([]byte, 4+len(audio))
    binary.BigEndian.PutUint32(message[:4], seq)
    copy(message[4:], audio)

    return c.conn.WriteMessage(websocket.BinaryMessage, message)
}

func (c *VoiceAgentClient) readLoop() {
    for {
        msgType, data, err := c.conn.ReadMessage()
        if err != nil {
            return
        }

        if msgType == websocket.BinaryMessage {
            c.handleAudio(data)
            continue
        }

        var msg Message
        json.Unmarshal(data, &msg)
        c.handleMessage(msg)
    }
}

Connection Management

Heartbeat

Send periodic pings to keep the connection alive:

setInterval(() => {
  if (ws.readyState === WebSocket.OPEN) {
    ws.send(JSON.stringify({ type: 'ping' }));
  }
}, 30000);

Reconnection

Handle disconnections gracefully:

class ReconnectingClient {
  private reconnectAttempts = 0;
  private maxReconnectAttempts = 5;
  private reconnectDelay = 1000;

  private onClose() {
    if (this.reconnectAttempts < this.maxReconnectAttempts) {
      setTimeout(() => {
        this.reconnectAttempts++;
        this.connect();
      }, this.reconnectDelay * Math.pow(2, this.reconnectAttempts));
    }
  }
}

Error Handling

Error Code	Description	Action
`auth_failed`	Invalid API key	Check credentials
`agent_not_found`	Agent ID invalid	Verify agent ID
`rate_limited`	Too many connections	Implement backoff
`stt_error`	Speech recognition failed	Continue, will recover
`llm_error`	LLM failed	May switch to fallback
`internal_error`	Server error	Retry connection

Best Practices

1. Buffer Audio

// Buffer small audio chunks before sending
const buffer = [];
const BUFFER_SIZE = 320; // 20ms at 8kHz

function addAudio(chunk) {
  buffer.push(...chunk);

  while (buffer.length >= BUFFER_SIZE) {
    const audio = new Uint8Array(buffer.splice(0, BUFFER_SIZE));
    client.sendAudio(audio);
  }
}

2. Handle Backpressure

// Check if WebSocket is ready
function sendAudio(audio) {
  if (ws.bufferedAmount > 64000) {
    // Too much buffered, skip this chunk
    console.warn('Backpressure detected, dropping audio');
    return;
  }
  ws.send(audio);
}

3. Sequence Tracking

// Track sequences for proper ordering
let lastPlayedSequence = -1;
const audioBuffer = new Map();

function handleAudio(sequence, audio) {
  audioBuffer.set(sequence, audio);

  // Play in order
  while (audioBuffer.has(lastPlayedSequence + 1)) {
    const nextAudio = audioBuffer.get(lastPlayedSequence + 1);
    audioBuffer.delete(lastPlayedSequence + 1);
    playAudio(nextAudio);
    lastPlayedSequence++;
  }
}

Next Steps

REST API - API reference
Webhooks - Event notifications
SDKs - Client libraries

WebSocket Protocol

The WebSocket protocol enables real-time bidirectional audio streaming between your telephony provider and the Edesy voice agent.

Connection

Endpoint

wss://ws.edesy.in/v1/stream/{agent_id}

Authentication

Include your API key in the connection URL or headers:

// URL parameter
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123?token=YOUR_API_KEY');

// Or header (where supported)
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123', {
  headers: {
    'Authorization': 'Bearer YOUR_API_KEY'
  }
});

Message Format

All messages are JSON encoded:

{
  "type": "message_type",
  "data": { ... },
  "timestamp": 1703779800000
}

Client → Server Messages

start

Initialize the session:

{
  "type": "start",
  "data": {
    "call_id": "call_abc123",
    "caller_number": "+14155551234",
    "called_number": "+14155559876",
    "direction": "inbound",
    "audio_format": {
      "encoding": "mulaw",
      "sample_rate": 8000,
      "channels": 1
    },
    "metadata": {
      "campaign_id": "summer_promo"
    },
    "variables": {
      "customer_name": "John"
    }
  }
}

audio

Send audio data:

{
  "type": "audio",
  "data": {
    "audio": "base64_encoded_audio_data",
    "sequence": 1
  }
}

Or as binary message:

First 4 bytes: sequence number (uint32, big-endian)
Remaining bytes: raw audio data

interrupt

Signal user interruption:

{
  "type": "interrupt",
  "data": {
    "reason": "user_speaking"
  }
}

dtmf

Send DTMF digit:

{
  "type": "dtmf",
  "data": {
    "digit": "5"
  }
}

stop

End the session:

{
  "type": "stop",
  "data": {
    "reason": "user_hangup"
  }
}

Server → Client Messages

ready

Session initialized, ready for audio:

{
  "type": "ready",
  "data": {
    "session_id": "sess_xyz789",
    "greeting_audio": "base64_encoded_greeting"
  }
}

audio

Bot audio to play:

{
  "type": "audio",
  "data": {
    "audio": "base64_encoded_audio",
    "sequence": 1,
    "is_final": false
  }
}

transcript

Real-time transcript:

{
  "type": "transcript",
  "data": {
    "role": "user",
    "text": "What's my order status?",
    "is_final": true,
    "confidence": 0.95
  }
}

function_call

Agent is calling a function:

{
  "type": "function_call",
  "data": {
    "id": "fc_123",
    "name": "get_order_status",
    "arguments": {
      "order_id": "ORD-12345"
    }
  }
}

function_result

Provide function result:

{
  "type": "function_result",
  "data": {
    "id": "fc_123",
    "result": {
      "status": "shipped",
      "tracking": "1Z999AA10123456784"
    }
  }
}

clear

Clear pending audio (after interruption):

{
  "type": "clear",
  "data": {
    "sequence_after": 5
  }
}

error

Error occurred:

{
  "type": "error",
  "data": {
    "code": "stt_error",
    "message": "Speech recognition failed",
    "recoverable": true
  }
}

end

Session ended:

{
  "type": "end",
  "data": {
    "reason": "completed",
    "duration": 180,
    "summary": "Customer checked order status",
    "recording_url": "https://..."
  }
}

Audio Format

Supported Formats

Encoding	Sample Rate	Bit Depth	Provider
mulaw	8000	8-bit	Twilio
pcm16	8000	16-bit	Plivo, WebRTC
pcm16	16000	16-bit	WebRTC HD

Format Specification

{
  "audio_format": {
    "encoding": "mulaw",
    "sample_rate": 8000,
    "channels": 1
  }
}

Implementation Example

JavaScript/TypeScript

class VoiceAgentClient {
  private ws: WebSocket;
  private audioQueue: Uint8Array[] = [];
  private sequence = 0;

  constructor(agentId: string, apiKey: string) {
    this.ws = new WebSocket(
      `wss://ws.edesy.in/v1/stream/${agentId}?token=${apiKey}`
    );

    this.ws.onopen = () => this.onOpen();
    this.ws.onmessage = (event) => this.onMessage(event);
    this.ws.onerror = (error) => this.onError(error);
    this.ws.onclose = () => this.onClose();
  }

  private onOpen() {
    console.log('Connected to voice agent');
  }

  private onMessage(event: MessageEvent) {
    if (event.data instanceof ArrayBuffer) {
      // Binary audio data
      this.handleAudio(new Uint8Array(event.data));
      return;
    }

    const message = JSON.parse(event.data);

    switch (message.type) {
      case 'ready':
        this.handleReady(message.data);
        break;
      case 'audio':
        this.handleAudioMessage(message.data);
        break;
      case 'transcript':
        this.handleTranscript(message.data);
        break;
      case 'function_call':
        this.handleFunctionCall(message.data);
        break;
      case 'clear':
        this.handleClear(message.data);
        break;
      case 'end':
        this.handleEnd(message.data);
        break;
    }
  }

  start(callInfo: CallInfo) {
    this.ws.send(JSON.stringify({
      type: 'start',
      data: callInfo
    }));
  }

  sendAudio(audio: Uint8Array) {
    // Send as binary for efficiency
    const header = new ArrayBuffer(4);
    new DataView(header).setUint32(0, this.sequence++, false);

    const message = new Uint8Array(4 + audio.length);
    message.set(new Uint8Array(header), 0);
    message.set(audio, 4);

    this.ws.send(message);
  }

  interrupt() {
    this.ws.send(JSON.stringify({
      type: 'interrupt',
      data: { reason: 'user_speaking' }
    }));
  }

  provideFunctionResult(id: string, result: any) {
    this.ws.send(JSON.stringify({
      type: 'function_result',
      data: { id, result }
    }));
  }

  stop(reason: string) {
    this.ws.send(JSON.stringify({
      type: 'stop',
      data: { reason }
    }));
  }
}

Go

type VoiceAgentClient struct {
    conn     *websocket.Conn
    agentID  string
    sequence uint32
    mu       sync.Mutex
}

func NewVoiceAgentClient(agentID, apiKey string) (*VoiceAgentClient, error) {
    url := fmt.Sprintf("wss://ws.edesy.in/v1/stream/%s?token=%s", agentID, apiKey)

    conn, _, err := websocket.DefaultDialer.Dial(url, nil)
    if err != nil {
        return nil, err
    }

    client := &VoiceAgentClient{
        conn:    conn,
        agentID: agentID,
    }

    go client.readLoop()

    return client, nil
}

func (c *VoiceAgentClient) Start(callInfo CallInfo) error {
    return c.sendJSON(Message{
        Type: "start",
        Data: callInfo,
    })
}

func (c *VoiceAgentClient) SendAudio(audio []byte) error {
    c.mu.Lock()
    seq := c.sequence
    c.sequence++
    c.mu.Unlock()

    // Binary message: 4 bytes sequence + audio
    message := make([]byte, 4+len(audio))
    binary.BigEndian.PutUint32(message[:4], seq)
    copy(message[4:], audio)

    return c.conn.WriteMessage(websocket.BinaryMessage, message)
}

func (c *VoiceAgentClient) readLoop() {
    for {
        msgType, data, err := c.conn.ReadMessage()
        if err != nil {
            return
        }

        if msgType == websocket.BinaryMessage {
            c.handleAudio(data)
            continue
        }

        var msg Message
        json.Unmarshal(data, &msg)
        c.handleMessage(msg)
    }
}

Connection Management

Heartbeat

Send periodic pings to keep the connection alive:

setInterval(() => {
  if (ws.readyState === WebSocket.OPEN) {
    ws.send(JSON.stringify({ type: 'ping' }));
  }
}, 30000);

Reconnection

Handle disconnections gracefully:

class ReconnectingClient {
  private reconnectAttempts = 0;
  private maxReconnectAttempts = 5;
  private reconnectDelay = 1000;

  private onClose() {
    if (this.reconnectAttempts < this.maxReconnectAttempts) {
      setTimeout(() => {
        this.reconnectAttempts++;
        this.connect();
      }, this.reconnectDelay * Math.pow(2, this.reconnectAttempts));
    }
  }
}

Error Handling

Error Code	Description	Action
`auth_failed`	Invalid API key	Check credentials
`agent_not_found`	Agent ID invalid	Verify agent ID
`rate_limited`	Too many connections	Implement backoff
`stt_error`	Speech recognition failed	Continue, will recover
`llm_error`	LLM failed	May switch to fallback
`internal_error`	Server error	Retry connection

Best Practices

1. Buffer Audio

// Buffer small audio chunks before sending
const buffer = [];
const BUFFER_SIZE = 320; // 20ms at 8kHz

function addAudio(chunk) {
  buffer.push(...chunk);

  while (buffer.length >= BUFFER_SIZE) {
    const audio = new Uint8Array(buffer.splice(0, BUFFER_SIZE));
    client.sendAudio(audio);
  }
}

2. Handle Backpressure

// Check if WebSocket is ready
function sendAudio(audio) {
  if (ws.bufferedAmount > 64000) {
    // Too much buffered, skip this chunk
    console.warn('Backpressure detected, dropping audio');
    return;
  }
  ws.send(audio);
}

3. Sequence Tracking

// Track sequences for proper ordering
let lastPlayedSequence = -1;
const audioBuffer = new Map();

function handleAudio(sequence, audio) {
  audioBuffer.set(sequence, audio);

  // Play in order
  while (audioBuffer.has(lastPlayedSequence + 1)) {
    const nextAudio = audioBuffer.get(lastPlayedSequence + 1);
    audioBuffer.delete(lastPlayedSequence + 1);
    playAudio(nextAudio);
    lastPlayedSequence++;
  }
}

Next Steps

REST API - API reference
Webhooks - Event notifications
SDKs - Client libraries