WebSocket Protocol
The WebSocket protocol enables real-time bidirectional audio streaming between your telephony provider and the Edesy voice agent.
Connection
Endpoint
wss://ws.edesy.in/v1/stream/{agent_id}
Authentication
Include your API key in the connection URL or headers:
// URL parameter
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123?token=YOUR_API_KEY');
// Or header (where supported)
const ws = new WebSocket('wss://ws.edesy.in/v1/stream/agent_123', {
headers: {
'Authorization': 'Bearer YOUR_API_KEY'
}
});
Message Format
All messages are JSON encoded:
{
"type": "message_type",
"data": { ... },
"timestamp": 1703779800000
}
Client → Server Messages
start
Initialize the session:
{
"type": "start",
"data": {
"call_id": "call_abc123",
"caller_number": "+14155551234",
"called_number": "+14155559876",
"direction": "inbound",
"audio_format": {
"encoding": "mulaw",
"sample_rate": 8000,
"channels": 1
},
"metadata": {
"campaign_id": "summer_promo"
},
"variables": {
"customer_name": "John"
}
}
}
audio
Send audio data:
{
"type": "audio",
"data": {
"audio": "base64_encoded_audio_data",
"sequence": 1
}
}
Or as binary message:
- First 4 bytes: sequence number (uint32, big-endian)
- Remaining bytes: raw audio data
interrupt
Signal user interruption:
{
"type": "interrupt",
"data": {
"reason": "user_speaking"
}
}
dtmf
Send DTMF digit:
{
"type": "dtmf",
"data": {
"digit": "5"
}
}
stop
End the session:
{
"type": "stop",
"data": {
"reason": "user_hangup"
}
}
Server → Client Messages
ready
Session initialized, ready for audio:
{
"type": "ready",
"data": {
"session_id": "sess_xyz789",
"greeting_audio": "base64_encoded_greeting"
}
}
audio
Bot audio to play:
{
"type": "audio",
"data": {
"audio": "base64_encoded_audio",
"sequence": 1,
"is_final": false
}
}
transcript
Real-time transcript:
{
"type": "transcript",
"data": {
"role": "user",
"text": "What's my order status?",
"is_final": true,
"confidence": 0.95
}
}
function_call
Agent is calling a function:
{
"type": "function_call",
"data": {
"id": "fc_123",
"name": "get_order_status",
"arguments": {
"order_id": "ORD-12345"
}
}
}
function_result
Provide function result:
{
"type": "function_result",
"data": {
"id": "fc_123",
"result": {
"status": "shipped",
"tracking": "1Z999AA10123456784"
}
}
}
clear
Clear pending audio (after interruption):
{
"type": "clear",
"data": {
"sequence_after": 5
}
}
error
Error occurred:
{
"type": "error",
"data": {
"code": "stt_error",
"message": "Speech recognition failed",
"recoverable": true
}
}
end
Session ended:
{
"type": "end",
"data": {
"reason": "completed",
"duration": 180,
"summary": "Customer checked order status",
"recording_url": "https://..."
}
}
Audio Format
Supported Formats
| Encoding | Sample Rate | Bit Depth | Provider |
|---|---|---|---|
| mulaw | 8000 | 8-bit | Twilio |
| pcm16 | 8000 | 16-bit | Plivo, WebRTC |
| pcm16 | 16000 | 16-bit | WebRTC HD |
Format Specification
{
"audio_format": {
"encoding": "mulaw",
"sample_rate": 8000,
"channels": 1
}
}
Implementation Example
JavaScript/TypeScript
class VoiceAgentClient {
private ws: WebSocket;
private audioQueue: Uint8Array[] = [];
private sequence = 0;
constructor(agentId: string, apiKey: string) {
this.ws = new WebSocket(
`wss://ws.edesy.in/v1/stream/${agentId}?token=${apiKey}`
);
this.ws.onopen = () => this.onOpen();
this.ws.onmessage = (event) => this.onMessage(event);
this.ws.onerror = (error) => this.onError(error);
this.ws.onclose = () => this.onClose();
}
private onOpen() {
console.log('Connected to voice agent');
}
private onMessage(event: MessageEvent) {
if (event.data instanceof ArrayBuffer) {
// Binary audio data
this.handleAudio(new Uint8Array(event.data));
return;
}
const message = JSON.parse(event.data);
switch (message.type) {
case 'ready':
this.handleReady(message.data);
break;
case 'audio':
this.handleAudioMessage(message.data);
break;
case 'transcript':
this.handleTranscript(message.data);
break;
case 'function_call':
this.handleFunctionCall(message.data);
break;
case 'clear':
this.handleClear(message.data);
break;
case 'end':
this.handleEnd(message.data);
break;
}
}
start(callInfo: CallInfo) {
this.ws.send(JSON.stringify({
type: 'start',
data: callInfo
}));
}
sendAudio(audio: Uint8Array) {
// Send as binary for efficiency
const header = new ArrayBuffer(4);
new DataView(header).setUint32(0, this.sequence++, false);
const message = new Uint8Array(4 + audio.length);
message.set(new Uint8Array(header), 0);
message.set(audio, 4);
this.ws.send(message);
}
interrupt() {
this.ws.send(JSON.stringify({
type: 'interrupt',
data: { reason: 'user_speaking' }
}));
}
provideFunctionResult(id: string, result: any) {
this.ws.send(JSON.stringify({
type: 'function_result',
data: { id, result }
}));
}
stop(reason: string) {
this.ws.send(JSON.stringify({
type: 'stop',
data: { reason }
}));
}
}
Go
type VoiceAgentClient struct {
conn *websocket.Conn
agentID string
sequence uint32
mu sync.Mutex
}
func NewVoiceAgentClient(agentID, apiKey string) (*VoiceAgentClient, error) {
url := fmt.Sprintf("wss://ws.edesy.in/v1/stream/%s?token=%s", agentID, apiKey)
conn, _, err := websocket.DefaultDialer.Dial(url, nil)
if err != nil {
return nil, err
}
client := &VoiceAgentClient{
conn: conn,
agentID: agentID,
}
go client.readLoop()
return client, nil
}
func (c *VoiceAgentClient) Start(callInfo CallInfo) error {
return c.sendJSON(Message{
Type: "start",
Data: callInfo,
})
}
func (c *VoiceAgentClient) SendAudio(audio []byte) error {
c.mu.Lock()
seq := c.sequence
c.sequence++
c.mu.Unlock()
// Binary message: 4 bytes sequence + audio
message := make([]byte, 4+len(audio))
binary.BigEndian.PutUint32(message[:4], seq)
copy(message[4:], audio)
return c.conn.WriteMessage(websocket.BinaryMessage, message)
}
func (c *VoiceAgentClient) readLoop() {
for {
msgType, data, err := c.conn.ReadMessage()
if err != nil {
return
}
if msgType == websocket.BinaryMessage {
c.handleAudio(data)
continue
}
var msg Message
json.Unmarshal(data, &msg)
c.handleMessage(msg)
}
}
Connection Management
Heartbeat
Send periodic pings to keep the connection alive:
setInterval(() => {
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: 'ping' }));
}
}, 30000);
Reconnection
Handle disconnections gracefully:
class ReconnectingClient {
private reconnectAttempts = 0;
private maxReconnectAttempts = 5;
private reconnectDelay = 1000;
private onClose() {
if (this.reconnectAttempts < this.maxReconnectAttempts) {
setTimeout(() => {
this.reconnectAttempts++;
this.connect();
}, this.reconnectDelay * Math.pow(2, this.reconnectAttempts));
}
}
}
Error Handling
| Error Code | Description | Action |
|---|---|---|
auth_failed |
Invalid API key | Check credentials |
agent_not_found |
Agent ID invalid | Verify agent ID |
rate_limited |
Too many connections | Implement backoff |
stt_error |
Speech recognition failed | Continue, will recover |
llm_error |
LLM failed | May switch to fallback |
internal_error |
Server error | Retry connection |
Best Practices
1. Buffer Audio
// Buffer small audio chunks before sending
const buffer = [];
const BUFFER_SIZE = 320; // 20ms at 8kHz
function addAudio(chunk) {
buffer.push(...chunk);
while (buffer.length >= BUFFER_SIZE) {
const audio = new Uint8Array(buffer.splice(0, BUFFER_SIZE));
client.sendAudio(audio);
}
}
2. Handle Backpressure
// Check if WebSocket is ready
function sendAudio(audio) {
if (ws.bufferedAmount > 64000) {
// Too much buffered, skip this chunk
console.warn('Backpressure detected, dropping audio');
return;
}
ws.send(audio);
}
3. Sequence Tracking
// Track sequences for proper ordering
let lastPlayedSequence = -1;
const audioBuffer = new Map();
function handleAudio(sequence, audio) {
audioBuffer.set(sequence, audio);
// Play in order
while (audioBuffer.has(lastPlayedSequence + 1)) {
const nextAudio = audioBuffer.get(lastPlayedSequence + 1);
audioBuffer.delete(lastPlayedSequence + 1);
playAudio(nextAudio);
lastPlayedSequence++;
}
}