WebRTC Integration

WebRTC enables browser-based voice agents with sub-100ms latency and high-quality audio, perfect for web applications and testing.

Overview

Feature	Value
Latency	<100ms
Audio Format	Opus/PCM
Sample Rate	48000 Hz
Channels	Mono/Stereo
Best For	Web apps, demos

Architecture

┌─────────────────────────────────────────────────────────────┐
│                      WebRTC Architecture                     │
│                                                             │
│   Browser                         Server                    │
│   ┌─────────────┐                ┌─────────────┐           │
│   │   WebRTC    │◄──────────────►│   WebRTC    │           │
│   │   Client    │   Audio/Data   │   Server    │           │
│   └──────┬──────┘                └──────┬──────┘           │
│          │                              │                   │
│          ▼                              ▼                   │
│   ┌─────────────┐                ┌─────────────┐           │
│   │ getUserMedia│                │  Pipeline   │           │
│   │ (Mic Input) │                │ (STT→LLM→TTS)│          │
│   └─────────────┘                └─────────────┘           │
│                                                             │
└─────────────────────────────────────────────────────────────┘

Client Implementation

HTML Setup

<!DOCTYPE html>
<html>
<head>
    <title>Voice Agent</title>
</head>
<body>
    <div id="controls">
        <button id="startBtn">Start Call</button>
        <button id="stopBtn" disabled>End Call</button>
        <div id="status">Ready</div>
    </div>

    <audio id="remoteAudio" autoplay></audio>

    <script src="voice-agent.js"></script>
</body>
</html>

JavaScript Client

class VoiceAgentClient {
    constructor(config) {
        this.serverUrl = config.serverUrl;
        this.agentId = config.agentId;
        this.peerConnection = null;
        this.dataChannel = null;
        this.localStream = null;
    }

    async connect() {
        // Get microphone access
        this.localStream = await navigator.mediaDevices.getUserMedia({
            audio: {
                echoCancellation: true,
                noiseSuppression: true,
                autoGainControl: true,
                sampleRate: 48000,
            },
            video: false,
        });

        // Create peer connection
        this.peerConnection = new RTCPeerConnection({
            iceServers: [
                { urls: 'stun:stun.l.google.com:19302' },
            ],
        });

        // Add local audio track
        this.localStream.getTracks().forEach(track => {
            this.peerConnection.addTrack(track, this.localStream);
        });

        // Handle remote audio
        this.peerConnection.ontrack = (event) => {
            const remoteAudio = document.getElementById('remoteAudio');
            remoteAudio.srcObject = event.streams[0];
        };

        // Create data channel for control messages
        this.dataChannel = this.peerConnection.createDataChannel('control');
        this.setupDataChannel();

        // Handle ICE candidates
        this.peerConnection.onicecandidate = (event) => {
            if (event.candidate) {
                this.sendSignaling({
                    type: 'ice-candidate',
                    candidate: event.candidate,
                });
            }
        };

        // Create and send offer
        const offer = await this.peerConnection.createOffer();
        await this.peerConnection.setLocalDescription(offer);

        // Connect to signaling server
        await this.connectSignaling();

        this.sendSignaling({
            type: 'offer',
            sdp: offer.sdp,
            agentId: this.agentId,
        });
    }

    async connectSignaling() {
        this.ws = new WebSocket(`${this.serverUrl}/signaling`);

        this.ws.onmessage = async (event) => {
            const message = JSON.parse(event.data);
            await this.handleSignaling(message);
        };

        return new Promise((resolve) => {
            this.ws.onopen = resolve;
        });
    }

    async handleSignaling(message) {
        switch (message.type) {
            case 'answer':
                await this.peerConnection.setRemoteDescription({
                    type: 'answer',
                    sdp: message.sdp,
                });
                break;

            case 'ice-candidate':
                await this.peerConnection.addIceCandidate(message.candidate);
                break;

            case 'transcript':
                this.onTranscript(message);
                break;
        }
    }

    sendSignaling(message) {
        this.ws.send(JSON.stringify(message));
    }

    setupDataChannel() {
        this.dataChannel.onopen = () => {
            console.log('Data channel opened');
        };

        this.dataChannel.onmessage = (event) => {
            const message = JSON.parse(event.data);
            this.handleControlMessage(message);
        };
    }

    handleControlMessage(message) {
        switch (message.type) {
            case 'transcript':
                console.log(`${message.role}: ${message.text}`);
                break;
            case 'status':
                document.getElementById('status').textContent = message.status;
                break;
        }
    }

    onTranscript(data) {
        console.log(`${data.role}: ${data.text}`);
    }

    disconnect() {
        if (this.localStream) {
            this.localStream.getTracks().forEach(track => track.stop());
        }
        if (this.peerConnection) {
            this.peerConnection.close();
        }
        if (this.ws) {
            this.ws.close();
        }
    }
}

// Usage
const client = new VoiceAgentClient({
    serverUrl: 'wss://api.example.com',
    agentId: 'agent_123',
});

document.getElementById('startBtn').onclick = () => client.connect();
document.getElementById('stopBtn').onclick = () => client.disconnect();

Server Implementation

Go WebRTC Server

package main

import (
    "github.com/pion/webrtc/v3"
    "github.com/gorilla/websocket"
)

type WebRTCServer struct {
    peerConnections map[string]*webrtc.PeerConnection
    pipeline        *Pipeline
}

func (s *WebRTCServer) HandleSignaling(ws *websocket.Conn) {
    for {
        var msg SignalingMessage
        err := ws.ReadJSON(&msg)
        if err != nil {
            return
        }

        switch msg.Type {
        case "offer":
            s.handleOffer(ws, msg)
        case "ice-candidate":
            s.handleICECandidate(msg)
        }
    }
}

func (s *WebRTCServer) handleOffer(ws *websocket.Conn, msg SignalingMessage) {
    // Create peer connection
    config := webrtc.Configuration{
        ICEServers: []webrtc.ICEServer{
            {URLs: []string{"stun:stun.l.google.com:19302"}},
        },
    }

    peerConnection, err := webrtc.NewPeerConnection(config)
    if err != nil {
        return
    }

    // Handle incoming audio
    peerConnection.OnTrack(func(track *webrtc.TrackRemote, receiver *webrtc.RTPReceiver) {
        go s.handleAudioTrack(track, peerConnection)
    })

    // Add outgoing audio track
    audioTrack, err := webrtc.NewTrackLocalStaticSample(
        webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeOpus},
        "audio",
        "voice-agent",
    )
    if err != nil {
        return
    }

    _, err = peerConnection.AddTrack(audioTrack)
    if err != nil {
        return
    }

    // Set remote description
    offer := webrtc.SessionDescription{
        Type: webrtc.SDPTypeOffer,
        SDP:  msg.SDP,
    }
    peerConnection.SetRemoteDescription(offer)

    // Create answer
    answer, err := peerConnection.CreateAnswer(nil)
    if err != nil {
        return
    }

    peerConnection.SetLocalDescription(answer)

    // Send answer
    ws.WriteJSON(SignalingMessage{
        Type: "answer",
        SDP:  answer.SDP,
    })

    // Store connection
    sessionID := generateSessionID()
    s.peerConnections[sessionID] = peerConnection

    // Start audio output loop
    go s.audioOutputLoop(sessionID, audioTrack)
}

func (s *WebRTCServer) handleAudioTrack(track *webrtc.TrackRemote, pc *webrtc.PeerConnection) {
    buf := make([]byte, 1500)

    for {
        n, _, err := track.Read(buf)
        if err != nil {
            return
        }

        // Decode Opus to PCM
        pcm := s.opusDecoder.Decode(buf[:n])

        // Send to pipeline
        s.pipeline.ProcessAudio(pcm)
    }
}

func (s *WebRTCServer) audioOutputLoop(sessionID string, track *webrtc.TrackLocalStaticSample) {
    for {
        // Get audio from pipeline
        audio := s.pipeline.GetOutputAudio()
        if audio == nil {
            continue
        }

        // Encode to Opus
        opus := s.opusEncoder.Encode(audio)

        // Send to client
        track.WriteSample(media.Sample{
            Data:     opus,
            Duration: 20 * time.Millisecond,
        })
    }
}

Signaling Protocol

type SignalingMessage struct {
    Type      string                 `json:"type"`
    SDP       string                 `json:"sdp,omitempty"`
    Candidate *webrtc.ICECandidate   `json:"candidate,omitempty"`
    AgentID   string                 `json:"agentId,omitempty"`
}

Audio Processing

Opus Codec

import "gopkg.in/hraban/opus.v2"

type AudioProcessor struct {
    encoder *opus.Encoder
    decoder *opus.Decoder
}

func NewAudioProcessor() (*AudioProcessor, error) {
    encoder, err := opus.NewEncoder(48000, 1, opus.AppVoIP)
    if err != nil {
        return nil, err
    }

    decoder, err := opus.NewDecoder(48000, 1)
    if err != nil {
        return nil, err
    }

    return &AudioProcessor{
        encoder: encoder,
        decoder: decoder,
    }, nil
}

func (p *AudioProcessor) Encode(pcm []int16) ([]byte, error) {
    opus := make([]byte, 1000)
    n, err := p.encoder.Encode(pcm, opus)
    if err != nil {
        return nil, err
    }
    return opus[:n], nil
}

func (p *AudioProcessor) Decode(opus []byte) ([]int16, error) {
    pcm := make([]int16, 960) // 20ms at 48kHz
    n, err := p.decoder.Decode(opus, pcm)
    if err != nil {
        return nil, err
    }
    return pcm[:n], nil
}

Resampling for STT

func (p *AudioProcessor) PrepareForSTT(audio []int16) []int16 {
    // Resample from 48kHz to 16kHz for STT
    return resample(audio, 48000, 16000)
}

func (p *AudioProcessor) PrepareFromTTS(audio []int16, sampleRate int) []int16 {
    // Resample TTS output to 48kHz for WebRTC
    return resample(audio, sampleRate, 48000)
}

Data Channel

Control Messages

type ControlMessage struct {
    Type string      `json:"type"`
    Data interface{} `json:"data"`
}

func (s *WebRTCServer) sendTranscript(dc *webrtc.DataChannel, role, text string) {
    msg := ControlMessage{
        Type: "transcript",
        Data: map[string]string{
            "role": role,
            "text": text,
        },
    }

    jsonMsg, _ := json.Marshal(msg)
    dc.SendText(string(jsonMsg))
}

func (s *WebRTCServer) sendStatus(dc *webrtc.DataChannel, status string) {
    msg := ControlMessage{
        Type: "status",
        Data: map[string]string{
            "status": status,
        },
    }

    jsonMsg, _ := json.Marshal(msg)
    dc.SendText(string(jsonMsg))
}

React Component

import React, { useState, useRef, useEffect } from 'react';

interface VoiceAgentProps {
    serverUrl: string;
    agentId: string;
    onTranscript?: (role: string, text: string) => void;
}

export function VoiceAgent({ serverUrl, agentId, onTranscript }: VoiceAgentProps) {
    const [status, setStatus] = useState<'idle' | 'connecting' | 'connected'>('idle');
    const [transcripts, setTranscripts] = useState<Array<{ role: string; text: string }>>([]);
    const clientRef = useRef<VoiceAgentClient | null>(null);

    const connect = async () => {
        setStatus('connecting');

        const client = new VoiceAgentClient({
            serverUrl,
            agentId,
        });

        client.onTranscript = (data) => {
            setTranscripts(prev => [...prev, { role: data.role, text: data.text }]);
            onTranscript?.(data.role, data.text);
        };

        await client.connect();
        clientRef.current = client;
        setStatus('connected');
    };

    const disconnect = () => {
        clientRef.current?.disconnect();
        clientRef.current = null;
        setStatus('idle');
    };

    return (
        <div className="voice-agent">
            <div className="status">Status: {status}</div>

            <div className="controls">
                {status === 'idle' && (
                    <button onClick={connect}>Start Call</button>
                )}
                {status === 'connected' && (
                    <button onClick={disconnect}>End Call</button>
                )}
            </div>

            <div className="transcripts">
                {transcripts.map((t, i) => (
                    <div key={i} className={`transcript ${t.role}`}>
                        <strong>{t.role}:</strong> {t.text}
                    </div>
                ))}
            </div>
        </div>
    );
}

TURN Server

Coturn Configuration

# /etc/turnserver.conf
listening-port=3478
tls-listening-port=5349
listening-ip=0.0.0.0
relay-ip=YOUR_PUBLIC_IP
external-ip=YOUR_PUBLIC_IP
realm=your-domain.com
server-name=turn.your-domain.com
fingerprint
lt-cred-mech
user=username:password

ICE Configuration

const config = {
    iceServers: [
        { urls: 'stun:stun.l.google.com:19302' },
        {
            urls: 'turn:turn.your-domain.com:3478',
            username: 'username',
            credential: 'password',
        },
    ],
};

Best Practices

1. Handle Connection States

peerConnection.onconnectionstatechange = () => {
    switch (peerConnection.connectionState) {
        case 'connected':
            console.log('Connected');
            break;
        case 'disconnected':
            console.log('Disconnected, attempting reconnect...');
            reconnect();
            break;
        case 'failed':
            console.log('Connection failed');
            disconnect();
            break;
    }
};

2. Implement Echo Cancellation

const constraints = {
    audio: {
        echoCancellation: true,
        noiseSuppression: true,
        autoGainControl: true,
    },
};

3. Monitor Audio Levels

function monitorAudioLevel(stream) {
    const audioContext = new AudioContext();
    const source = audioContext.createMediaStreamSource(stream);
    const analyser = audioContext.createAnalyser();
    source.connect(analyser);

    const dataArray = new Uint8Array(analyser.frequencyBinCount);

    function checkLevel() {
        analyser.getByteFrequencyData(dataArray);
        const average = dataArray.reduce((a, b) => a + b) / dataArray.length;
        // Use average for VU meter or VAD
        requestAnimationFrame(checkLevel);
    }

    checkLevel();
}

Next Steps

Twilio - Phone integration
Audio Processing - Audio handling
Latency Guide - Optimization

WebRTC Integration

WebRTC enables browser-based voice agents with sub-100ms latency and high-quality audio, perfect for web applications and testing.

Overview

Feature	Value
Latency	<100ms
Audio Format	Opus/PCM
Sample Rate	48000 Hz
Channels	Mono/Stereo
Best For	Web apps, demos

Architecture

┌─────────────────────────────────────────────────────────────┐
│                      WebRTC Architecture                     │
│                                                             │
│   Browser                         Server                    │
│   ┌─────────────┐                ┌─────────────┐           │
│   │   WebRTC    │◄──────────────►│   WebRTC    │           │
│   │   Client    │   Audio/Data   │   Server    │           │
│   └──────┬──────┘                └──────┬──────┘           │
│          │                              │                   │
│          ▼                              ▼                   │
│   ┌─────────────┐                ┌─────────────┐           │
│   │ getUserMedia│                │  Pipeline   │           │
│   │ (Mic Input) │                │ (STT→LLM→TTS)│          │
│   └─────────────┘                └─────────────┘           │
│                                                             │
└─────────────────────────────────────────────────────────────┘

Client Implementation

HTML Setup

<!DOCTYPE html>
<html>
<head>
    <title>Voice Agent</title>
</head>
<body>
    <div id="controls">
        <button id="startBtn">Start Call</button>
        <button id="stopBtn" disabled>End Call</button>
        <div id="status">Ready</div>
    </div>

    <audio id="remoteAudio" autoplay></audio>

    <script src="voice-agent.js"></script>
</body>
</html>

JavaScript Client

class VoiceAgentClient {
    constructor(config) {
        this.serverUrl = config.serverUrl;
        this.agentId = config.agentId;
        this.peerConnection = null;
        this.dataChannel = null;
        this.localStream = null;
    }

    async connect() {
        // Get microphone access
        this.localStream = await navigator.mediaDevices.getUserMedia({
            audio: {
                echoCancellation: true,
                noiseSuppression: true,
                autoGainControl: true,
                sampleRate: 48000,
            },
            video: false,
        });

        // Create peer connection
        this.peerConnection = new RTCPeerConnection({
            iceServers: [
                { urls: 'stun:stun.l.google.com:19302' },
            ],
        });

        // Add local audio track
        this.localStream.getTracks().forEach(track => {
            this.peerConnection.addTrack(track, this.localStream);
        });

        // Handle remote audio
        this.peerConnection.ontrack = (event) => {
            const remoteAudio = document.getElementById('remoteAudio');
            remoteAudio.srcObject = event.streams[0];
        };

        // Create data channel for control messages
        this.dataChannel = this.peerConnection.createDataChannel('control');
        this.setupDataChannel();

        // Handle ICE candidates
        this.peerConnection.onicecandidate = (event) => {
            if (event.candidate) {
                this.sendSignaling({
                    type: 'ice-candidate',
                    candidate: event.candidate,
                });
            }
        };

        // Create and send offer
        const offer = await this.peerConnection.createOffer();
        await this.peerConnection.setLocalDescription(offer);

        // Connect to signaling server
        await this.connectSignaling();

        this.sendSignaling({
            type: 'offer',
            sdp: offer.sdp,
            agentId: this.agentId,
        });
    }

    async connectSignaling() {
        this.ws = new WebSocket(`${this.serverUrl}/signaling`);

        this.ws.onmessage = async (event) => {
            const message = JSON.parse(event.data);
            await this.handleSignaling(message);
        };

        return new Promise((resolve) => {
            this.ws.onopen = resolve;
        });
    }

    async handleSignaling(message) {
        switch (message.type) {
            case 'answer':
                await this.peerConnection.setRemoteDescription({
                    type: 'answer',
                    sdp: message.sdp,
                });
                break;

            case 'ice-candidate':
                await this.peerConnection.addIceCandidate(message.candidate);
                break;

            case 'transcript':
                this.onTranscript(message);
                break;
        }
    }

    sendSignaling(message) {
        this.ws.send(JSON.stringify(message));
    }

    setupDataChannel() {
        this.dataChannel.onopen = () => {
            console.log('Data channel opened');
        };

        this.dataChannel.onmessage = (event) => {
            const message = JSON.parse(event.data);
            this.handleControlMessage(message);
        };
    }

    handleControlMessage(message) {
        switch (message.type) {
            case 'transcript':
                console.log(`${message.role}: ${message.text}`);
                break;
            case 'status':
                document.getElementById('status').textContent = message.status;
                break;
        }
    }

    onTranscript(data) {
        console.log(`${data.role}: ${data.text}`);
    }

    disconnect() {
        if (this.localStream) {
            this.localStream.getTracks().forEach(track => track.stop());
        }
        if (this.peerConnection) {
            this.peerConnection.close();
        }
        if (this.ws) {
            this.ws.close();
        }
    }
}

// Usage
const client = new VoiceAgentClient({
    serverUrl: 'wss://api.example.com',
    agentId: 'agent_123',
});

document.getElementById('startBtn').onclick = () => client.connect();
document.getElementById('stopBtn').onclick = () => client.disconnect();

Server Implementation

Go WebRTC Server

package main

import (
    "github.com/pion/webrtc/v3"
    "github.com/gorilla/websocket"
)

type WebRTCServer struct {
    peerConnections map[string]*webrtc.PeerConnection
    pipeline        *Pipeline
}

func (s *WebRTCServer) HandleSignaling(ws *websocket.Conn) {
    for {
        var msg SignalingMessage
        err := ws.ReadJSON(&msg)
        if err != nil {
            return
        }

        switch msg.Type {
        case "offer":
            s.handleOffer(ws, msg)
        case "ice-candidate":
            s.handleICECandidate(msg)
        }
    }
}

func (s *WebRTCServer) handleOffer(ws *websocket.Conn, msg SignalingMessage) {
    // Create peer connection
    config := webrtc.Configuration{
        ICEServers: []webrtc.ICEServer{
            {URLs: []string{"stun:stun.l.google.com:19302"}},
        },
    }

    peerConnection, err := webrtc.NewPeerConnection(config)
    if err != nil {
        return
    }

    // Handle incoming audio
    peerConnection.OnTrack(func(track *webrtc.TrackRemote, receiver *webrtc.RTPReceiver) {
        go s.handleAudioTrack(track, peerConnection)
    })

    // Add outgoing audio track
    audioTrack, err := webrtc.NewTrackLocalStaticSample(
        webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeOpus},
        "audio",
        "voice-agent",
    )
    if err != nil {
        return
    }

    _, err = peerConnection.AddTrack(audioTrack)
    if err != nil {
        return
    }

    // Set remote description
    offer := webrtc.SessionDescription{
        Type: webrtc.SDPTypeOffer,
        SDP:  msg.SDP,
    }
    peerConnection.SetRemoteDescription(offer)

    // Create answer
    answer, err := peerConnection.CreateAnswer(nil)
    if err != nil {
        return
    }

    peerConnection.SetLocalDescription(answer)

    // Send answer
    ws.WriteJSON(SignalingMessage{
        Type: "answer",
        SDP:  answer.SDP,
    })

    // Store connection
    sessionID := generateSessionID()
    s.peerConnections[sessionID] = peerConnection

    // Start audio output loop
    go s.audioOutputLoop(sessionID, audioTrack)
}

func (s *WebRTCServer) handleAudioTrack(track *webrtc.TrackRemote, pc *webrtc.PeerConnection) {
    buf := make([]byte, 1500)

    for {
        n, _, err := track.Read(buf)
        if err != nil {
            return
        }

        // Decode Opus to PCM
        pcm := s.opusDecoder.Decode(buf[:n])

        // Send to pipeline
        s.pipeline.ProcessAudio(pcm)
    }
}

func (s *WebRTCServer) audioOutputLoop(sessionID string, track *webrtc.TrackLocalStaticSample) {
    for {
        // Get audio from pipeline
        audio := s.pipeline.GetOutputAudio()
        if audio == nil {
            continue
        }

        // Encode to Opus
        opus := s.opusEncoder.Encode(audio)

        // Send to client
        track.WriteSample(media.Sample{
            Data:     opus,
            Duration: 20 * time.Millisecond,
        })
    }
}

Signaling Protocol

type SignalingMessage struct {
    Type      string                 `json:"type"`
    SDP       string                 `json:"sdp,omitempty"`
    Candidate *webrtc.ICECandidate   `json:"candidate,omitempty"`
    AgentID   string                 `json:"agentId,omitempty"`
}

Audio Processing

Opus Codec

import "gopkg.in/hraban/opus.v2"

type AudioProcessor struct {
    encoder *opus.Encoder
    decoder *opus.Decoder
}

func NewAudioProcessor() (*AudioProcessor, error) {
    encoder, err := opus.NewEncoder(48000, 1, opus.AppVoIP)
    if err != nil {
        return nil, err
    }

    decoder, err := opus.NewDecoder(48000, 1)
    if err != nil {
        return nil, err
    }

    return &AudioProcessor{
        encoder: encoder,
        decoder: decoder,
    }, nil
}

func (p *AudioProcessor) Encode(pcm []int16) ([]byte, error) {
    opus := make([]byte, 1000)
    n, err := p.encoder.Encode(pcm, opus)
    if err != nil {
        return nil, err
    }
    return opus[:n], nil
}

func (p *AudioProcessor) Decode(opus []byte) ([]int16, error) {
    pcm := make([]int16, 960) // 20ms at 48kHz
    n, err := p.decoder.Decode(opus, pcm)
    if err != nil {
        return nil, err
    }
    return pcm[:n], nil
}

Resampling for STT

func (p *AudioProcessor) PrepareForSTT(audio []int16) []int16 {
    // Resample from 48kHz to 16kHz for STT
    return resample(audio, 48000, 16000)
}

func (p *AudioProcessor) PrepareFromTTS(audio []int16, sampleRate int) []int16 {
    // Resample TTS output to 48kHz for WebRTC
    return resample(audio, sampleRate, 48000)
}

Data Channel

Control Messages

type ControlMessage struct {
    Type string      `json:"type"`
    Data interface{} `json:"data"`
}

func (s *WebRTCServer) sendTranscript(dc *webrtc.DataChannel, role, text string) {
    msg := ControlMessage{
        Type: "transcript",
        Data: map[string]string{
            "role": role,
            "text": text,
        },
    }

    jsonMsg, _ := json.Marshal(msg)
    dc.SendText(string(jsonMsg))
}

func (s *WebRTCServer) sendStatus(dc *webrtc.DataChannel, status string) {
    msg := ControlMessage{
        Type: "status",
        Data: map[string]string{
            "status": status,
        },
    }

    jsonMsg, _ := json.Marshal(msg)
    dc.SendText(string(jsonMsg))
}

React Component

import React, { useState, useRef, useEffect } from 'react';

interface VoiceAgentProps {
    serverUrl: string;
    agentId: string;
    onTranscript?: (role: string, text: string) => void;
}

export function VoiceAgent({ serverUrl, agentId, onTranscript }: VoiceAgentProps) {
    const [status, setStatus] = useState<'idle' | 'connecting' | 'connected'>('idle');
    const [transcripts, setTranscripts] = useState<Array<{ role: string; text: string }>>([]);
    const clientRef = useRef<VoiceAgentClient | null>(null);

    const connect = async () => {
        setStatus('connecting');

        const client = new VoiceAgentClient({
            serverUrl,
            agentId,
        });

        client.onTranscript = (data) => {
            setTranscripts(prev => [...prev, { role: data.role, text: data.text }]);
            onTranscript?.(data.role, data.text);
        };

        await client.connect();
        clientRef.current = client;
        setStatus('connected');
    };

    const disconnect = () => {
        clientRef.current?.disconnect();
        clientRef.current = null;
        setStatus('idle');
    };

    return (
        <div className="voice-agent">
            <div className="status">Status: {status}</div>

            <div className="controls">
                {status === 'idle' && (
                    <button onClick={connect}>Start Call</button>
                )}
                {status === 'connected' && (
                    <button onClick={disconnect}>End Call</button>
                )}
            </div>

            <div className="transcripts">
                {transcripts.map((t, i) => (
                    <div key={i} className={`transcript ${t.role}`}>
                        <strong>{t.role}:</strong> {t.text}
                    </div>
                ))}
            </div>
        </div>
    );
}

TURN Server

Coturn Configuration

# /etc/turnserver.conf
listening-port=3478
tls-listening-port=5349
listening-ip=0.0.0.0
relay-ip=YOUR_PUBLIC_IP
external-ip=YOUR_PUBLIC_IP
realm=your-domain.com
server-name=turn.your-domain.com
fingerprint
lt-cred-mech
user=username:password

ICE Configuration

const config = {
    iceServers: [
        { urls: 'stun:stun.l.google.com:19302' },
        {
            urls: 'turn:turn.your-domain.com:3478',
            username: 'username',
            credential: 'password',
        },
    ],
};

Best Practices

1. Handle Connection States

peerConnection.onconnectionstatechange = () => {
    switch (peerConnection.connectionState) {
        case 'connected':
            console.log('Connected');
            break;
        case 'disconnected':
            console.log('Disconnected, attempting reconnect...');
            reconnect();
            break;
        case 'failed':
            console.log('Connection failed');
            disconnect();
            break;
    }
};

2. Implement Echo Cancellation

const constraints = {
    audio: {
        echoCancellation: true,
        noiseSuppression: true,
        autoGainControl: true,
    },
};

3. Monitor Audio Levels

function monitorAudioLevel(stream) {
    const audioContext = new AudioContext();
    const source = audioContext.createMediaStreamSource(stream);
    const analyser = audioContext.createAnalyser();
    source.connect(analyser);

    const dataArray = new Uint8Array(analyser.frequencyBinCount);

    function checkLevel() {
        analyser.getByteFrequencyData(dataArray);
        const average = dataArray.reduce((a, b) => a + b) / dataArray.length;
        // Use average for VU meter or VAD
        requestAnimationFrame(checkLevel);
    }

    checkLevel();
}

Next Steps

Twilio - Phone integration
Audio Processing - Audio handling
Latency Guide - Optimization