Azure OpenAI

Azure OpenAI provides enterprise-grade access to OpenAI models with enhanced security, compliance, data privacy, and regional deployment options.

Overview

Feature	Value
Latency	~300-600ms
Models	GPT-4o, GPT-4, GPT-3.5
Context	Up to 128K tokens
Best For	Enterprise, compliance

Configuration

Basic Setup

{
  "agent": {
    "llmProvider": "openai-azure",
    "llmModel": "gpt-4o",
    "llmConfig": {
      "deploymentName": "gpt-4o-deployment",
      "temperature": 0.7,
      "maxTokens": 500
    }
  }
}

Environment Variables

AZURE_OPENAI_API_KEY=your-api-key
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
AZURE_OPENAI_API_VERSION=2024-02-15-preview

Implementation

Client Setup

type AzureOpenAILLM struct {
    endpoint       string
    apiKey         string
    deploymentName string
    apiVersion     string
    client         *http.Client
}

func NewAzureOpenAILLM(config AzureOpenAIConfig) *AzureOpenAILLM {
    return &AzureOpenAILLM{
        endpoint:       config.Endpoint,
        apiKey:         config.APIKey,
        deploymentName: config.DeploymentName,
        apiVersion:     config.APIVersion,
        client:         &http.Client{Timeout: 30 * time.Second},
    }
}

func (a *AzureOpenAILLM) buildURL(path string) string {
    return fmt.Sprintf(
        "%s/openai/deployments/%s/%s?api-version=%s",
        a.endpoint, a.deploymentName, path, a.apiVersion,
    )
}

Chat Completion

func (a *AzureOpenAILLM) Generate(ctx context.Context, messages []Message) (string, error) {
    url := a.buildURL("chat/completions")

    body := ChatCompletionRequest{
        Messages:    messages,
        Temperature: a.temperature,
        MaxTokens:   a.maxTokens,
    }

    jsonBody, _ := json.Marshal(body)

    req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
    if err != nil {
        return "", err
    }

    req.Header.Set("Content-Type", "application/json")
    req.Header.Set("api-key", a.apiKey)

    resp, err := a.client.Do(req)
    if err != nil {
        return "", err
    }
    defer resp.Body.Close()

    if resp.StatusCode != 200 {
        return "", a.handleError(resp)
    }

    var result ChatCompletionResponse
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return "", err
    }

    if len(result.Choices) == 0 {
        return "", fmt.Errorf("no response generated")
    }

    return result.Choices[0].Message.Content, nil
}

Streaming

func (a *AzureOpenAILLM) GenerateStreaming(ctx context.Context, messages []Message, callback func(string)) error {
    url := a.buildURL("chat/completions")

    body := ChatCompletionRequest{
        Messages:    messages,
        Temperature: a.temperature,
        MaxTokens:   a.maxTokens,
        Stream:      true,
    }

    jsonBody, _ := json.Marshal(body)

    req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
    if err != nil {
        return err
    }

    req.Header.Set("Content-Type", "application/json")
    req.Header.Set("api-key", a.apiKey)

    resp, err := a.client.Do(req)
    if err != nil {
        return err
    }
    defer resp.Body.Close()

    reader := bufio.NewReader(resp.Body)
    for {
        line, err := reader.ReadString('\n')
        if err != nil {
            if err == io.EOF {
                break
            }
            return err
        }

        line = strings.TrimPrefix(line, "data: ")
        line = strings.TrimSpace(line)

        if line == "" || line == "[DONE]" {
            continue
        }

        var chunk StreamChunk
        if err := json.Unmarshal([]byte(line), &chunk); err != nil {
            continue
        }

        if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
            callback(chunk.Choices[0].Delta.Content)
        }
    }

    return nil
}

Function Calling

func (a *AzureOpenAILLM) GenerateWithTools(ctx context.Context, messages []Message, tools []Tool) (*ToolCallResponse, error) {
    url := a.buildURL("chat/completions")

    body := ChatCompletionRequest{
        Messages:    messages,
        Temperature: a.temperature,
        MaxTokens:   a.maxTokens,
        Tools:       tools,
    }

    jsonBody, _ := json.Marshal(body)

    req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
    if err != nil {
        return nil, err
    }

    req.Header.Set("Content-Type", "application/json")
    req.Header.Set("api-key", a.apiKey)

    resp, err := a.client.Do(req)
    if err != nil {
        return nil, err
    }
    defer resp.Body.Close()

    var result ChatCompletionResponse
    if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
        return nil, err
    }

    choice := result.Choices[0]

    if choice.FinishReason == "tool_calls" {
        return &ToolCallResponse{
            ToolCalls: choice.Message.ToolCalls,
        }, nil
    }

    return &ToolCallResponse{
        Content: choice.Message.Content,
    }, nil
}

Deployments

Creating Deployments

Azure OpenAI requires model deployments:

# Via Azure CLI
az cognitiveservices account deployment create \
    --resource-group myResourceGroup \
    --name myOpenAIResource \
    --deployment-name gpt-4o-deployment \
    --model-name gpt-4o \
    --model-version "2024-02-01" \
    --model-format OpenAI \
    --sku-capacity 10 \
    --sku-name Standard

Deployment Configuration

type DeploymentConfig struct {
    Name          string
    ModelName     string
    ModelVersion  string
    Capacity      int    // Tokens per minute (thousands)
    SKU           string // Standard or Provisioned
}

var RecommendedDeployments = []DeploymentConfig{
    {
        Name:         "gpt-4o-voice",
        ModelName:    "gpt-4o",
        ModelVersion: "2024-08-06",
        Capacity:     50, // 50K TPM
        SKU:          "Standard",
    },
    {
        Name:         "gpt-4o-mini-voice",
        ModelName:    "gpt-4o-mini",
        ModelVersion: "2024-07-18",
        Capacity:     100, // 100K TPM
        SKU:          "Standard",
    },
}

API Versions

var APIVersions = map[string]string{
    "stable":  "2024-02-15-preview",
    "preview": "2024-08-01-preview",
    "ga":      "2024-06-01",
}

func (a *AzureOpenAILLM) SetAPIVersion(version string) {
    if v, ok := APIVersions[version]; ok {
        a.apiVersion = v
    } else {
        a.apiVersion = version
    }
}

Regional Deployment

Available Regions

var AzureOpenAIRegions = map[string]string{
    "us_east":       "eastus",
    "us_east_2":     "eastus2",
    "us_south":      "southcentralus",
    "us_west":       "westus",
    "us_west_3":     "westus3",
    "europe_west":   "westeurope",
    "europe_north":  "northeurope",
    "uk_south":      "uksouth",
    "asia_east":     "eastasia",
    "japan_east":    "japaneast",
    "australia_east": "australiaeast",
    "canada_east":   "canadaeast",
    "india_central": "centralindia", // For India-based workloads
}

func (a *AzureOpenAILLM) SetRegion(region string) {
    if endpoint, ok := AzureOpenAIRegions[region]; ok {
        a.endpoint = fmt.Sprintf("https://%s.openai.azure.com", endpoint)
    }
}

Multi-Region Failover

type AzureOpenAIMultiRegion struct {
    primary   *AzureOpenAILLM
    secondary *AzureOpenAILLM
    failover  bool
}

func (m *AzureOpenAIMultiRegion) Generate(ctx context.Context, messages []Message) (string, error) {
    // Try primary
    result, err := m.primary.Generate(ctx, messages)
    if err == nil {
        return result, nil
    }

    // Failover to secondary
    if m.failover {
        log.Printf("Failing over to secondary region: %v", err)
        return m.secondary.Generate(ctx, messages)
    }

    return "", err
}

Authentication

API Key

func (a *AzureOpenAILLM) setAPIKeyAuth(req *http.Request) {
    req.Header.Set("api-key", a.apiKey)
}

Azure AD (Managed Identity)

import "github.com/Azure/azure-sdk-for-go/sdk/azidentity"

type AzureOpenAIWithAAD struct {
    endpoint   string
    credential *azidentity.DefaultAzureCredential
    // ...
}

func NewAzureOpenAIWithAAD(endpoint string) (*AzureOpenAIWithAAD, error) {
    cred, err := azidentity.NewDefaultAzureCredential(nil)
    if err != nil {
        return nil, err
    }

    return &AzureOpenAIWithAAD{
        endpoint:   endpoint,
        credential: cred,
    }, nil
}

func (a *AzureOpenAIWithAAD) getToken(ctx context.Context) (string, error) {
    token, err := a.credential.GetToken(ctx, policy.TokenRequestOptions{
        Scopes: []string{"https://cognitiveservices.azure.com/.default"},
    })
    if err != nil {
        return "", err
    }
    return token.Token, nil
}

func (a *AzureOpenAIWithAAD) setAADAuth(ctx context.Context, req *http.Request) error {
    token, err := a.getToken(ctx)
    if err != nil {
        return err
    }
    req.Header.Set("Authorization", "Bearer "+token)
    return nil
}

Content Filtering

Azure OpenAI includes built-in content filtering:

type ContentFilterResult struct {
    Hate      FilterCategory `json:"hate"`
    SelfHarm  FilterCategory `json:"self_harm"`
    Sexual    FilterCategory `json:"sexual"`
    Violence  FilterCategory `json:"violence"`
}

type FilterCategory struct {
    Filtered bool   `json:"filtered"`
    Severity string `json:"severity"` // safe, low, medium, high
}

func (a *AzureOpenAILLM) handleContentFilter(result *ChatCompletionResponse) error {
    if result.Choices[0].ContentFilterResults != nil {
        cf := result.Choices[0].ContentFilterResults
        if cf.Hate.Filtered || cf.Violence.Filtered {
            return fmt.Errorf("content filtered: policy violation")
        }
    }
    return nil
}

Custom Content Filters

// Configure via Azure Portal or API
type ContentFilterConfig struct {
    Name     string
    Blocking bool
    Severity string // low, medium, high
    Action   string // warn, block
}

var VoiceAgentContentFilter = ContentFilterConfig{
    Name:     "voice-agent-filter",
    Blocking: true,
    Severity: "medium",
    Action:   "block",
}

Rate Limiting

Token Quotas

type RateLimiter struct {
    tokensPerMinute int
    tokens          int
    lastReset       time.Time
    mu              sync.Mutex
}

func (r *RateLimiter) Acquire(tokens int) bool {
    r.mu.Lock()
    defer r.mu.Unlock()

    // Reset counter every minute
    if time.Since(r.lastReset) >= time.Minute {
        r.tokens = 0
        r.lastReset = time.Now()
    }

    if r.tokens+tokens > r.tokensPerMinute {
        return false
    }

    r.tokens += tokens
    return true
}

func (a *AzureOpenAILLM) GenerateWithRateLimit(ctx context.Context, messages []Message) (string, error) {
    estimatedTokens := estimateTokens(messages)

    if !a.rateLimiter.Acquire(estimatedTokens) {
        return "", fmt.Errorf("rate limit exceeded")
    }

    return a.Generate(ctx, messages)
}

Handling 429 Errors

func (a *AzureOpenAILLM) GenerateWithRetry(ctx context.Context, messages []Message) (string, error) {
    maxRetries := 5
    baseDelay := 1 * time.Second

    for attempt := 0; attempt < maxRetries; attempt++ {
        result, err := a.Generate(ctx, messages)
        if err == nil {
            return result, nil
        }

        var azureErr *AzureError
        if errors.As(err, &azureErr) && azureErr.StatusCode == 429 {
            // Use retry-after header if available
            delay := baseDelay * time.Duration(1<<attempt)
            if azureErr.RetryAfter > 0 {
                delay = time.Duration(azureErr.RetryAfter) * time.Second
            }

            select {
            case <-ctx.Done():
                return "", ctx.Err()
            case <-time.After(delay):
                continue
            }
        }

        return "", err
    }

    return "", fmt.Errorf("max retries exceeded")
}

Error Handling

type AzureError struct {
    StatusCode int
    Code       string
    Message    string
    RetryAfter int
}

func (a *AzureOpenAILLM) handleError(resp *http.Response) error {
    body, _ := io.ReadAll(resp.Body)

    var errResp struct {
        Error struct {
            Code    string `json:"code"`
            Message string `json:"message"`
        } `json:"error"`
    }
    json.Unmarshal(body, &errResp)

    retryAfter := 0
    if ra := resp.Header.Get("Retry-After"); ra != "" {
        retryAfter, _ = strconv.Atoi(ra)
    }

    return &AzureError{
        StatusCode: resp.StatusCode,
        Code:       errResp.Error.Code,
        Message:    errResp.Error.Message,
        RetryAfter: retryAfter,
    }
}

Monitoring

Azure Monitor Integration

type AzureMetrics struct {
    appInsights *appinsights.TelemetryClient
}

func (m *AzureMetrics) TrackRequest(operation string, duration time.Duration, success bool) {
    m.appInsights.TrackRequest(appinsights.RequestTelemetry{
        Name:     operation,
        Duration: duration,
        Success:  success,
    })
}

func (m *AzureMetrics) TrackTokenUsage(prompt, completion int) {
    m.appInsights.TrackMetric("PromptTokens", float64(prompt))
    m.appInsights.TrackMetric("CompletionTokens", float64(completion))
}

Best Practices

1. Use Provisioned Throughput for Production

// For predictable, high-volume workloads
config := AzureOpenAIConfig{
    DeploymentName: "gpt-4o-provisioned",
    SKU:            "ProvisionedManaged",
    Capacity:       100, // PTUs
}

2. Implement Circuit Breaker

type CircuitBreaker struct {
    failures    int
    threshold   int
    state       string
    lastFailure time.Time
    timeout     time.Duration
}

func (a *AzureOpenAILLM) GenerateWithCircuitBreaker(ctx context.Context, messages []Message) (string, error) {
    if a.circuitBreaker.IsOpen() {
        return "", fmt.Errorf("circuit breaker open")
    }

    result, err := a.Generate(ctx, messages)
    if err != nil {
        a.circuitBreaker.RecordFailure()
        return "", err
    }

    a.circuitBreaker.RecordSuccess()
    return result, nil
}

3. Use Azure Private Endpoints

// For enhanced security, use private endpoints
config := AzureOpenAIConfig{
    Endpoint: "https://myopenai.privatelink.openai.azure.com",
    // Configure via Azure networking
}

Next Steps

OpenAI - Direct OpenAI integration
Function Calling - Tool use
Security - Enterprise security