Azure OpenAI
Azure OpenAI provides enterprise-grade access to OpenAI models with enhanced security, compliance, data privacy, and regional deployment options.
Overview
| Feature | Value |
|---|---|
| Latency | ~300-600ms |
| Models | GPT-4o, GPT-4, GPT-3.5 |
| Context | Up to 128K tokens |
| Best For | Enterprise, compliance |
Configuration
Basic Setup
{
"agent": {
"llmProvider": "openai-azure",
"llmModel": "gpt-4o",
"llmConfig": {
"deploymentName": "gpt-4o-deployment",
"temperature": 0.7,
"maxTokens": 500
}
}
}
Environment Variables
AZURE_OPENAI_API_KEY=your-api-key
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
AZURE_OPENAI_API_VERSION=2024-02-15-preview
Implementation
Client Setup
type AzureOpenAILLM struct {
endpoint string
apiKey string
deploymentName string
apiVersion string
client *http.Client
}
func NewAzureOpenAILLM(config AzureOpenAIConfig) *AzureOpenAILLM {
return &AzureOpenAILLM{
endpoint: config.Endpoint,
apiKey: config.APIKey,
deploymentName: config.DeploymentName,
apiVersion: config.APIVersion,
client: &http.Client{Timeout: 30 * time.Second},
}
}
func (a *AzureOpenAILLM) buildURL(path string) string {
return fmt.Sprintf(
"%s/openai/deployments/%s/%s?api-version=%s",
a.endpoint, a.deploymentName, path, a.apiVersion,
)
}
Chat Completion
func (a *AzureOpenAILLM) Generate(ctx context.Context, messages []Message) (string, error) {
url := a.buildURL("chat/completions")
body := ChatCompletionRequest{
Messages: messages,
Temperature: a.temperature,
MaxTokens: a.maxTokens,
}
jsonBody, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("api-key", a.apiKey)
resp, err := a.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return "", a.handleError(resp)
}
var result ChatCompletionResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", err
}
if len(result.Choices) == 0 {
return "", fmt.Errorf("no response generated")
}
return result.Choices[0].Message.Content, nil
}
Streaming
func (a *AzureOpenAILLM) GenerateStreaming(ctx context.Context, messages []Message, callback func(string)) error {
url := a.buildURL("chat/completions")
body := ChatCompletionRequest{
Messages: messages,
Temperature: a.temperature,
MaxTokens: a.maxTokens,
Stream: true,
}
jsonBody, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("api-key", a.apiKey)
resp, err := a.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
reader := bufio.NewReader(resp.Body)
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
line = strings.TrimPrefix(line, "data: ")
line = strings.TrimSpace(line)
if line == "" || line == "[DONE]" {
continue
}
var chunk StreamChunk
if err := json.Unmarshal([]byte(line), &chunk); err != nil {
continue
}
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
callback(chunk.Choices[0].Delta.Content)
}
}
return nil
}
Function Calling
func (a *AzureOpenAILLM) GenerateWithTools(ctx context.Context, messages []Message, tools []Tool) (*ToolCallResponse, error) {
url := a.buildURL("chat/completions")
body := ChatCompletionRequest{
Messages: messages,
Temperature: a.temperature,
MaxTokens: a.maxTokens,
Tools: tools,
}
jsonBody, _ := json.Marshal(body)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("api-key", a.apiKey)
resp, err := a.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
var result ChatCompletionResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
choice := result.Choices[0]
if choice.FinishReason == "tool_calls" {
return &ToolCallResponse{
ToolCalls: choice.Message.ToolCalls,
}, nil
}
return &ToolCallResponse{
Content: choice.Message.Content,
}, nil
}
Deployments
Creating Deployments
Azure OpenAI requires model deployments:
# Via Azure CLI
az cognitiveservices account deployment create \
--resource-group myResourceGroup \
--name myOpenAIResource \
--deployment-name gpt-4o-deployment \
--model-name gpt-4o \
--model-version "2024-02-01" \
--model-format OpenAI \
--sku-capacity 10 \
--sku-name Standard
Deployment Configuration
type DeploymentConfig struct {
Name string
ModelName string
ModelVersion string
Capacity int // Tokens per minute (thousands)
SKU string // Standard or Provisioned
}
var RecommendedDeployments = []DeploymentConfig{
{
Name: "gpt-4o-voice",
ModelName: "gpt-4o",
ModelVersion: "2024-08-06",
Capacity: 50, // 50K TPM
SKU: "Standard",
},
{
Name: "gpt-4o-mini-voice",
ModelName: "gpt-4o-mini",
ModelVersion: "2024-07-18",
Capacity: 100, // 100K TPM
SKU: "Standard",
},
}
API Versions
var APIVersions = map[string]string{
"stable": "2024-02-15-preview",
"preview": "2024-08-01-preview",
"ga": "2024-06-01",
}
func (a *AzureOpenAILLM) SetAPIVersion(version string) {
if v, ok := APIVersions[version]; ok {
a.apiVersion = v
} else {
a.apiVersion = version
}
}
Regional Deployment
Available Regions
var AzureOpenAIRegions = map[string]string{
"us_east": "eastus",
"us_east_2": "eastus2",
"us_south": "southcentralus",
"us_west": "westus",
"us_west_3": "westus3",
"europe_west": "westeurope",
"europe_north": "northeurope",
"uk_south": "uksouth",
"asia_east": "eastasia",
"japan_east": "japaneast",
"australia_east": "australiaeast",
"canada_east": "canadaeast",
"india_central": "centralindia", // For India-based workloads
}
func (a *AzureOpenAILLM) SetRegion(region string) {
if endpoint, ok := AzureOpenAIRegions[region]; ok {
a.endpoint = fmt.Sprintf("https://%s.openai.azure.com", endpoint)
}
}
Multi-Region Failover
type AzureOpenAIMultiRegion struct {
primary *AzureOpenAILLM
secondary *AzureOpenAILLM
failover bool
}
func (m *AzureOpenAIMultiRegion) Generate(ctx context.Context, messages []Message) (string, error) {
// Try primary
result, err := m.primary.Generate(ctx, messages)
if err == nil {
return result, nil
}
// Failover to secondary
if m.failover {
log.Printf("Failing over to secondary region: %v", err)
return m.secondary.Generate(ctx, messages)
}
return "", err
}
Authentication
API Key
func (a *AzureOpenAILLM) setAPIKeyAuth(req *http.Request) {
req.Header.Set("api-key", a.apiKey)
}
Azure AD (Managed Identity)
import "github.com/Azure/azure-sdk-for-go/sdk/azidentity"
type AzureOpenAIWithAAD struct {
endpoint string
credential *azidentity.DefaultAzureCredential
// ...
}
func NewAzureOpenAIWithAAD(endpoint string) (*AzureOpenAIWithAAD, error) {
cred, err := azidentity.NewDefaultAzureCredential(nil)
if err != nil {
return nil, err
}
return &AzureOpenAIWithAAD{
endpoint: endpoint,
credential: cred,
}, nil
}
func (a *AzureOpenAIWithAAD) getToken(ctx context.Context) (string, error) {
token, err := a.credential.GetToken(ctx, policy.TokenRequestOptions{
Scopes: []string{"https://cognitiveservices.azure.com/.default"},
})
if err != nil {
return "", err
}
return token.Token, nil
}
func (a *AzureOpenAIWithAAD) setAADAuth(ctx context.Context, req *http.Request) error {
token, err := a.getToken(ctx)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+token)
return nil
}
Content Filtering
Azure OpenAI includes built-in content filtering:
type ContentFilterResult struct {
Hate FilterCategory `json:"hate"`
SelfHarm FilterCategory `json:"self_harm"`
Sexual FilterCategory `json:"sexual"`
Violence FilterCategory `json:"violence"`
}
type FilterCategory struct {
Filtered bool `json:"filtered"`
Severity string `json:"severity"` // safe, low, medium, high
}
func (a *AzureOpenAILLM) handleContentFilter(result *ChatCompletionResponse) error {
if result.Choices[0].ContentFilterResults != nil {
cf := result.Choices[0].ContentFilterResults
if cf.Hate.Filtered || cf.Violence.Filtered {
return fmt.Errorf("content filtered: policy violation")
}
}
return nil
}
Custom Content Filters
// Configure via Azure Portal or API
type ContentFilterConfig struct {
Name string
Blocking bool
Severity string // low, medium, high
Action string // warn, block
}
var VoiceAgentContentFilter = ContentFilterConfig{
Name: "voice-agent-filter",
Blocking: true,
Severity: "medium",
Action: "block",
}
Rate Limiting
Token Quotas
type RateLimiter struct {
tokensPerMinute int
tokens int
lastReset time.Time
mu sync.Mutex
}
func (r *RateLimiter) Acquire(tokens int) bool {
r.mu.Lock()
defer r.mu.Unlock()
// Reset counter every minute
if time.Since(r.lastReset) >= time.Minute {
r.tokens = 0
r.lastReset = time.Now()
}
if r.tokens+tokens > r.tokensPerMinute {
return false
}
r.tokens += tokens
return true
}
func (a *AzureOpenAILLM) GenerateWithRateLimit(ctx context.Context, messages []Message) (string, error) {
estimatedTokens := estimateTokens(messages)
if !a.rateLimiter.Acquire(estimatedTokens) {
return "", fmt.Errorf("rate limit exceeded")
}
return a.Generate(ctx, messages)
}
Handling 429 Errors
func (a *AzureOpenAILLM) GenerateWithRetry(ctx context.Context, messages []Message) (string, error) {
maxRetries := 5
baseDelay := 1 * time.Second
for attempt := 0; attempt < maxRetries; attempt++ {
result, err := a.Generate(ctx, messages)
if err == nil {
return result, nil
}
var azureErr *AzureError
if errors.As(err, &azureErr) && azureErr.StatusCode == 429 {
// Use retry-after header if available
delay := baseDelay * time.Duration(1<<attempt)
if azureErr.RetryAfter > 0 {
delay = time.Duration(azureErr.RetryAfter) * time.Second
}
select {
case <-ctx.Done():
return "", ctx.Err()
case <-time.After(delay):
continue
}
}
return "", err
}
return "", fmt.Errorf("max retries exceeded")
}
Error Handling
type AzureError struct {
StatusCode int
Code string
Message string
RetryAfter int
}
func (a *AzureOpenAILLM) handleError(resp *http.Response) error {
body, _ := io.ReadAll(resp.Body)
var errResp struct {
Error struct {
Code string `json:"code"`
Message string `json:"message"`
} `json:"error"`
}
json.Unmarshal(body, &errResp)
retryAfter := 0
if ra := resp.Header.Get("Retry-After"); ra != "" {
retryAfter, _ = strconv.Atoi(ra)
}
return &AzureError{
StatusCode: resp.StatusCode,
Code: errResp.Error.Code,
Message: errResp.Error.Message,
RetryAfter: retryAfter,
}
}
Monitoring
Azure Monitor Integration
type AzureMetrics struct {
appInsights *appinsights.TelemetryClient
}
func (m *AzureMetrics) TrackRequest(operation string, duration time.Duration, success bool) {
m.appInsights.TrackRequest(appinsights.RequestTelemetry{
Name: operation,
Duration: duration,
Success: success,
})
}
func (m *AzureMetrics) TrackTokenUsage(prompt, completion int) {
m.appInsights.TrackMetric("PromptTokens", float64(prompt))
m.appInsights.TrackMetric("CompletionTokens", float64(completion))
}
Best Practices
1. Use Provisioned Throughput for Production
// For predictable, high-volume workloads
config := AzureOpenAIConfig{
DeploymentName: "gpt-4o-provisioned",
SKU: "ProvisionedManaged",
Capacity: 100, // PTUs
}
2. Implement Circuit Breaker
type CircuitBreaker struct {
failures int
threshold int
state string
lastFailure time.Time
timeout time.Duration
}
func (a *AzureOpenAILLM) GenerateWithCircuitBreaker(ctx context.Context, messages []Message) (string, error) {
if a.circuitBreaker.IsOpen() {
return "", fmt.Errorf("circuit breaker open")
}
result, err := a.Generate(ctx, messages)
if err != nil {
a.circuitBreaker.RecordFailure()
return "", err
}
a.circuitBreaker.RecordSuccess()
return result, nil
}
3. Use Azure Private Endpoints
// For enhanced security, use private endpoints
config := AzureOpenAIConfig{
Endpoint: "https://myopenai.privatelink.openai.azure.com",
// Configure via Azure networking
}
Next Steps
- OpenAI - Direct OpenAI integration
- Function Calling - Tool use
- Security - Enterprise security