This is the multi-page printable view of this section. Click here to print.

Return to the regular view of this page.

Metrics Collection

Learn how to collect and export application metrics with Rivaas metrics package

The Rivaas Metrics package provides OpenTelemetry-based metrics collection. Supports multiple exporters including Prometheus, OTLP, and stdout. Enables observability best practices with minimal configuration.

Features

  • Multiple Providers: Prometheus, OTLP, and stdout exporters
  • Built-in HTTP Metrics: Request duration, count, active requests, and more
  • Custom Metrics: Support for counters, histograms, and gauges with error handling
  • Thread-Safe: All methods are safe for concurrent use
  • Context Support: All metrics methods accept context for cancellation
  • Structured Logging: Pluggable logger interface for error and warning messages
  • HTTP Middleware: Integration with any HTTP framework
  • Security: Automatic filtering of sensitive headers

Quick Start

package main

import (
    "context"
    "log"
    "net/http"
    "os/signal"
    
    "rivaas.dev/metrics"
)

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
        metrics.WithServiceVersion("v1.0.0"),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())

    // Record custom metrics
    _ = recorder.IncrementCounter(ctx, "requests_total")
    
    // Prometheus metrics available at http://localhost:9090/metrics
}
package main

import (
    "context"
    "log"
    "os/signal"
    
    "rivaas.dev/metrics"
)

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    recorder, err := metrics.New(
        metrics.WithOTLP("http://localhost:4318"),
        metrics.WithServiceName("my-api"),
        metrics.WithServiceVersion("v1.0.0"),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())

    // Metrics pushed to OTLP collector
    _ = recorder.IncrementCounter(ctx, "requests_total")
}
package main

import (
    "context"
    "log"
    
    "rivaas.dev/metrics"
)

func main() {
    recorder := metrics.MustNew(
        metrics.WithStdout(),
        metrics.WithServiceName("my-api"),
    )

    ctx := context.Background()
    
    // Metrics printed to stdout
    _ = recorder.IncrementCounter(ctx, "requests_total")
}

How It Works

  • Providers determine where metrics are exported (Prometheus, OTLP, stdout)
  • Lifecycle management ensures proper initialization and graceful shutdown
  • HTTP middleware automatically collects request metrics
  • Custom metrics can be recorded with type-safe methods
  • Context support enables cancellation and request tracing

Learning Path

Follow these guides to master metrics collection with Rivaas:

  1. Installation - Get started with the metrics package
  2. Basic Usage - Learn the fundamentals of metrics collection
  3. Providers - Understand Prometheus, OTLP, and stdout exporters
  4. Configuration - Configure service metadata, histograms, and advanced options
  5. Custom Metrics - Create counters, histograms, and gauges
  6. Middleware - Integrate HTTP metrics with your application
  7. Testing - Test your metrics with provided utilities
  8. Examples - See real-world usage patterns

Next Steps

1 - Installation

How to install and set up the Rivaas metrics package

This guide covers installing the metrics package and verifying your setup.

Requirements

  • Go 1.25 or later
  • OpenTelemetry dependencies (automatically installed)

Installation

Install the metrics package using go get:

go get rivaas.dev/metrics

The package will automatically install its dependencies, including:

  • go.opentelemetry.io/otel - OpenTelemetry SDK
  • go.opentelemetry.io/otel/exporters/prometheus - Prometheus exporter
  • go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp - OTLP exporter
  • go.opentelemetry.io/otel/exporters/stdout/stdoutmetric - Stdout exporter

Verify Installation

Create a simple test file to verify the installation:

package main

import (
    "context"
    "fmt"
    "log"
    
    "rivaas.dev/metrics"
)

func main() {
    // Create a basic metrics recorder
    recorder, err := metrics.New(
        metrics.WithStdout(),
        metrics.WithServiceName("test-service"),
    )
    if err != nil {
        log.Fatalf("Failed to create recorder: %v", err)
    }
    
    // Start the recorder (optional for stdout, but good practice)
    if err := recorder.Start(context.Background()); err != nil {
        log.Fatalf("Failed to start recorder: %v", err)
    }
    defer recorder.Shutdown(context.Background())
    
    fmt.Println("Metrics package installed successfully!")
}

Run the test:

go run main.go

You should see output confirming the installation was successful.

Import Path

Import the metrics package in your code:

import "rivaas.dev/metrics"

Module Setup

If you’re starting a new project, initialize a Go module first:

go mod init your-project-name
go get rivaas.dev/metrics

Dependency Management

The metrics package uses Go modules for dependency management. After installation, your go.mod file will include:

require (
    rivaas.dev/metrics v0.1.0
    // OpenTelemetry dependencies added automatically
)

Run go mod tidy to clean up dependencies:

go mod tidy

Version Compatibility

The metrics package follows semantic versioning:

  • Stable API: The public API is stable and follows semantic versioning
  • Breaking Changes: Only introduced in major version updates
  • Go Version: Requires Go 1.25 or later

Check the releases page for the latest version.

Next Steps

Troubleshooting

Import Errors

If you see import errors:

go mod tidy
go mod download

Version Conflicts

If you have dependency conflicts with OpenTelemetry:

# Update to latest versions
go get -u rivaas.dev/metrics
go get -u go.opentelemetry.io/otel
go mod tidy

Build Errors

Ensure you’re using Go 1.25 or later:

go version

If you need to upgrade Go, visit golang.org/dl.

2 - Basic Usage

Learn the fundamentals of metrics collection with Rivaas

This guide covers the basic patterns for using the metrics package in your Go applications.

Creating a Metrics Recorder

The core of the metrics package is the Recorder type. Create a recorder by choosing a provider and configuring it:

package main

import (
    "context"
    "log"
    "os/signal"
    
    "rivaas.dev/metrics"
)

func main() {
    // Create context for application lifecycle
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create recorder with error handling
    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
        metrics.WithServiceVersion("v1.0.0"),
    )
    if err != nil {
        log.Fatalf("Failed to create recorder: %v", err)
    }
    
    // Start metrics server
    if err := recorder.Start(ctx); err != nil {
        log.Fatalf("Failed to start metrics: %v", err)
    }
    
    // Your application code here...
}

Using MustNew

For applications that should fail fast on configuration errors:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-api"),
)
// Panics if configuration is invalid

Lifecycle Management

Proper lifecycle management ensures metrics are properly initialized and flushed on shutdown.

Start and Shutdown

func main() {
    // Create lifecycle context
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    recorder := metrics.MustNew(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
    )
    
    // Start with lifecycle context
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    // Ensure graceful shutdown
    defer func() {
        shutdownCtx, shutdownCancel := context.WithTimeout(
            context.Background(),
            5*time.Second,
        )
        defer shutdownCancel()
        
        if err := recorder.Shutdown(shutdownCtx); err != nil {
            log.Printf("Metrics shutdown error: %v", err)
        }
    }()
    
    // Your application code...
}

Why Start() is Important

Different providers require Start() for different reasons:

  • OTLP: Requires lifecycle context for network connections and graceful shutdown
  • Prometheus: Starts the HTTP metrics server
  • Stdout: Works without Start(), but calling it is harmless

Best Practice: Always call Start(ctx) with a lifecycle context, regardless of provider.

Force Flush

For push-based providers (OTLP, stdout), you can force immediate export of pending metrics:

// Before critical operation or deployment
if err := recorder.ForceFlush(ctx); err != nil {
    log.Printf("Failed to flush metrics: %v", err)
}

This is useful for:

  • Ensuring metrics are exported before deployment
  • Checkpointing during long-running operations
  • Guaranteeing metrics visibility before shutdown

Note: For Prometheus (pull-based), this is typically a no-op as metrics are collected on-demand.

Standalone Usage

Use the recorder directly without HTTP middleware:

package main

import (
    "context"
    "log"
    "os/signal"
    
    "rivaas.dev/metrics"
    "go.opentelemetry.io/otel/attribute"
)

func main() {
    // Create context for application lifecycle
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create metrics recorder
    recorder := metrics.MustNew(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-service"),
    )
    
    // Start metrics server
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    defer recorder.Shutdown(context.Background())

    // Record custom metrics with error handling
    if err := recorder.RecordHistogram(ctx, "processing_duration", 1.5,
        attribute.String("operation", "create_user"),
    ); err != nil {
        log.Printf("metrics error: %v", err)
    }
    
    // Or fire-and-forget (ignore errors)
    _ = recorder.IncrementCounter(ctx, "requests_total",
        attribute.String("status", "success"),
    )
    
    _ = recorder.SetGauge(ctx, "active_connections", 42)
}

HTTP Integration

Integrate metrics with your HTTP server using middleware:

package main

import (
    "context"
    "log"
    "net/http"
    "os/signal"
    "time"
    
    "rivaas.dev/metrics"
)

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create metrics recorder
    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    defer func() {
        shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
        defer shutdownCancel()
        recorder.Shutdown(shutdownCtx)
    }()

    // Create HTTP handlers
    mux := http.NewServeMux()
    mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
        w.Header().Set("Content-Type", "application/json")
        w.Write([]byte(`{"message": "Hello"}`))
    })
    mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
    })

    // Wrap with metrics middleware
    handler := metrics.Middleware(recorder,
        metrics.WithExcludePaths("/health", "/metrics"),
    )(mux)

    // Start HTTP server
    server := &http.Server{
        Addr:    ":8080",
        Handler: handler,
    }
    
    go func() {
        if err := server.ListenAndServe(); err != http.ErrServerClosed {
            log.Fatal(err)
        }
    }()
    
    // Wait for interrupt
    <-ctx.Done()
    
    shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
    defer shutdownCancel()
    server.Shutdown(shutdownCtx)
}

Built-in Metrics

When using the HTTP middleware, the following metrics are automatically collected:

MetricTypeDescription
http_request_duration_secondsHistogramRequest duration distribution
http_requests_totalCounterTotal request count by status, method, path
http_requests_activeGaugeCurrent active requests
http_request_size_bytesHistogramRequest body size distribution
http_response_size_bytesHistogramResponse body size distribution
http_errors_totalCounterHTTP errors by status code

Viewing Metrics

With Prometheus provider, metrics are available at the configured endpoint:

curl http://localhost:9090/metrics

Example output:

# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/",status="200"} 42

# HELP http_request_duration_seconds HTTP request duration
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{method="GET",path="/",le="0.005"} 10
http_request_duration_seconds_bucket{method="GET",path="/",le="0.01"} 25
...

Error Handling

The metrics package provides two patterns for error handling:

Check Errors

For critical metrics where errors matter:

if err := recorder.IncrementCounter(ctx, "critical_operations",
    attribute.String("type", "payment"),
); err != nil {
    log.Printf("Failed to record metric: %v", err)
    // Handle error appropriately
}

Fire-and-Forget

For best-effort metrics where errors can be ignored:

// Ignore errors - metrics are best-effort
_ = recorder.IncrementCounter(ctx, "page_views")
_ = recorder.RecordHistogram(ctx, "query_duration", duration)

Best Practice: Use fire-and-forget for most metrics to avoid impacting application performance.

Thread Safety

All Recorder methods are thread-safe and can be called concurrently:

// Safe to call from multiple goroutines
go func() {
    _ = recorder.IncrementCounter(ctx, "worker_1")
}()

go func() {
    _ = recorder.IncrementCounter(ctx, "worker_2")
}()

Context Usage

All metrics methods accept a context for cancellation and tracing:

// Use request context for tracing
func handleRequest(w http.ResponseWriter, r *http.Request) {
    // Metrics will inherit trace context from request
    _ = recorder.IncrementCounter(r.Context(), "requests_processed")
}

// Use timeout context
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = recorder.RecordHistogram(ctx, "operation_duration", 1.5)

Common Patterns

Service Initialization

type Service struct {
    recorder *metrics.Recorder
}

func NewService() (*Service, error) {
    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-service"),
    )
    if err != nil {
        return nil, err
    }
    
    return &Service{recorder: recorder}, nil
}

func (s *Service) Start(ctx context.Context) error {
    return s.recorder.Start(ctx)
}

func (s *Service) Shutdown(ctx context.Context) error {
    return s.recorder.Shutdown(ctx)
}

Dependency Injection

type Handler struct {
    recorder *metrics.Recorder
}

func NewHandler(recorder *metrics.Recorder) *Handler {
    return &Handler{recorder: recorder}
}

func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    _ = h.recorder.IncrementCounter(r.Context(), "handler_calls")
    // Handle request...
}

Next Steps

3 - Metrics Providers

Understand Prometheus, OTLP, and stdout metrics exporters

The metrics package supports three provider types for exporting metrics. Each provider has different characteristics and use cases.

Provider Overview

ProviderUse CaseNetworkPush/Pull
PrometheusProduction monitoringHTTP serverPull
OTLPOpenTelemetry collectorsHTTP clientPush
StdoutDevelopment/debuggingConsole outputPush

Important: Only one provider can be used per Recorder instance. Using multiple provider options will result in a validation error.

Basic Configuration

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-service"),
)
recorder := metrics.MustNew(
    metrics.WithOTLP("http://localhost:4318"),
    metrics.WithServiceName("my-service"),
    metrics.WithServiceVersion("v1.0.0"),
)
recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("my-service"),
)

Prometheus Provider

Initialization Behavior

The Prometheus provider:

  1. Initializes immediately in New()
  2. Starts the HTTP server when Start(ctx) is called
  3. Metrics are available immediately after Start() returns
recorder, err := metrics.New(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-api"),
)
if err != nil {
    log.Fatal(err)
}

// HTTP server starts here
if err := recorder.Start(ctx); err != nil {
    log.Fatal(err)
}

// Metrics endpoint is now available at http://localhost:9090/metrics

Port Configuration

By default, if the requested port is unavailable, the server automatically finds the next available port (up to 100 ports searched).

Strict Port Mode

For production, use WithStrictPort() to ensure the exact port is used:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithStrictPort(),  // Fail if port 9090 is unavailable
    metrics.WithServiceName("my-service"),
)

Production Best Practice: Always use WithStrictPort() to avoid port conflicts.

Finding the Actual Port

If not using strict mode, check which port was actually used:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-service"),
)

if err := recorder.Start(ctx); err != nil {
    log.Fatal(err)
}

// Get the actual address (returns port like ":9090")
address := recorder.ServerAddress()
log.Printf("Metrics available at: http://localhost%s/metrics", address)

Manual Server Management

Disable automatic server startup and serve metrics on your own HTTP server:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServerDisabled(),
    metrics.WithServiceName("my-service"),
)

// Get the metrics handler
handler, err := recorder.Handler()
if err != nil {
    log.Fatalf("Failed to get metrics handler: %v", err)
}

// Serve on your own server
mux := http.NewServeMux()
mux.Handle("/metrics", handler)
mux.HandleFunc("/health", healthHandler)

http.ListenAndServe(":8080", mux)

Use Case: Serve metrics on the same port as your application server.

Viewing Metrics

Access metrics via HTTP:

curl http://localhost:9090/metrics

Example output:

# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/api/users",status="200"} 1543

# HELP http_request_duration_seconds HTTP request duration
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{method="GET",path="/api/users",le="0.005"} 245
http_request_duration_seconds_bucket{method="GET",path="/api/users",le="0.01"} 892
http_request_duration_seconds_sum{method="GET",path="/api/users"} 15.432
http_request_duration_seconds_count{method="GET",path="/api/users"} 1543

Prometheus Scrape Configuration

Configure Prometheus to scrape your service:

# prometheus.yml
scrape_configs:
  - job_name: 'my-service'
    static_configs:
      - targets: ['localhost:9090']
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics

OTLP Provider

The OTLP (OpenTelemetry Protocol) provider pushes metrics to an OpenTelemetry collector.

Basic Configuration

recorder := metrics.MustNew(
    metrics.WithOTLP("http://localhost:4318"),
    metrics.WithServiceName("my-service"),
    metrics.WithServiceVersion("v1.0.0"),
)

Parameter:

  • Endpoint: OTLP collector HTTP endpoint (e.g., http://localhost:4318)

Initialization Behavior

The OTLP provider:

  1. Defers initialization until Start(ctx) is called
  2. Uses the lifecycle context for network connections
  3. Enables graceful shutdown of connections

Critical: You must call Start(ctx) before recording metrics, or metrics will be silently dropped.

recorder, err := metrics.New(
    metrics.WithOTLP("http://localhost:4318"),
    metrics.WithServiceName("my-service"),
)
if err != nil {
    log.Fatal(err)
}

// OTLP connection established here
if err := recorder.Start(ctx); err != nil {
    log.Fatal(err)
}

// Metrics are now exported to collector
_ = recorder.IncrementCounter(ctx, "requests_total")

Why Deferred Initialization?

OTLP initialization is deferred to:

  • Use the application lifecycle context for network connections
  • Enable proper graceful shutdown
  • Avoid establishing connections during configuration

Export Interval

OTLP exports metrics periodically (default: 30 seconds):

recorder := metrics.MustNew(
    metrics.WithOTLP("http://localhost:4318"),
    metrics.WithExportInterval(10 * time.Second),  // Export every 10s
    metrics.WithServiceName("my-service"),
)

Force Flush

Force immediate export before the next interval:

// Ensure all metrics are sent immediately
if err := recorder.ForceFlush(ctx); err != nil {
    log.Printf("Failed to flush metrics: %v", err)
}

Use cases:

  • Before deployment or shutdown
  • Checkpointing during long operations
  • Guaranteeing metric visibility

OpenTelemetry Collector Setup

Example collector configuration:

# otel-collector-config.yaml
receivers:
  otlp:
    protocols:
      http:
        endpoint: 0.0.0.0:4318

exporters:
  prometheus:
    endpoint: "0.0.0.0:8889"
  logging:
    loglevel: debug

service:
  pipelines:
    metrics:
      receivers: [otlp]
      exporters: [prometheus, logging]

Run the collector:

otel-collector --config=otel-collector-config.yaml

Stdout Provider

The stdout provider prints metrics to the console. Ideal for development and debugging.

Basic Configuration

recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("my-service"),
)

Initialization Behavior

The stdout provider:

  1. Initializes immediately in New()
  2. Works without calling Start() (but calling it is harmless)
  3. Prints metrics to stdout periodically
recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("my-service"),
)

// Optional: Start() does nothing for stdout but doesn't hurt
recorder.Start(context.Background())

// Metrics are printed to stdout
_ = recorder.IncrementCounter(ctx, "requests_total")

Export Interval

Configure how often metrics are printed (default: 30 seconds):

recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithExportInterval(5 * time.Second),  // Print every 5s
    metrics.WithServiceName("my-service"),
)

Example Output

{
  "Resource": {
    "service.name": "my-service",
    "service.version": "v1.0.0"
  },
  "ScopeMetrics": [
    {
      "Scope": {
        "Name": "rivaas.dev/metrics"
      },
      "Metrics": [
        {
          "Name": "http_requests_total",
          "Data": {
            "DataPoints": [
              {
                "Attributes": {
                  "method": "GET",
                  "path": "/api/users",
                  "status": "200"
                },
                "Value": 42
              }
            ]
          }
        }
      ]
    }
  ]
}

Use Cases

  • Local development
  • Debugging metric collection
  • CI/CD pipeline validation
  • Unit tests (with TestingRecorder)

Provider Comparison

Prometheus

Pros:

  • Industry standard for metrics
  • Rich ecosystem (dashboards, alerting)
  • Simple pull-based model
  • No external dependencies

Cons:

  • Requires network port
  • Pull-based (can’t push on-demand)
  • Requires Prometheus server setup

Best For: Production services, microservices, containerized applications

OTLP

Pros:

  • Vendor-neutral standard
  • Flexible routing via collector
  • Push-based (immediate export)
  • Integrates with OpenTelemetry tracing

Cons:

  • Requires collector setup
  • More complex infrastructure
  • Network dependency

Best For: OpenTelemetry-native applications, multi-vendor observability, cloud environments

Stdout

Pros:

  • No external dependencies
  • Immediate visibility
  • Simple setup
  • Works everywhere

Cons:

  • Not for production
  • No aggregation or visualization
  • High output volume
  • No persistence

Best For: Development, debugging, testing, CI/CD pipelines

Choosing a Provider

Development

Use stdout for quick feedback:

recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("dev-service"),
)

Production (Simple)

Use Prometheus for straightforward monitoring:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithStrictPort(),
    metrics.WithServiceName("my-service"),
    metrics.WithServiceVersion("v1.2.3"),
)

Production (OpenTelemetry)

Use OTLP for OpenTelemetry-native environments:

recorder := metrics.MustNew(
    metrics.WithOTLP(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")),
    metrics.WithServiceName("my-service"),
    metrics.WithServiceVersion(version),
)

Testing

Use testing utilities (based on stdout):

func TestHandler(t *testing.T) {
    recorder := metrics.TestingRecorder(t, "test-service")
    // Test code...
}

Multiple Recorder Instances

You can create multiple recorder instances with different providers:

// Development recorder (stdout)
devRecorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("dev-metrics"),
)

// Production recorder (Prometheus)
prodRecorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("prod-metrics"),
)

// Both work independently without conflicts

Note: By default, recorders do NOT set the global OpenTelemetry meter provider. See Configuration for details.

Next Steps

4 - Configuration

Configure service metadata, histograms, and advanced options

This guide covers all configuration options for the metrics package beyond basic provider setup.

Service Configuration

Service metadata helps identify your application in metrics dashboards and monitoring systems.

Service Name

Required metadata that identifies your service:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-api"),
)

The service name appears as a resource attribute in all metrics:

# Service name in Prometheus labels
http_requests_total{service_name="my-api",method="GET"} 42

Best Practices:

  • Use lowercase with hyphens: user-service, payment-api.
  • Be consistent across services.
  • Avoid changing names in production.

Service Version

Optional version metadata for tracking deployments:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-api"),
    metrics.WithServiceVersion("v1.2.3"),
)

Use cases:

  • Track metrics across deployments.
  • Compare performance between versions.
  • Debug version-specific issues.

Best Practices:

  • Use semantic versioning: v1.2.3.
  • Include in all production deployments.
  • Automate from CI/CD pipelines:
var Version = "dev" // Set by build flags

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-api"),
    metrics.WithServiceVersion(Version),
)

Prometheus-Specific Options

Strict Port Mode

Fail immediately if the configured port is unavailable:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithStrictPort(),  // Production recommendation
    metrics.WithServiceName("my-api"),
)

Default Behavior: If port is unavailable, automatically searches up to 100 ports.

With Strict Mode: Fails with error if exact port is unavailable.

Production Best Practice: Always use WithStrictPort() to ensure predictable port allocation.

Server Disabled

Disable automatic metrics server and manage it yourself:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServerDisabled(),
    metrics.WithServiceName("my-api"),
)

// Get the metrics handler
handler, err := recorder.Handler()
if err != nil {
    log.Fatalf("Failed to get handler: %v", err)
}

// Serve on your own HTTP server
http.Handle("/metrics", handler)
http.ListenAndServe(":8080", nil)

Use Cases:

  • Serve metrics on same port as application
  • Custom server configuration
  • Integration with existing HTTP servers

Note: Handler() only works with Prometheus provider.

Histogram Bucket Configuration

Customize histogram bucket boundaries for better resolution in specific ranges.

Duration Buckets

Configure buckets for duration metrics (in seconds):

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithDurationBuckets(0.001, 0.01, 0.1, 0.5, 1, 5, 10),
    metrics.WithServiceName("my-api"),
)

Default Buckets: 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10 seconds

When to Customize:

  • Most requests < 100ms: Use finer buckets at low end
  • Slow operations (seconds): Use coarser buckets
  • Specific SLA requirements

Examples:

// Fast API (most requests < 100ms)
metrics.WithDurationBuckets(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1)

// Slow batch operations (seconds to minutes)
metrics.WithDurationBuckets(1, 5, 10, 30, 60, 120, 300, 600)

// Mixed workload
metrics.WithDurationBuckets(0.01, 0.1, 0.5, 1, 5, 10, 30, 60)

Size Buckets

Configure buckets for size metrics (in bytes):

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithSizeBuckets(100, 1000, 10000, 100000, 1000000),
    metrics.WithServiceName("my-api"),
)

Default Buckets: 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576 bytes

When to Customize:

  • Small payloads (< 10KB): Use finer buckets
  • Large payloads (MB+): Use coarser buckets
  • Specific size requirements

Examples:

// Small JSON API (< 10KB)
metrics.WithSizeBuckets(100, 500, 1000, 5000, 10000, 50000)

// File uploads (KB to MB)
metrics.WithSizeBuckets(1024, 10240, 102400, 1048576, 10485760, 104857600)

// Mixed sizes
metrics.WithSizeBuckets(100, 1000, 10000, 100000, 1000000, 10000000)

Impact on Cardinality

Important: More buckets = higher metric cardinality = more storage.

// 7 buckets (lower cardinality)
metrics.WithDurationBuckets(0.01, 0.1, 0.5, 1, 5, 10)

// 15 buckets (higher cardinality, better resolution)
metrics.WithDurationBuckets(
    0.001, 0.005, 0.01, 0.025, 0.05,
    0.1, 0.25, 0.5, 1, 2.5,
    5, 10, 30, 60, 120,
)

Best Practice: Use the minimum number of buckets that provide sufficient resolution for your use case.

Advanced Options

Logging

Configure how internal events are logged:

import "log/slog"

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithLogger(slog.Default()),
    metrics.WithServiceName("my-api"),
)

The logger receives:

  • Initialization events
  • Error messages (metric creation failures, etc.)
  • Warning messages (port conflicts, etc.)

Example Output:

INFO metrics server started on :9090
WARN custom metric limit reached (1000/1000)
ERROR failed to create metric: invalid name "__reserved"

Event Handler

For advanced use cases, handle events programmatically:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithEventHandler(func(e metrics.Event) {
        switch e.Type {
        case metrics.EventError:
            // Send to error tracking
            sentry.CaptureMessage(e.Message)
        case metrics.EventWarning:
            // Log warnings
            log.Printf("WARN: %s", e.Message)
        case metrics.EventInfo:
            // Log info
            log.Printf("INFO: %s", e.Message)
        }
    }),
    metrics.WithServiceName("my-api"),
)

Event Types:

  • EventInfo - Informational messages
  • EventWarning - Non-critical warnings
  • EventError - Error conditions

Use Cases:

  • Send errors to external monitoring
  • Custom logging formats
  • Metric collection about metric collection

Custom Metrics Limit

Set maximum number of custom metrics that can be created:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithMaxCustomMetrics(5000),  // Default: 1000
    metrics.WithServiceName("my-api"),
)

Why Limit Metrics?

  • Prevent unbounded cardinality
  • Protect against memory exhaustion
  • Enforce metric discipline

Built-in Metrics Don’t Count: HTTP metrics are always available.

Monitor Usage:

count := recorder.CustomMetricCount()
log.Printf("Custom metrics: %d/%d", count, maxLimit)

What Happens at Limit?

  • New metric creation returns an error
  • Existing metrics continue to work
  • Error is logged via logger/event handler

Export Interval

Configure how often metrics are exported (OTLP and stdout only):

recorder := metrics.MustNew(
    metrics.WithOTLP("http://localhost:4318"),
    metrics.WithExportInterval(10 * time.Second),  // Default: 30s
    metrics.WithServiceName("my-api"),
)

Applies To: OTLP (push), Stdout (push)

Does NOT Apply To: Prometheus (pull-based, scraped on-demand)

Trade-offs:

  • Shorter interval: More timely data, higher overhead
  • Longer interval: Lower overhead, delayed visibility

Best Practices:

  • Development: 5-10 seconds
  • Production: 15-30 seconds
  • High-volume: 30-60 seconds

Global Meter Provider

By default, the metrics package does NOT set the global OpenTelemetry meter provider.

Multiple independent recorder instances work without conflicts:

// Create independent recorders (no global state!)
recorder1 := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("service-1"),
)

recorder2 := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("service-2"),
)

// Both work independently without conflicts

Opt-in to Global Registration

Explicitly set the global meter provider:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServiceName("my-service"),
    metrics.WithGlobalMeterProvider(),  // Explicit opt-in
)

When to Use:

  • OpenTelemetry instrumentation libraries need global provider
  • Third-party libraries expect otel.GetMeterProvider()
  • Centralized metrics collection across libraries

When NOT to Use:

  • Multiple services in same process (e.g., tests)
  • Avoid global state
  • Custom meter provider management

Configuration Examples

Production API

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithStrictPort(),
    metrics.WithServiceName("payment-api"),
    metrics.WithServiceVersion(version),
    metrics.WithLogger(slog.Default()),
    metrics.WithDurationBuckets(0.01, 0.1, 0.5, 1, 5, 10),
    metrics.WithMaxCustomMetrics(2000),
)

Development

recorder := metrics.MustNew(
    metrics.WithStdout(),
    metrics.WithServiceName("dev-api"),
    metrics.WithExportInterval(5 * time.Second),
)

OpenTelemetry Native

recorder := metrics.MustNew(
    metrics.WithOTLP(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")),
    metrics.WithServiceName(os.Getenv("SERVICE_NAME")),
    metrics.WithServiceVersion(os.Getenv("SERVICE_VERSION")),
    metrics.WithExportInterval(15 * time.Second),
    metrics.WithLogger(slog.Default()),
)

Embedded Metrics Server

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithServerDisabled(),
    metrics.WithServiceName("api"),
)

handler, _ := recorder.Handler()

// Serve on application port
mux := http.NewServeMux()
mux.Handle("/metrics", handler)
mux.HandleFunc("/", appHandler)
http.ListenAndServe(":8080", mux)

Configuration from Environment

Load configuration from environment variables:

func configFromEnv() []metrics.Option {
    opts := []metrics.Option{
        metrics.WithServiceName(os.Getenv("SERVICE_NAME")),
    }
    
    if version := os.Getenv("SERVICE_VERSION"); version != "" {
        opts = append(opts, metrics.WithServiceVersion(version))
    }
    
    switch os.Getenv("METRICS_PROVIDER") {
    case "prometheus":
        addr := os.Getenv("METRICS_ADDR")
        if addr == "" {
            addr = ":9090"
        }
        opts = append(opts, 
            metrics.WithPrometheus(addr, "/metrics"),
            metrics.WithStrictPort(),
        )
    case "otlp":
        endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
        opts = append(opts, metrics.WithOTLP(endpoint))
    default:
        opts = append(opts, metrics.WithStdout())
    }
    
    return opts
}

recorder := metrics.MustNew(configFromEnv()...)

Next Steps

5 - Custom Metrics

Create counters, histograms, and gauges with proper naming conventions

This guide covers recording custom metrics beyond the built-in HTTP metrics.

Metric Types

The metrics package supports three metric types from OpenTelemetry:

TypeDescriptionUse CaseExample
CounterMonotonically increasing valueCounts of eventsRequests processed, errors occurred
HistogramDistribution of valuesDurations, sizesQuery time, response size
GaugePoint-in-time valueCurrent stateActive connections, queue depth

Counters

Counters track cumulative totals that only increase.

Increment Counter

Add 1 to a counter:

// With error handling
if err := recorder.IncrementCounter(ctx, "orders_processed_total",
    attribute.String("status", "success"),
    attribute.String("payment_method", "card"),
); err != nil {
    log.Printf("Failed to record metric: %v", err)
}

// Fire-and-forget (ignore errors)
_ = recorder.IncrementCounter(ctx, "page_views_total")

Add to Counter

Add a specific value to a counter:

// Add multiple items (value is int64)
_ = recorder.AddCounter(ctx, "bytes_processed_total", 1024,
    attribute.String("direction", "inbound"),
)

// Batch processing
itemsProcessed := int64(50)
_ = recorder.AddCounter(ctx, "items_processed_total", itemsProcessed,
    attribute.String("batch_id", batchID),
)

Important: Counter values must be non-negative integers (int64).

Counter Examples

// Simple event counting
_ = recorder.IncrementCounter(ctx, "user_registrations_total")

// With attributes
_ = recorder.IncrementCounter(ctx, "api_calls_total",
    attribute.String("endpoint", "/api/users"),
    attribute.String("method", "POST"),
    attribute.Int("status_code", 201),
)

// Tracking errors
_ = recorder.IncrementCounter(ctx, "errors_total",
    attribute.String("type", "validation"),
    attribute.String("field", "email"),
)

// Data volume
_ = recorder.AddCounter(ctx, "data_transferred_bytes", float64(len(data)),
    attribute.String("protocol", "https"),
    attribute.String("direction", "upload"),
)

Histograms

Histograms record distributions of values, useful for durations and sizes.

Record Histogram

startTime := time.Now()
// ... perform operation ...
duration := time.Since(startTime).Seconds()

_ = recorder.RecordHistogram(ctx, "operation_duration_seconds", duration,
    attribute.String("operation", "create_user"),
    attribute.String("status", "success"),
)

Histogram Examples

// Request duration
start := time.Now()
result, err := processRequest(ctx, req)
duration := time.Since(start).Seconds()

_ = recorder.RecordHistogram(ctx, "request_processing_duration_seconds", duration,
    attribute.String("operation", "process_request"),
    attribute.Bool("cache_hit", result.FromCache),
)

// Database query time
start = time.Now()
rows, err := db.QueryContext(ctx, query)
duration = time.Since(start).Seconds()

_ = recorder.RecordHistogram(ctx, "db_query_duration_seconds", duration,
    attribute.String("query_type", "select"),
    attribute.String("table", "users"),
)

// Response size
responseSize := len(responseData)
_ = recorder.RecordHistogram(ctx, "response_size_bytes", float64(responseSize),
    attribute.String("endpoint", "/api/users"),
    attribute.String("format", "json"),
)

// Payment amount
_ = recorder.RecordHistogram(ctx, "payment_amount_usd", amount,
    attribute.String("currency", "USD"),
    attribute.String("payment_method", "credit_card"),
)

Histogram Bucket Configuration

Customize bucket boundaries for better resolution (see Configuration):

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    // Fine-grained buckets for fast operations
    metrics.WithDurationBuckets(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5),
    metrics.WithServiceName("my-api"),
)

Gauges

Gauges represent point-in-time values that can increase or decrease.

Set Gauge

// Current connections
activeConnections := connectionPool.Active()
_ = recorder.SetGauge(ctx, "active_connections", float64(activeConnections),
    attribute.String("pool", "database"),
)

// Queue depth
queueSize := queue.Len()
_ = recorder.SetGauge(ctx, "queue_depth", float64(queueSize),
    attribute.String("queue", "tasks"),
)

Gauge Examples

// Memory usage
var m runtime.MemStats
runtime.ReadMemStats(&m)
_ = recorder.SetGauge(ctx, "memory_allocated_bytes", float64(m.Alloc))

// Goroutine count
_ = recorder.SetGauge(ctx, "goroutines_active", float64(runtime.NumGoroutine()))

// Cache size
cacheSize := cache.Len()
_ = recorder.SetGauge(ctx, "cache_entries", float64(cacheSize),
    attribute.String("cache", "users"),
)

// Connection pool
_ = recorder.SetGauge(ctx, "db_connections_active", float64(pool.Stats().InUse),
    attribute.String("database", "postgres"),
)

// Worker pool
_ = recorder.SetGauge(ctx, "worker_pool_idle", float64(workerPool.IdleCount()),
    attribute.String("pool", "background_jobs"),
)

// Temperature (example from IoT)
_ = recorder.SetGauge(ctx, "sensor_temperature_celsius", temperature,
    attribute.String("sensor_id", sensorID),
    attribute.String("location", "datacenter-1"),
)

Gauge Best Practices

DO:

  • Record current state: active connections, queue depth
  • Update regularly with latest values
  • Use for resource utilization metrics

DON’T:

  • Use for cumulative counts (use Counter instead)
  • Forget to update when value changes
  • Use for values that only increase (use Counter)

Metric Naming Conventions

Follow OpenTelemetry and Prometheus naming conventions for consistent metrics.

Valid Metric Names

Metric names must:

  • Start with a letter (a-z, A-Z)
  • Contain only alphanumeric, underscores, dots, hyphens
  • Maximum 255 characters
  • Not use reserved prefixes

Valid Examples:

_ = recorder.IncrementCounter(ctx, "orders_total")
_ = recorder.RecordHistogram(ctx, "processing_duration_seconds", 1.5)
_ = recorder.SetGauge(ctx, "active_users", 42)
_ = recorder.IncrementCounter(ctx, "api.v1.requests_total")
_ = recorder.RecordHistogram(ctx, "payment-processing-time", 2.0)

Invalid Metric Names

These will return an error:

// Reserved prefix: __
recorder.IncrementCounter(ctx, "__internal_metric")

// Reserved prefix: http_
recorder.RecordHistogram(ctx, "http_custom_duration", 1.0)

// Reserved prefix: router_
recorder.SetGauge(ctx, "router_custom_gauge", 10)

// Starts with number
recorder.IncrementCounter(ctx, "1st_metric")

// Invalid characters
recorder.IncrementCounter(ctx, "my metric!")  // Space and !
recorder.IncrementCounter(ctx, "metric@count")  // @ symbol

Reserved Prefixes

These prefixes are reserved for built-in metrics:

  • __ - Prometheus internal metrics
  • http_ - Built-in HTTP metrics
  • router_ - Built-in router metrics

Naming Best Practices

Units in Name:

// Good - includes unit
_ = recorder.RecordHistogram(ctx, "processing_duration_seconds", 1.5)
_ = recorder.RecordHistogram(ctx, "response_size_bytes", 1024)
_ = recorder.SetGauge(ctx, "temperature_celsius", 25.5)

// Bad - no unit
_ = recorder.RecordHistogram(ctx, "processing_duration", 1.5)
_ = recorder.RecordHistogram(ctx, "response_size", 1024)

Counter Suffix:

// Good - ends with _total
_ = recorder.IncrementCounter(ctx, "requests_total")
_ = recorder.IncrementCounter(ctx, "errors_total")
_ = recorder.AddCounter(ctx, "bytes_processed_total", 1024)

// Acceptable - clear it's a count
_ = recorder.IncrementCounter(ctx, "request_count")

// Bad - unclear
_ = recorder.IncrementCounter(ctx, "requests")

Descriptive Names:

// Good - clear and specific
_ = recorder.RecordHistogram(ctx, "db_query_duration_seconds", 0.15)
_ = recorder.IncrementCounter(ctx, "payment_failures_total")
_ = recorder.SetGauge(ctx, "redis_connections_active", 10)

// Bad - too generic
_ = recorder.RecordHistogram(ctx, "duration", 0.15)
_ = recorder.IncrementCounter(ctx, "failures")
_ = recorder.SetGauge(ctx, "connections", 10)

Consistent Style:

// Good - consistent snake_case
_ = recorder.IncrementCounter(ctx, "user_registrations_total")
_ = recorder.IncrementCounter(ctx, "order_completions_total")

// Avoid mixing styles
_ = recorder.IncrementCounter(ctx, "userRegistrations")  // camelCase
_ = recorder.IncrementCounter(ctx, "order-completions")  // kebab-case

Attributes (Labels)

Attributes add dimensions to metrics for filtering and grouping.

Using Attributes

import "go.opentelemetry.io/otel/attribute"

_ = recorder.IncrementCounter(ctx, "requests_total",
    attribute.String("method", "GET"),
    attribute.String("path", "/api/users"),
    attribute.Int("status_code", 200),
)

Attribute Types

// String
attribute.String("status", "success")
attribute.String("region", "us-east-1")

// Integer
attribute.Int("status_code", 200)
attribute.Int("retry_count", 3)

// Boolean
attribute.Bool("cache_hit", true)
attribute.Bool("authenticated", false)

// Float
attribute.Float64("error_rate", 0.05)

Attribute Best Practices

Keep Cardinality Low:

// Good - low cardinality
attribute.String("status", "success")  // success, error, timeout
attribute.String("method", "GET")      // GET, POST, PUT, DELETE

// Bad - high cardinality (unbounded)
attribute.String("user_id", userID)         // Millions of unique values
attribute.String("request_id", requestID)   // Unique per request
attribute.String("timestamp", time.Now().String())  // Always unique

Use Consistent Names:

// Good - consistent across metrics
attribute.String("status", "success")
attribute.String("method", "GET")
attribute.String("region", "us-east-1")

// Bad - inconsistent
attribute.String("status", "success")
attribute.String("http_method", "GET")  // Should be "method"
attribute.String("aws_region", "us-east-1")  // Should be "region"

Limit Attribute Count:

// Good - focused attributes
_ = recorder.IncrementCounter(ctx, "requests_total",
    attribute.String("method", "GET"),
    attribute.String("status", "success"),
)

// Bad - too many attributes
_ = recorder.IncrementCounter(ctx, "requests_total",
    attribute.String("method", "GET"),
    attribute.String("status", "success"),
    attribute.String("user_agent", ua),
    attribute.String("ip_address", ip),
    attribute.String("country", country),
    attribute.String("device", device),
    // ... creates explosion of metric combinations
)

Monitoring Custom Metrics

Track how many custom metrics have been created:

count := recorder.CustomMetricCount()
log.Printf("Custom metrics created: %d/%d", count, maxLimit)

// Expose as a metric
_ = recorder.SetGauge(ctx, "custom_metrics_count", float64(count))

Custom Metric Limit

Default limit: 1000 custom metrics

Increase the limit:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    metrics.WithMaxCustomMetrics(5000),
    metrics.WithServiceName("my-api"),
)

What Counts as Custom Metric?

Counts toward limit:

  • Each unique metric name created with IncrementCounter, AddCounter, RecordHistogram, or SetGauge

Does NOT count:

  • Built-in HTTP metrics (http_requests_total, etc.)
  • Different attribute combinations of same metric name
  • Re-recording same metric name

Example:

// Creates 1 custom metric
_ = recorder.IncrementCounter(ctx, "orders_total")
_ = recorder.IncrementCounter(ctx, "orders_total", attribute.String("status", "success"))
_ = recorder.IncrementCounter(ctx, "orders_total", attribute.String("status", "failed"))

// Creates 2 more custom metrics (total: 3)
_ = recorder.IncrementCounter(ctx, "payments_total")
_ = recorder.RecordHistogram(ctx, "order_duration_seconds", 1.5)

Error Handling

All metric methods return an error. Choose your handling strategy:

Check Errors (Critical Metrics)

if err := recorder.IncrementCounter(ctx, "payment_processed_total",
    attribute.String("method", "credit_card"),
); err != nil {
    log.Printf("Failed to record payment metric: %v", err)
    // Alert or handle appropriately
}

Fire-and-Forget (Best Effort)

// Most metrics - don't impact application performance
_ = recorder.IncrementCounter(ctx, "page_views_total")
_ = recorder.RecordHistogram(ctx, "render_time_seconds", duration)

Common Errors

  • Invalid name: Violates naming rules
  • Reserved prefix: Uses __, http_, or router_
  • Limit reached: Custom metric limit exceeded
  • Provider not started: OTLP provider not initialized

Built-in Metrics

The package automatically collects these HTTP metrics (when using middleware):

MetricTypeDescription
http_request_duration_secondsHistogramRequest duration distribution
http_requests_totalCounterTotal requests by method, path, status
http_requests_activeGaugeCurrently active requests
http_request_size_bytesHistogramRequest body size distribution
http_response_size_bytesHistogramResponse body size distribution
http_errors_totalCounterHTTP errors by status code
custom_metric_failures_totalCounterFailed custom metric creations

Note: Built-in metrics don’t count toward the custom metrics limit.

Next Steps

6 - HTTP Middleware

Integrate automatic HTTP metrics collection with middleware

This guide covers using the metrics middleware to automatically collect HTTP metrics.

Overview

The metrics middleware automatically records metrics for HTTP requests:

  • Request duration as histogram.
  • Request count as counter.
  • Active requests as gauge.
  • Request and response sizes as histograms.
  • Error counts as counter.

Basic Usage

Wrap your HTTP handler with the metrics middleware:

package main

import (
    "net/http"
    "rivaas.dev/metrics"
)

func main() {
    // Create recorder
    recorder := metrics.MustNew(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
    )
    defer recorder.Shutdown(context.Background())

    // Create your HTTP handlers
    mux := http.NewServeMux()
    mux.HandleFunc("/", homeHandler)
    mux.HandleFunc("/api/users", usersHandler)
    mux.HandleFunc("/health", healthHandler)

    // Wrap with metrics middleware
    handler := metrics.Middleware(recorder)(mux)

    http.ListenAndServe(":8080", handler)
}

Collected Metrics

The middleware automatically collects:

MetricTypeLabelsDescription
http_request_duration_secondsHistogrammethod, path, statusRequest duration distribution
http_requests_totalCountermethod, path, statusTotal request count
http_requests_activeGauge-Currently active requests
http_request_size_bytesHistogrammethod, pathRequest body size
http_response_size_bytesHistogrammethod, path, statusResponse body size
http_errors_totalCountermethod, path, statusHTTP error count

Metric Labels

Each metric includes relevant labels:

  • method: HTTP method like GET, POST, PUT, DELETE.
  • path: Request path like /api/users, /health.
  • status: HTTP status code like 200, 404, 500.

Path Exclusion

Exclude specific paths from metrics collection to reduce noise and cardinality.

Exact Path Exclusion

Exclude specific paths:

handler := metrics.Middleware(recorder,
    metrics.WithExcludePaths("/health", "/metrics", "/ready"),
)(mux)

Use Case: Health checks, metrics endpoints, readiness probes

Prefix Exclusion

Exclude all paths with specific prefixes:

handler := metrics.Middleware(recorder,
    metrics.WithExcludePrefixes("/debug/", "/internal/", "/_/"),
)(mux)

Use Case: Debug endpoints, internal APIs, administrative paths

Pattern Exclusion

Exclude paths matching regex patterns:

handler := metrics.Middleware(recorder,
    metrics.WithExcludePatterns(
        `^/v[0-9]+/internal/.*`,  // /v1/internal/*, /v2/internal/*
        `^/api/[0-9]+$`,           // /api/123, /api/456 (avoid high cardinality)
    ),
)(mux)

Use Case: Version-specific internal paths, high-cardinality routes

Combining Exclusions

Use multiple exclusion strategies together:

handler := metrics.Middleware(recorder,
    // Exact paths
    metrics.WithExcludePaths("/health", "/metrics"),
    
    // Prefixes
    metrics.WithExcludePrefixes("/debug/", "/internal/"),
    
    // Patterns
    metrics.WithExcludePatterns(`^/admin/.*`),
)(mux)

Header Recording

Record specific HTTP headers as metric attributes.

Basic Header Recording

handler := metrics.Middleware(recorder,
    metrics.WithHeaders("X-Request-ID", "X-Correlation-ID"),
)(mux)

Headers are recorded as metric attributes:

http_requests_total{
    method="GET",
    path="/api/users",
    status="200",
    x_request_id="abc123",
    x_correlation_id="def456"
} 1

Header Name Normalization

Header names are automatically normalized:

  • Converted to lowercase
  • Hyphens replaced with underscores

Examples:

  • X-Request-ID → x_request_id
  • Content-Type → content_type
  • User-Agent → user_agent

Multiple Headers

Record multiple headers:

handler := metrics.Middleware(recorder,
    metrics.WithHeaders(
        "X-Request-ID",
        "X-Correlation-ID", 
        "X-Client-Version",
        "X-API-Key",  // This will be filtered out (sensitive)
    ),
)(mux)

Security

The middleware automatically protects sensitive headers.

Automatic Header Filtering

These headers are always filtered and never recorded as metrics, even if explicitly requested:

  • Authorization
  • Cookie
  • Set-Cookie
  • X-API-Key
  • X-Auth-Token
  • Proxy-Authorization
  • WWW-Authenticate

Example

// Only X-Request-ID will be recorded
// Authorization and Cookie are automatically filtered
handler := metrics.Middleware(recorder,
    metrics.WithHeaders(
        "Authorization",      // Filtered
        "X-Request-ID",       // Recorded
        "Cookie",             // Filtered
        "X-Correlation-ID",   // Recorded
    ),
)(mux)

Why Filter Sensitive Headers?

Recording sensitive headers in metrics can:

  • Leak authentication credentials
  • Expose API keys in monitoring systems
  • Violate security policies
  • Create compliance issues

Best Practice: Only record non-sensitive, low-cardinality headers.

Complete Example

package main

import (
    "context"
    "log"
    "net/http"
    "os/signal"
    "time"
    
    "rivaas.dev/metrics"
)

func main() {
    // Create lifecycle context
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create metrics recorder
    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("my-api"),
        metrics.WithServiceVersion("v1.0.0"),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    defer func() {
        shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
        defer cancel()
        recorder.Shutdown(shutdownCtx)
    }()

    // Create HTTP handlers
    mux := http.NewServeMux()
    
    mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
        w.Write([]byte("Hello, World!"))
    })
    
    mux.HandleFunc("/api/users", func(w http.ResponseWriter, r *http.Request) {
        w.Header().Set("Content-Type", "application/json")
        w.Write([]byte(`{"users": []}`))
    })
    
    mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
    })

    // Configure middleware with all options
    handler := metrics.Middleware(recorder,
        // Exclude health and metrics endpoints
        metrics.WithExcludePaths("/health", "/metrics"),
        
        // Exclude debug and internal paths
        metrics.WithExcludePrefixes("/debug/", "/internal/"),
        
        // Exclude admin paths
        metrics.WithExcludePatterns(`^/admin/.*`),
        
        // Record tracing headers
        metrics.WithHeaders("X-Request-ID", "X-Correlation-ID"),
    )(mux)

    // Start HTTP server
    server := &http.Server{
        Addr:    ":8080",
        Handler: handler,
    }
    
    go func() {
        log.Printf("Server listening on :8080")
        if err := server.ListenAndServe(); err != http.ErrServerClosed {
            log.Fatal(err)
        }
    }()
    
    // Wait for interrupt
    <-ctx.Done()
    log.Println("Shutting down...")
    
    // Graceful shutdown
    shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
    defer cancel()
    server.Shutdown(shutdownCtx)
}

Integration Patterns

Standalone HTTP Server

mux := http.NewServeMux()
mux.HandleFunc("/", handler)

wrappedHandler := metrics.Middleware(recorder)(mux)

http.ListenAndServe(":8080", wrappedHandler)

With Router Middleware Chain

// Apply metrics middleware first in chain
handler := metrics.Middleware(recorder)(
    loggingMiddleware(
        authMiddleware(mux),
    ),
)

Gorilla Mux

import "github.com/gorilla/mux"

r := mux.NewRouter()
r.HandleFunc("/", homeHandler)
r.HandleFunc("/api/users", usersHandler)

// Wrap the router
handler := metrics.Middleware(recorder)(r)

http.ListenAndServe(":8080", handler)

Chi Router

import "github.com/go-chi/chi/v5"

r := chi.NewRouter()
r.Get("/", homeHandler)
r.Get("/api/users", usersHandler)

// Chi router is already http.Handler
handler := metrics.Middleware(recorder)(r)

http.ListenAndServe(":8080", handler)

Path Cardinality

Warning: High-cardinality paths can create excessive metrics.

Problematic Paths

// DON'T: These create unique paths for each request
/api/users/12345       // User ID in path
/api/orders/abc-123    // Order ID in path
/files/document-xyz    // Document ID in path

Each unique path creates separate metric series, leading to:

  • Excessive memory usage
  • Slow query performance
  • Storage bloat

Solutions

1. Exclude High-Cardinality Paths

handler := metrics.Middleware(recorder,
    // Exclude paths with IDs
    metrics.WithExcludePatterns(
        `^/api/users/[^/]+$`,      // /api/users/{id}
        `^/api/orders/[^/]+$`,     // /api/orders/{id}
        `^/files/[^/]+$`,          // /files/{id}
    ),
)(mux)

2. Use Path Normalization

Some routers support path normalization:

// Router provides normalized path
// /api/users/123 → /api/users/{id}

Check your router documentation for normalization support.

3. Record Fewer Labels

// Instead of recording full path, use endpoint name
// This requires custom instrumentation

Performance Considerations

Middleware Overhead

The middleware adds minimal overhead:

  • ~1-2 microseconds per request
  • Safe for production use
  • Thread-safe for concurrent requests

Memory Usage

Memory usage scales with:

  • Number of unique paths
  • Number of unique label combinations
  • Histogram bucket count

Best Practice: Exclude high-cardinality paths.

CPU Impact

Histogram recording is the most CPU-intensive operation. If needed, adjust bucket count:

recorder := metrics.MustNew(
    metrics.WithPrometheus(":9090", "/metrics"),
    // Fewer buckets = lower CPU overhead
    metrics.WithDurationBuckets(0.01, 0.1, 1, 10),
    metrics.WithServiceName("my-api"),
)

Viewing Metrics

Access metrics via the Prometheus endpoint:

curl http://localhost:9090/metrics

Example output:

# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/",status="200"} 42
http_requests_total{method="GET",path="/api/users",status="200"} 128
http_requests_total{method="POST",path="/api/users",status="201"} 15

# HELP http_request_duration_seconds HTTP request duration
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{method="GET",path="/",le="0.005"} 10
http_request_duration_seconds_bucket{method="GET",path="/",le="0.01"} 35
http_request_duration_seconds_bucket{method="GET",path="/",le="0.025"} 42
http_request_duration_seconds_sum{method="GET",path="/"} 0.523
http_request_duration_seconds_count{method="GET",path="/"} 42

# HELP http_requests_active Currently active HTTP requests
# TYPE http_requests_active gauge
http_requests_active 3

Middleware Options Reference

OptionDescription
WithExcludePaths(paths...)Exclude exact paths from metrics
WithExcludePrefixes(prefixes...)Exclude path prefixes from metrics
WithExcludePatterns(patterns...)Exclude paths matching regex patterns
WithHeaders(headers...)Record specific headers as metric attributes

See Middleware Options Reference for complete details.

Next Steps

7 - Testing

Test utilities for metrics collection

This guide covers testing utilities provided by the metrics package.

Testing Utilities

The metrics package provides utilities for testing without port conflicts or complex setup.

TestingRecorder

Create a test recorder with stdout provider. No network is required.

package myapp_test

import (
    "testing"
    "rivaas.dev/metrics"
)

func TestHandler(t *testing.T) {
    t.Parallel()
    
    // Create test recorder (uses stdout, avoids port conflicts)
    recorder := metrics.TestingRecorder(t, "test-service")
    
    // Use recorder in tests...
    handler := NewHandler(recorder)
    
    // Test your handler
    req := httptest.NewRequest("GET", "/", nil)
    w := httptest.NewRecorder()
    handler.ServeHTTP(w, req)
    
    // Assertions...
    // Cleanup is automatic via t.Cleanup()
}

// With additional options
func TestWithOptions(t *testing.T) {
    recorder := metrics.TestingRecorder(t, "test-service",
        metrics.WithMaxCustomMetrics(100),
    )
    // ...
}

Signature

func TestingRecorder(tb testing.TB, serviceName string, opts ...Option) *Recorder

Parameters:

  • tb testing.TB - Test or benchmark instance.
  • serviceName string - Service name for metrics.
  • opts ...Option - Optional additional configuration options.

Features

  • No port conflicts: Uses stdout provider, no network required.
  • Automatic cleanup: Registers cleanup via t.Cleanup().
  • Parallel safe: Safe to use in parallel tests.
  • Simple setup: One-line initialization.
  • Works with benchmarks: Accepts testing.TB (both *testing.T and *testing.B).

Example

func TestMetricsCollection(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorder(t, "test-service")
    
    // Record some metrics
    ctx := context.Background()
    err := recorder.IncrementCounter(ctx, "test_counter")
    if err != nil {
        t.Errorf("Failed to record counter: %v", err)
    }
    
    err = recorder.RecordHistogram(ctx, "test_duration", 1.5)
    if err != nil {
        t.Errorf("Failed to record histogram: %v", err)
    }
    
    // Test passes if no errors
}

TestingRecorderWithPrometheus

Create a test recorder with Prometheus provider (for endpoint testing):

func TestPrometheusEndpoint(t *testing.T) {
    t.Parallel()
    
    // Create test recorder with Prometheus (dynamic port)
    recorder := metrics.TestingRecorderWithPrometheus(t, "test-service")
    
    // Wait for server to be ready
    err := metrics.WaitForMetricsServer(t, recorder.ServerAddress(), 5*time.Second)
    if err != nil {
        t.Fatal(err)
    }
    
    // Test metrics endpoint (note: ServerAddress returns port like ":9090")
    resp, err := http.Get("http://localhost" + recorder.ServerAddress() + "/metrics")
    if err != nil {
        t.Fatal(err)
    }
    defer resp.Body.Close()
    
    if resp.StatusCode != http.StatusOK {
        t.Errorf("Expected status 200, got %d", resp.StatusCode)
    }
}

Signature

func TestingRecorderWithPrometheus(tb testing.TB, serviceName string, opts ...Option) *Recorder

Parameters:

  • tb testing.TB - Test or benchmark instance
  • serviceName string - Service name for metrics
  • opts ...Option - Optional additional configuration options

Features

  • Dynamic port allocation: Automatically finds available port
  • Real Prometheus endpoint: Test actual HTTP metrics endpoint
  • Server readiness check: Use WaitForMetricsServer to wait for startup
  • Automatic cleanup: Shuts down server via t.Cleanup()
  • Works with benchmarks: Accepts testing.TB (both *testing.T and *testing.B)

WaitForMetricsServer

Wait for Prometheus metrics server to be ready:

func TestMetricsEndpoint(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorderWithPrometheus(t, "test-service")
    
    // Wait up to 5 seconds for server to start
    err := metrics.WaitForMetricsServer(t, recorder.ServerAddress(), 5*time.Second)
    if err != nil {
        t.Fatalf("Metrics server not ready: %v", err)
    }
    
    // Server is ready, make requests (note: ServerAddress returns port like ":9090")
    resp, err := http.Get("http://localhost" + recorder.ServerAddress() + "/metrics")
    // ... test response
}

Signature

func WaitForMetricsServer(tb testing.TB, address string, timeout time.Duration) error

Parameters

  • tb testing.TB: Test or benchmark instance for logging
  • address string: Server address (e.g., :9090)
  • timeout time.Duration: Maximum wait time

Returns

  • error: Returns error if server doesn’t become ready within timeout

Testing Middleware

Test HTTP middleware with metrics collection:

func TestMiddleware(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorder(t, "test-service")
    
    // Create test handler
    handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
        w.Write([]byte("OK"))
    })
    
    // Wrap with metrics middleware
    wrappedHandler := metrics.Middleware(recorder)(handler)
    
    // Make test request
    req := httptest.NewRequest("GET", "/test", nil)
    w := httptest.NewRecorder()
    
    wrappedHandler.ServeHTTP(w, req)
    
    // Assert response
    if w.Code != http.StatusOK {
        t.Errorf("Expected status 200, got %d", w.Code)
    }
    
    if w.Body.String() != "OK" {
        t.Errorf("Expected body 'OK', got %s", w.Body.String())
    }
    
    // Metrics are recorded (visible in test logs if verbose)
}

Testing Custom Metrics

Test custom metric recording:

func TestCustomMetrics(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorder(t, "test-service")
    ctx := context.Background()
    
    tests := []struct {
        name    string
        record  func() error
        wantErr bool
    }{
        {
            name: "valid counter",
            record: func() error {
                return recorder.IncrementCounter(ctx, "test_counter")
            },
            wantErr: false,
        },
        {
            name: "invalid counter name",
            record: func() error {
                return recorder.IncrementCounter(ctx, "__reserved")
            },
            wantErr: true,
        },
        {
            name: "valid histogram",
            record: func() error {
                return recorder.RecordHistogram(ctx, "test_duration", 1.5)
            },
            wantErr: false,
        },
        {
            name: "valid gauge",
            record: func() error {
                return recorder.SetGauge(ctx, "test_gauge", 42)
            },
            wantErr: false,
        },
    }
    
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            err := tt.record()
            if (err != nil) != tt.wantErr {
                t.Errorf("wantErr=%v, got err=%v", tt.wantErr, err)
            }
        })
    }
}

Testing Error Handling

Test metric recording error handling:

func TestMetricErrors(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorder(t, "test-service")
    ctx := context.Background()
    
    // Test invalid metric name
    err := recorder.IncrementCounter(ctx, "http_invalid")
    if err == nil {
        t.Error("Expected error for reserved prefix, got nil")
    }
    
    // Test reserved prefix
    err = recorder.IncrementCounter(ctx, "__internal")
    if err == nil {
        t.Error("Expected error for reserved prefix, got nil")
    }
    
    // Test valid metric
    err = recorder.IncrementCounter(ctx, "valid_metric")
    if err != nil {
        t.Errorf("Expected no error, got %v", err)
    }
}

Integration Testing

Test complete HTTP server with metrics:

func TestServerWithMetrics(t *testing.T) {
    recorder := metrics.TestingRecorderWithPrometheus(t, "test-api")
    
    // Wait for metrics server
    err := metrics.WaitForMetricsServer(t, recorder.ServerAddress(), 5*time.Second)
    if err != nil {
        t.Fatal(err)
    }
    
    // Create test HTTP server
    mux := http.NewServeMux()
    mux.HandleFunc("/api", func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
        w.Write([]byte(`{"status": "ok"}`))
    })
    
    handler := metrics.Middleware(recorder)(mux)
    
    server := httptest.NewServer(handler)
    defer server.Close()
    
    // Make requests
    resp, err := http.Get(server.URL + "/api")
    if err != nil {
        t.Fatal(err)
    }
    defer resp.Body.Close()
    
    if resp.StatusCode != http.StatusOK {
        t.Errorf("Expected status 200, got %d", resp.StatusCode)
    }
    
    // Check metrics endpoint (note: ServerAddress returns port like ":9090")
    metricsResp, err := http.Get("http://localhost" + recorder.ServerAddress() + "/metrics")
    if err != nil {
        t.Fatal(err)
    }
    defer metricsResp.Body.Close()
    
    body, _ := io.ReadAll(metricsResp.Body)
    bodyStr := string(body)
    
    // Verify metrics exist
    if !strings.Contains(bodyStr, "http_requests_total") {
        t.Error("Expected http_requests_total metric")
    }
}

Parallel Tests

The testing utilities support parallel test execution:

func TestMetricsParallel(t *testing.T) {
    tests := []struct {
        name string
        path string
    }{
        {"endpoint1", "/api/users"},
        {"endpoint2", "/api/orders"},
        {"endpoint3", "/api/products"},
    }
    
    for _, tt := range tests {
        tt := tt // Capture range variable
        t.Run(tt.name, func(t *testing.T) {
            t.Parallel()
            
            // Each test gets its own recorder
            recorder := metrics.TestingRecorder(t, "test-"+tt.name)
            
            // Test handler
            handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                w.WriteHeader(http.StatusOK)
            })
            
            wrapped := metrics.Middleware(recorder)(handler)
            
            req := httptest.NewRequest("GET", tt.path, nil)
            w := httptest.NewRecorder()
            wrapped.ServeHTTP(w, req)
            
            if w.Code != http.StatusOK {
                t.Errorf("Expected 200, got %d", w.Code)
            }
        })
    }
}

Benchmarking

Benchmark metrics collection performance:

func BenchmarkMetricsMiddleware(b *testing.B) {
    // Create recorder (use t=nil for benchmarks)
    recorder, err := metrics.New(
        metrics.WithStdout(),
        metrics.WithServiceName("bench-service"),
    )
    if err != nil {
        b.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())
    
    handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
    })
    
    wrapped := metrics.Middleware(recorder)(handler)
    
    req := httptest.NewRequest("GET", "/test", nil)
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        w := httptest.NewRecorder()
        wrapped.ServeHTTP(w, req)
    }
}

func BenchmarkCustomMetrics(b *testing.B) {
    recorder, err := metrics.New(
        metrics.WithStdout(),
        metrics.WithServiceName("bench-service"),
    )
    if err != nil {
        b.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())
    
    ctx := context.Background()
    
    b.Run("Counter", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            _ = recorder.IncrementCounter(ctx, "bench_counter")
        }
    })
    
    b.Run("Histogram", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            _ = recorder.RecordHistogram(ctx, "bench_duration", 1.5)
        }
    })
    
    b.Run("Gauge", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            _ = recorder.SetGauge(ctx, "bench_gauge", 42)
        }
    })
}

Testing Best Practices

Use Parallel Tests

Enable parallel execution to run tests faster:

func TestSomething(t *testing.T) {
    t.Parallel() // Always use t.Parallel() when safe
    
    recorder := metrics.TestingRecorder(t, "test-service")
    // ... test code
}

Prefer TestingRecorder

Use TestingRecorder (stdout) unless you specifically need to test the HTTP endpoint:

// Good - fast, no port allocation
recorder := metrics.TestingRecorder(t, "test-service")

// Only when needed - tests HTTP endpoint
recorder := metrics.TestingRecorderWithPrometheus(t, "test-service")

Wait for Server Ready

Always wait for Prometheus server before making requests:

recorder := metrics.TestingRecorderWithPrometheus(t, "test-service")
err := metrics.WaitForMetricsServer(t, recorder.ServerAddress(), 5*time.Second)
if err != nil {
    t.Fatal(err)
}
// Now safe to make requests

Don’t Forget Context

Always pass context to metric methods:

ctx := context.Background()
err := recorder.IncrementCounter(ctx, "test_counter")

Test Error Cases

Test both success and error cases:

// Test valid metric
err := recorder.IncrementCounter(ctx, "valid_metric")
if err != nil {
    t.Errorf("Unexpected error: %v", err)
}

// Test invalid metric
err = recorder.IncrementCounter(ctx, "__reserved")
if err == nil {
    t.Error("Expected error for reserved prefix")
}

Example Test Suite

Complete example test suite:

package api_test

import (
    "context"
    "net/http"
    "net/http/httptest"
    "testing"
    "time"
    
    "rivaas.dev/metrics"
    "myapp/api"
)

func TestAPI(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorder(t, "test-api")
    
    server := api.NewServer(recorder)
    
    tests := []struct {
        name       string
        method     string
        path       string
        wantStatus int
    }{
        {"home", "GET", "/", 200},
        {"users", "GET", "/api/users", 200},
        {"not found", "GET", "/invalid", 404},
    }
    
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            req := httptest.NewRequest(tt.method, tt.path, nil)
            w := httptest.NewRecorder()
            
            server.ServeHTTP(w, req)
            
            if w.Code != tt.wantStatus {
                t.Errorf("Expected status %d, got %d", tt.wantStatus, w.Code)
            }
        })
    }
}

func TestMetricsEndpoint(t *testing.T) {
    t.Parallel()
    
    recorder := metrics.TestingRecorderWithPrometheus(t, "test-api")
    
    err := metrics.WaitForMetricsServer(t, recorder.ServerAddress(), 5*time.Second)
    if err != nil {
        t.Fatal(err)
    }
    
    resp, err := http.Get("http://localhost" + recorder.ServerAddress() + "/metrics")
    if err != nil {
        t.Fatal(err)
    }
    defer resp.Body.Close()
    
    if resp.StatusCode != http.StatusOK {
        t.Errorf("Expected status 200, got %d", resp.StatusCode)
    }
}

Next Steps

8 - Examples

Real-world examples of metrics collection patterns

This guide provides complete, real-world examples of using the metrics package.

Simple HTTP Server

Basic HTTP server with Prometheus metrics.

package main

import (
    "context"
    "log"
    "net/http"
    "os"
    "os/signal"
    "time"
    
    "rivaas.dev/metrics"
)

func main() {
    // Create lifecycle context
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create metrics recorder
    recorder, err := metrics.New(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("simple-api"),
        metrics.WithServiceVersion("v1.0.0"),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    // Start metrics server
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    defer func() {
        shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
        defer cancel()
        if err := recorder.Shutdown(shutdownCtx); err != nil {
            log.Printf("Metrics shutdown error: %v", err)
        }
    }()

    // Create HTTP handlers
    mux := http.NewServeMux()
    
    mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
        w.Header().Set("Content-Type", "application/json")
        w.Write([]byte(`{"message": "Hello, World!"}`))
    })
    
    mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
    })

    // Wrap with metrics middleware
    handler := metrics.Middleware(recorder,
        metrics.WithExcludePaths("/health", "/metrics"),
    )(mux)

    // Start HTTP server
    server := &http.Server{
        Addr:    ":8080",
        Handler: handler,
    }
    
    go func() {
        log.Printf("Server listening on :8080")
        log.Printf("Metrics available at http://localhost:9090/metrics")
        if err := server.ListenAndServe(); err != http.ErrServerClosed {
            log.Fatal(err)
        }
    }()
    
    // Wait for interrupt
    <-ctx.Done()
    log.Println("Shutting down gracefully...")
    
    shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
    defer cancel()
    server.Shutdown(shutdownCtx)
}

Run and test:

# Start server
go run main.go

# Make requests
curl http://localhost:8080/

# View metrics
curl http://localhost:9090/metrics

Custom Metrics Example

Application with custom business metrics:

package main

import (
    "context"
    "log"
    "math/rand"
    "os"
    "os/signal"
    "time"
    
    "rivaas.dev/metrics"
    "go.opentelemetry.io/otel/attribute"
)

type OrderProcessor struct {
    recorder *metrics.Recorder
}

func NewOrderProcessor(recorder *metrics.Recorder) *OrderProcessor {
    return &OrderProcessor{recorder: recorder}
}

func (p *OrderProcessor) ProcessOrder(ctx context.Context, orderID string, amount float64) error {
    start := time.Now()
    
    // Simulate processing
    time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
    
    // Record processing duration
    duration := time.Since(start).Seconds()
    _ = p.recorder.RecordHistogram(ctx, "order_processing_duration_seconds", duration,
        attribute.String("order_id", orderID),
    )
    
    // Record order amount
    _ = p.recorder.RecordHistogram(ctx, "order_amount_usd", amount,
        attribute.String("currency", "USD"),
    )
    
    // Increment orders processed counter
    _ = p.recorder.IncrementCounter(ctx, "orders_processed_total",
        attribute.String("status", "success"),
    )
    
    log.Printf("Processed order %s: $%.2f in %.3fs", orderID, amount, duration)
    return nil
}

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Create metrics recorder
    recorder := metrics.MustNew(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("order-processor"),
        metrics.WithDurationBuckets(0.01, 0.05, 0.1, 0.5, 1, 5),
    )
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())

    processor := NewOrderProcessor(recorder)
    
    log.Println("Processing orders... (metrics at http://localhost:9090/metrics)")
    
    // Simulate order processing
    ticker := time.NewTicker(1 * time.Second)
    defer ticker.Stop()
    
    orderNum := 0
    for {
        select {
        case <-ctx.Done():
            return
        case <-ticker.C:
            orderNum++
            orderID := fmt.Sprintf("ORD-%d", orderNum)
            amount := 10.0 + rand.Float64()*990.0
            
            if err := processor.ProcessOrder(ctx, orderID, amount); err != nil {
                log.Printf("Error processing order: %v", err)
            }
        }
    }
}

OTLP with OpenTelemetry Collector

Send metrics to OpenTelemetry collector:

package main

import (
    "context"
    "log"
    "os"
    "os/signal"
    "time"
    
    "rivaas.dev/metrics"
)

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    // Get OTLP endpoint from environment
    endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
    if endpoint == "" {
        endpoint = "http://localhost:4318"
    }

    // Create recorder with OTLP
    recorder, err := metrics.New(
        metrics.WithOTLP(endpoint),
        metrics.WithServiceName(os.Getenv("SERVICE_NAME")),
        metrics.WithServiceVersion(os.Getenv("SERVICE_VERSION")),
        metrics.WithExportInterval(10 * time.Second),
    )
    if err != nil {
        log.Fatal(err)
    }
    
    // Important: Start before recording metrics
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    
    defer func() {
        shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
        defer cancel()
        recorder.Shutdown(shutdownCtx)
    }()

    log.Printf("Sending metrics to OTLP endpoint: %s", endpoint)
    
    // Record metrics periodically
    ticker := time.NewTicker(2 * time.Second)
    defer ticker.Stop()
    
    count := 0
    for {
        select {
        case <-ctx.Done():
            return
        case <-ticker.C:
            count++
            _ = recorder.IncrementCounter(ctx, "app_ticks_total")
            _ = recorder.SetGauge(ctx, "app_counter", float64(count))
            log.Printf("Tick %d", count)
        }
    }
}

OpenTelemetry collector configuration:

# otel-collector-config.yaml
receivers:
  otlp:
    protocols:
      http:
        endpoint: 0.0.0.0:4318

exporters:
  prometheus:
    endpoint: "0.0.0.0:8889"
  logging:
    loglevel: debug

service:
  pipelines:
    metrics:
      receivers: [otlp]
      exporters: [prometheus, logging]

Run collector:

otel-collector --config=otel-collector-config.yaml

Worker Pool with Gauges

Track worker pool metrics:

package main

import (
    "context"
    "log"
    "math/rand"
    "os"
    "os/signal"
    "sync"
    "time"
    
    "rivaas.dev/metrics"
    "go.opentelemetry.io/otel/attribute"
)

type WorkerPool struct {
    workers  int
    active   int
    idle     int
    mu       sync.Mutex
    recorder *metrics.Recorder
}

func NewWorkerPool(size int, recorder *metrics.Recorder) *WorkerPool {
    return &WorkerPool{
        workers:  size,
        idle:     size,
        recorder: recorder,
    }
}

func (p *WorkerPool) updateMetrics(ctx context.Context) {
    p.mu.Lock()
    active := p.active
    idle := p.idle
    p.mu.Unlock()
    
    _ = p.recorder.SetGauge(ctx, "worker_pool_active", float64(active))
    _ = p.recorder.SetGauge(ctx, "worker_pool_idle", float64(idle))
    _ = p.recorder.SetGauge(ctx, "worker_pool_total", float64(p.workers))
}

func (p *WorkerPool) DoWork(ctx context.Context, jobID string) {
    p.mu.Lock()
    p.active++
    p.idle--
    p.mu.Unlock()
    
    p.updateMetrics(ctx)
    
    start := time.Now()
    
    // Simulate work
    time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
    
    duration := time.Since(start).Seconds()
    _ = p.recorder.RecordHistogram(ctx, "job_duration_seconds", duration,
        attribute.String("job_id", jobID),
    )
    _ = p.recorder.IncrementCounter(ctx, "jobs_completed_total")
    
    p.mu.Lock()
    p.active--
    p.idle++
    p.mu.Unlock()
    
    p.updateMetrics(ctx)
}

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    recorder := metrics.MustNew(
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithServiceName("worker-pool"),
    )
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())

    pool := NewWorkerPool(10, recorder)
    
    log.Println("Worker pool started (metrics at http://localhost:9090/metrics)")
    
    // Submit jobs
    var wg sync.WaitGroup
    for i := 0; i < 50; i++ {
        wg.Add(1)
        jobID := fmt.Sprintf("job-%d", i)
        
        go func(id string) {
            defer wg.Done()
            pool.DoWork(ctx, id)
        }(jobID)
        
        time.Sleep(100 * time.Millisecond)
    }
    
    wg.Wait()
    log.Println("All jobs completed")
}

Environment-Based Configuration

Load metrics configuration from environment:

package main

import (
    "context"
    "log"
    "os"
    "strconv"
    "time"
    
    "rivaas.dev/metrics"
)

func createRecorder() (*metrics.Recorder, error) {
    var opts []metrics.Option
    
    // Service metadata
    opts = append(opts, metrics.WithServiceName(getEnv("SERVICE_NAME", "my-service")))
    
    if version := os.Getenv("SERVICE_VERSION"); version != "" {
        opts = append(opts, metrics.WithServiceVersion(version))
    }
    
    // Provider selection
    provider := getEnv("METRICS_PROVIDER", "prometheus")
    switch provider {
    case "prometheus":
        addr := getEnv("METRICS_ADDR", ":9090")
        path := getEnv("METRICS_PATH", "/metrics")
        opts = append(opts, metrics.WithPrometheus(addr, path))
        
        if getBoolEnv("METRICS_STRICT_PORT", true) {
            opts = append(opts, metrics.WithStrictPort())
        }
        
    case "otlp":
        endpoint := getEnv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
        opts = append(opts, metrics.WithOTLP(endpoint))
        
        if interval := getDurationEnv("METRICS_EXPORT_INTERVAL", 30*time.Second); interval > 0 {
            opts = append(opts, metrics.WithExportInterval(interval))
        }
        
    case "stdout":
        opts = append(opts, metrics.WithStdout())
        
    default:
        log.Printf("Unknown provider %s, using stdout", provider)
        opts = append(opts, metrics.WithStdout())
    }
    
    // Custom metrics limit
    if limit := getIntEnv("METRICS_MAX_CUSTOM", 1000); limit > 0 {
        opts = append(opts, metrics.WithMaxCustomMetrics(limit))
    }
    
    return metrics.New(opts...)
}

func getEnv(key, defaultValue string) string {
    if value := os.Getenv(key); value != "" {
        return value
    }
    return defaultValue
}

func getBoolEnv(key string, defaultValue bool) bool {
    if value := os.Getenv(key); value != "" {
        b, err := strconv.ParseBool(value)
        if err == nil {
            return b
        }
    }
    return defaultValue
}

func getIntEnv(key string, defaultValue int) int {
    if value := os.Getenv(key); value != "" {
        i, err := strconv.Atoi(value)
        if err == nil {
            return i
        }
    }
    return defaultValue
}

func getDurationEnv(key string, defaultValue time.Duration) time.Duration {
    if value := os.Getenv(key); value != "" {
        d, err := time.ParseDuration(value)
        if err == nil {
            return d
        }
    }
    return defaultValue
}

func main() {
    ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
    defer cancel()

    recorder, err := createRecorder()
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(ctx); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())

    log.Println("Service started with metrics")
    
    // Your application code...
    <-ctx.Done()
}

Example .env file:

SERVICE_NAME=my-api
SERVICE_VERSION=v1.2.3
METRICS_PROVIDER=prometheus
METRICS_ADDR=:9090
METRICS_PATH=/metrics
METRICS_STRICT_PORT=true
METRICS_MAX_CUSTOM=2000

Microservices Pattern

Shared metrics setup for microservices:

// pkg/telemetry/metrics.go
package telemetry

import (
    "context"
    "fmt"
    "os"
    
    "rivaas.dev/metrics"
)

type Config struct {
    ServiceName    string
    ServiceVersion string
    MetricsAddr    string
}

func NewMetricsRecorder(cfg Config) (*metrics.Recorder, error) {
    opts := []metrics.Option{
        metrics.WithPrometheus(cfg.MetricsAddr, "/metrics"),
        metrics.WithStrictPort(),
        metrics.WithServiceName(cfg.ServiceName),
    }
    
    if cfg.ServiceVersion != "" {
        opts = append(opts, metrics.WithServiceVersion(cfg.ServiceVersion))
    }
    
    return metrics.New(opts...)
}

// Service-specific metrics helpers
type ServiceMetrics struct {
    recorder *metrics.Recorder
}

func NewServiceMetrics(recorder *metrics.Recorder) *ServiceMetrics {
    return &ServiceMetrics{recorder: recorder}
}

func (m *ServiceMetrics) RecordAPICall(ctx context.Context, endpoint string, duration float64, err error) {
    status := "success"
    if err != nil {
        status = "error"
    }
    
    _ = m.recorder.RecordHistogram(ctx, "api_call_duration_seconds", duration,
        attribute.String("endpoint", endpoint),
        attribute.String("status", status),
    )
    
    _ = m.recorder.IncrementCounter(ctx, "api_calls_total",
        attribute.String("endpoint", endpoint),
        attribute.String("status", status),
    )
}

Use in service:

// cmd/user-service/main.go
package main

import (
    "context"
    "log"
    
    "myapp/pkg/telemetry"
)

func main() {
    cfg := telemetry.Config{
        ServiceName:    "user-service",
        ServiceVersion: os.Getenv("VERSION"),
        MetricsAddr:    ":9090",
    }
    
    recorder, err := telemetry.NewMetricsRecorder(cfg)
    if err != nil {
        log.Fatal(err)
    }
    
    if err := recorder.Start(context.Background()); err != nil {
        log.Fatal(err)
    }
    defer recorder.Shutdown(context.Background())
    
    metrics := telemetry.NewServiceMetrics(recorder)
    
    // Use metrics in your service
    // ...
}

Complete Production Example

Full production-ready setup:

package main

import (
    "context"
    "log"
    "log/slog"
    "net/http"
    "os"
    "os/signal"
    "syscall"
    "time"
    
    "rivaas.dev/metrics"
)

func main() {
    // Setup structured logging
    logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
    slog.SetDefault(logger)
    
    // Create application context
    ctx, cancel := signal.NotifyContext(
        context.Background(),
        os.Interrupt,
        syscall.SIGTERM,
    )
    defer cancel()

    // Create metrics recorder with production settings
    recorder, err := metrics.New(
        // Provider
        metrics.WithPrometheus(":9090", "/metrics"),
        metrics.WithStrictPort(),
        
        // Service metadata
        metrics.WithServiceName("production-api"),
        metrics.WithServiceVersion(os.Getenv("VERSION")),
        
        // Configuration
        metrics.WithDurationBuckets(0.01, 0.1, 0.5, 1, 5, 10, 30),
        metrics.WithSizeBuckets(100, 1000, 10000, 100000, 1000000),
        metrics.WithMaxCustomMetrics(2000),
        
        // Observability
        metrics.WithLogger(slog.Default()),
    )
    if err != nil {
        slog.Error("Failed to create metrics recorder", "error", err)
        os.Exit(1)
    }
    
    // Start metrics server
    if err := recorder.Start(ctx); err != nil {
        slog.Error("Failed to start metrics", "error", err)
        os.Exit(1)
    }
    
    slog.Info("Metrics server started", "address", recorder.ServerAddress())
    
    // Ensure graceful shutdown
    defer func() {
        shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
        defer cancel()
        
        if err := recorder.Shutdown(shutdownCtx); err != nil {
            slog.Error("Metrics shutdown error", "error", err)
        } else {
            slog.Info("Metrics shut down successfully")
        }
    }()

    // Create HTTP server
    mux := http.NewServeMux()
    mux.HandleFunc("/", homeHandler)
    mux.HandleFunc("/api/v1/users", usersHandler)
    mux.HandleFunc("/health", healthHandler)
    mux.HandleFunc("/ready", readyHandler)

    // Configure middleware
    handler := metrics.Middleware(recorder,
        metrics.WithExcludePaths("/health", "/ready", "/metrics"),
        metrics.WithExcludePrefixes("/debug/", "/_/"),
        metrics.WithHeaders("X-Request-ID", "X-Correlation-ID"),
    )(mux)

    server := &http.Server{
        Addr:              ":8080",
        Handler:           handler,
        ReadHeaderTimeout: 5 * time.Second,
        ReadTimeout:       10 * time.Second,
        WriteTimeout:      10 * time.Second,
        IdleTimeout:       60 * time.Second,
    }
    
    // Start HTTP server
    go func() {
        slog.Info("HTTP server starting", "address", server.Addr)
        if err := server.ListenAndServe(); err != http.ErrServerClosed {
            slog.Error("HTTP server error", "error", err)
            cancel()
        }
    }()
    
    // Wait for shutdown signal
    <-ctx.Done()
    slog.Info("Shutdown signal received")
    
    // Graceful shutdown
    shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
    defer shutdownCancel()
    
    if err := server.Shutdown(shutdownCtx); err != nil {
        slog.Error("Server shutdown error", "error", err)
    } else {
        slog.Info("Server shut down successfully")
    }
}

func homeHandler(w http.ResponseWriter, r *http.Request) {
    w.Header().Set("Content-Type", "application/json")
    w.Write([]byte(`{"status": "ok"}`))
}

func usersHandler(w http.ResponseWriter, r *http.Request) {
    w.Header().Set("Content-Type", "application/json")
    w.Write([]byte(`{"users": []}`))
}

func healthHandler(w http.ResponseWriter, r *http.Request) {
    w.WriteHeader(http.StatusOK)
}

func readyHandler(w http.ResponseWriter, r *http.Request) {
    w.WriteHeader(http.StatusOK)
}

Next Steps