Implementing Observability with OpenTelemetry in Node.js

Introduction

OpenTelemetry provides unified observability through distributed tracing, metrics, and logging. This guide instruments Node.js applications for comprehensive monitoring.

Prerequisites

Node.js >=14
Basic understanding of observability concepts
Jaeger and Prometheus (optional)

Step 1: Install OpenTelemetry SDK

npm install @opentelemetry/sdk-node @opentelemetry/auto-instrumentations-node
npm install @opentelemetry/exporter-jaeger @opentelemetry/exporter-prometheus
npm install @opentelemetry/instrumentation-express @opentelemetry/instrumentation-http

Step 2: Basic Tracing Setup

Create tracing.js:

const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');

// Configure the trace exporter
const traceExporter = new JaegerExporter({
  endpoint: process.env.JAEGER_ENDPOINT || 'http://localhost:14268/api/traces',
});

// Initialize the SDK
const sdk = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
    [SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0',
    [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'development',
  }),
  traceExporter,
  instrumentations: [
    getNodeAutoInstrumentations({
      '@opentelemetry/instrumentation-fs': { enabled: false }, // Disable noisy fs instrumentation
    }),
  ],
});

sdk.start();
console.log('OpenTelemetry initialized');

// Graceful shutdown
process.on('SIGTERM', () => {
  sdk.shutdown()
    .then(() => console.log('OpenTelemetry terminated'))
    .catch(error => console.error('Error terminating OpenTelemetry', error))
    .finally(() => process.exit(0));
});

Step 3: Express.js Application with Tracing

Create app.js:

// Import tracing BEFORE other imports
require('./tracing');

const express = require('express');
const { trace, context, SpanStatusCode } = require('@opentelemetry/api');

const app = express();
const tracer = trace.getTracer('user-service', '1.0.0');

app.use(express.json());

// Custom middleware for request tracing
app.use((req, res, next) => {
  const span = tracer.startSpan(`${req.method} ${req.path}`);
  
  // Add custom attributes
  span.setAttributes({
    'http.method': req.method,
    'http.path': req.path,
    'http.user_agent': req.get('user-agent') || '',
    'user.id': req.headers['x-user-id'] || 'anonymous',
  });
  
  // Store span in request context
  req.span = span;
  
  res.on('finish', () => {
    span.setAttribute('http.status_code', res.statusCode);
    
    if (res.statusCode >= 400) {
      span.setStatus({
        code: SpanStatusCode.ERROR,
        message: `HTTP ${res.statusCode}`,
      });
    }
    
    span.end();
  });
  
  next();
});

// Simulated database service
async function getUserFromDB(userId) {
  return tracer.startActiveSpan('db.users.findById', async (span) => {
    span.setAttributes({
      'db.operation': 'findById',
      'db.collection.name': 'users',
      'user.id': userId,
    });
    
    try {
      // Simulate database delay
      await new Promise(resolve => setTimeout(resolve, Math.random() * 100));
      
      if (userId === 'error') {
        throw new Error('User not found');
      }
      
      const user = {
        id: userId,
        name: `User ${userId}`,
        email: `user${userId}@example.com`,
      };
      
      span.setStatus({ code: SpanStatusCode.OK });
      return user;
    } catch (error) {
      span.setStatus({
        code: SpanStatusCode.ERROR,
        message: error.message,
      });
      span.recordException(error);
      throw error;
    } finally {
      span.end();
    }
  });
}

// API Routes
app.get('/users/:id', async (req, res) => {
  const span = req.span;
  const userId = req.params.id;
  
  try {
    const user = await getUserFromDB(userId);
    
    span.setAttributes({
      'user.found': true,
      'response.user.name': user.name,
    });
    
    res.json(user);
  } catch (error) {
    span.setAttributes({
      'user.found': false,
      'error.message': error.message,
    });
    
    res.status(404).json({ error: 'User not found' });
  }
});

app.get('/health', (req, res) => {
  const span = req.span;
  span.setAttributes({
    'health.check': true,
    'health.status': 'healthy',
  });
  
  res.json({ status: 'healthy', timestamp: new Date().toISOString() });
});

// Error handling middleware
app.use((error, req, res, next) => {
  if (req.span) {
    req.span.recordException(error);
    req.span.setStatus({
      code: SpanStatusCode.ERROR,
      message: error.message,
    });
  }
  
  console.error('Unhandled error:', error);
  res.status(500).json({ error: 'Internal server error' });
});

const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  console.log(`Server running on port ${PORT}`);
});

Step 4: Custom Metrics with Prometheus

Create metrics.js:

const { NodeSDK } = require('@opentelemetry/sdk-node');
const { PrometheusExporter } = require('@opentelemetry/exporter-prometheus');
const { metrics } = require('@opentelemetry/api');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');

// Initialize metrics
const metricExporter = new PrometheusExporter({
  port: 9090,
  endpoint: '/metrics',
}, () => {
  console.log('Prometheus metrics available at http://localhost:9090/metrics');
});

// Configure SDK with metrics
const sdk = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
  }),
  metricReader: metricExporter,
});

sdk.start();

// Create custom metrics
const meter = metrics.getMeter('user-service', '1.0.0');

const httpRequestCounter = meter.createCounter('http_requests_total', {
  description: 'Total number of HTTP requests',
});

const httpRequestDuration = meter.createHistogram('http_request_duration_seconds', {
  description: 'Duration of HTTP requests in seconds',
});

const activeConnectionsGauge = meter.createUpDownCounter('active_connections', {
  description: 'Number of active connections',
});

const customMetrics = {
  recordHttpRequest: (method, path, statusCode, duration) => {
    httpRequestCounter.add(1, {
      method,
      path,
      status_code: statusCode.toString(),
    });
    
    httpRequestDuration.record(duration, {
      method,
      path,
    });
  },
  
  incrementConnections: () => activeConnectionsGauge.add(1),
  decrementConnections: () => activeConnectionsGauge.add(-1),
};

module.exports = customMetrics;

Step 5: Structured Logging with Correlation

Create logger.js:

const winston = require('winston');
const { trace, context } = require('@opentelemetry/api');

// Create logger with correlation IDs
const logger = winston.createLogger({
  level: process.env.LOG_LEVEL || 'info',
  format: winston.format.combine(
    winston.format.timestamp(),
    winston.format.errors({ stack: true }),
    winston.format.printf(({ timestamp, level, message, stack, ...meta }) => {
      // Add trace context to logs
      const activeSpan = trace.getActiveSpan();
      const traceId = activeSpan?.spanContext().traceId || 'no-trace';
      const spanId = activeSpan?.spanContext().spanId || 'no-span';
      
      const logEntry = {
        timestamp,
        level,
        message,
        traceId,
        spanId,
        ...meta,
      };
      
      if (stack) {
        logEntry.stack = stack;
      }
      
      return JSON.stringify(logEntry);
    })
  ),
  transports: [
    new winston.transports.Console(),
    new winston.transports.File({ filename: 'app.log' }),
  ],
});

module.exports = logger;

Step 6: Enhanced Application with Metrics and Logging

Update app.js to include metrics and logging:

require('./tracing');
const express = require('express');
const logger = require('./logger');
const metrics = require('./metrics');

const app = express();

// Metrics middleware
app.use((req, res, next) => {
  const startTime = Date.now();
  metrics.incrementConnections();
  
  res.on('finish', () => {
    const duration = (Date.now() - startTime) / 1000;
    metrics.recordHttpRequest(req.method, req.path, res.statusCode, duration);
    metrics.decrementConnections();
    
    logger.info('HTTP Request', {
      method: req.method,
      path: req.path,
      statusCode: res.statusCode,
      duration,
      userAgent: req.get('user-agent'),
    });
  });
  
  next();
});

app.get('/users/:id', async (req, res) => {
  const userId = req.params.id;
  
  logger.info('Fetching user', { userId });
  
  try {
    // Your user fetching logic here
    const user = { id: userId, name: `User ${userId}` };
    
    logger.info('User found', { userId, userName: user.name });
    res.json(user);
  } catch (error) {
    logger.error('Error fetching user', { userId, error: error.message });
    res.status(500).json({ error: 'Internal server error' });
  }
});

// Health check endpoint
app.get('/health', (req, res) => {
  logger.debug('Health check requested');
  res.json({ 
    status: 'healthy', 
    timestamp: new Date().toISOString(),
    uptime: process.uptime() 
  });
});

const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  logger.info('Server started', { port: PORT });
});

Step 7: Docker Compose for Local Development

Create docker-compose.yml:

version: '3.8'

services:
  app:
    build: .
    ports:
      - "3000:3000"
      - "9090:9090"  # Prometheus metrics
    environment:
      - JAEGER_ENDPOINT=http://jaeger:14268/api/traces
    depends_on:
      - jaeger
      - prometheus

  jaeger:
    image: jaegertracing/all-in-one:latest
    ports:
      - "16686:16686"  # Jaeger UI
      - "14268:14268"  # Jaeger collector
    environment:
      - COLLECTOR_OTLP_ENABLED=true

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9091:9090"  # Prometheus UI
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3001:3000"  # Grafana UI
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana-storage:/var/lib/grafana

volumes:
  grafana-storage:

Summary

OpenTelemetry provides distributed tracing, metrics collection, and structured logging for Node.js applications. Integration with Jaeger, Prometheus, and Grafana creates a comprehensive observability stack for monitoring application performance and troubleshooting issues.