Skip to content
Go back

Implementing Observability with OpenTelemetry in Node.js

Implementing Observability with OpenTelemetry in Node.js

Introduction

OpenTelemetry provides unified observability through distributed tracing, metrics, and logging. This guide instruments Node.js applications for comprehensive monitoring.

Prerequisites

Step 1: Install OpenTelemetry SDK

npm install @opentelemetry/sdk-node @opentelemetry/auto-instrumentations-node
npm install @opentelemetry/exporter-jaeger @opentelemetry/exporter-prometheus
npm install @opentelemetry/instrumentation-express @opentelemetry/instrumentation-http

Step 2: Basic Tracing Setup

Create tracing.js:

const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');

// Configure the trace exporter
const traceExporter = new JaegerExporter({
  endpoint: process.env.JAEGER_ENDPOINT || 'http://localhost:14268/api/traces',
});

// Initialize the SDK
const sdk = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
    [SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0',
    [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'development',
  }),
  traceExporter,
  instrumentations: [
    getNodeAutoInstrumentations({
      '@opentelemetry/instrumentation-fs': { enabled: false }, // Disable noisy fs instrumentation
    }),
  ],
});

sdk.start();
console.log('OpenTelemetry initialized');

// Graceful shutdown
process.on('SIGTERM', () => {
  sdk.shutdown()
    .then(() => console.log('OpenTelemetry terminated'))
    .catch(error => console.error('Error terminating OpenTelemetry', error))
    .finally(() => process.exit(0));
});

Step 3: Express.js Application with Tracing

Create app.js:

// Import tracing BEFORE other imports
require('./tracing');

const express = require('express');
const { trace, context, SpanStatusCode } = require('@opentelemetry/api');

const app = express();
const tracer = trace.getTracer('user-service', '1.0.0');

app.use(express.json());

// Custom middleware for request tracing
app.use((req, res, next) => {
  const span = tracer.startSpan(`${req.method} ${req.path}`);
  
  // Add custom attributes
  span.setAttributes({
    'http.method': req.method,
    'http.path': req.path,
    'http.user_agent': req.get('user-agent') || '',
    'user.id': req.headers['x-user-id'] || 'anonymous',
  });
  
  // Store span in request context
  req.span = span;
  
  res.on('finish', () => {
    span.setAttribute('http.status_code', res.statusCode);
    
    if (res.statusCode >= 400) {
      span.setStatus({
        code: SpanStatusCode.ERROR,
        message: `HTTP ${res.statusCode}`,
      });
    }
    
    span.end();
  });
  
  next();
});

// Simulated database service
async function getUserFromDB(userId) {
  return tracer.startActiveSpan('db.users.findById', async (span) => {
    span.setAttributes({
      'db.operation': 'findById',
      'db.collection.name': 'users',
      'user.id': userId,
    });
    
    try {
      // Simulate database delay
      await new Promise(resolve => setTimeout(resolve, Math.random() * 100));
      
      if (userId === 'error') {
        throw new Error('User not found');
      }
      
      const user = {
        id: userId,
        name: `User ${userId}`,
        email: `user${userId}@example.com`,
      };
      
      span.setStatus({ code: SpanStatusCode.OK });
      return user;
    } catch (error) {
      span.setStatus({
        code: SpanStatusCode.ERROR,
        message: error.message,
      });
      span.recordException(error);
      throw error;
    } finally {
      span.end();
    }
  });
}

// API Routes
app.get('/users/:id', async (req, res) => {
  const span = req.span;
  const userId = req.params.id;
  
  try {
    const user = await getUserFromDB(userId);
    
    span.setAttributes({
      'user.found': true,
      'response.user.name': user.name,
    });
    
    res.json(user);
  } catch (error) {
    span.setAttributes({
      'user.found': false,
      'error.message': error.message,
    });
    
    res.status(404).json({ error: 'User not found' });
  }
});

app.get('/health', (req, res) => {
  const span = req.span;
  span.setAttributes({
    'health.check': true,
    'health.status': 'healthy',
  });
  
  res.json({ status: 'healthy', timestamp: new Date().toISOString() });
});

// Error handling middleware
app.use((error, req, res, next) => {
  if (req.span) {
    req.span.recordException(error);
    req.span.setStatus({
      code: SpanStatusCode.ERROR,
      message: error.message,
    });
  }
  
  console.error('Unhandled error:', error);
  res.status(500).json({ error: 'Internal server error' });
});

const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  console.log(`Server running on port ${PORT}`);
});

Step 4: Custom Metrics with Prometheus

Create metrics.js:

const { NodeSDK } = require('@opentelemetry/sdk-node');
const { PrometheusExporter } = require('@opentelemetry/exporter-prometheus');
const { metrics } = require('@opentelemetry/api');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');

// Initialize metrics
const metricExporter = new PrometheusExporter({
  port: 9090,
  endpoint: '/metrics',
}, () => {
  console.log('Prometheus metrics available at http://localhost:9090/metrics');
});

// Configure SDK with metrics
const sdk = new NodeSDK({
  resource: new Resource({
    [SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
  }),
  metricReader: metricExporter,
});

sdk.start();

// Create custom metrics
const meter = metrics.getMeter('user-service', '1.0.0');

const httpRequestCounter = meter.createCounter('http_requests_total', {
  description: 'Total number of HTTP requests',
});

const httpRequestDuration = meter.createHistogram('http_request_duration_seconds', {
  description: 'Duration of HTTP requests in seconds',
});

const activeConnectionsGauge = meter.createUpDownCounter('active_connections', {
  description: 'Number of active connections',
});

const customMetrics = {
  recordHttpRequest: (method, path, statusCode, duration) => {
    httpRequestCounter.add(1, {
      method,
      path,
      status_code: statusCode.toString(),
    });
    
    httpRequestDuration.record(duration, {
      method,
      path,
    });
  },
  
  incrementConnections: () => activeConnectionsGauge.add(1),
  decrementConnections: () => activeConnectionsGauge.add(-1),
};

module.exports = customMetrics;

Step 5: Structured Logging with Correlation

Create logger.js:

const winston = require('winston');
const { trace, context } = require('@opentelemetry/api');

// Create logger with correlation IDs
const logger = winston.createLogger({
  level: process.env.LOG_LEVEL || 'info',
  format: winston.format.combine(
    winston.format.timestamp(),
    winston.format.errors({ stack: true }),
    winston.format.printf(({ timestamp, level, message, stack, ...meta }) => {
      // Add trace context to logs
      const activeSpan = trace.getActiveSpan();
      const traceId = activeSpan?.spanContext().traceId || 'no-trace';
      const spanId = activeSpan?.spanContext().spanId || 'no-span';
      
      const logEntry = {
        timestamp,
        level,
        message,
        traceId,
        spanId,
        ...meta,
      };
      
      if (stack) {
        logEntry.stack = stack;
      }
      
      return JSON.stringify(logEntry);
    })
  ),
  transports: [
    new winston.transports.Console(),
    new winston.transports.File({ filename: 'app.log' }),
  ],
});

module.exports = logger;

Step 6: Enhanced Application with Metrics and Logging

Update app.js to include metrics and logging:

require('./tracing');
const express = require('express');
const logger = require('./logger');
const metrics = require('./metrics');

const app = express();

// Metrics middleware
app.use((req, res, next) => {
  const startTime = Date.now();
  metrics.incrementConnections();
  
  res.on('finish', () => {
    const duration = (Date.now() - startTime) / 1000;
    metrics.recordHttpRequest(req.method, req.path, res.statusCode, duration);
    metrics.decrementConnections();
    
    logger.info('HTTP Request', {
      method: req.method,
      path: req.path,
      statusCode: res.statusCode,
      duration,
      userAgent: req.get('user-agent'),
    });
  });
  
  next();
});

app.get('/users/:id', async (req, res) => {
  const userId = req.params.id;
  
  logger.info('Fetching user', { userId });
  
  try {
    // Your user fetching logic here
    const user = { id: userId, name: `User ${userId}` };
    
    logger.info('User found', { userId, userName: user.name });
    res.json(user);
  } catch (error) {
    logger.error('Error fetching user', { userId, error: error.message });
    res.status(500).json({ error: 'Internal server error' });
  }
});

// Health check endpoint
app.get('/health', (req, res) => {
  logger.debug('Health check requested');
  res.json({ 
    status: 'healthy', 
    timestamp: new Date().toISOString(),
    uptime: process.uptime() 
  });
});

const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  logger.info('Server started', { port: PORT });
});

Step 7: Docker Compose for Local Development

Create docker-compose.yml:

version: '3.8'

services:
  app:
    build: .
    ports:
      - "3000:3000"
      - "9090:9090"  # Prometheus metrics
    environment:
      - JAEGER_ENDPOINT=http://jaeger:14268/api/traces
    depends_on:
      - jaeger
      - prometheus

  jaeger:
    image: jaegertracing/all-in-one:latest
    ports:
      - "16686:16686"  # Jaeger UI
      - "14268:14268"  # Jaeger collector
    environment:
      - COLLECTOR_OTLP_ENABLED=true

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9091:9090"  # Prometheus UI
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3001:3000"  # Grafana UI
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana-storage:/var/lib/grafana

volumes:
  grafana-storage:

Summary

OpenTelemetry provides distributed tracing, metrics collection, and structured logging for Node.js applications. Integration with Jaeger, Prometheus, and Grafana creates a comprehensive observability stack for monitoring application performance and troubleshooting issues.


Share this post on:

Previous Post
Implementing Service Mesh with Istio in Kubernetes
Next Post
Deploying Node.js Apps to Kubernetes with Helm Charts