Implementing Observability with OpenTelemetry in Node.js
Introduction
OpenTelemetry provides unified observability through distributed tracing, metrics, and logging. This guide instruments Node.js applications for comprehensive monitoring.
Prerequisites
- Node.js >=14
- Basic understanding of observability concepts
- Jaeger and Prometheus (optional)
Step 1: Install OpenTelemetry SDK
npm install @opentelemetry/sdk-node @opentelemetry/auto-instrumentations-node
npm install @opentelemetry/exporter-jaeger @opentelemetry/exporter-prometheus
npm install @opentelemetry/instrumentation-express @opentelemetry/instrumentation-http
Step 2: Basic Tracing Setup
Create tracing.js
:
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
// Configure the trace exporter
const traceExporter = new JaegerExporter({
endpoint: process.env.JAEGER_ENDPOINT || 'http://localhost:14268/api/traces',
});
// Initialize the SDK
const sdk = new NodeSDK({
resource: new Resource({
[SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
[SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0',
[SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'development',
}),
traceExporter,
instrumentations: [
getNodeAutoInstrumentations({
'@opentelemetry/instrumentation-fs': { enabled: false }, // Disable noisy fs instrumentation
}),
],
});
sdk.start();
console.log('OpenTelemetry initialized');
// Graceful shutdown
process.on('SIGTERM', () => {
sdk.shutdown()
.then(() => console.log('OpenTelemetry terminated'))
.catch(error => console.error('Error terminating OpenTelemetry', error))
.finally(() => process.exit(0));
});
Step 3: Express.js Application with Tracing
Create app.js
:
// Import tracing BEFORE other imports
require('./tracing');
const express = require('express');
const { trace, context, SpanStatusCode } = require('@opentelemetry/api');
const app = express();
const tracer = trace.getTracer('user-service', '1.0.0');
app.use(express.json());
// Custom middleware for request tracing
app.use((req, res, next) => {
const span = tracer.startSpan(`${req.method} ${req.path}`);
// Add custom attributes
span.setAttributes({
'http.method': req.method,
'http.path': req.path,
'http.user_agent': req.get('user-agent') || '',
'user.id': req.headers['x-user-id'] || 'anonymous',
});
// Store span in request context
req.span = span;
res.on('finish', () => {
span.setAttribute('http.status_code', res.statusCode);
if (res.statusCode >= 400) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: `HTTP ${res.statusCode}`,
});
}
span.end();
});
next();
});
// Simulated database service
async function getUserFromDB(userId) {
return tracer.startActiveSpan('db.users.findById', async (span) => {
span.setAttributes({
'db.operation': 'findById',
'db.collection.name': 'users',
'user.id': userId,
});
try {
// Simulate database delay
await new Promise(resolve => setTimeout(resolve, Math.random() * 100));
if (userId === 'error') {
throw new Error('User not found');
}
const user = {
id: userId,
name: `User ${userId}`,
email: `user${userId}@example.com`,
};
span.setStatus({ code: SpanStatusCode.OK });
return user;
} catch (error) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: error.message,
});
span.recordException(error);
throw error;
} finally {
span.end();
}
});
}
// API Routes
app.get('/users/:id', async (req, res) => {
const span = req.span;
const userId = req.params.id;
try {
const user = await getUserFromDB(userId);
span.setAttributes({
'user.found': true,
'response.user.name': user.name,
});
res.json(user);
} catch (error) {
span.setAttributes({
'user.found': false,
'error.message': error.message,
});
res.status(404).json({ error: 'User not found' });
}
});
app.get('/health', (req, res) => {
const span = req.span;
span.setAttributes({
'health.check': true,
'health.status': 'healthy',
});
res.json({ status: 'healthy', timestamp: new Date().toISOString() });
});
// Error handling middleware
app.use((error, req, res, next) => {
if (req.span) {
req.span.recordException(error);
req.span.setStatus({
code: SpanStatusCode.ERROR,
message: error.message,
});
}
console.error('Unhandled error:', error);
res.status(500).json({ error: 'Internal server error' });
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
Step 4: Custom Metrics with Prometheus
Create metrics.js
:
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { PrometheusExporter } = require('@opentelemetry/exporter-prometheus');
const { metrics } = require('@opentelemetry/api');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
// Initialize metrics
const metricExporter = new PrometheusExporter({
port: 9090,
endpoint: '/metrics',
}, () => {
console.log('Prometheus metrics available at http://localhost:9090/metrics');
});
// Configure SDK with metrics
const sdk = new NodeSDK({
resource: new Resource({
[SemanticResourceAttributes.SERVICE_NAME]: 'nodejs-app',
}),
metricReader: metricExporter,
});
sdk.start();
// Create custom metrics
const meter = metrics.getMeter('user-service', '1.0.0');
const httpRequestCounter = meter.createCounter('http_requests_total', {
description: 'Total number of HTTP requests',
});
const httpRequestDuration = meter.createHistogram('http_request_duration_seconds', {
description: 'Duration of HTTP requests in seconds',
});
const activeConnectionsGauge = meter.createUpDownCounter('active_connections', {
description: 'Number of active connections',
});
const customMetrics = {
recordHttpRequest: (method, path, statusCode, duration) => {
httpRequestCounter.add(1, {
method,
path,
status_code: statusCode.toString(),
});
httpRequestDuration.record(duration, {
method,
path,
});
},
incrementConnections: () => activeConnectionsGauge.add(1),
decrementConnections: () => activeConnectionsGauge.add(-1),
};
module.exports = customMetrics;
Step 5: Structured Logging with Correlation
Create logger.js
:
const winston = require('winston');
const { trace, context } = require('@opentelemetry/api');
// Create logger with correlation IDs
const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.printf(({ timestamp, level, message, stack, ...meta }) => {
// Add trace context to logs
const activeSpan = trace.getActiveSpan();
const traceId = activeSpan?.spanContext().traceId || 'no-trace';
const spanId = activeSpan?.spanContext().spanId || 'no-span';
const logEntry = {
timestamp,
level,
message,
traceId,
spanId,
...meta,
};
if (stack) {
logEntry.stack = stack;
}
return JSON.stringify(logEntry);
})
),
transports: [
new winston.transports.Console(),
new winston.transports.File({ filename: 'app.log' }),
],
});
module.exports = logger;
Step 6: Enhanced Application with Metrics and Logging
Update app.js
to include metrics and logging:
require('./tracing');
const express = require('express');
const logger = require('./logger');
const metrics = require('./metrics');
const app = express();
// Metrics middleware
app.use((req, res, next) => {
const startTime = Date.now();
metrics.incrementConnections();
res.on('finish', () => {
const duration = (Date.now() - startTime) / 1000;
metrics.recordHttpRequest(req.method, req.path, res.statusCode, duration);
metrics.decrementConnections();
logger.info('HTTP Request', {
method: req.method,
path: req.path,
statusCode: res.statusCode,
duration,
userAgent: req.get('user-agent'),
});
});
next();
});
app.get('/users/:id', async (req, res) => {
const userId = req.params.id;
logger.info('Fetching user', { userId });
try {
// Your user fetching logic here
const user = { id: userId, name: `User ${userId}` };
logger.info('User found', { userId, userName: user.name });
res.json(user);
} catch (error) {
logger.error('Error fetching user', { userId, error: error.message });
res.status(500).json({ error: 'Internal server error' });
}
});
// Health check endpoint
app.get('/health', (req, res) => {
logger.debug('Health check requested');
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime()
});
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
logger.info('Server started', { port: PORT });
});
Step 7: Docker Compose for Local Development
Create docker-compose.yml
:
version: '3.8'
services:
app:
build: .
ports:
- "3000:3000"
- "9090:9090" # Prometheus metrics
environment:
- JAEGER_ENDPOINT=http://jaeger:14268/api/traces
depends_on:
- jaeger
- prometheus
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686" # Jaeger UI
- "14268:14268" # Jaeger collector
environment:
- COLLECTOR_OTLP_ENABLED=true
prometheus:
image: prom/prometheus:latest
ports:
- "9091:9090" # Prometheus UI
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000" # Grafana UI
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana-storage:/var/lib/grafana
volumes:
grafana-storage:
Summary
OpenTelemetry provides distributed tracing, metrics collection, and structured logging for Node.js applications. Integration with Jaeger, Prometheus, and Grafana creates a comprehensive observability stack for monitoring application performance and troubleshooting issues.