Skip to content
Go back

Microservices Design Patterns and Resilience

Microservices Design Patterns and Resilience

Introduction

Microservices resilience patterns ensure system stability in distributed environments. This guide covers circuit breakers, bulkheads, saga patterns, and fault tolerance strategies.

Prerequisites

Step 1: Circuit Breaker Pattern

Implement circuit breaker to prevent cascading failures:

class CircuitBreaker {
  constructor(service, options = {}) {
    this.service = service;
    this.failureThreshold = options.failureThreshold || 5;
    this.timeout = options.timeout || 10000;
    this.resetTimeout = options.resetTimeout || 60000;
    
    this.state = 'CLOSED'; // CLOSED, OPEN, HALF_OPEN
    this.failureCount = 0;
    this.nextAttempt = Date.now();
    this.successCount = 0;
  }

  async call(method, ...args) {
    if (this.state === 'OPEN') {
      if (Date.now() < this.nextAttempt) {
        throw new Error('Circuit breaker is OPEN');
      }
      this.state = 'HALF_OPEN';
      this.successCount = 0;
    }

    try {
      const result = await Promise.race([
        this.service[method](...args),
        this.timeoutPromise()
      ]);
      
      return this.onSuccess(result);
    } catch (error) {
      return this.onFailure(error);
    }
  }

  timeoutPromise() {
    return new Promise((_, reject) => {
      setTimeout(() => reject(new Error('Request timeout')), this.timeout);
    });
  }

  onSuccess(result) {
    this.failureCount = 0;
    
    if (this.state === 'HALF_OPEN') {
      this.successCount++;
      if (this.successCount >= 3) {
        this.state = 'CLOSED';
      }
    }
    
    return result;
  }

  onFailure(error) {
    this.failureCount++;
    
    if (this.failureCount >= this.failureThreshold) {
      this.state = 'OPEN';
      this.nextAttempt = Date.now() + this.resetTimeout;
    }
    
    throw error;
  }

  getState() {
    return {
      state: this.state,
      failureCount: this.failureCount,
      nextAttempt: this.nextAttempt
    };
  }
}

// Usage example
class PaymentService {
  async processPayment(amount, cardToken) {
    // Simulate payment processing
    const success = Math.random() > 0.3; // 70% success rate
    
    if (!success) {
      throw new Error('Payment failed');
    }
    
    return { transactionId: Date.now(), amount };
  }
}

const paymentService = new PaymentService();
const circuitBreaker = new CircuitBreaker(paymentService, {
  failureThreshold: 3,
  timeout: 5000,
  resetTimeout: 30000
});

// Protected service call
async function makePayment(amount, cardToken) {
  try {
    const result = await circuitBreaker.call('processPayment', amount, cardToken);
    console.log('Payment successful:', result);
    return result;
  } catch (error) {
    console.error('Payment failed:', error.message);
    throw error;
  }
}

module.exports = { CircuitBreaker, makePayment };

Step 2: Retry Pattern with Exponential Backoff

class RetryPattern {
  constructor(options = {}) {
    this.maxRetries = options.maxRetries || 3;
    this.baseDelay = options.baseDelay || 1000;
    this.maxDelay = options.maxDelay || 30000;
    this.jitter = options.jitter || true;
  }

  async execute(fn, ...args) {
    let lastError;
    
    for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
      try {
        return await fn(...args);
      } catch (error) {
        lastError = error;
        
        if (attempt === this.maxRetries) {
          break;
        }
        
        if (!this.shouldRetry(error)) {
          throw error;
        }
        
        const delay = this.calculateDelay(attempt);
        console.log(`Retry attempt ${attempt + 1} after ${delay}ms`);
        await this.sleep(delay);
      }
    }
    
    throw lastError;
  }

  shouldRetry(error) {
    // Retry on network errors, timeouts, 5xx status codes
    const retryableErrors = [
      'ECONNRESET',
      'ECONNREFUSED',
      'ETIMEDOUT',
      'Request timeout'
    ];
    
    return retryableErrors.some(errorType => 
      error.message.includes(errorType) || 
      error.code === errorType
    ) || (error.status >= 500 && error.status < 600);
  }

  calculateDelay(attempt) {
    const exponentialDelay = this.baseDelay * Math.pow(2, attempt);
    const cappedDelay = Math.min(exponentialDelay, this.maxDelay);
    
    if (this.jitter) {
      // Add random jitter (±25%)
      const jitterRange = cappedDelay * 0.25;
      const jitter = (Math.random() - 0.5) * 2 * jitterRange;
      return Math.max(0, cappedDelay + jitter);
    }
    
    return cappedDelay;
  }

  sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
  }
}

// Usage with service calls
class ExternalService {
  async fetchUserData(userId) {
    // Simulate unreliable service
    if (Math.random() < 0.6) {
      throw new Error('Service temporarily unavailable');
    }
    
    return { userId, name: `User ${userId}`, email: `user${userId}@example.com` };
  }
}

const retryPattern = new RetryPattern({
  maxRetries: 3,
  baseDelay: 1000,
  maxDelay: 10000
});

const externalService = new ExternalService();

async function getUserWithRetry(userId) {
  return await retryPattern.execute(
    externalService.fetchUserData.bind(externalService),
    userId
  );
}

module.exports = { RetryPattern, getUserWithRetry };

Step 3: Bulkhead Pattern

Isolate resources to prevent cascading failures:

class BulkheadPattern {
  constructor() {
    this.pools = new Map();
  }

  createPool(name, options = {}) {
    const pool = {
      name,
      maxConcurrent: options.maxConcurrent || 10,
      timeout: options.timeout || 5000,
      queue: [],
      active: 0,
      rejected: 0,
      completed: 0
    };
    
    this.pools.set(name, pool);
    return pool;
  }

  async execute(poolName, fn, ...args) {
    const pool = this.pools.get(poolName);
    
    if (!pool) {
      throw new Error(`Pool ${poolName} not found`);
    }

    if (pool.active >= pool.maxConcurrent) {
      return new Promise((resolve, reject) => {
        const queueItem = {
          fn,
          args,
          resolve,
          reject,
          timestamp: Date.now()
        };
        
        pool.queue.push(queueItem);
        
        // Set timeout for queued items
        setTimeout(() => {
          const index = pool.queue.indexOf(queueItem);
          if (index !== -1) {
            pool.queue.splice(index, 1);
            pool.rejected++;
            reject(new Error(`Request timeout in ${poolName} queue`));
          }
        }, pool.timeout);
      });
    }

    return this.executeWithPool(pool, fn, ...args);
  }

  async executeWithPool(pool, fn, ...args) {
    pool.active++;
    
    try {
      const result = await Promise.race([
        fn(...args),
        new Promise((_, reject) => 
          setTimeout(() => reject(new Error('Execution timeout')), pool.timeout)
        )
      ]);
      
      pool.completed++;
      return result;
    } catch (error) {
      pool.rejected++;
      throw error;
    } finally {
      pool.active--;
      this.processQueue(pool);
    }
  }

  processQueue(pool) {
    if (pool.queue.length > 0 && pool.active < pool.maxConcurrent) {
      const queueItem = pool.queue.shift();
      
      this.executeWithPool(pool, queueItem.fn, ...queueItem.args)
        .then(queueItem.resolve)
        .catch(queueItem.reject);
    }
  }

  getPoolStats(poolName) {
    const pool = this.pools.get(poolName);
    return pool ? {
      name: pool.name,
      active: pool.active,
      queued: pool.queue.length,
      completed: pool.completed,
      rejected: pool.rejected,
      utilization: (pool.active / pool.maxConcurrent) * 100
    } : null;
  }
}

// Usage example
const bulkhead = new BulkheadPattern();

// Create separate pools for different operations
bulkhead.createPool('user-service', { maxConcurrent: 5, timeout: 3000 });
bulkhead.createPool('payment-service', { maxConcurrent: 3, timeout: 10000 });
bulkhead.createPool('notification-service', { maxConcurrent: 10, timeout: 2000 });

// Service functions
async function fetchUser(userId) {
  return bulkhead.execute('user-service', async () => {
    // Simulate user service call
    await new Promise(resolve => setTimeout(resolve, 1000));
    return { userId, name: `User ${userId}` };
  });
}

async function processPayment(amount) {
  return bulkhead.execute('payment-service', async () => {
    // Simulate payment processing
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { transactionId: Date.now(), amount };
  });
}

async function sendNotification(message) {
  return bulkhead.execute('notification-service', async () => {
    // Simulate notification sending
    await new Promise(resolve => setTimeout(resolve, 500));
    return { messageId: Date.now(), status: 'sent' };
  });
}

module.exports = { BulkheadPattern, bulkhead };

Step 4: Saga Pattern for Distributed Transactions

class SagaOrchestrator {
  constructor() {
    this.sagas = new Map();
  }

  async executeSaga(sagaId, steps) {
    const saga = {
      id: sagaId,
      steps,
      currentStep: 0,
      compensations: [],
      status: 'RUNNING',
      results: []
    };
    
    this.sagas.set(sagaId, saga);
    
    try {
      for (let i = 0; i < steps.length; i++) {
        saga.currentStep = i;
        const step = steps[i];
        
        console.log(`Executing step ${i + 1}: ${step.name}`);
        const result = await step.action();
        
        saga.results.push(result);
        saga.compensations.push(step.compensation);
        
        // Update saga state
        this.sagas.set(sagaId, { ...saga });
      }
      
      saga.status = 'COMPLETED';
      console.log(`Saga ${sagaId} completed successfully`);
      return saga.results;
      
    } catch (error) {
      console.error(`Saga ${sagaId} failed at step ${saga.currentStep + 1}:`, error.message);
      await this.compensate(saga);
      saga.status = 'FAILED';
      throw error;
    }
  }

  async compensate(saga) {
    console.log(`Starting compensation for saga ${saga.id}`);
    
    // Execute compensations in reverse order
    for (let i = saga.compensations.length - 1; i >= 0; i--) {
      try {
        const compensation = saga.compensations[i];
        if (compensation) {
          console.log(`Executing compensation ${i + 1}`);
          await compensation(saga.results[i]);
        }
      } catch (error) {
        console.error(`Compensation ${i + 1} failed:`, error.message);
        // Log compensation failure but continue with other compensations
      }
    }
    
    console.log(`Compensation completed for saga ${saga.id}`);
  }

  getSagaStatus(sagaId) {
    return this.sagas.get(sagaId);
  }
}

// Example: E-commerce order processing saga
class OrderSaga {
  constructor() {
    this.orchestrator = new SagaOrchestrator();
  }

  async processOrder(orderData) {
    const sagaId = `order-${Date.now()}`;
    
    const steps = [
      {
        name: 'Reserve Inventory',
        action: () => this.reserveInventory(orderData.items),
        compensation: (result) => this.releaseInventory(result.reservationId)
      },
      {
        name: 'Process Payment',
        action: () => this.processPayment(orderData.payment),
        compensation: (result) => this.refundPayment(result.transactionId)
      },
      {
        name: 'Create Shipment',
        action: () => this.createShipment(orderData.shipping),
        compensation: (result) => this.cancelShipment(result.shipmentId)
      },
      {
        name: 'Send Confirmation',
        action: () => this.sendConfirmation(orderData.customer),
        compensation: () => this.sendCancellationNotice(orderData.customer)
      }
    ];

    return await this.orchestrator.executeSaga(sagaId, steps);
  }

  async reserveInventory(items) {
    // Simulate inventory reservation
    await new Promise(resolve => setTimeout(resolve, 1000));
    
    // Randomly fail for demo
    if (Math.random() < 0.1) {
      throw new Error('Insufficient inventory');
    }
    
    return { reservationId: Date.now(), items };
  }

  async releaseInventory(reservationId) {
    console.log(`Releasing inventory reservation: ${reservationId}`);
    await new Promise(resolve => setTimeout(resolve, 500));
  }

  async processPayment(paymentData) {
    await new Promise(resolve => setTimeout(resolve, 1500));
    
    if (Math.random() < 0.1) {
      throw new Error('Payment processing failed');
    }
    
    return { transactionId: Date.now(), amount: paymentData.amount };
  }

  async refundPayment(transactionId) {
    console.log(`Processing refund for transaction: ${transactionId}`);
    await new Promise(resolve => setTimeout(resolve, 1000));
  }

  async createShipment(shippingData) {
    await new Promise(resolve => setTimeout(resolve, 800));
    
    if (Math.random() < 0.05) {
      throw new Error('Shipment creation failed');
    }
    
    return { shipmentId: Date.now(), trackingNumber: `TRK${Date.now()}` };
  }

  async cancelShipment(shipmentId) {
    console.log(`Cancelling shipment: ${shipmentId}`);
    await new Promise(resolve => setTimeout(resolve, 600));
  }

  async sendConfirmation(customer) {
    await new Promise(resolve => setTimeout(resolve, 300));
    return { messageId: Date.now(), recipient: customer.email };
  }

  async sendCancellationNotice(customer) {
    console.log(`Sending cancellation notice to: ${customer.email}`);
    await new Promise(resolve => setTimeout(resolve, 300));
  }
}

module.exports = { SagaOrchestrator, OrderSaga };

Step 5: Event Sourcing Pattern

class EventStore {
  constructor() {
    this.events = [];
    this.snapshots = new Map();
  }

  async appendEvents(streamId, expectedVersion, events) {
    const streamEvents = this.getStreamEvents(streamId);
    
    if (streamEvents.length !== expectedVersion) {
      throw new Error(`Optimistic concurrency conflict. Expected version ${expectedVersion}, got ${streamEvents.length}`);
    }

    const newEvents = events.map((event, index) => ({
      streamId,
      eventId: this.generateEventId(),
      eventType: event.type,
      eventData: event.data,
      eventVersion: expectedVersion + index + 1,
      timestamp: new Date().toISOString()
    }));

    this.events.push(...newEvents);
    return newEvents;
  }

  getStreamEvents(streamId, fromVersion = 0) {
    return this.events
      .filter(event => event.streamId === streamId && event.eventVersion > fromVersion)
      .sort((a, b) => a.eventVersion - b.eventVersion);
  }

  saveSnapshot(streamId, version, data) {
    this.snapshots.set(`${streamId}-${version}`, {
      streamId,
      version,
      data,
      timestamp: new Date().toISOString()
    });
  }

  getSnapshot(streamId) {
    const snapshots = Array.from(this.snapshots.values())
      .filter(snapshot => snapshot.streamId === streamId)
      .sort((a, b) => b.version - a.version);
    
    return snapshots[0] || null;
  }

  generateEventId() {
    return `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
  }
}

class AggregateRoot {
  constructor(id) {
    this.id = id;
    this.version = 0;
    this.uncommittedEvents = [];
  }

  applyEvent(event) {
    this.handle(event);
    this.version++;
  }

  raiseEvent(eventType, eventData) {
    const event = { type: eventType, data: eventData };
    this.uncommittedEvents.push(event);
    this.applyEvent(event);
  }

  markEventsAsCommitted() {
    this.uncommittedEvents = [];
  }

  getUncommittedEvents() {
    return this.uncommittedEvents;
  }

  // Override in subclasses
  handle(event) {
    const handlerName = `on${event.type}`;
    if (this[handlerName]) {
      this[handlerName](event.data);
    }
  }
}

// Example: Order Aggregate
class Order extends AggregateRoot {
  constructor(id) {
    super(id);
    this.customerId = null;
    this.items = [];
    this.status = 'DRAFT';
    this.totalAmount = 0;
  }

  static fromHistory(id, events) {
    const order = new Order(id);
    
    events.forEach(event => {
      order.applyEvent({
        type: event.eventType,
        data: event.eventData
      });
    });
    
    return order;
  }

  createOrder(customerId, items) {
    if (this.status !== 'DRAFT') {
      throw new Error('Order already created');
    }

    this.raiseEvent('OrderCreated', {
      orderId: this.id,
      customerId,
      items,
      totalAmount: this.calculateTotal(items)
    });
  }

  addItem(productId, quantity, price) {
    if (this.status !== 'DRAFT') {
      throw new Error('Cannot modify confirmed order');
    }

    this.raiseEvent('ItemAdded', {
      productId,
      quantity,
      price
    });
  }

  confirmOrder() {
    if (this.status !== 'DRAFT') {
      throw new Error('Order already confirmed');
    }

    if (this.items.length === 0) {
      throw new Error('Cannot confirm empty order');
    }

    this.raiseEvent('OrderConfirmed', {
      orderId: this.id,
      totalAmount: this.totalAmount
    });
  }

  // Event handlers
  onOrderCreated(data) {
    this.customerId = data.customerId;
    this.items = data.items;
    this.totalAmount = data.totalAmount;
    this.status = 'DRAFT';
  }

  onItemAdded(data) {
    this.items.push(data);
    this.totalAmount += data.price * data.quantity;
  }

  onOrderConfirmed(data) {
    this.status = 'CONFIRMED';
  }

  calculateTotal(items) {
    return items.reduce((total, item) => total + (item.price * item.quantity), 0);
  }
}

// Repository using Event Sourcing
class OrderRepository {
  constructor(eventStore) {
    this.eventStore = eventStore;
  }

  async save(order) {
    const events = order.getUncommittedEvents();
    
    if (events.length === 0) {
      return;
    }

    const expectedVersion = order.version - events.length;
    await this.eventStore.appendEvents(order.id, expectedVersion, events);
    
    order.markEventsAsCommitted();
    
    // Save snapshot every 10 events
    if (order.version % 10 === 0) {
      this.eventStore.saveSnapshot(order.id, order.version, {
        customerId: order.customerId,
        items: order.items,
        status: order.status,
        totalAmount: order.totalAmount
      });
    }
  }

  async getById(id) {
    // Try to load from snapshot first
    const snapshot = this.eventStore.getSnapshot(id);
    
    let order;
    let fromVersion = 0;
    
    if (snapshot) {
      order = new Order(id);
      order.customerId = snapshot.data.customerId;
      order.items = snapshot.data.items;
      order.status = snapshot.data.status;
      order.totalAmount = snapshot.data.totalAmount;
      order.version = snapshot.version;
      fromVersion = snapshot.version;
    }

    // Load events after snapshot
    const events = this.eventStore.getStreamEvents(id, fromVersion);
    
    if (!order && events.length === 0) {
      return null;
    }

    if (!order) {
      order = Order.fromHistory(id, events);
    } else {
      events.forEach(event => {
        order.applyEvent({
          type: event.eventType,
          data: event.eventData
        });
      });
    }

    return order;
  }
}

module.exports = { EventStore, AggregateRoot, Order, OrderRepository };

Summary

Microservices resilience patterns include circuit breakers for fault isolation, retry with backoff for transient failures, bulkhead for resource isolation, saga for distributed transactions, and event sourcing for audit trails and eventual consistency.


Share this post on:

Previous Post
Advanced Database Optimization and Query Performance
Next Post
OAuth2 and OpenID Connect Authentication in Node.js