Advanced Database Optimization and Query Performance

Introduction

Database optimization is crucial for application performance. This guide covers advanced indexing strategies, query optimization, partitioning, and performance monitoring techniques.

Prerequisites

Understanding of SQL and database concepts
PostgreSQL, MySQL, or similar RDBMS
Basic performance monitoring knowledge

Step 1: Advanced Indexing Strategies

Create optimized indexes for different query patterns:

-- Composite index for multi-column WHERE clauses
CREATE INDEX idx_users_active_created 
ON users (is_active, created_at) 
WHERE is_active = true;

-- Partial index for specific conditions
CREATE INDEX idx_orders_pending 
ON orders (customer_id, created_at) 
WHERE status = 'PENDING';

-- Expression index for computed values
CREATE INDEX idx_users_email_lower 
ON users (LOWER(email));

-- Covering index (includes additional columns)
CREATE INDEX idx_products_category_covering 
ON products (category_id) 
INCLUDE (name, price, stock_quantity);

-- Hash index for equality lookups (PostgreSQL)
CREATE INDEX idx_sessions_token 
ON user_sessions USING HASH (session_token);

-- GIN index for full-text search
CREATE INDEX idx_articles_content_gin 
ON articles USING GIN (to_tsvector('english', content));

-- B-tree index with custom sort order
CREATE INDEX idx_products_price_desc 
ON products (price DESC, created_at ASC);

Step 2: Query Optimization Techniques

-- Use EXISTS instead of IN for large subqueries
-- Slower:
SELECT * FROM customers 
WHERE customer_id IN (
    SELECT customer_id FROM orders WHERE total_amount > 1000
);

-- Faster:
SELECT * FROM customers c
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.customer_id = c.customer_id 
    AND o.total_amount > 1000
);

-- Use JOIN instead of correlated subqueries
-- Slower:
SELECT 
    p.name,
    p.price,
    (SELECT COUNT(*) FROM order_items oi WHERE oi.product_id = p.product_id) as order_count
FROM products p;

-- Faster:
SELECT 
    p.name,
    p.price,
    COALESCE(oi.order_count, 0) as order_count
FROM products p
LEFT JOIN (
    SELECT product_id, COUNT(*) as order_count
    FROM order_items
    GROUP BY product_id
) oi ON p.product_id = oi.product_id;

-- Use LIMIT with ORDER BY efficiently
-- Add index on (created_at DESC) for pagination
SELECT * FROM posts 
WHERE created_at < '2023-12-01'
ORDER BY created_at DESC 
LIMIT 20;

-- Window functions for ranking without subqueries
SELECT 
    customer_id,
    order_date,
    total_amount,
    ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) as order_rank
FROM orders
WHERE order_rank <= 3; -- Get last 3 orders per customer

Step 3: Table Partitioning

Implement table partitioning for large datasets:

-- PostgreSQL: Create partitioned table by date
CREATE TABLE orders_partitioned (
    order_id SERIAL,
    customer_id INTEGER NOT NULL,
    order_date DATE NOT NULL,
    total_amount DECIMAL(10,2),
    status VARCHAR(20),
    created_at TIMESTAMP DEFAULT NOW()
) PARTITION BY RANGE (order_date);

-- Create monthly partitions
CREATE TABLE orders_2024_01 PARTITION OF orders_partitioned
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE orders_2024_02 PARTITION OF orders_partitioned
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

-- Create index on each partition
CREATE INDEX idx_orders_2024_01_customer 
ON orders_2024_01 (customer_id);

CREATE INDEX idx_orders_2024_02_customer 
ON orders_2024_02 (customer_id);

-- Hash partitioning for even distribution
CREATE TABLE user_activities_partitioned (
    activity_id SERIAL,
    user_id INTEGER NOT NULL,
    activity_type VARCHAR(50),
    timestamp TIMESTAMP DEFAULT NOW(),
    data JSONB
) PARTITION BY HASH (user_id);

-- Create hash partitions
CREATE TABLE user_activities_p0 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 0);

CREATE TABLE user_activities_p1 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 1);

CREATE TABLE user_activities_p2 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 2);

CREATE TABLE user_activities_p3 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 3);

Step 4: Database Connection Pooling

Implement efficient connection pooling:

const { Pool } = require('pg');
const redis = require('redis');

class DatabasePool {
  constructor(config) {
    this.readPool = new Pool({
      ...config.read,
      max: 20,
      idleTimeoutMillis: 30000,
      connectionTimeoutMillis: 2000,
      maxUses: 7500, // Close connection after 7500 uses
    });

    this.writePool = new Pool({
      ...config.write,
      max: 10,
      idleTimeoutMillis: 30000,
      connectionTimeoutMillis: 2000,
    });

    this.redisClient = redis.createClient(config.redis);
    
    // Connection monitoring
    this.setupMonitoring();
  }

  setupMonitoring() {
    this.readPool.on('connect', (client) => {
      console.log('Read pool: New client connected');
    });

    this.readPool.on('error', (err) => {
      console.error('Read pool error:', err);
    });

    this.writePool.on('error', (err) => {
      console.error('Write pool error:', err);
    });

    // Log pool statistics every minute
    setInterval(() => {
      console.log('Pool stats:', {
        read: {
          total: this.readPool.totalCount,
          idle: this.readPool.idleCount,
          waiting: this.readPool.waitingCount,
        },
        write: {
          total: this.writePool.totalCount,
          idle: this.writePool.idleCount,
          waiting: this.writePool.waitingCount,
        }
      });
    }, 60000);
  }

  // Read operations (can use read replicas)
  async query(text, params, useCache = false) {
    const cacheKey = useCache ? this.generateCacheKey(text, params) : null;
    
    if (cacheKey) {
      const cached = await this.redisClient.get(cacheKey);
      if (cached) {
        return JSON.parse(cached);
      }
    }

    const client = await this.readPool.connect();
    
    try {
      const result = await client.query(text, params);
      
      if (cacheKey && result.rows.length > 0) {
        await this.redisClient.setex(cacheKey, 300, JSON.stringify(result));
      }
      
      return result;
    } finally {
      client.release();
    }
  }

  // Write operations (must use master)
  async execute(text, params) {
    const client = await this.writePool.connect();
    
    try {
      return await client.query(text, params);
    } finally {
      client.release();
    }
  }

  // Transaction support
  async transaction(callback) {
    const client = await this.writePool.connect();
    
    try {
      await client.query('BEGIN');
      const result = await callback(client);
      await client.query('COMMIT');
      return result;
    } catch (error) {
      await client.query('ROLLBACK');
      throw error;
    } finally {
      client.release();
    }
  }

  generateCacheKey(query, params) {
    const crypto = require('crypto');
    const key = query + JSON.stringify(params || []);
    return 'query:' + crypto.createHash('md5').update(key).digest('hex');
  }

  async close() {
    await this.readPool.end();
    await this.writePool.end();
    await this.redisClient.quit();
  }
}

module.exports = DatabasePool;

Step 5: Query Performance Monitoring

class QueryMonitor {
  constructor(pool) {
    this.pool = pool;
    this.slowQueries = [];
    this.queryStats = new Map();
  }

  async monitoredQuery(query, params, threshold = 1000) {
    const startTime = process.hrtime.bigint();
    const queryId = this.generateQueryId(query);
    
    try {
      const result = await this.pool.query(query, params);
      const duration = Number(process.hrtime.bigint() - startTime) / 1000000;
      
      this.recordQuery(queryId, query, duration, true);
      
      if (duration > threshold) {
        this.recordSlowQuery(query, params, duration);
      }
      
      return result;
    } catch (error) {
      const duration = Number(process.hrtime.bigint() - startTime) / 1000000;
      this.recordQuery(queryId, query, duration, false);
      throw error;
    }
  }

  recordQuery(queryId, query, duration, success) {
    if (!this.queryStats.has(queryId)) {
      this.queryStats.set(queryId, {
        query: this.normalizeQuery(query),
        count: 0,
        totalDuration: 0,
        avgDuration: 0,
        minDuration: Infinity,
        maxDuration: 0,
        errors: 0
      });
    }

    const stats = this.queryStats.get(queryId);
    stats.count++;
    stats.totalDuration += duration;
    stats.avgDuration = stats.totalDuration / stats.count;
    stats.minDuration = Math.min(stats.minDuration, duration);
    stats.maxDuration = Math.max(stats.maxDuration, duration);
    
    if (!success) {
      stats.errors++;
    }
  }

  recordSlowQuery(query, params, duration) {
    this.slowQueries.push({
      query: this.normalizeQuery(query),
      params,
      duration,
      timestamp: new Date().toISOString()
    });

    // Keep only last 100 slow queries
    if (this.slowQueries.length > 100) {
      this.slowQueries.shift();
    }
  }

  normalizeQuery(query) {
    // Replace parameter placeholders with ? for grouping
    return query.replace(/\$\d+/g, '?').replace(/\s+/g, ' ').trim();
  }

  generateQueryId(query) {
    const crypto = require('crypto');
    return crypto.createHash('md5').update(this.normalizeQuery(query)).digest('hex');
  }

  getTopSlowQueries(limit = 10) {
    return Array.from(this.queryStats.values())
      .sort((a, b) => b.avgDuration - a.avgDuration)
      .slice(0, limit);
  }

  getMostFrequentQueries(limit = 10) {
    return Array.from(this.queryStats.values())
      .sort((a, b) => b.count - a.count)
      .slice(0, limit);
  }

  generateReport() {
    return {
      totalQueries: Array.from(this.queryStats.values()).reduce((sum, stat) => sum + stat.count, 0),
      slowQueries: this.slowQueries,
      topSlow: this.getTopSlowQueries(),
      mostFrequent: this.getMostFrequentQueries(),
      errorRate: this.calculateErrorRate()
    };
  }

  calculateErrorRate() {
    const stats = Array.from(this.queryStats.values());
    const totalQueries = stats.reduce((sum, stat) => sum + stat.count, 0);
    const totalErrors = stats.reduce((sum, stat) => sum + stat.errors, 0);
    return totalQueries > 0 ? (totalErrors / totalQueries) * 100 : 0;
  }
}

module.exports = QueryMonitor;

Step 6: Database Schema Optimization

-- Use appropriate data types
-- Instead of VARCHAR(255) for everything
CREATE TABLE optimized_users (
    user_id INTEGER PRIMARY KEY,
    email VARCHAR(320), -- Maximum email length
    username VARCHAR(30), -- Reasonable username limit
    is_active BOOLEAN NOT NULL DEFAULT true,
    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
    last_login TIMESTAMP,
    settings JSONB -- Use JSONB for flexible data
);

-- Normalize repeated data
CREATE TABLE user_preferences (
    preference_id SERIAL PRIMARY KEY,
    name VARCHAR(50) UNIQUE NOT NULL
);

CREATE TABLE user_preference_values (
    user_id INTEGER REFERENCES users(user_id),
    preference_id INTEGER REFERENCES user_preferences(preference_id),
    value TEXT,
    PRIMARY KEY (user_id, preference_id)
);

-- Use constraints for data integrity
ALTER TABLE products ADD CONSTRAINT check_positive_price 
CHECK (price > 0);

ALTER TABLE orders ADD CONSTRAINT check_valid_status 
CHECK (status IN ('PENDING', 'CONFIRMED', 'SHIPPED', 'DELIVERED', 'CANCELLED'));

-- Use triggers for automatic updates
CREATE OR REPLACE FUNCTION update_modified_time()
RETURNS TRIGGER AS $$
BEGIN
    NEW.modified_at = NOW();
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_update_modified_time
    BEFORE UPDATE ON users
    FOR EACH ROW
    EXECUTE FUNCTION update_modified_time();

Step 7: Caching Strategies

class MultiLevelCache {
  constructor(redisClient, memoryCache) {
    this.redis = redisClient;
    this.memory = memoryCache; // LRU cache
    this.metrics = {
      hits: { l1: 0, l2: 0 },
      misses: 0,
      sets: 0
    };
  }

  async get(key) {
    // Level 1: Memory cache
    let value = this.memory.get(key);
    if (value !== undefined) {
      this.metrics.hits.l1++;
      return value;
    }

    // Level 2: Redis cache
    try {
      const serialized = await this.redis.get(key);
      if (serialized) {
        value = JSON.parse(serialized);
        this.memory.set(key, value); // Promote to L1
        this.metrics.hits.l2++;
        return value;
      }
    } catch (error) {
      console.error('Redis get error:', error);
    }

    this.metrics.misses++;
    return null;
  }

  async set(key, value, ttl = 3600) {
    this.metrics.sets++;
    
    // Set in memory cache
    this.memory.set(key, value);
    
    // Set in Redis cache
    try {
      await this.redis.setex(key, ttl, JSON.stringify(value));
    } catch (error) {
      console.error('Redis set error:', error);
    }
  }

  async invalidate(key) {
    this.memory.del(key);
    
    try {
      await this.redis.del(key);
    } catch (error) {
      console.error('Redis delete error:', error);
    }
  }

  async invalidatePattern(pattern) {
    // Invalidate memory cache entries matching pattern
    for (const key of this.memory.keys()) {
      if (this.matchesPattern(key, pattern)) {
        this.memory.del(key);
      }
    }

    // Invalidate Redis entries
    try {
      const keys = await this.redis.keys(pattern);
      if (keys.length > 0) {
        await this.redis.del(...keys);
      }
    } catch (error) {
      console.error('Redis pattern delete error:', error);
    }
  }

  matchesPattern(str, pattern) {
    const regex = new RegExp(pattern.replace(/\*/g, '.*'));
    return regex.test(str);
  }

  getMetrics() {
    const total = this.metrics.hits.l1 + this.metrics.hits.l2 + this.metrics.misses;
    return {
      ...this.metrics,
      hitRate: total > 0 ? ((this.metrics.hits.l1 + this.metrics.hits.l2) / total) * 100 : 0,
      l1HitRate: total > 0 ? (this.metrics.hits.l1 / total) * 100 : 0,
      l2HitRate: total > 0 ? (this.metrics.hits.l2 / total) * 100 : 0
    };
  }
}

module.exports = MultiLevelCache;

Summary

Database optimization involves strategic indexing, query optimization, connection pooling, partitioning, and multi-level caching. Monitor query performance, use appropriate data types, and implement caching strategies for optimal application performance.