Advanced Database Optimization and Query Performance
Introduction
Database optimization is crucial for application performance. This guide covers advanced indexing strategies, query optimization, partitioning, and performance monitoring techniques.
Prerequisites
- Understanding of SQL and database concepts
- PostgreSQL, MySQL, or similar RDBMS
- Basic performance monitoring knowledge
Step 1: Advanced Indexing Strategies
Create optimized indexes for different query patterns:
-- Composite index for multi-column WHERE clauses
CREATE INDEX idx_users_active_created
ON users (is_active, created_at)
WHERE is_active = true;
-- Partial index for specific conditions
CREATE INDEX idx_orders_pending
ON orders (customer_id, created_at)
WHERE status = 'PENDING';
-- Expression index for computed values
CREATE INDEX idx_users_email_lower
ON users (LOWER(email));
-- Covering index (includes additional columns)
CREATE INDEX idx_products_category_covering
ON products (category_id)
INCLUDE (name, price, stock_quantity);
-- Hash index for equality lookups (PostgreSQL)
CREATE INDEX idx_sessions_token
ON user_sessions USING HASH (session_token);
-- GIN index for full-text search
CREATE INDEX idx_articles_content_gin
ON articles USING GIN (to_tsvector('english', content));
-- B-tree index with custom sort order
CREATE INDEX idx_products_price_desc
ON products (price DESC, created_at ASC);
Step 2: Query Optimization Techniques
-- Use EXISTS instead of IN for large subqueries
-- Slower:
SELECT * FROM customers
WHERE customer_id IN (
SELECT customer_id FROM orders WHERE total_amount > 1000
);
-- Faster:
SELECT * FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.total_amount > 1000
);
-- Use JOIN instead of correlated subqueries
-- Slower:
SELECT
p.name,
p.price,
(SELECT COUNT(*) FROM order_items oi WHERE oi.product_id = p.product_id) as order_count
FROM products p;
-- Faster:
SELECT
p.name,
p.price,
COALESCE(oi.order_count, 0) as order_count
FROM products p
LEFT JOIN (
SELECT product_id, COUNT(*) as order_count
FROM order_items
GROUP BY product_id
) oi ON p.product_id = oi.product_id;
-- Use LIMIT with ORDER BY efficiently
-- Add index on (created_at DESC) for pagination
SELECT * FROM posts
WHERE created_at < '2023-12-01'
ORDER BY created_at DESC
LIMIT 20;
-- Window functions for ranking without subqueries
SELECT
customer_id,
order_date,
total_amount,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) as order_rank
FROM orders
WHERE order_rank <= 3; -- Get last 3 orders per customer
Step 3: Table Partitioning
Implement table partitioning for large datasets:
-- PostgreSQL: Create partitioned table by date
CREATE TABLE orders_partitioned (
order_id SERIAL,
customer_id INTEGER NOT NULL,
order_date DATE NOT NULL,
total_amount DECIMAL(10,2),
status VARCHAR(20),
created_at TIMESTAMP DEFAULT NOW()
) PARTITION BY RANGE (order_date);
-- Create monthly partitions
CREATE TABLE orders_2024_01 PARTITION OF orders_partitioned
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE orders_2024_02 PARTITION OF orders_partitioned
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
-- Create index on each partition
CREATE INDEX idx_orders_2024_01_customer
ON orders_2024_01 (customer_id);
CREATE INDEX idx_orders_2024_02_customer
ON orders_2024_02 (customer_id);
-- Hash partitioning for even distribution
CREATE TABLE user_activities_partitioned (
activity_id SERIAL,
user_id INTEGER NOT NULL,
activity_type VARCHAR(50),
timestamp TIMESTAMP DEFAULT NOW(),
data JSONB
) PARTITION BY HASH (user_id);
-- Create hash partitions
CREATE TABLE user_activities_p0 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 0);
CREATE TABLE user_activities_p1 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 1);
CREATE TABLE user_activities_p2 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 2);
CREATE TABLE user_activities_p3 PARTITION OF user_activities_partitioned
FOR VALUES WITH (modulus 4, remainder 3);
Step 4: Database Connection Pooling
Implement efficient connection pooling:
const { Pool } = require('pg');
const redis = require('redis');
class DatabasePool {
constructor(config) {
this.readPool = new Pool({
...config.read,
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
maxUses: 7500, // Close connection after 7500 uses
});
this.writePool = new Pool({
...config.write,
max: 10,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
this.redisClient = redis.createClient(config.redis);
// Connection monitoring
this.setupMonitoring();
}
setupMonitoring() {
this.readPool.on('connect', (client) => {
console.log('Read pool: New client connected');
});
this.readPool.on('error', (err) => {
console.error('Read pool error:', err);
});
this.writePool.on('error', (err) => {
console.error('Write pool error:', err);
});
// Log pool statistics every minute
setInterval(() => {
console.log('Pool stats:', {
read: {
total: this.readPool.totalCount,
idle: this.readPool.idleCount,
waiting: this.readPool.waitingCount,
},
write: {
total: this.writePool.totalCount,
idle: this.writePool.idleCount,
waiting: this.writePool.waitingCount,
}
});
}, 60000);
}
// Read operations (can use read replicas)
async query(text, params, useCache = false) {
const cacheKey = useCache ? this.generateCacheKey(text, params) : null;
if (cacheKey) {
const cached = await this.redisClient.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
}
const client = await this.readPool.connect();
try {
const result = await client.query(text, params);
if (cacheKey && result.rows.length > 0) {
await this.redisClient.setex(cacheKey, 300, JSON.stringify(result));
}
return result;
} finally {
client.release();
}
}
// Write operations (must use master)
async execute(text, params) {
const client = await this.writePool.connect();
try {
return await client.query(text, params);
} finally {
client.release();
}
}
// Transaction support
async transaction(callback) {
const client = await this.writePool.connect();
try {
await client.query('BEGIN');
const result = await callback(client);
await client.query('COMMIT');
return result;
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
generateCacheKey(query, params) {
const crypto = require('crypto');
const key = query + JSON.stringify(params || []);
return 'query:' + crypto.createHash('md5').update(key).digest('hex');
}
async close() {
await this.readPool.end();
await this.writePool.end();
await this.redisClient.quit();
}
}
module.exports = DatabasePool;
Step 5: Query Performance Monitoring
class QueryMonitor {
constructor(pool) {
this.pool = pool;
this.slowQueries = [];
this.queryStats = new Map();
}
async monitoredQuery(query, params, threshold = 1000) {
const startTime = process.hrtime.bigint();
const queryId = this.generateQueryId(query);
try {
const result = await this.pool.query(query, params);
const duration = Number(process.hrtime.bigint() - startTime) / 1000000;
this.recordQuery(queryId, query, duration, true);
if (duration > threshold) {
this.recordSlowQuery(query, params, duration);
}
return result;
} catch (error) {
const duration = Number(process.hrtime.bigint() - startTime) / 1000000;
this.recordQuery(queryId, query, duration, false);
throw error;
}
}
recordQuery(queryId, query, duration, success) {
if (!this.queryStats.has(queryId)) {
this.queryStats.set(queryId, {
query: this.normalizeQuery(query),
count: 0,
totalDuration: 0,
avgDuration: 0,
minDuration: Infinity,
maxDuration: 0,
errors: 0
});
}
const stats = this.queryStats.get(queryId);
stats.count++;
stats.totalDuration += duration;
stats.avgDuration = stats.totalDuration / stats.count;
stats.minDuration = Math.min(stats.minDuration, duration);
stats.maxDuration = Math.max(stats.maxDuration, duration);
if (!success) {
stats.errors++;
}
}
recordSlowQuery(query, params, duration) {
this.slowQueries.push({
query: this.normalizeQuery(query),
params,
duration,
timestamp: new Date().toISOString()
});
// Keep only last 100 slow queries
if (this.slowQueries.length > 100) {
this.slowQueries.shift();
}
}
normalizeQuery(query) {
// Replace parameter placeholders with ? for grouping
return query.replace(/\$\d+/g, '?').replace(/\s+/g, ' ').trim();
}
generateQueryId(query) {
const crypto = require('crypto');
return crypto.createHash('md5').update(this.normalizeQuery(query)).digest('hex');
}
getTopSlowQueries(limit = 10) {
return Array.from(this.queryStats.values())
.sort((a, b) => b.avgDuration - a.avgDuration)
.slice(0, limit);
}
getMostFrequentQueries(limit = 10) {
return Array.from(this.queryStats.values())
.sort((a, b) => b.count - a.count)
.slice(0, limit);
}
generateReport() {
return {
totalQueries: Array.from(this.queryStats.values()).reduce((sum, stat) => sum + stat.count, 0),
slowQueries: this.slowQueries,
topSlow: this.getTopSlowQueries(),
mostFrequent: this.getMostFrequentQueries(),
errorRate: this.calculateErrorRate()
};
}
calculateErrorRate() {
const stats = Array.from(this.queryStats.values());
const totalQueries = stats.reduce((sum, stat) => sum + stat.count, 0);
const totalErrors = stats.reduce((sum, stat) => sum + stat.errors, 0);
return totalQueries > 0 ? (totalErrors / totalQueries) * 100 : 0;
}
}
module.exports = QueryMonitor;
Step 6: Database Schema Optimization
-- Use appropriate data types
-- Instead of VARCHAR(255) for everything
CREATE TABLE optimized_users (
user_id INTEGER PRIMARY KEY,
email VARCHAR(320), -- Maximum email length
username VARCHAR(30), -- Reasonable username limit
is_active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
last_login TIMESTAMP,
settings JSONB -- Use JSONB for flexible data
);
-- Normalize repeated data
CREATE TABLE user_preferences (
preference_id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL
);
CREATE TABLE user_preference_values (
user_id INTEGER REFERENCES users(user_id),
preference_id INTEGER REFERENCES user_preferences(preference_id),
value TEXT,
PRIMARY KEY (user_id, preference_id)
);
-- Use constraints for data integrity
ALTER TABLE products ADD CONSTRAINT check_positive_price
CHECK (price > 0);
ALTER TABLE orders ADD CONSTRAINT check_valid_status
CHECK (status IN ('PENDING', 'CONFIRMED', 'SHIPPED', 'DELIVERED', 'CANCELLED'));
-- Use triggers for automatic updates
CREATE OR REPLACE FUNCTION update_modified_time()
RETURNS TRIGGER AS $$
BEGIN
NEW.modified_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_update_modified_time
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_modified_time();
Step 7: Caching Strategies
class MultiLevelCache {
constructor(redisClient, memoryCache) {
this.redis = redisClient;
this.memory = memoryCache; // LRU cache
this.metrics = {
hits: { l1: 0, l2: 0 },
misses: 0,
sets: 0
};
}
async get(key) {
// Level 1: Memory cache
let value = this.memory.get(key);
if (value !== undefined) {
this.metrics.hits.l1++;
return value;
}
// Level 2: Redis cache
try {
const serialized = await this.redis.get(key);
if (serialized) {
value = JSON.parse(serialized);
this.memory.set(key, value); // Promote to L1
this.metrics.hits.l2++;
return value;
}
} catch (error) {
console.error('Redis get error:', error);
}
this.metrics.misses++;
return null;
}
async set(key, value, ttl = 3600) {
this.metrics.sets++;
// Set in memory cache
this.memory.set(key, value);
// Set in Redis cache
try {
await this.redis.setex(key, ttl, JSON.stringify(value));
} catch (error) {
console.error('Redis set error:', error);
}
}
async invalidate(key) {
this.memory.del(key);
try {
await this.redis.del(key);
} catch (error) {
console.error('Redis delete error:', error);
}
}
async invalidatePattern(pattern) {
// Invalidate memory cache entries matching pattern
for (const key of this.memory.keys()) {
if (this.matchesPattern(key, pattern)) {
this.memory.del(key);
}
}
// Invalidate Redis entries
try {
const keys = await this.redis.keys(pattern);
if (keys.length > 0) {
await this.redis.del(...keys);
}
} catch (error) {
console.error('Redis pattern delete error:', error);
}
}
matchesPattern(str, pattern) {
const regex = new RegExp(pattern.replace(/\*/g, '.*'));
return regex.test(str);
}
getMetrics() {
const total = this.metrics.hits.l1 + this.metrics.hits.l2 + this.metrics.misses;
return {
...this.metrics,
hitRate: total > 0 ? ((this.metrics.hits.l1 + this.metrics.hits.l2) / total) * 100 : 0,
l1HitRate: total > 0 ? (this.metrics.hits.l1 / total) * 100 : 0,
l2HitRate: total > 0 ? (this.metrics.hits.l2 / total) * 100 : 0
};
}
}
module.exports = MultiLevelCache;
Summary
Database optimization involves strategic indexing, query optimization, connection pooling, partitioning, and multi-level caching. Monitor query performance, use appropriate data types, and implement caching strategies for optimal application performance.