Lesson 5.5: Caching Strategies

Application-Level Caching

import redis

redis_client = redis.Redis(host='localhost', port=6379)

def get_user_features(user_id):
    # Check cache
    cache_key = f"features:user:{user_id}"
    cached = redis_client.get(cache_key)

    if cached:
        return json.loads(cached)

    # Cache miss: Query database
    features = db.execute("""
        SELECT feature_name, feature_value
        FROM user_features
        WHERE user_id = %s
    """, user_id)

    # Cache for 5 minutes
    redis_client.setex(cache_key, 300, json.dumps(features))

    return features

Materialized Views

-- Expensive aggregation query
CREATE MATERIALIZED VIEW user_stats AS
SELECT
    user_id,
    COUNT(*) AS order_count,
    SUM(total) AS total_spent,
    AVG(total) AS avg_order_value,
    MAX(created_at) AS last_order
FROM orders
GROUP BY user_id;

-- Index for fast lookups
CREATE UNIQUE INDEX user_stats_user_idx ON user_stats(user_id);

-- Refresh periodically (cron job or manual)
REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;

When to use:

Expensive aggregations (GROUP BY, window functions)
Data changes infrequently (hourly, daily)
Can tolerate stale data

Query Result Caching

from functools import lru_cache
import time

@lru_cache(maxsize=128)
def get_model_config(model_id, _cache_time):
    return db.execute("SELECT config FROM ml_models WHERE id = %s", model_id)

# Use with time-based expiry
def get_cached_model_config(model_id):
    # Cache expires every 5 minutes
    cache_time = int(time.time() / 300)
    return get_model_config(model_id, cache_time)

Key Takeaways

Multi-layer caching: Redis for hot data, materialized views for aggregations
Redis caching is essential for high-frequency feature lookups
Materialized views cache expensive aggregation queries in database
CONCURRENTLY allows refresh without blocking reads
Time-based cache invalidation with LRU cache decorator
Cache only data that's read frequently and changes infrequently
Feature stores benefit enormously from Redis caching

Application-Level Caching

import redis redis_client = redis.Redis(host='localhost', port=6379) def get_user_features(user_id): # Check cache cache_key = f"features:user:{user_id}" cached = redis_client.get(cache_key) if cached: return json.loads(cached) # Cache miss: Query database features = db.execute(""" SELECT feature_name, feature_value FROM user_features WHERE user_id = %s """, user_id) # Cache for 5 minutes redis_client.setex(cache_key, 300, json.dumps(features)) return features

Materialized Views

-- Expensive aggregation query CREATE MATERIALIZED VIEW user_stats AS SELECT user_id, COUNT(*) AS order_count, SUM(total) AS total_spent, AVG(total) AS avg_order_value, MAX(created_at) AS last_order FROM orders GROUP BY user_id; -- Index for fast lookups CREATE UNIQUE INDEX user_stats_user_idx ON user_stats(user_id); -- Refresh periodically (cron job or manual) REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;

When to use:

Expensive aggregations (GROUP BY, window functions)

Data changes infrequently (hourly, daily)

Can tolerate stale data

Query Result Caching

from functools import lru_cache import time @lru_cache(maxsize=128) def get_model_config(model_id, _cache_time): return db.execute("SELECT config FROM ml_models WHERE id = %s", model_id) # Use with time-based expiry def get_cached_model_config(model_id): # Cache expires every 5 minutes cache_time = int(time.time() / 300) return get_model_config(model_id, cache_time)

Key Takeaways

Multi-layer caching: Redis for hot data, materialized views for aggregations

Redis caching is essential for high-frequency feature lookups

Materialized views cache expensive aggregation queries in database

CONCURRENTLY allows refresh without blocking reads

Time-based cache invalidation with LRU cache decorator

Cache only data that's read frequently and changes infrequently

Feature stores benefit enormously from Redis caching