Lesson 7.4: Storing Model Predictions and Logs

Partitioned Predictions Table

-- Main predictions table (partitioned by time)
CREATE TABLE model_predictions (
    id BIGSERIAL,
    user_id BIGINT NOT NULL,
    conversation_id BIGINT,
    model_name TEXT NOT NULL,
    model_version TEXT NOT NULL,
    input_data JSONB NOT NULL,
    prediction JSONB NOT NULL,
    confidence FLOAT,
    latency_ms INT,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);

-- Create monthly partitions
CREATE TABLE model_predictions_2024_01 PARTITION OF model_predictions
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE model_predictions_2024_02 PARTITION OF model_predictions
    FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

-- Indexes per partition (created automatically)
CREATE INDEX model_predictions_2024_01_model_idx
ON model_predictions_2024_01(model_name, model_version, created_at DESC);

-- Model performance metrics (aggregated)
CREATE MATERIALIZED VIEW model_performance_daily AS
SELECT
    DATE(created_at) AS date,
    model_name,
    model_version,
    COUNT(*) AS prediction_count,
    AVG(confidence) AS avg_confidence,
    AVG(latency_ms) AS avg_latency_ms,
    PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) AS p95_latency_ms
FROM model_predictions
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY DATE(created_at), model_name, model_version;

CREATE INDEX model_performance_date_model_idx
ON model_performance_daily(date DESC, model_name);

Logging Best Practices

# Application layer: Log predictions asynchronously
import asyncio

async def log_prediction(prediction_data):
    await db.execute("""
        INSERT INTO model_predictions (
            user_id, conversation_id, model_name, model_version,
            input_data, prediction, confidence, latency_ms
        ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
    """, prediction_data)

# Don't block inference
result = model.predict(input)
asyncio.create_task(log_prediction({...}))  # Fire and forget
return result

Key Takeaways

Partitioning by time enables efficient queries and easy data retention
Monthly partitions balance partition count with query performance
Composite primary key (id, created_at) required for range partitioning
Indexes per partition improve query performance on filtered data
Materialized views pre-aggregate daily metrics for dashboards
Async logging prevents prediction latency from blocking inference
JSONB columns store flexible input/output data without schema changes
Automatic partition routing sends inserts to correct partition based on timestamp

Partitioned Predictions Table

-- Main predictions table (partitioned by time) CREATE TABLE model_predictions ( id BIGSERIAL, user_id BIGINT NOT NULL, conversation_id BIGINT, model_name TEXT NOT NULL, model_version TEXT NOT NULL, input_data JSONB NOT NULL, prediction JSONB NOT NULL, confidence FLOAT, latency_ms INT, created_at TIMESTAMPTZ DEFAULT NOW(), PRIMARY KEY (id, created_at) ) PARTITION BY RANGE (created_at); -- Create monthly partitions CREATE TABLE model_predictions_2024_01 PARTITION OF model_predictions FOR VALUES FROM ('2024-01-01') TO ('2024-02-01'); CREATE TABLE model_predictions_2024_02 PARTITION OF model_predictions FOR VALUES FROM ('2024-02-01') TO ('2024-03-01'); -- Indexes per partition (created automatically) CREATE INDEX model_predictions_2024_01_model_idx ON model_predictions_2024_01(model_name, model_version, created_at DESC); -- Model performance metrics (aggregated) CREATE MATERIALIZED VIEW model_performance_daily AS SELECT DATE(created_at) AS date, model_name, model_version, COUNT(*) AS prediction_count, AVG(confidence) AS avg_confidence, AVG(latency_ms) AS avg_latency_ms, PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) AS p95_latency_ms FROM model_predictions WHERE created_at > NOW() - INTERVAL '30 days' GROUP BY DATE(created_at), model_name, model_version; CREATE INDEX model_performance_date_model_idx ON model_performance_daily(date DESC, model_name);

Logging Best Practices

# Application layer: Log predictions asynchronously import asyncio async def log_prediction(prediction_data): await db.execute(""" INSERT INTO model_predictions ( user_id, conversation_id, model_name, model_version, input_data, prediction, confidence, latency_ms ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) """, prediction_data) # Don't block inference result = model.predict(input) asyncio.create_task(log_prediction({...})) # Fire and forget return result

Key Takeaways

Partitioning by time enables efficient queries and easy data retention

Monthly partitions balance partition count with query performance

Composite primary key (id, created_at) required for range partitioning

Indexes per partition improve query performance on filtered data

Materialized views pre-aggregate daily metrics for dashboards

Async logging prevents prediction latency from blocking inference

JSONB columns store flexible input/output data without schema changes

Automatic partition routing sends inserts to correct partition based on timestamp