Lesson 7.4: Storing Model Predictions and Logs
Partitioned Predictions Table
CREATE TABLE model_predictions (
id BIGSERIAL,
user_id BIGINT NOT NULL,
conversation_id BIGINT,
model_name TEXT NOT NULL,
model_version TEXT NOT NULL,
input_data JSONB NOT NULL,
prediction JSONB NOT NULL,
confidence FLOAT,
latency_ms INT,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
CREATE TABLE model_predictions_2024_01 PARTITION OF model_predictions
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE model_predictions_2024_02 PARTITION OF model_predictions
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
CREATE INDEX model_predictions_2024_01_model_idx
ON model_predictions_2024_01(model_name, model_version, created_at DESC);
CREATE MATERIALIZED VIEW model_performance_daily AS
SELECT
DATE(created_at) AS date,
model_name,
model_version,
COUNT(*) AS prediction_count,
AVG(confidence) AS avg_confidence,
AVG(latency_ms) AS avg_latency_ms,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) AS p95_latency_ms
FROM model_predictions
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY DATE(created_at), model_name, model_version;
CREATE INDEX model_performance_date_model_idx
ON model_performance_daily(date DESC, model_name);
Logging Best Practices
import asyncio
async def log_prediction(prediction_data):
await db.execute("""
INSERT INTO model_predictions (
user_id, conversation_id, model_name, model_version,
input_data, prediction, confidence, latency_ms
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
""", prediction_data)
result = model.predict(input)
asyncio.create_task(log_prediction({...}))
return result
Key Takeaways
- Partitioning by time enables efficient queries and easy data retention
- Monthly partitions balance partition count with query performance
- Composite primary key (id, created_at) required for range partitioning
- Indexes per partition improve query performance on filtered data
- Materialized views pre-aggregate daily metrics for dashboards
- Async logging prevents prediction latency from blocking inference
- JSONB columns store flexible input/output data without schema changes
- Automatic partition routing sends inserts to correct partition based on timestamp