Lesson 4.4: Soft Deletes and Audit Tables
Soft Deletes
Why: Never actually delete data (compliance, undo, analytics)
-- Add deleted_at column
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL,
content TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
deleted_at TIMESTAMPTZ -- NULL = active
);
-- Partial index on active rows
CREATE INDEX documents_active_idx ON documents(user_id)
WHERE deleted_at IS NULL;
-- Soft delete
UPDATE documents SET deleted_at = NOW() WHERE id = 123;
-- Queries filter deleted rows
SELECT * FROM documents WHERE user_id = 456 AND deleted_at IS NULL;
Pros:
- Recoverable (undo deletes)
- Audit trail
- Analytics on deleted data
Cons:
- Table never shrinks
- Queries must always filter deleted_at
- Risk: Forgetting the filter (data leaks)
Solution: Views
-- View hides soft-deleted rows
CREATE VIEW documents_active AS
SELECT * FROM documents WHERE deleted_at IS NULL;
-- Queries use view (automatic filtering)
SELECT * FROM documents_active WHERE user_id = 456;
Audit Tables
Track all changes:
-- Main table
CREATE TABLE user_features (
user_id BIGINT PRIMARY KEY,
feature_vector JSONB,
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Audit table (all historical versions)
CREATE TABLE user_features_audit (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL,
feature_vector JSONB,
operation TEXT, -- 'INSERT', 'UPDATE', 'DELETE'
changed_at TIMESTAMPTZ DEFAULT NOW(),
changed_by TEXT -- User/process that made change
);
-- Trigger to log changes
CREATE OR REPLACE FUNCTION audit_user_features() RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'DELETE') THEN
INSERT INTO user_features_audit (user_id, feature_vector, operation)
VALUES (OLD.user_id, OLD.feature_vector, 'DELETE');
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO user_features_audit (user_id, feature_vector, operation)
VALUES (NEW.user_id, NEW.feature_vector, 'UPDATE');
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO user_features_audit (user_id, feature_vector, operation)
VALUES (NEW.user_id, NEW.feature_vector, 'INSERT');
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER user_features_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON user_features
FOR EACH ROW EXECUTE FUNCTION audit_user_features();
Querying history:
-- Get feature values at specific time
SELECT feature_vector
FROM user_features_audit
WHERE user_id = 123
AND changed_at <= '2024-06-01'
ORDER BY changed_at DESC
LIMIT 1;
-- See all changes for user
SELECT changed_at, operation, feature_vector
FROM user_features_audit
WHERE user_id = 123
ORDER BY changed_at DESC;
Key Takeaways
- Soft deletes use a deleted_at timestamp instead of actual deletion
- Partial indexes on active rows keep queries fast
- Views can hide soft-deleted rows automatically
- Audit tables track all changes with triggers
- Audit tables enable time-travel queries and compliance
- Both patterns are essential for AI systems with regulatory requirements
- Trade-off: Additional storage for complete data history

