CTEs (Common Table Expressions) - WITH Clause
Introduction
Subqueries are powerful, but deeply nested subqueries can become hard to read and maintain. Consider this query:
SELECT * FROM (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY customer_id
) AS customer_totals
WHERE total_spent > (
SELECT AVG(total_spent)
FROM (
SELECT SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY customer_id
) AS all_customers
);
Hard to read, right? CTEs (Common Table Expressions) provide a cleaner solution!
What is a CTE?
A CTE is a named temporary result set defined using the WITH clause. Think of it as a named subquery that you can reference multiple times in your main query.
Syntax
WITH cte_name AS (
SELECT ...
FROM ...
WHERE ...
)
SELECT ...
FROM cte_name
WHERE ...;
Basic Example
-- Using CTE
WITH expensive_products AS (
SELECT *
FROM products
WHERE price > 100
)
SELECT name, price
FROM expensive_products
ORDER BY price DESC;
Equivalent subquery:
SELECT name, price
FROM (
SELECT *
FROM products
WHERE price > 100
) AS expensive_products
ORDER BY price DESC;
The CTE version is more readable!
Simple CTE Examples
Customer Total Spending
WITH customer_totals AS (
SELECT
customer_id,
SUM(total) AS total_spent,
COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
)
SELECT
c.name,
ct.total_spent,
ct.order_count
FROM customers c
INNER JOIN customer_totals ct ON c.id = ct.customer_id
WHERE ct.total_spent > 500
ORDER BY ct.total_spent DESC;
Above-Average Customers
WITH customer_spending AS (
SELECT
customer_id,
SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
),
avg_spending AS (
SELECT AVG(total_spent) AS avg
FROM customer_spending
)
SELECT
c.name,
cs.total_spent
FROM customers c
INNER JOIN customer_spending cs ON c.id = cs.customer_id
CROSS JOIN avg_spending
WHERE cs.total_spent > avg_spending.avg
ORDER BY cs.total_spent DESC;
Multiple CTEs
You can define multiple CTEs in a single query:
WITH
cte1 AS (
SELECT ... FROM table1
),
cte2 AS (
SELECT ... FROM table2
),
cte3 AS (
SELECT ... FROM cte1 -- CTEs can reference previous CTEs!
INNER JOIN cte2 ON ...
)
SELECT ...
FROM cte3;
Practical Example: Sales Report
WITH
-- Monthly revenue
monthly_revenue AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total) AS revenue
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY DATE_TRUNC('month', order_date)
),
-- Previous month revenue
revenue_with_previous AS (
SELECT
month,
revenue,
LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue
FROM monthly_revenue
)
-- Calculate growth
SELECT
month,
revenue,
prev_month_revenue,
revenue - prev_month_revenue AS growth,
ROUND(
100.0 * (revenue - prev_month_revenue) / NULLIF(prev_month_revenue, 0),
2
) AS growth_pct
FROM revenue_with_previous
ORDER BY month;
CTEs vs Subqueries
Readability
-- ❌ Hard to read: Nested subqueries
SELECT *
FROM (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
) AS t1
WHERE total_spent > (
SELECT AVG(total_spent)
FROM (
SELECT SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
) AS t2
);
-- ✅ Easy to read: CTEs
WITH customer_totals AS (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
),
avg_total AS (
SELECT AVG(total_spent) AS avg
FROM customer_totals
)
SELECT *
FROM customer_totals
WHERE total_spent > (SELECT avg FROM avg_total);
Reusability
-- CTE can be referenced multiple times
WITH high_value_customers AS (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
HAVING SUM(total) > 1000
)
SELECT
(SELECT COUNT(*) FROM high_value_customers) AS count,
(SELECT SUM(total_spent) FROM high_value_customers) AS total_revenue,
(SELECT AVG(total_spent) FROM high_value_customers) AS avg_spent;
With subqueries, you'd have to repeat the same logic three times!
CTEs for Complex Calculations
Customer Lifetime Value with Segments
WITH customer_metrics AS (
SELECT
c.id,
c.name,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.total), 0) AS lifetime_value,
MAX(o.order_date) AS last_order_date
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name
),
customer_segments AS (
SELECT
*,
CASE
WHEN lifetime_value >= 1000 THEN 'VIP'
WHEN lifetime_value >= 500 THEN 'High Value'
WHEN lifetime_value >= 100 THEN 'Regular'
WHEN lifetime_value > 0 THEN 'Low Value'
ELSE 'No Purchases'
END AS segment,
CURRENT_DATE - last_order_date AS days_since_purchase
FROM customer_metrics
)
SELECT
segment,
COUNT(*) AS customer_count,
SUM(lifetime_value) AS segment_revenue,
ROUND(AVG(lifetime_value), 2) AS avg_lifetime_value,
ROUND(AVG(days_since_purchase), 0) AS avg_days_inactive
FROM customer_segments
GROUP BY segment
ORDER BY segment_revenue DESC;
Product Performance Analysis
WITH product_sales AS (
SELECT
p.id,
p.name,
p.category,
p.price,
COALESCE(SUM(oi.quantity), 0) AS units_sold,
COALESCE(SUM(oi.quantity * oi.price), 0) AS revenue
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.category, p.price
),
category_stats AS (
SELECT
category,
AVG(revenue) AS avg_category_revenue
FROM product_sales
GROUP BY category
)
SELECT
ps.name,
ps.category,
ps.units_sold,
ps.revenue,
cs.avg_category_revenue,
CASE
WHEN ps.revenue > cs.avg_category_revenue THEN 'Above Average'
WHEN ps.revenue = cs.avg_category_revenue THEN 'Average'
ELSE 'Below Average'
END AS performance
FROM product_sales ps
INNER JOIN category_stats cs ON ps.category = cs.category
ORDER BY ps.category, ps.revenue DESC;
Recursive CTEs
Recursive CTEs reference themselves and are powerful for hierarchical or tree-structured data.
Syntax
WITH RECURSIVE cte_name AS (
-- Base case (anchor)
SELECT ...
UNION ALL
-- Recursive case
SELECT ...
FROM cte_name -- References itself!
WHERE ...
)
SELECT * FROM cte_name;
Example: Employee Hierarchy
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
manager_id INTEGER REFERENCES employees(id)
);
INSERT INTO employees VALUES
(1, 'Alice (CEO)', NULL),
(2, 'Bob (VP)', 1),
(3, 'Carol (VP)', 1),
(4, 'Dave (Manager)', 2),
(5, 'Eve (Manager)', 2),
(6, 'Frank (Employee)', 4);
-- Org chart with levels
WITH RECURSIVE org_chart AS (
-- Base case: Top-level employees (no manager)
SELECT
id,
name,
manager_id,
1 AS level,
name AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: Employees under managers
SELECT
e.id,
e.name,
e.manager_id,
oc.level + 1,
oc.path || ' > ' || e.name
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT
level,
REPEAT(' ', level - 1) || name AS org_structure,
path
FROM org_chart
ORDER BY path;
Result:
level | org_structure | path
-------+------------------------+-------------------------------------
1 | Alice (CEO) | Alice (CEO)
2 | Bob (VP) | Alice (CEO) > Bob (VP)
3 | Dave (Manager) | Alice (CEO) > Bob (VP) > Dave (Manager)
4 | Frank (Employee) | Alice (CEO) > Bob (VP) > Dave (Manager) > Frank (Employee)
3 | Eve (Manager) | Alice (CEO) > Bob (VP) > Eve (Manager)
2 | Carol (VP) | Alice (CEO) > Carol (VP)
Example: Number Sequence
-- Generate numbers 1 to 10
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1
FROM numbers
WHERE n < 10
)
SELECT * FROM numbers;
Result:
n
----
1
2
3
4
5
6
7
8
9
10
Example: Date Series
-- Generate all dates in January 2024
WITH RECURSIVE dates AS (
SELECT DATE '2024-01-01' AS date
UNION ALL
SELECT date + INTERVAL '1 day'
FROM dates
WHERE date < '2024-01-31'
)
SELECT
date,
TO_CHAR(date, 'Day') AS day_name
FROM dates;
CTEs for Data Transformation
Pivoting Data
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
category,
SUM(oi.quantity * oi.price) AS revenue
FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE o.order_date >= '2024-01-01'
GROUP BY DATE_TRUNC('month', order_date), category
)
SELECT
month,
SUM(CASE WHEN category = 'Electronics' THEN revenue ELSE 0 END) AS electronics,
SUM(CASE WHEN category = 'Books' THEN revenue ELSE 0 END) AS books,
SUM(CASE WHEN category = 'Clothing' THEN revenue ELSE 0 END) AS clothing
FROM monthly_sales
GROUP BY month
ORDER BY month;
Result:
month | electronics | books | clothing
---------------------+-------------+----------+----------
2024-01-01 00:00:00 | 8750.00 | 2547.50 | 3125.75
2024-02-01 00:00:00 | 9450.75 | 3125.25 | 3847.50
Ranking and Filtering
WITH ranked_products AS (
SELECT
p.category,
p.name,
SUM(oi.quantity * oi.price) AS revenue,
ROW_NUMBER() OVER (
PARTITION BY p.category
ORDER BY SUM(oi.quantity * oi.price) DESC
) AS rank
FROM products p
INNER JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.category, p.name
)
SELECT category, name, revenue
FROM ranked_products
WHERE rank <= 3
ORDER BY category, rank;
CTEs vs Temporary Tables
CTEs
WITH temp_data AS (
SELECT ...
)
SELECT * FROM temp_data;
Pros:
- ✅ Cleaner syntax
- ✅ No cleanup needed
- ✅ Scoped to single query
- ✅ Can be recursive
Cons:
- ❌ Can't be indexed
- ❌ Recreated on each reference (in some databases)
- ❌ Limited to one query
Temporary Tables
CREATE TEMP TABLE temp_data AS
SELECT ...;
SELECT * FROM temp_data;
DROP TABLE temp_data;
Pros:
- ✅ Can be indexed
- ✅ Computed once, reused many times
- ✅ Can be used across multiple queries
Cons:
- ❌ Requires cleanup (DROP TABLE)
- ❌ More verbose
- ❌ Session-scoped (visible to entire session)
Use CTEs for single-query operations. Use temporary tables for complex multi-step processes.
Performance Considerations
CTEs Can Be Materialized or Inlined
PostgreSQL 12+ allows controlling CTE behavior:
-- Materialized: Computed once, stored temporarily
WITH customer_totals AS MATERIALIZED (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
)
SELECT * FROM customer_totals
WHERE total_spent > 1000;
-- Inlined: Merged into main query (like a subquery)
WITH customer_totals AS NOT MATERIALIZED (
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
)
SELECT * FROM customer_totals
WHERE total_spent > 1000;
Default behavior:
- PostgreSQL 12+: NOT MATERIALIZED (inlined) unless used multiple times
- PostgreSQL <12: Always materialized
When CTEs Can Be Slower
-- ❌ Slower: CTE forces materialization (PostgreSQL <12)
WITH recent_orders AS (
SELECT * FROM orders WHERE order_date >= '2024-01-01'
)
SELECT * FROM recent_orders WHERE customer_id = 5;
-- Filters customer_id AFTER computing entire CTE
-- ✅ Faster: Subquery allows optimization
SELECT * FROM (
SELECT * FROM orders WHERE order_date >= '2024-01-01'
) AS recent_orders
WHERE customer_id = 5;
-- Optimizer can push customer_id filter earlier
Use EXPLAIN ANALYZE to check performance!
Common Patterns with CTEs
Pattern 1: Multi-Step Calculations
WITH
step1 AS (SELECT ...),
step2 AS (SELECT ... FROM step1 ...),
step3 AS (SELECT ... FROM step2 ...)
SELECT * FROM step3;
Pattern 2: Filtering Before JOIN
WITH filtered_orders AS (
SELECT * FROM orders
WHERE order_date >= '2024-01-01'
AND total > 100
)
SELECT c.name, fo.total
FROM customers c
INNER JOIN filtered_orders fo ON c.id = fo.customer_id;
Pattern 3: Deduplication
WITH ranked_orders AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS rn
FROM orders
)
SELECT *
FROM ranked_orders
WHERE rn = 1; -- Most recent order per customer
Practice Exercises
Exercise 1
Use a CTE to find customers who spent more than the average customer.
Solution
WITH customer_totals AS (
SELECT
customer_id,
SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
),
avg_spending AS (
SELECT AVG(total_spent) AS avg
FROM customer_totals
)
SELECT
c.name,
ct.total_spent
FROM customers c
INNER JOIN customer_totals ct ON c.id = ct.customer_id
CROSS JOIN avg_spending
WHERE ct.total_spent > avg_spending.avg
ORDER BY ct.total_spent DESC;
Exercise 2
Create a recursive CTE to generate the first 20 Fibonacci numbers.
Solution
WITH RECURSIVE fibonacci AS (
-- Base case: First two numbers
SELECT 1 AS n, 0 AS fib, 1 AS next_fib
UNION ALL
-- Recursive case
SELECT
n + 1,
next_fib,
fib + next_fib
FROM fibonacci
WHERE n < 20
)
SELECT n, fib
FROM fibonacci;
Exercise 3
Use CTEs to find the top 3 products by revenue in each category.
Solution
WITH product_revenue AS (
SELECT
p.id,
p.name,
p.category,
SUM(oi.quantity * oi.price) AS revenue
FROM products p
INNER JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.category
),
ranked_products AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY revenue DESC
) AS rank
FROM product_revenue
)
SELECT category, name, revenue
FROM ranked_products
WHERE rank <= 3
ORDER BY category, rank;
Key Takeaways
- ✅ CTEs are named temporary result sets defined with WITH
- ✅ More readable than nested subqueries
- ✅ Reusable within the same query
- ✅ Multiple CTEs can be chained together
- ✅ Recursive CTEs handle hierarchical data
- ✅ Materialized vs inlined affects performance (PostgreSQL 12+)
- ✅ Use CTEs for readability and single-query operations
- ✅ Use temp tables for multi-query workflows and indexing
- ✅ Use EXPLAIN ANALYZE to check performance
Next Steps
You've mastered CTEs! In the next lesson, we'll learn CASE Statements—conditional logic in SQL for transforming data and creating dynamic columns.
Ready to add more power to your queries!

