PostgreSQL Deep Dive · Lesson 2 of 4

Window Functions, JSONB & pgvector

PostgreSQL is far more capable than its "relational database" label suggests. The features in this guide solve problems that would otherwise require application-level code, separate NoSQL stores, or complex external infrastructure — all within a single Postgres instance.


Window Functions — Full Specification

Window functions compute across a set of rows related to the current row without collapsing the result (unlike GROUP BY). Every window function follows this structure:

SQL
function_name(arg) OVER (
  PARTITION BY partition_col  -- divide rows into groups (optional)
  ORDER BY sort_col           -- order within each partition
  frame_clause                -- which rows in the partition to include
)

Frame Clauses — The Part Most Tutorials Skip

The frame clause defines the "window" of rows visible to the function, relative to the current row. Without it, most aggregate window functions default to RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW.

SQL
-- Frame clause syntax:
ROWS BETWEEN <start> AND <end>
RANGE BETWEEN <start> AND <end>   -- uses ORDER BY value, not row position
GROUPS BETWEEN <start> AND <end>  -- counts by peer groups (PostgreSQL 11+)

-- Boundary values:
UNBOUNDED PRECEDING  -- from the first row in partition
N PRECEDING          -- N rows before current
CURRENT ROW          -- current row
N FOLLOWING          -- N rows after current
UNBOUNDED FOLLOWING  -- to the last row in partition
SQL
-- Running total: all rows from start to current
SUM(revenue) OVER (ORDER BY month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)

-- 3-month rolling average (current + 2 preceding)
AVG(revenue) OVER (ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)

-- Centered 3-month average (1 before, current, 1 after)
AVG(revenue) OVER (ORDER BY month ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)

-- Full partition total (same value for every row in partition)
SUM(revenue) OVER (PARTITION BY region)
-- equivalent to:
SUM(revenue) OVER (PARTITION BY region ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)

ROWS vs RANGE:

  • ROWS: counts by physical row position — precise
  • RANGE: counts by value — if two rows have the same ORDER BY value, they are treated as peers and both included
SQL
-- Data: three rows with revenue on the same date
-- ROWS BETWEEN 1 PRECEDING AND CURRENT ROW  always 2 rows
-- RANGE BETWEEN 1 PRECEDING AND CURRENT ROW  varies (peers with same date are all included)

Ranking Functions — Subtle Differences

SQL
SELECT
  product_id,
  category,
  revenue,
  -- RANK: skips numbers after ties (1, 2, 2, 4)
  RANK()        OVER (PARTITION BY category ORDER BY revenue DESC) AS rank,
  -- DENSE_RANK: no gaps (1, 2, 2, 3)
  DENSE_RANK()  OVER (PARTITION BY category ORDER BY revenue DESC) AS dense_rank,
  -- ROW_NUMBER: always unique, arbitrary tie-breaking
  ROW_NUMBER()  OVER (PARTITION BY category ORDER BY revenue DESC) AS row_num,
  -- NTILE: divide into N equal buckets (useful for percentiles)
  NTILE(4)      OVER (PARTITION BY category ORDER BY revenue DESC) AS quartile,
  -- PERCENT_RANK: rank as a fraction (0.0 to 1.0)
  PERCENT_RANK() OVER (PARTITION BY category ORDER BY revenue DESC) AS pct_rank,
  -- CUME_DIST: cumulative distribution (fraction of rows <= current)
  CUME_DIST()   OVER (PARTITION BY category ORDER BY revenue DESC) AS cume_dist
FROM product_sales;

Gap Detection — Finding Missing Sequence Values

SQL
-- Find gaps in an order sequence (useful for audit/compliance)
WITH numbered AS (
  SELECT
    order_id,
    LAG(order_id)  OVER (ORDER BY order_id) AS prev_id,
    LEAD(order_id) OVER (ORDER BY order_id) AS next_id
  FROM orders
)
SELECT
  prev_id + 1 AS gap_start,
  order_id - 1 AS gap_end,
  order_id - prev_id - 1 AS gap_size
FROM numbered
WHERE order_id - prev_id > 1;

Session Analysis — Grouping Consecutive Events

SQL
-- Identify user sessions (gap > 30 minutes = new session)
WITH events AS (
  SELECT
    user_id,
    event_time,
    LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_event
  FROM user_events
),
session_starts AS (
  SELECT
    user_id,
    event_time,
    CASE
      WHEN event_time - prev_event > INTERVAL '30 minutes' OR prev_event IS NULL
      THEN 1 ELSE 0
    END AS is_new_session
  FROM events
)
SELECT
  user_id,
  event_time,
  SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY event_time) AS session_id
FROM session_starts;

CTEs — Beyond the Basics

Materialization Control (PostgreSQL 12+)

By default, CTEs may be materialised (computed once and stored) or inlined (treated as a subquery). PostgreSQL 12+ lets you control this explicitly:

SQL
-- Force materialisation (useful when CTE is used multiple times)
WITH expensive_calc AS MATERIALIZED (
  SELECT product_id, AVG(price) AS avg_price, STDDEV(price) AS stddev_price
  FROM price_history
  GROUP BY product_id
)
SELECT p.name, ec.avg_price, ec.stddev_price
FROM products p
JOIN expensive_calc ec ON ec.product_id = p.id
WHERE ec.stddev_price > 10;

-- Prevent materialisation (allow planner to inline and optimise)
WITH filtered AS NOT MATERIALIZED (
  SELECT * FROM orders WHERE status = 'pending'
)
SELECT * FROM filtered WHERE customer_id = 42;
-- With NOT MATERIALIZED, planner can push customer_id = 42 into the CTE scan

Recursive CTEs for Hierarchical Data

SQL
-- Org chart: find all reports under a manager (any depth)
WITH RECURSIVE reports AS (
  -- Base: the manager themselves
  SELECT id, name, manager_id, 0 AS depth, ARRAY[id] AS path
  FROM employees
  WHERE id = 42  -- starting manager

  UNION ALL

  -- Recursive: their direct reports, then reports of reports...
  SELECT e.id, e.name, e.manager_id, r.depth + 1, r.path || e.id
  FROM employees e
  JOIN reports r ON e.manager_id = r.id
  WHERE NOT e.id = ANY(r.path)  -- cycle protection
)
SELECT depth, REPEAT('  ', depth) || name AS indented_name, path
FROM reports
ORDER BY path;

-- Bill of Materials explosion (nested product components)
WITH RECURSIVE bom AS (
  SELECT product_id, component_id, quantity, 1 AS level
  FROM product_components
  WHERE product_id = 100  -- top-level product

  UNION ALL

  SELECT pc.product_id, pc.component_id, bom.quantity * pc.quantity, bom.level + 1
  FROM product_components pc
  JOIN bom ON bom.component_id = pc.product_id
  WHERE bom.level < 10  -- prevent infinite recursion on circular references
)
SELECT component_id, SUM(quantity) AS total_quantity
FROM bom
GROUP BY component_id;

Writeable CTEs: Multiple Operations in One Statement

SQL
-- Move rows atomically: delete from source, insert to archive
WITH deleted AS (
  DELETE FROM orders
  WHERE created_at < NOW() - INTERVAL '2 years'
  RETURNING *
)
INSERT INTO orders_archive
SELECT * FROM deleted;

-- Upsert with conflict tracking
WITH inserted AS (
  INSERT INTO inventory (product_id, warehouse_id, quantity)
  VALUES (101, 5, 100)
  ON CONFLICT (product_id, warehouse_id) DO UPDATE
    SET quantity = inventory.quantity + EXCLUDED.quantity,
        updated_at = NOW()
  RETURNING product_id, warehouse_id, quantity, xmax
)
SELECT
  product_id,
  CASE WHEN xmax = 0 THEN 'inserted' ELSE 'updated' END AS action
FROM inserted;
-- xmax = 0 means new row; xmax != 0 means updated existing row

JSONB — Production Patterns

Operator Reference

SQL
-- Navigation
col->'key'           -- returns JSONB (preserves type)
col->>'key'          -- returns TEXT (strips quotes)
col#>'{a,b,c}'       -- navigate nested path, returns JSONB
col#>>'{a,b,c}'      -- navigate nested path, returns TEXT

-- Containment
col @> '{"key": "value"}'    -- does col contain this JSON?
'{"key": "value"}' <@ col    -- is this JSON contained in col?

-- Existence
col ? 'key'           -- does key exist at top level?
col ?| ARRAY['a','b'] -- does any of these keys exist?
col ?& ARRAY['a','b'] -- do all of these keys exist?

-- Modification (returns new JSONB, does not mutate)
jsonb_set(col, '{key}', '"new_value"')               -- set nested path
jsonb_set(col, '{address,city}', '"London"', true)   -- create if missing
col - 'key'                                           -- remove key
col - ARRAY['key1', 'key2']                          -- remove multiple keys
col #- '{address,city}'                              -- remove at path

-- Aggregation
jsonb_agg(expr)                -- aggregate rows into a JSON array
jsonb_object_agg(key, value)   -- aggregate into a JSON object

Schema Enforcement with Check Constraints

JSONB is schema-less by default, but you can enforce structure:

SQL
ALTER TABLE products ADD CONSTRAINT valid_attributes CHECK (
  attributes IS NULL OR (
    jsonb_typeof(attributes) = 'object'
    AND (attributes ? 'sku')                           -- sku is required
    AND jsonb_typeof(attributes->'price') = 'number'   -- price must be numeric
    AND (attributes->>'price')::numeric > 0            -- price must be positive
  )
);

Querying Arrays Inside JSONB

SQL
-- Data: {"tags": ["sale", "new", "featured"]}
-- Find products with tag "sale"
WHERE attributes->'tags' @> '["sale"]'

-- Find products with BOTH "sale" and "new" tags
WHERE attributes->'tags' @> '["sale", "new"]'

-- Unnest JSONB array to rows (for reporting)
SELECT
  p.id,
  p.name,
  tag
FROM products p,
  jsonb_array_elements_text(p.attributes->'tags') AS tag
WHERE tag IN ('sale', 'featured');

-- Count products per tag
SELECT tag, COUNT(*) AS product_count
FROM products,
  jsonb_array_elements_text(attributes->'tags') AS tag
GROUP BY tag
ORDER BY product_count DESC;

Table Partitioning — Production Strategies

Partitioning splits one logical table into multiple physical tables (partitions). Queries targeting a partition key automatically prune irrelevant partitions (partition pruning).

Range Partitioning (Time-Series Data)

SQL
-- Partition orders by month
CREATE TABLE orders (
  id          BIGSERIAL,
  customer_id INT NOT NULL,
  total       NUMERIC(12, 2) NOT NULL,
  created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  status      TEXT NOT NULL DEFAULT 'pending'
) PARTITION BY RANGE (created_at);

-- Monthly partitions
CREATE TABLE orders_2026_01 PARTITION OF orders
  FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
CREATE TABLE orders_2026_02 PARTITION OF orders
  FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');
-- ... and so on

-- Default partition catches anything outside defined ranges
CREATE TABLE orders_default PARTITION OF orders DEFAULT;

-- Automate partition creation with pg_partman extension
-- or a scheduled function:
CREATE OR REPLACE FUNCTION create_monthly_partition(target_date DATE)
RETURNS void AS $$
DECLARE
  partition_name TEXT;
  start_date DATE;
  end_date DATE;
BEGIN
  start_date := DATE_TRUNC('month', target_date);
  end_date := start_date + INTERVAL '1 month';
  partition_name := 'orders_' || TO_CHAR(target_date, 'YYYY_MM');

  EXECUTE format(
    'CREATE TABLE IF NOT EXISTS %I PARTITION OF orders FOR VALUES FROM (%L) TO (%L)',
    partition_name, start_date, end_date
  );
END;
$$ LANGUAGE plpgsql;
SQL
-- Verify partition pruning is working
EXPLAIN SELECT * FROM orders WHERE created_at >= '2026-04-01' AND created_at < '2026-05-01';
-- Should show: Seq Scan on orders_2026_04  (NOT on other partitions)
-- If you see all partitions: check constraint exclusion settings
SET constraint_exclusion = partition;  -- enable partition pruning

List Partitioning (Status or Region)

SQL
-- Partition by region (EU data stays in EU partition for compliance)
CREATE TABLE customers (
  id     BIGSERIAL,
  name   TEXT,
  email  TEXT,
  region TEXT NOT NULL  -- 'EU', 'US', 'APAC'
) PARTITION BY LIST (region);

CREATE TABLE customers_eu   PARTITION OF customers FOR VALUES IN ('EU');
CREATE TABLE customers_us   PARTITION OF customers FOR VALUES IN ('US');
CREATE TABLE customers_apac PARTITION OF customers FOR VALUES IN ('APAC');
CREATE TABLE customers_other PARTITION OF customers DEFAULT;

-- EU customer queries touch ONLY customers_eu partition
SELECT * FROM customers WHERE region = 'EU' AND email = 'alice@example.com';

Hash Partitioning (Even Distribution)

SQL
-- Hash partition when no natural range/list grouping exists
-- Useful for distributing hot rows across partitions
CREATE TABLE user_events (
  id      BIGSERIAL,
  user_id INT NOT NULL,
  event   TEXT,
  ts      TIMESTAMPTZ DEFAULT NOW()
) PARTITION BY HASH (user_id);

CREATE TABLE user_events_0 PARTITION OF user_events FOR VALUES WITH (MODULUS 4, REMAINDER 0);
CREATE TABLE user_events_1 PARTITION OF user_events FOR VALUES WITH (MODULUS 4, REMAINDER 1);
CREATE TABLE user_events_2 PARTITION OF user_events FOR VALUES WITH (MODULUS 4, REMAINDER 2);
CREATE TABLE user_events_3 PARTITION OF user_events FOR VALUES WITH (MODULUS 4, REMAINDER 3);

Indexes and Constraints on Partitioned Tables

SQL
-- Create index on parent  automatically applies to all partitions
CREATE INDEX ON orders(customer_id);           -- creates on all partitions
CREATE INDEX ON orders(status, created_at);    -- creates on all partitions

-- Unique constraints must include the partition key
-- This FAILS:
CREATE UNIQUE INDEX ON orders(id);
-- This WORKS (id + created_at includes the partition key):
CREATE UNIQUE INDEX ON orders(id, created_at);

-- Dropping old partitions is instant and does not lock the parent table
ALTER TABLE orders DETACH PARTITION orders_2024_01 CONCURRENTLY;
DROP TABLE orders_2024_01;

Row-Level Security (RLS) — Multi-Tenant SaaS

RLS enforces access control at the database level — every query is automatically filtered. Even if application code has a bug and forgets to filter by tenant, the database enforces isolation.

Basic Multi-Tenant Pattern

SQL
ALTER TABLE documents ENABLE ROW LEVEL SECURITY;
ALTER TABLE documents FORCE ROW LEVEL SECURITY;  -- applies to table owner too

-- Policy: users see only their own documents
CREATE POLICY tenant_isolation ON documents
  USING (tenant_id = current_setting('app.current_tenant_id')::INT);

-- Read-only policy for a reporting role
CREATE POLICY reporting_read ON documents
  FOR SELECT
  TO reporting_role
  USING (TRUE);  -- can see all rows, but only SELECT

In your application, set the tenant context at the start of each request:

C#
// .NET — set tenant context on every DB connection
await using var conn = new NpgsqlConnection(connectionString);
await conn.OpenAsync();
await using var cmd = conn.CreateCommand();
cmd.CommandText = "SET app.current_tenant_id = @tenantId";
cmd.Parameters.AddWithValue("tenantId", tenantId);
await cmd.ExecuteNonQueryAsync();

// Now all queries on this connection are automatically filtered to this tenant

Separating Read and Write Policies

SQL
-- Users can see all company documents, but only edit their own
CREATE POLICY documents_select ON documents
  FOR SELECT
  USING (tenant_id = current_setting('app.current_tenant_id')::INT);

CREATE POLICY documents_insert ON documents
  FOR INSERT
  WITH CHECK (
    tenant_id = current_setting('app.current_tenant_id')::INT
    AND created_by = current_setting('app.current_user_id')::INT
  );

CREATE POLICY documents_update ON documents
  FOR UPDATE
  USING (created_by = current_setting('app.current_user_id')::INT)
  WITH CHECK (tenant_id = current_setting('app.current_tenant_id')::INT);

CREATE POLICY documents_delete ON documents
  FOR DELETE
  USING (
    created_by = current_setting('app.current_user_id')::INT
    OR current_setting('app.current_role') = 'admin'
  );

RLS with Partitioned Tables

RLS and partitioning compose — the policy is defined once on the parent, inherited by all partitions:

SQL
-- Partition customers by region, RLS by tenant_id
ALTER TABLE customers ENABLE ROW LEVEL SECURITY;
CREATE POLICY tenant_customers ON customers
  USING (tenant_id = current_setting('app.current_tenant_id')::INT);
-- Policy automatically applies to customers_eu, customers_us, customers_apac

pgvector — AI and Semantic Search

pgvector adds vector similarity search to PostgreSQL — making it a complete vector database without a separate service.

Setup and Schema

SQL
CREATE EXTENSION IF NOT EXISTS vector;

CREATE TABLE documents (
  id          BIGSERIAL PRIMARY KEY,
  content     TEXT NOT NULL,
  source_url  TEXT,
  embedding   vector(1536),    -- OpenAI text-embedding-3-small: 1536 dims
  created_at  TIMESTAMPTZ DEFAULT NOW()
);

-- For images (OpenAI CLIP): vector(512)
-- For text (text-embedding-3-large): vector(3072)
-- For sentence-transformers: vector(384) or vector(768)

Index Types: IVFFlat vs HNSW

IVFFlat — Inverted File with Flat Quantization:

  • Divides vectors into lists clusters (centroids)
  • Search: find nearest probes clusters, scan those lists
  • Trade-off: fast to build, good recall with right parameters, lower memory

HNSW — Hierarchical Navigable Small World (PostgreSQL 16+):

  • Multi-layer graph structure — fast approximate nearest neighbour
  • Higher recall, faster queries, higher memory and slower build
  • Generally preferred for production
SQL
-- IVFFlat: good for datasets up to ~1M vectors
-- lists = sqrt(number of rows) is a good starting point
CREATE INDEX idx_docs_embedding_ivfflat ON documents
  USING ivfflat (embedding vector_cosine_ops)
  WITH (lists = 100);

-- At query time, probe more lists for better recall (at cost of speed)
SET ivfflat.probes = 10;  -- default 1, 10 gives much better recall

-- HNSW: preferred for production, especially > 1M vectors
CREATE INDEX idx_docs_embedding_hnsw ON documents
  USING hnsw (embedding vector_cosine_ops)
  WITH (m = 16, ef_construction = 64);
-- m: max connections per node (default 16)
-- ef_construction: build quality vs time (default 64)

-- At query time, control recall vs speed
SET hnsw.ef_search = 100;  -- default 40; higher = better recall

Distance Operators

SQL
<->   -- L2 distance (Euclidean)  good for spatial data
<#>   -- negative inner product  good for dot-product similarity
<=>   -- cosine distance  best for text embeddings (normalised vectors)
<+>   -- L1 distance (Manhattan)  less common

Production RAG Pattern

SQL
-- 1. Store documents with embeddings (from your application)
INSERT INTO documents (content, source_url, embedding)
VALUES (
  'PostgreSQL is a powerful open-source relational database.',
  'https://postgresql.org',
  '[0.023, -0.015, ...]'::vector  -- embedding from OpenAI API
);

-- 2. Semantic search (find N most similar to a query embedding)
WITH query AS (
  SELECT '[0.018, -0.022, ...]'::vector AS embedding  -- embed the user's question
)
SELECT
  d.id,
  d.content,
  d.source_url,
  1 - (d.embedding <=> q.embedding) AS cosine_similarity
FROM documents d, query q
ORDER BY d.embedding <=> q.embedding   -- order by distance (ascending = closer)
LIMIT 5;

-- 3. Hybrid search: combine semantic + keyword (best recall)
WITH query AS (
  SELECT
    '[0.018, -0.022, ...]'::vector AS vec_embedding,
    plainto_tsquery('english', 'postgresql performance') AS ts_query
),
vector_results AS (
  SELECT id, 1 - (embedding <=> q.vec_embedding) AS vector_score
  FROM documents d, query q
  ORDER BY embedding <=> q.vec_embedding
  LIMIT 50
),
text_results AS (
  SELECT id, ts_rank(to_tsvector('english', content), q.ts_query) AS text_score
  FROM documents d, query q
  WHERE to_tsvector('english', content) @@ q.ts_query
)
SELECT
  d.id,
  d.content,
  COALESCE(vr.vector_score, 0) * 0.7 + COALESCE(tr.text_score, 0) * 0.3 AS combined_score
FROM documents d
LEFT JOIN vector_results vr ON vr.id = d.id
LEFT JOIN text_results tr ON tr.id = d.id
WHERE vr.id IS NOT NULL OR tr.id IS NOT NULL
ORDER BY combined_score DESC
LIMIT 10;

Filtering Before Vector Search

Filtering after vector search wastes time scanning irrelevant vectors. Filter first, then search within the subset:

SQL
-- Tenant-scoped semantic search (for multi-tenant RAG)
SELECT id, content, 1 - (embedding <=> $1) AS similarity
FROM documents
WHERE tenant_id = $2        -- filter BEFORE vector search
  AND created_at > NOW() - INTERVAL '90 days'
ORDER BY embedding <=> $1
LIMIT 5;

-- Ensure the filter columns are indexed for efficiency:
CREATE INDEX ON documents(tenant_id, created_at);

LISTEN/NOTIFY — Real-Time Events from PostgreSQL

PostgreSQL has built-in pub/sub. Any session can publish a notification; any listening session receives it instantly.

SQL
-- Producer: notify a channel with a JSON payload
SELECT pg_notify('order_events', json_build_object(
  'event', 'OrderPlaced',
  'orderId', NEW.id,
  'customerId', NEW.customer_id
)::text);

-- Trigger to auto-notify on insert
CREATE OR REPLACE FUNCTION notify_order_placed()
RETURNS trigger AS $$
BEGIN
  PERFORM pg_notify('order_events', row_to_json(NEW)::text);
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER after_order_insert
  AFTER INSERT ON orders
  FOR EACH ROW EXECUTE FUNCTION notify_order_placed();
C#
// .NET consumer — receive notifications asynchronously
await using var conn = new NpgsqlConnection(connectionString);
await conn.OpenAsync();
conn.Notification += (sender, args) =>
{
    var evt = JsonSerializer.Deserialize<OrderEvent>(args.Payload);
    Console.WriteLine($"Order placed: {evt.OrderId}");
};
await using var cmd = new NpgsqlCommand("LISTEN order_events", conn);
await cmd.ExecuteNonQueryAsync();

// Keep polling for notifications
while (true)
{
    await conn.WaitAsync(cancellationToken);  // blocks until notification arrives
}

LISTEN/NOTIFY is useful for lightweight real-time triggers (cache invalidation, WebSocket pushes, simple event propagation) without a separate message broker. For high-throughput or durable messaging, use a proper broker (Service Bus, Kafka).


Advisory Locks — Application-Level Coordination

Advisory locks are session-scoped or transaction-scoped locks that your application can use for distributed coordination — without the overhead of table locks.

SQL
-- Session lock: held until explicitly released or session ends
SELECT pg_try_advisory_lock(42);     -- returns true if acquired, false if not
SELECT pg_advisory_unlock(42);       -- release

-- Transaction lock: automatically released at end of transaction
SELECT pg_try_advisory_xact_lock(hashtext('job:email-batch'));

-- Practical: prevent duplicate cron job execution
BEGIN;
  -- If we get the lock, we're the only instance running this job
  SELECT pg_try_advisory_xact_lock(hashtext('nightly-report-job')) AS acquired;
  -- If acquired = false, another instance is already running — exit early
  -- If acquired = true, proceed with the job
  -- Lock automatically released at COMMIT/ROLLBACK
COMMIT;

Advisory locks are the correct tool for:

  • Preventing duplicate processing in multi-instance deployments
  • Rate limiting with database coordination
  • Distributed mutex patterns

Generated Columns — Computed Stored Values

Generated columns compute and store a value automatically on INSERT/UPDATE:

SQL
-- STORED: computed value written to disk (can be indexed)
ALTER TABLE products
  ADD COLUMN total_value NUMERIC GENERATED ALWAYS AS (price * stock_quantity) STORED;

-- Full-text search vector  stored and indexable
ALTER TABLE articles
  ADD COLUMN search_vector tsvector
  GENERATED ALWAYS AS (
    setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
    setweight(to_tsvector('english', coalesce(body, '')), 'B')
  ) STORED;

CREATE INDEX ON articles USING GIN(search_vector);
-- INSERT/UPDATE automatically maintains search_vector  no application code needed

Quick Reference

SQL
-- Window functions
RANK() / DENSE_RANK() / ROW_NUMBER() / NTILE(n) / PERCENT_RANK()
LAG(col, n, default) / LEAD(col, n, default)
SUM / AVG / MIN / MAX OVER (PARTITION BY ... ORDER BY ... ROWS/RANGE BETWEEN ...)
FIRST_VALUE(col) / LAST_VALUE(col) / NTH_VALUE(col, n) OVER (...)

-- CTE
WITH MATERIALIZED / NOT MATERIALIZED name AS (...)
WITH RECURSIVE name AS (base UNION ALL recursive)

-- JSONB
->  ->>  #>  #>>            -- navigation
@>  <@  ?  ?|  ?&          -- containment / existence
jsonb_set / jsonb_strip_nulls / jsonb_agg / jsonb_object_agg
jsonb_array_elements / jsonb_each / jsonb_keys

-- Partitioning
PARTITION BY RANGE / LIST / HASH
CREATE TABLE child PARTITION OF parent FOR VALUES FROM x TO y
CREATE TABLE child PARTITION OF parent FOR VALUES IN (a, b, c)
CREATE TABLE child PARTITION OF parent FOR VALUES WITH (MODULUS n, REMAINDER r)
ALTER TABLE parent DETACH PARTITION child CONCURRENTLY

-- RLS
ALTER TABLE t ENABLE ROW LEVEL SECURITY
CREATE POLICY name ON t [FOR cmd] [TO role] USING (expr) [WITH CHECK (expr)]

-- pgvector
vector(n) column type
<->  <#>  <=>  <+>        -- distance operators
USING ivfflat / hnsw (col ops) WITH (lists/m, ef_construction)
SET ivfflat.probes / SET hnsw.ef_search

-- LISTEN/NOTIFY
LISTEN channel_name
NOTIFY channel_name, 'payload'
SELECT pg_notify('channel', 'payload')

-- Advisory locks
pg_try_advisory_lock(key) / pg_advisory_unlock(key)
pg_try_advisory_xact_lock(key)   -- auto-released at transaction end

Related: PostgreSQL Indexing & Query Performance — B-tree internals, EXPLAIN ANALYZE, slow query diagnosis
Related: SQL Advanced Guide — analytical SQL patterns
Related: Building a Production RAG Pipeline — pgvector in a real AI application