SQL & Database Mastery · Lesson 1 of 6

SQL Fundamentals

What Is SQL?

SQL (Structured Query Language) is the standard language for working with relational databases. Every major company — from startups to Fortune 500 — stores critical data in relational databases and queries it with SQL.

Whether you're a backend engineer, data analyst, or full-stack developer, SQL is a foundational skill you'll use throughout your career.

We'll use a realistic e-commerce database throughout this guide so every query solves a real problem.


The Database We'll Use

SQL
-- Customers table
CREATE TABLE customers (
  customer_id   INT PRIMARY KEY,
  name          VARCHAR(100),
  email         VARCHAR(100),
  city          VARCHAR(50),
  signup_date   DATE,
  tier          VARCHAR(20) -- 'free', 'pro', 'enterprise'
);

-- Products table
CREATE TABLE products (
  product_id    INT PRIMARY KEY,
  name          VARCHAR(100),
  category      VARCHAR(50),
  price         DECIMAL(10,2),
  stock         INT
);

-- Orders table
CREATE TABLE orders (
  order_id      INT PRIMARY KEY,
  customer_id   INT REFERENCES customers(customer_id),
  order_date    DATE,
  status        VARCHAR(20), -- 'pending', 'shipped', 'delivered', 'cancelled'
  total_amount  DECIMAL(10,2)
);

-- Order items table
CREATE TABLE order_items (
  item_id       INT PRIMARY KEY,
  order_id      INT REFERENCES orders(order_id),
  product_id    INT REFERENCES products(product_id),
  quantity      INT,
  unit_price    DECIMAL(10,2)
);

SELECT — Retrieving Data

The foundation of every SQL query.

SQL
-- Get everything from customers
SELECT * FROM customers;

-- Get specific columns only (always prefer this in production)
SELECT customer_id, name, email FROM customers;

-- Give columns readable aliases
SELECT
  customer_id AS id,
  name        AS customer_name,
  signup_date AS joined
FROM customers;

Real-world use: An admin dashboard fetching customer records to display in a table.


WHERE — Filtering Rows

SQL
-- Customers from London
SELECT name, email FROM customers
WHERE city = 'London';

-- Pro and enterprise customers only
SELECT name, tier FROM customers
WHERE tier IN ('pro', 'enterprise');

-- Orders above £500
SELECT order_id, total_amount FROM orders
WHERE total_amount > 500;

-- Orders in Q1 2025
SELECT order_id, order_date FROM orders
WHERE order_date BETWEEN '2025-01-01' AND '2025-03-31';

-- Customers whose name starts with 'A'
SELECT name FROM customers
WHERE name LIKE 'A%';

-- Customers with no email on file
SELECT name FROM customers
WHERE email IS NULL;

Combining conditions:

SQL
-- High-value orders that are still pending
SELECT order_id, total_amount, status
FROM orders
WHERE total_amount > 500
  AND status = 'pending';

-- Pro customers OR customers from New York
SELECT name, tier, city
FROM customers
WHERE tier = 'pro' OR city = 'New York';

ORDER BY — Sorting Results

SQL
-- Most expensive products first
SELECT name, price
FROM products
ORDER BY price DESC;

-- Customers alphabetically
SELECT name FROM customers
ORDER BY name ASC;

-- Orders by date then amount
SELECT order_id, order_date, total_amount
FROM orders
ORDER BY order_date DESC, total_amount DESC;

LIMIT / TOP — Controlling Row Count

SQL
-- PostgreSQL / MySQL: top 10 most expensive products
SELECT name, price
FROM products
ORDER BY price DESC
LIMIT 10;

-- SQL Server equivalent
SELECT TOP 10 name, price
FROM products
ORDER BY price DESC;

-- Pagination: page 3, 20 rows per page
SELECT name, price
FROM products
ORDER BY product_id
LIMIT 20 OFFSET 40;  -- skip first 40 rows

Real-world use: Infinite scroll or paginated product listings.


Aggregate Functions

Compute summaries across rows.

SQL
-- Total revenue
SELECT SUM(total_amount) AS total_revenue
FROM orders
WHERE status = 'delivered';

-- Average order value
SELECT AVG(total_amount) AS avg_order_value
FROM orders;

-- Count of customers per tier
SELECT tier, COUNT(*) AS customer_count
FROM customers
GROUP BY tier;

-- Highest and lowest order values
SELECT
  MAX(total_amount) AS largest_order,
  MIN(total_amount) AS smallest_order
FROM orders;

GROUP BY — Aggregating by Category

SQL
-- Revenue by product category
SELECT
  p.category,
  SUM(oi.quantity * oi.unit_price) AS category_revenue
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
GROUP BY p.category
ORDER BY category_revenue DESC;

Real-world use: Sales dashboard breakdown by category.


HAVING — Filtering Groups

WHERE filters rows before grouping. HAVING filters groups after aggregation.

SQL
-- Categories with more than £10,000 in revenue
SELECT
  p.category,
  SUM(oi.quantity * oi.unit_price) AS revenue
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
GROUP BY p.category
HAVING SUM(oi.quantity * oi.unit_price) > 10000;

-- Customers who placed more than 5 orders
SELECT
  customer_id,
  COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 5;

JOINs — Combining Tables

The most important SQL skill.

INNER JOIN — Only matching rows

SQL
-- Orders with customer names (only customers who ordered)
SELECT
  o.order_id,
  c.name AS customer,
  o.order_date,
  o.total_amount
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id;

LEFT JOIN — All rows from left table

SQL
-- All customers, including those with no orders
SELECT
  c.name,
  c.email,
  COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name, c.email
ORDER BY order_count DESC;

Real-world use: Find customers who signed up but never ordered (marketing follow-up).

Multi-table JOIN

SQL
-- Full order detail: customer + order + items + product names
SELECT
  c.name        AS customer,
  o.order_date,
  p.name        AS product,
  oi.quantity,
  oi.unit_price,
  (oi.quantity * oi.unit_price) AS line_total
FROM orders o
JOIN customers c    ON o.customer_id = c.customer_id
JOIN order_items oi ON o.order_id    = oi.order_id
JOIN products p     ON oi.product_id = p.product_id
WHERE o.status = 'delivered'
ORDER BY o.order_date DESC;

Subqueries

A query inside another query.

SQL
-- Customers who spent above average
SELECT name, email
FROM customers
WHERE customer_id IN (
  SELECT customer_id
  FROM orders
  GROUP BY customer_id
  HAVING SUM(total_amount) > (
    SELECT AVG(total_amount) FROM orders
  )
);

-- Most expensive product in each category
SELECT name, category, price
FROM products
WHERE (category, price) IN (
  SELECT category, MAX(price)
  FROM products
  GROUP BY category
);

Common Table Expressions (CTEs)

CTEs make complex queries readable by naming intermediate results.

SQL
-- Top customers by revenue with their order count
WITH customer_stats AS (
  SELECT
    customer_id,
    COUNT(*)           AS order_count,
    SUM(total_amount)  AS total_spent
  FROM orders
  WHERE status = 'delivered'
  GROUP BY customer_id
),
top_customers AS (
  SELECT customer_id, order_count, total_spent
  FROM customer_stats
  WHERE total_spent > 1000
)
SELECT
  c.name,
  c.tier,
  tc.order_count,
  tc.total_spent
FROM top_customers tc
JOIN customers c ON tc.customer_id = c.customer_id
ORDER BY tc.total_spent DESC;

INSERT, UPDATE, DELETE

SQL
-- Add a new customer
INSERT INTO customers (customer_id, name, email, city, signup_date, tier)
VALUES (1001, 'Jane Smith', 'jane@example.com', 'Manchester', '2025-04-13', 'free');

-- Upgrade a customer to pro
UPDATE customers
SET tier = 'pro'
WHERE customer_id = 1001;

-- Cancel all pending orders older than 30 days
UPDATE orders
SET status = 'cancelled'
WHERE status = 'pending'
  AND order_date < CURRENT_DATE - INTERVAL '30 days';

-- Delete a customer (be careful  check for foreign key constraints first)
DELETE FROM customers
WHERE customer_id = 1001;

Indexes — Why Queries Are Slow

Without indexes, the database scans every row. With indexes, it jumps directly to matching rows.

SQL
-- Index on frequently filtered column
CREATE INDEX idx_orders_customer ON orders(customer_id);
CREATE INDEX idx_orders_status   ON orders(status);
CREATE INDEX idx_orders_date     ON orders(order_date);

-- Composite index for common combined filters
CREATE INDEX idx_orders_status_date ON orders(status, order_date);

When to add an index:

  • Columns you frequently use in WHERE, JOIN ON, or ORDER BY
  • Foreign key columns (customer_id, product_id)
  • Columns used in GROUP BY on large tables

When NOT to add an index:

  • Small tables (< 10,000 rows) — full scan is fine
  • Columns you rarely filter on
  • Tables with very high write volume (indexes slow down INSERT/UPDATE)

NULL Handling

SQL
-- NULL is not equal to anything, even itself
SELECT * FROM customers WHERE email = NULL;    -- WRONG, returns nothing
SELECT * FROM customers WHERE email IS NULL;   -- CORRECT

-- Replace NULL with a default
SELECT name, COALESCE(email, 'no email') AS contact
FROM customers;

-- NULLs sort last by default in ASC
SELECT name, email
FROM customers
ORDER BY email NULLS LAST;

String Functions

SQL
-- Concatenate name parts
SELECT CONCAT(first_name, ' ', last_name) AS full_name FROM customers;

-- Uppercase/lowercase
SELECT UPPER(name), LOWER(email) FROM customers;

-- Extract domain from email
SELECT
  email,
  SUBSTRING(email, POSITION('@' IN email) + 1) AS domain
FROM customers;

-- Trim whitespace
SELECT TRIM(name) FROM customers;

-- String length
SELECT name, LENGTH(name) AS name_length FROM customers;

Date Functions

SQL
-- Current date and time
SELECT CURRENT_DATE, CURRENT_TIMESTAMP;

-- Extract year/month from date
SELECT
  EXTRACT(YEAR  FROM order_date) AS order_year,
  EXTRACT(MONTH FROM order_date) AS order_month,
  COUNT(*) AS orders
FROM orders
GROUP BY 1, 2
ORDER BY 1, 2;

-- Days since signup
SELECT
  name,
  CURRENT_DATE - signup_date AS days_since_signup
FROM customers;

-- Format date
SELECT TO_CHAR(order_date, 'Month DD, YYYY') FROM orders;  -- PostgreSQL
SELECT DATE_FORMAT(order_date, '%M %d, %Y') FROM orders;   -- MySQL

Real-World Query: Monthly Revenue Report

SQL
WITH monthly AS (
  SELECT
    DATE_TRUNC('month', order_date) AS month,
    COUNT(DISTINCT o.order_id)      AS order_count,
    COUNT(DISTINCT o.customer_id)   AS unique_customers,
    SUM(o.total_amount)             AS revenue
  FROM orders o
  WHERE o.status = 'delivered'
    AND o.order_date >= '2025-01-01'
  GROUP BY DATE_TRUNC('month', order_date)
),
with_growth AS (
  SELECT
    month,
    order_count,
    unique_customers,
    revenue,
    LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue
  FROM monthly
)
SELECT
  TO_CHAR(month, 'Month YYYY') AS period,
  order_count,
  unique_customers,
  revenue,
  ROUND(
    100.0 * (revenue - prev_month_revenue) / NULLIF(prev_month_revenue, 0),
    1
  ) AS growth_pct
FROM with_growth
ORDER BY month;

This is the kind of query a data analyst runs every week for a business review meeting.


What to Learn Next

  • Advanced SQL: Window functions, recursive CTEs, PIVOT, performance tuning
  • Real-world project: Build a full e-commerce analytics schema
  • SQL Interview Prep: 300 questions with answers organized by difficulty

SQL mastery comes from writing queries against real data. Set up PostgreSQL locally, load some sample data, and query it every day.