Python Essentials for AI Engineers · Lesson 29 of 36

Broadcasting: NumPy's Superpower

What is Broadcasting?

Broadcasting is NumPy's mechanism for performing operations on arrays of different shapes — without explicitly copying data. NumPy "stretches" smaller arrays conceptually to match larger ones.

Python

import numpy as np

# Scalar + array: scalar is broadcast to every element
arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
print(arr + 10)   # [11. 12. 13. 14. 15.] — 10 applied to each element
print(arr * 2)    # [2.  4.  6.  8. 10.] — 2 applied to each element

# Without broadcasting you'd need:
result = np.array([x + 10 for x in arr])   # Slow Python loop

Broadcasting Rules

NumPy applies these rules to determine if two shapes are compatible:

If arrays have different numbers of dimensions, prepend 1s to the shape of the smaller array
Dimensions are compatible if they are equal, or if one of them is 1
If compatible, the 1-dimension is stretched to match the other

Python

# Rule in action:
a = np.array([[1, 2, 3],   # Shape: (3, 3)
              [4, 5, 6],
              [7, 8, 9]])

b = np.array([10, 20, 30]) # Shape: (3,) → becomes (1, 3) → stretched to (3, 3)

print(a + b)
# [[11 22 33]
#  [14 25 36]
#  [17 28 39]]
# b[0]=10 added to entire first column, b[1]=20 to second column, etc.

# Column-wise broadcast
c = np.array([[10],   # Shape: (3, 1) → stretched to (3, 3)
              [20],
              [30]])

print(a + c)
# [[11 12 13]
#  [24 25 26]
#  [37 38 39]]
# c[0]=10 added to entire first row, c[1]=20 to second row, etc.

Visualizing Broadcasting

a: (3, 3)    b: (3,) → (1, 3) → (3, 3)
[[1  2  3]       [[10 20 30]
 [4  5  6]   +    [10 20 30]   → [[11 22 33]
 [7  8  9]]       [10 20 30]]      [14 25 36]
                                   [17 28 39]]

NumPy doesn't actually create the expanded array — it's a virtual operation that runs in C.

Shape Compatibility Check

Python

# Compatible shapes:
# (3, 4) + (4,)    → (3, 4) — b broadcasts along rows
# (3, 1) + (1, 4)  → (3, 4) — both broadcast
# (3, 4) + (1,)    → (3, 4) — scalar broadcast
# (3, 4) + (3, 4)  → (3, 4) — same shape, no broadcasting needed

# Incompatible shapes (ValueError):
# (3, 4) + (3,)    → error: 4 ≠ 3
# (3, 4) + (2, 4)  → error: 3 ≠ 2

def test_broadcast(a_shape: tuple, b_shape: tuple) -> None:
    try:
        a = np.ones(a_shape)
        b = np.ones(b_shape)
        result = a + b
        print(f"{a_shape} + {b_shape} → {result.shape}")
    except ValueError as e:
        print(f"{a_shape} + {b_shape} → ERROR: {e}")

test_broadcast((3, 4), (4,))    # (3, 4) + (4,) → (3, 4)
test_broadcast((3, 1), (1, 4))  # (3, 1) + (1, 4) → (3, 4)
test_broadcast((3, 4), (3,))    # ERROR

Key AI Use Case: Normalizing Embeddings

Python

# Without broadcasting (slow, manual)
embeddings = np.random.randn(100, 1536)   # 100 embeddings
normalized = np.zeros_like(embeddings)
for i in range(len(embeddings)):
    norm = np.linalg.norm(embeddings[i])
    normalized[i] = embeddings[i] / norm


# With broadcasting (fast, vectorized)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
# norms shape: (100, 1) — one norm per row, keepdims=True preserves the dimension

normalized = embeddings / norms
# embeddings: (100, 1536) ÷ norms: (100, 1) → broadcasts norms across 1536 columns
# Result: (100, 1536) — each row divided by its own norm

# Verify: each row now has unit norm
row_norms = np.linalg.norm(normalized, axis=1)
print(np.allclose(row_norms, 1.0))   # True — all unit norms

Batch Cosine Similarity

Python

def cosine_similarity_matrix(queries: np.ndarray, documents: np.ndarray) -> np.ndarray:
    """
    Compute pairwise cosine similarity between queries and documents.
    
    queries:   (n_queries, dim)
    documents: (n_docs, dim)
    Returns:   (n_queries, n_docs) — similarity[i, j] = sim(query_i, doc_j)
    """
    # Normalize both
    q_norms = np.linalg.norm(queries, axis=1, keepdims=True)   # (n_queries, 1)
    d_norms = np.linalg.norm(documents, axis=1, keepdims=True) # (n_docs, 1)
    
    q_normalized = queries / q_norms      # (n_queries, dim)
    d_normalized = documents / d_norms   # (n_docs, dim)
    
    # Matrix multiply: (n_queries, dim) @ (dim, n_docs) = (n_queries, n_docs)
    return q_normalized @ d_normalized.T


queries   = np.random.randn(10, 1536)   # 10 query embeddings
documents = np.random.randn(100, 1536)  # 100 document embeddings

similarity = cosine_similarity_matrix(queries, documents)
print(similarity.shape)   # (10, 100) — one row per query, one col per doc

# Get top-5 documents for each query
for i, query_sims in enumerate(similarity):
    top_5 = np.argsort(query_sims)[::-1][:5]
    print(f"Query {i}: top docs = {top_5}")

Softmax with Broadcasting

Python

def softmax(logits: np.ndarray) -> np.ndarray:
    """Numerically stable softmax using broadcasting."""
    # Subtract max for numerical stability (prevents overflow)
    shifted = logits - np.max(logits, axis=-1, keepdims=True)
    # keepdims=True: (batch, n_classes) → max shape (batch, 1) → broadcasts back
    
    exp_vals = np.exp(shifted)
    return exp_vals / np.sum(exp_vals, axis=-1, keepdims=True)


logits = np.array([[2.0, 1.0, 0.1], [1.0, 3.0, 0.2]])
probs  = softmax(logits)
print(probs)
# [[0.659 0.242 0.099]
#  [0.107 0.803 0.090]]
print(probs.sum(axis=1))   # [1. 1.] — each row sums to 1

Z-Score Normalization

Python

# Normalize features: (x - mean) / std — per feature (column)
features = np.random.randn(1000, 50)   # 1000 samples, 50 features

# Without broadcasting (wrong — uses global mean/std)
# features_norm = (features - features.mean()) / features.std()

# With broadcasting: per-column mean and std
mean = features.mean(axis=0)   # (50,) — one mean per feature
std  = features.std(axis=0)    # (50,)

# (1000, 50) - (50,) broadcasts to (1000, 50)
features_norm = (features - mean) / std

# Verify
print(np.allclose(features_norm.mean(axis=0), 0, atol=1e-10))  # True — mean ≈ 0 per feature
print(np.allclose(features_norm.std(axis=0),  1, atol=1e-10))  # True — std ≈ 1 per feature

Broadcasting Pitfalls

Python

# Pitfall 1: Forgetting keepdims when you need it
embeddings = np.random.randn(100, 1536)

# WRONG: norms has shape (100,) — division fails or broadcasts incorrectly
norms = np.linalg.norm(embeddings, axis=1)   # Shape: (100,)
# embeddings (100, 1536) / norms (100,) → ERROR or wrong result

# CORRECT: keepdims=True gives shape (100, 1) → broadcasts correctly
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)   # Shape: (100, 1)
normalized = embeddings / norms   # Correct: divides each row by its norm


# Pitfall 2: Shape (n,) vs shape (n, 1)
a = np.ones((3, 4))
b = np.ones(3)        # Shape (3,) — NOT (3, 1)
# a + b → ERROR: (3, 4) + (3,) incompatible (last dims: 4 ≠ 3)

b_col = np.ones((3, 1))   # Shape (3, 1) — explicit column vector
# a + b_col → OK: (3, 4) + (3, 1) → (3, 4)

# Fix: reshape or use [:, np.newaxis]
b_reshaped = b[:, np.newaxis]   # (3,) → (3, 1)
print(b_reshaped.shape)   # (3, 1)

Quick Reference

| Operation | Shape | Example | |---|---|---| | Add scalar | (n,) + scalar | arr + 5 | | Add row vector to matrix | (m, n) + (n,) | mat + row | | Add column vector to matrix | (m, n) + (m, 1) | mat + col[:, np.newaxis] | | Normalize rows | (m, n) / (m, 1) | mat / norms where norms = mat.norm(axis=1, keepdims=True) | | Pairwise similarity | (m, d) @ (d, n) | A @ B.T | | Per-feature normalization | (m, n) - (n,) | (X - mean) / std |

NumPy Slicing and Indexing

Next Lesson

NumPy Math Operations for ML