Python Essentials for AI Engineers · Lesson 29 of 36
Broadcasting: NumPy's Superpower
What is Broadcasting?
Broadcasting is NumPy's mechanism for performing operations on arrays of different shapes — without explicitly copying data. NumPy "stretches" smaller arrays conceptually to match larger ones.
import numpy as np
# Scalar + array: scalar is broadcast to every element
arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
print(arr + 10) # [11. 12. 13. 14. 15.] — 10 applied to each element
print(arr * 2) # [2. 4. 6. 8. 10.] — 2 applied to each element
# Without broadcasting you'd need:
result = np.array([x + 10 for x in arr]) # Slow Python loopBroadcasting Rules
NumPy applies these rules to determine if two shapes are compatible:
- If arrays have different numbers of dimensions, prepend 1s to the shape of the smaller array
- Dimensions are compatible if they are equal, or if one of them is 1
- If compatible, the 1-dimension is stretched to match the other
# Rule in action:
a = np.array([[1, 2, 3], # Shape: (3, 3)
[4, 5, 6],
[7, 8, 9]])
b = np.array([10, 20, 30]) # Shape: (3,) → becomes (1, 3) → stretched to (3, 3)
print(a + b)
# [[11 22 33]
# [14 25 36]
# [17 28 39]]
# b[0]=10 added to entire first column, b[1]=20 to second column, etc.
# Column-wise broadcast
c = np.array([[10], # Shape: (3, 1) → stretched to (3, 3)
[20],
[30]])
print(a + c)
# [[11 12 13]
# [24 25 26]
# [37 38 39]]
# c[0]=10 added to entire first row, c[1]=20 to second row, etc.Visualizing Broadcasting
a: (3, 3) b: (3,) → (1, 3) → (3, 3)
[[1 2 3] [[10 20 30]
[4 5 6] + [10 20 30] → [[11 22 33]
[7 8 9]] [10 20 30]] [14 25 36]
[17 28 39]]NumPy doesn't actually create the expanded array — it's a virtual operation that runs in C.
Shape Compatibility Check
# Compatible shapes:
# (3, 4) + (4,) → (3, 4) — b broadcasts along rows
# (3, 1) + (1, 4) → (3, 4) — both broadcast
# (3, 4) + (1,) → (3, 4) — scalar broadcast
# (3, 4) + (3, 4) → (3, 4) — same shape, no broadcasting needed
# Incompatible shapes (ValueError):
# (3, 4) + (3,) → error: 4 ≠ 3
# (3, 4) + (2, 4) → error: 3 ≠ 2
def test_broadcast(a_shape: tuple, b_shape: tuple) -> None:
try:
a = np.ones(a_shape)
b = np.ones(b_shape)
result = a + b
print(f"{a_shape} + {b_shape} → {result.shape}")
except ValueError as e:
print(f"{a_shape} + {b_shape} → ERROR: {e}")
test_broadcast((3, 4), (4,)) # (3, 4) + (4,) → (3, 4)
test_broadcast((3, 1), (1, 4)) # (3, 1) + (1, 4) → (3, 4)
test_broadcast((3, 4), (3,)) # ERRORKey AI Use Case: Normalizing Embeddings
# Without broadcasting (slow, manual)
embeddings = np.random.randn(100, 1536) # 100 embeddings
normalized = np.zeros_like(embeddings)
for i in range(len(embeddings)):
norm = np.linalg.norm(embeddings[i])
normalized[i] = embeddings[i] / norm
# With broadcasting (fast, vectorized)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
# norms shape: (100, 1) — one norm per row, keepdims=True preserves the dimension
normalized = embeddings / norms
# embeddings: (100, 1536) ÷ norms: (100, 1) → broadcasts norms across 1536 columns
# Result: (100, 1536) — each row divided by its own norm
# Verify: each row now has unit norm
row_norms = np.linalg.norm(normalized, axis=1)
print(np.allclose(row_norms, 1.0)) # True — all unit normsBatch Cosine Similarity
def cosine_similarity_matrix(queries: np.ndarray, documents: np.ndarray) -> np.ndarray:
"""
Compute pairwise cosine similarity between queries and documents.
queries: (n_queries, dim)
documents: (n_docs, dim)
Returns: (n_queries, n_docs) — similarity[i, j] = sim(query_i, doc_j)
"""
# Normalize both
q_norms = np.linalg.norm(queries, axis=1, keepdims=True) # (n_queries, 1)
d_norms = np.linalg.norm(documents, axis=1, keepdims=True) # (n_docs, 1)
q_normalized = queries / q_norms # (n_queries, dim)
d_normalized = documents / d_norms # (n_docs, dim)
# Matrix multiply: (n_queries, dim) @ (dim, n_docs) = (n_queries, n_docs)
return q_normalized @ d_normalized.T
queries = np.random.randn(10, 1536) # 10 query embeddings
documents = np.random.randn(100, 1536) # 100 document embeddings
similarity = cosine_similarity_matrix(queries, documents)
print(similarity.shape) # (10, 100) — one row per query, one col per doc
# Get top-5 documents for each query
for i, query_sims in enumerate(similarity):
top_5 = np.argsort(query_sims)[::-1][:5]
print(f"Query {i}: top docs = {top_5}")Softmax with Broadcasting
def softmax(logits: np.ndarray) -> np.ndarray:
"""Numerically stable softmax using broadcasting."""
# Subtract max for numerical stability (prevents overflow)
shifted = logits - np.max(logits, axis=-1, keepdims=True)
# keepdims=True: (batch, n_classes) → max shape (batch, 1) → broadcasts back
exp_vals = np.exp(shifted)
return exp_vals / np.sum(exp_vals, axis=-1, keepdims=True)
logits = np.array([[2.0, 1.0, 0.1], [1.0, 3.0, 0.2]])
probs = softmax(logits)
print(probs)
# [[0.659 0.242 0.099]
# [0.107 0.803 0.090]]
print(probs.sum(axis=1)) # [1. 1.] — each row sums to 1Z-Score Normalization
# Normalize features: (x - mean) / std — per feature (column)
features = np.random.randn(1000, 50) # 1000 samples, 50 features
# Without broadcasting (wrong — uses global mean/std)
# features_norm = (features - features.mean()) / features.std()
# With broadcasting: per-column mean and std
mean = features.mean(axis=0) # (50,) — one mean per feature
std = features.std(axis=0) # (50,)
# (1000, 50) - (50,) broadcasts to (1000, 50)
features_norm = (features - mean) / std
# Verify
print(np.allclose(features_norm.mean(axis=0), 0, atol=1e-10)) # True — mean ≈ 0 per feature
print(np.allclose(features_norm.std(axis=0), 1, atol=1e-10)) # True — std ≈ 1 per featureBroadcasting Pitfalls
# Pitfall 1: Forgetting keepdims when you need it
embeddings = np.random.randn(100, 1536)
# WRONG: norms has shape (100,) — division fails or broadcasts incorrectly
norms = np.linalg.norm(embeddings, axis=1) # Shape: (100,)
# embeddings (100, 1536) / norms (100,) → ERROR or wrong result
# CORRECT: keepdims=True gives shape (100, 1) → broadcasts correctly
norms = np.linalg.norm(embeddings, axis=1, keepdims=True) # Shape: (100, 1)
normalized = embeddings / norms # Correct: divides each row by its norm
# Pitfall 2: Shape (n,) vs shape (n, 1)
a = np.ones((3, 4))
b = np.ones(3) # Shape (3,) — NOT (3, 1)
# a + b → ERROR: (3, 4) + (3,) incompatible (last dims: 4 ≠ 3)
b_col = np.ones((3, 1)) # Shape (3, 1) — explicit column vector
# a + b_col → OK: (3, 4) + (3, 1) → (3, 4)
# Fix: reshape or use [:, np.newaxis]
b_reshaped = b[:, np.newaxis] # (3,) → (3, 1)
print(b_reshaped.shape) # (3, 1)Quick Reference
| Operation | Shape | Example |
|---|---|---|
| Add scalar | (n,) + scalar | arr + 5 |
| Add row vector to matrix | (m, n) + (n,) | mat + row |
| Add column vector to matrix | (m, n) + (m, 1) | mat + col[:, np.newaxis] |
| Normalize rows | (m, n) / (m, 1) | mat / norms where norms = mat.norm(axis=1, keepdims=True) |
| Pairwise similarity | (m, d) @ (d, n) | A @ B.T |
| Per-feature normalization | (m, n) - (n,) | (X - mean) / std |