Broadcasting: NumPy's Superpower
Understand NumPy broadcasting: how arrays of different shapes operate together, the broadcasting rules, common patterns for embeddings normalization and similarity computation, and pitfalls to avoid.
What is Broadcasting?
Broadcasting is NumPy's mechanism for performing operations on arrays of different shapes โ without explicitly copying data. NumPy "stretches" smaller arrays conceptually to match larger ones.
import numpy as np
# Scalar + array: scalar is broadcast to every element
arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
print(arr + 10) # [11. 12. 13. 14. 15.] โ 10 applied to each element
print(arr * 2) # [2. 4. 6. 8. 10.] โ 2 applied to each element
# Without broadcasting you'd need:
result = np.array([x + 10 for x in arr]) # Slow Python loopBroadcasting Rules
NumPy applies these rules to determine if two shapes are compatible:
- If arrays have different numbers of dimensions, prepend 1s to the shape of the smaller array
- Dimensions are compatible if they are equal, or if one of them is 1
- If compatible, the 1-dimension is stretched to match the other
# Rule in action:
a = np.array([[1, 2, 3], # Shape: (3, 3)
[4, 5, 6],
[7, 8, 9]])
b = np.array([10, 20, 30]) # Shape: (3,) โ becomes (1, 3) โ stretched to (3, 3)
print(a + b)
# [[11 22 33]
# [14 25 36]
# [17 28 39]]
# b[0]=10 added to entire first column, b[1]=20 to second column, etc.
# Column-wise broadcast
c = np.array([[10], # Shape: (3, 1) โ stretched to (3, 3)
[20],
[30]])
print(a + c)
# [[11 12 13]
# [24 25 26]
# [37 38 39]]
# c[0]=10 added to entire first row, c[1]=20 to second row, etc.Visualizing Broadcasting
a: (3, 3) b: (3,) โ (1, 3) โ (3, 3)
[[1 2 3] [[10 20 30]
[4 5 6] + [10 20 30] โ [[11 22 33]
[7 8 9]] [10 20 30]] [14 25 36]
[17 28 39]]NumPy doesn't actually create the expanded array โ it's a virtual operation that runs in C.
Shape Compatibility Check
# Compatible shapes:
# (3, 4) + (4,) โ (3, 4) โ b broadcasts along rows
# (3, 1) + (1, 4) โ (3, 4) โ both broadcast
# (3, 4) + (1,) โ (3, 4) โ scalar broadcast
# (3, 4) + (3, 4) โ (3, 4) โ same shape, no broadcasting needed
# Incompatible shapes (ValueError):
# (3, 4) + (3,) โ error: 4 โ 3
# (3, 4) + (2, 4) โ error: 3 โ 2
def test_broadcast(a_shape: tuple, b_shape: tuple) -> None:
try:
a = np.ones(a_shape)
b = np.ones(b_shape)
result = a + b
print(f"{a_shape} + {b_shape} โ {result.shape}")
except ValueError as e:
print(f"{a_shape} + {b_shape} โ ERROR: {e}")
test_broadcast((3, 4), (4,)) # (3, 4) + (4,) โ (3, 4)
test_broadcast((3, 1), (1, 4)) # (3, 1) + (1, 4) โ (3, 4)
test_broadcast((3, 4), (3,)) # ERRORKey AI Use Case: Normalizing Embeddings
# Without broadcasting (slow, manual)
embeddings = np.random.randn(100, 1536) # 100 embeddings
normalized = np.zeros_like(embeddings)
for i in range(len(embeddings)):
norm = np.linalg.norm(embeddings[i])
normalized[i] = embeddings[i] / norm
# With broadcasting (fast, vectorized)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
# norms shape: (100, 1) โ one norm per row, keepdims=True preserves the dimension
normalized = embeddings / norms
# embeddings: (100, 1536) รท norms: (100, 1) โ broadcasts norms across 1536 columns
# Result: (100, 1536) โ each row divided by its own norm
# Verify: each row now has unit norm
row_norms = np.linalg.norm(normalized, axis=1)
print(np.allclose(row_norms, 1.0)) # True โ all unit normsBatch Cosine Similarity
def cosine_similarity_matrix(queries: np.ndarray, documents: np.ndarray) -> np.ndarray:
"""
Compute pairwise cosine similarity between queries and documents.
queries: (n_queries, dim)
documents: (n_docs, dim)
Returns: (n_queries, n_docs) โ similarity[i, j] = sim(query_i, doc_j)
"""
# Normalize both
q_norms = np.linalg.norm(queries, axis=1, keepdims=True) # (n_queries, 1)
d_norms = np.linalg.norm(documents, axis=1, keepdims=True) # (n_docs, 1)
q_normalized = queries / q_norms # (n_queries, dim)
d_normalized = documents / d_norms # (n_docs, dim)
# Matrix multiply: (n_queries, dim) @ (dim, n_docs) = (n_queries, n_docs)
return q_normalized @ d_normalized.T
queries = np.random.randn(10, 1536) # 10 query embeddings
documents = np.random.randn(100, 1536) # 100 document embeddings
similarity = cosine_similarity_matrix(queries, documents)
print(similarity.shape) # (10, 100) โ one row per query, one col per doc
# Get top-5 documents for each query
for i, query_sims in enumerate(similarity):
top_5 = np.argsort(query_sims)[::-1][:5]
print(f"Query {i}: top docs = {top_5}")Softmax with Broadcasting
def softmax(logits: np.ndarray) -> np.ndarray:
"""Numerically stable softmax using broadcasting."""
# Subtract max for numerical stability (prevents overflow)
shifted = logits - np.max(logits, axis=-1, keepdims=True)
# keepdims=True: (batch, n_classes) โ max shape (batch, 1) โ broadcasts back
exp_vals = np.exp(shifted)
return exp_vals / np.sum(exp_vals, axis=-1, keepdims=True)
logits = np.array([[2.0, 1.0, 0.1], [1.0, 3.0, 0.2]])
probs = softmax(logits)
print(probs)
# [[0.659 0.242 0.099]
# [0.107 0.803 0.090]]
print(probs.sum(axis=1)) # [1. 1.] โ each row sums to 1Z-Score Normalization
# Normalize features: (x - mean) / std โ per feature (column)
features = np.random.randn(1000, 50) # 1000 samples, 50 features
# Without broadcasting (wrong โ uses global mean/std)
# features_norm = (features - features.mean()) / features.std()
# With broadcasting: per-column mean and std
mean = features.mean(axis=0) # (50,) โ one mean per feature
std = features.std(axis=0) # (50,)
# (1000, 50) - (50,) broadcasts to (1000, 50)
features_norm = (features - mean) / std
# Verify
print(np.allclose(features_norm.mean(axis=0), 0, atol=1e-10)) # True โ mean โ 0 per feature
print(np.allclose(features_norm.std(axis=0), 1, atol=1e-10)) # True โ std โ 1 per featureBroadcasting Pitfalls
# Pitfall 1: Forgetting keepdims when you need it
embeddings = np.random.randn(100, 1536)
# WRONG: norms has shape (100,) โ division fails or broadcasts incorrectly
norms = np.linalg.norm(embeddings, axis=1) # Shape: (100,)
# embeddings (100, 1536) / norms (100,) โ ERROR or wrong result
# CORRECT: keepdims=True gives shape (100, 1) โ broadcasts correctly
norms = np.linalg.norm(embeddings, axis=1, keepdims=True) # Shape: (100, 1)
normalized = embeddings / norms # Correct: divides each row by its norm
# Pitfall 2: Shape (n,) vs shape (n, 1)
a = np.ones((3, 4))
b = np.ones(3) # Shape (3,) โ NOT (3, 1)
# a + b โ ERROR: (3, 4) + (3,) incompatible (last dims: 4 โ 3)
b_col = np.ones((3, 1)) # Shape (3, 1) โ explicit column vector
# a + b_col โ OK: (3, 4) + (3, 1) โ (3, 4)
# Fix: reshape or use [:, np.newaxis]
b_reshaped = b[:, np.newaxis] # (3,) โ (3, 1)
print(b_reshaped.shape) # (3, 1)Quick Reference
| Operation | Shape | Example |
|---|---|---|
| Add scalar | (n,) + scalar | arr + 5 |
| Add row vector to matrix | (m, n) + (n,) | mat + row |
| Add column vector to matrix | (m, n) + (m, 1) | mat + col[:, np.newaxis] |
| Normalize rows | (m, n) / (m, 1) | mat / norms where norms = mat.norm(axis=1, keepdims=True) |
| Pairwise similarity | (m, d) @ (d, n) | A @ B.T |
| Per-feature normalization | (m, n) - (n,) | (X - mean) / std |
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.