Learnixo

Python Essentials for AI Engineers · Lesson 29 of 36

Broadcasting: NumPy's Superpower

What is Broadcasting?

Broadcasting is NumPy's mechanism for performing operations on arrays of different shapes — without explicitly copying data. NumPy "stretches" smaller arrays conceptually to match larger ones.

Python
import numpy as np

# Scalar + array: scalar is broadcast to every element
arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
print(arr + 10)   # [11. 12. 13. 14. 15.]  10 applied to each element
print(arr * 2)    # [2.  4.  6.  8. 10.]  2 applied to each element

# Without broadcasting you'd need:
result = np.array([x + 10 for x in arr])   # Slow Python loop

Broadcasting Rules

NumPy applies these rules to determine if two shapes are compatible:

  1. If arrays have different numbers of dimensions, prepend 1s to the shape of the smaller array
  2. Dimensions are compatible if they are equal, or if one of them is 1
  3. If compatible, the 1-dimension is stretched to match the other
Python
# Rule in action:
a = np.array([[1, 2, 3],   # Shape: (3, 3)
              [4, 5, 6],
              [7, 8, 9]])

b = np.array([10, 20, 30]) # Shape: (3,)  becomes (1, 3)  stretched to (3, 3)

print(a + b)
# [[11 22 33]
#  [14 25 36]
#  [17 28 39]]
# b[0]=10 added to entire first column, b[1]=20 to second column, etc.

# Column-wise broadcast
c = np.array([[10],   # Shape: (3, 1)  stretched to (3, 3)
              [20],
              [30]])

print(a + c)
# [[11 12 13]
#  [24 25 26]
#  [37 38 39]]
# c[0]=10 added to entire first row, c[1]=20 to second row, etc.

Visualizing Broadcasting

a: (3, 3)    b: (3,) → (1, 3) → (3, 3)
[[1  2  3]       [[10 20 30]
 [4  5  6]   +    [10 20 30]   → [[11 22 33]
 [7  8  9]]       [10 20 30]]      [14 25 36]
                                   [17 28 39]]

NumPy doesn't actually create the expanded array — it's a virtual operation that runs in C.


Shape Compatibility Check

Python
# Compatible shapes:
# (3, 4) + (4,)     (3, 4)  b broadcasts along rows
# (3, 1) + (1, 4)   (3, 4)  both broadcast
# (3, 4) + (1,)     (3, 4)  scalar broadcast
# (3, 4) + (3, 4)   (3, 4)  same shape, no broadcasting needed

# Incompatible shapes (ValueError):
# (3, 4) + (3,)     error: 4  3
# (3, 4) + (2, 4)   error: 3  2

def test_broadcast(a_shape: tuple, b_shape: tuple) -> None:
    try:
        a = np.ones(a_shape)
        b = np.ones(b_shape)
        result = a + b
        print(f"{a_shape} + {b_shape} → {result.shape}")
    except ValueError as e:
        print(f"{a_shape} + {b_shape} → ERROR: {e}")

test_broadcast((3, 4), (4,))    # (3, 4) + (4,)  (3, 4)
test_broadcast((3, 1), (1, 4))  # (3, 1) + (1, 4)  (3, 4)
test_broadcast((3, 4), (3,))    # ERROR

Key AI Use Case: Normalizing Embeddings

Python
# Without broadcasting (slow, manual)
embeddings = np.random.randn(100, 1536)   # 100 embeddings
normalized = np.zeros_like(embeddings)
for i in range(len(embeddings)):
    norm = np.linalg.norm(embeddings[i])
    normalized[i] = embeddings[i] / norm


# With broadcasting (fast, vectorized)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
# norms shape: (100, 1)  one norm per row, keepdims=True preserves the dimension

normalized = embeddings / norms
# embeddings: (100, 1536) ÷ norms: (100, 1)  broadcasts norms across 1536 columns
# Result: (100, 1536)  each row divided by its own norm

# Verify: each row now has unit norm
row_norms = np.linalg.norm(normalized, axis=1)
print(np.allclose(row_norms, 1.0))   # True  all unit norms

Batch Cosine Similarity

Python
def cosine_similarity_matrix(queries: np.ndarray, documents: np.ndarray) -> np.ndarray:
    """
    Compute pairwise cosine similarity between queries and documents.
    
    queries:   (n_queries, dim)
    documents: (n_docs, dim)
    Returns:   (n_queries, n_docs) — similarity[i, j] = sim(query_i, doc_j)
    """
    # Normalize both
    q_norms = np.linalg.norm(queries, axis=1, keepdims=True)   # (n_queries, 1)
    d_norms = np.linalg.norm(documents, axis=1, keepdims=True) # (n_docs, 1)
    
    q_normalized = queries / q_norms      # (n_queries, dim)
    d_normalized = documents / d_norms   # (n_docs, dim)
    
    # Matrix multiply: (n_queries, dim) @ (dim, n_docs) = (n_queries, n_docs)
    return q_normalized @ d_normalized.T


queries   = np.random.randn(10, 1536)   # 10 query embeddings
documents = np.random.randn(100, 1536)  # 100 document embeddings

similarity = cosine_similarity_matrix(queries, documents)
print(similarity.shape)   # (10, 100)  one row per query, one col per doc

# Get top-5 documents for each query
for i, query_sims in enumerate(similarity):
    top_5 = np.argsort(query_sims)[::-1][:5]
    print(f"Query {i}: top docs = {top_5}")

Softmax with Broadcasting

Python
def softmax(logits: np.ndarray) -> np.ndarray:
    """Numerically stable softmax using broadcasting."""
    # Subtract max for numerical stability (prevents overflow)
    shifted = logits - np.max(logits, axis=-1, keepdims=True)
    # keepdims=True: (batch, n_classes)  max shape (batch, 1)  broadcasts back
    
    exp_vals = np.exp(shifted)
    return exp_vals / np.sum(exp_vals, axis=-1, keepdims=True)


logits = np.array([[2.0, 1.0, 0.1], [1.0, 3.0, 0.2]])
probs  = softmax(logits)
print(probs)
# [[0.659 0.242 0.099]
#  [0.107 0.803 0.090]]
print(probs.sum(axis=1))   # [1. 1.]  each row sums to 1

Z-Score Normalization

Python
# Normalize features: (x - mean) / std  per feature (column)
features = np.random.randn(1000, 50)   # 1000 samples, 50 features

# Without broadcasting (wrong  uses global mean/std)
# features_norm = (features - features.mean()) / features.std()

# With broadcasting: per-column mean and std
mean = features.mean(axis=0)   # (50,)  one mean per feature
std  = features.std(axis=0)    # (50,)

# (1000, 50) - (50,) broadcasts to (1000, 50)
features_norm = (features - mean) / std

# Verify
print(np.allclose(features_norm.mean(axis=0), 0, atol=1e-10))  # True  mean  0 per feature
print(np.allclose(features_norm.std(axis=0),  1, atol=1e-10))  # True  std  1 per feature

Broadcasting Pitfalls

Python
# Pitfall 1: Forgetting keepdims when you need it
embeddings = np.random.randn(100, 1536)

# WRONG: norms has shape (100,)  division fails or broadcasts incorrectly
norms = np.linalg.norm(embeddings, axis=1)   # Shape: (100,)
# embeddings (100, 1536) / norms (100,)  ERROR or wrong result

# CORRECT: keepdims=True gives shape (100, 1)  broadcasts correctly
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)   # Shape: (100, 1)
normalized = embeddings / norms   # Correct: divides each row by its norm


# Pitfall 2: Shape (n,) vs shape (n, 1)
a = np.ones((3, 4))
b = np.ones(3)        # Shape (3,)  NOT (3, 1)
# a + b  ERROR: (3, 4) + (3,) incompatible (last dims: 4  3)

b_col = np.ones((3, 1))   # Shape (3, 1)  explicit column vector
# a + b_col  OK: (3, 4) + (3, 1)  (3, 4)

# Fix: reshape or use [:, np.newaxis]
b_reshaped = b[:, np.newaxis]   # (3,)  (3, 1)
print(b_reshaped.shape)   # (3, 1)

Quick Reference

| Operation | Shape | Example | |---|---|---| | Add scalar | (n,) + scalar | arr + 5 | | Add row vector to matrix | (m, n) + (n,) | mat + row | | Add column vector to matrix | (m, n) + (m, 1) | mat + col[:, np.newaxis] | | Normalize rows | (m, n) / (m, 1) | mat / norms where norms = mat.norm(axis=1, keepdims=True) | | Pairwise similarity | (m, d) @ (d, n) | A @ B.T | | Per-feature normalization | (m, n) - (n,) | (X - mean) / std |