Python Essentials for AI Engineers · Lesson 30 of 36

NumPy Math Operations for ML

Matrix Multiplication

Matrix multiplication is the foundation of neural networks:

Python

import numpy as np

# Element-wise (Hadamard) product: *
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print(a * b)
# [[ 5 12]
#  [21 32]]

# Matrix multiplication: @ (Python 3.5+)
print(a @ b)
# [[19 22]
#  [43 50]]

# Same as np.matmul
print(np.matmul(a, b))

# Dot product for 1D vectors
v1 = np.array([1.0, 2.0, 3.0])
v2 = np.array([4.0, 5.0, 6.0])
print(np.dot(v1, v2))   # 32.0 = 1*4 + 2*5 + 3*6


# Neural network forward pass
def linear_layer(X: np.ndarray, W: np.ndarray, b: np.ndarray) -> np.ndarray:
    """One fully-connected layer: output = XW + b"""
    return X @ W + b   # (batch, in_features) @ (in_features, out_features) + (out_features,)

batch_size, in_features, out_features = 32, 128, 64
X = np.random.randn(batch_size, in_features)
W = np.random.randn(in_features, out_features) * 0.01   # Small initialization
b = np.zeros(out_features)

output = linear_layer(X, W, b)
print(output.shape)   # (32, 64)

Common Linear Algebra Operations

Python

A = np.array([[4.0, 7.0], [2.0, 6.0]])

# Transpose
print(A.T)
# [[4. 2.]
#  [7. 6.]]

# Determinant
print(np.linalg.det(A))   # 10.0

# Inverse (A⁻¹)
A_inv = np.linalg.inv(A)
print(A @ A_inv)   # Identity matrix (approximately)

# Solve linear system Ax = b
b_vec = np.array([1.0, 2.0])
x = np.linalg.solve(A, b_vec)   # More stable than A_inv @ b
print(A @ x - b_vec)   # ~[0. 0.] — verified solution

# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(A)
print("Eigenvalues:", eigenvalues)
print("Eigenvectors shape:", eigenvectors.shape)   # (2, 2) — columns are eigenvectors

# L2 norm (vector length)
v = np.array([3.0, 4.0])
print(np.linalg.norm(v))          # 5.0 (Pythagorean theorem)
print(np.linalg.norm(v, ord=1))   # 7.0 — L1 norm (sum of abs values)
print(np.linalg.norm(v, ord=np.inf))  # 4.0 — L∞ norm (max abs value)

# Matrix norms
M = np.random.randn(4, 4)
print(np.linalg.norm(M, 'fro'))   # Frobenius norm: sqrt(sum of squared elements)

Statistical Operations

Python

data = np.array([[85, 90, 78, 92], [73, 88, 95, 81], [91, 76, 84, 87]])
# Shape: (3, 4) — 3 samples, 4 features

# Global statistics
print(np.mean(data))      # 85.08
print(np.median(data))    # 86.0
print(np.std(data))       # 6.27 (standard deviation)
print(np.var(data))       # 39.32 (variance)

# Per-feature statistics (axis=0: collapse across samples → one value per feature)
print(np.mean(data, axis=0))   # [83.  84.67 85.67 86.67]
print(np.std(data, axis=0))    # Per-feature std dev

# Per-sample statistics (axis=1: one value per sample)
print(np.mean(data, axis=1))   # [86.25 84.25 84.5]

# Percentile and quantile
scores = np.random.randn(1000)
print(np.percentile(scores, 95))           # 95th percentile
print(np.quantile(scores, [0.25, 0.5, 0.75]))  # Quartiles

# Correlation matrix (useful for feature analysis)
features = np.random.randn(100, 5)
corr_matrix = np.corrcoef(features.T)   # (5, 5) correlation matrix
print(corr_matrix.shape)   # (5, 5)

Random Number Generation

Python

rng = np.random.default_rng(seed=42)   # Modern API — reproducible

# Distributions
uniform   = rng.uniform(0, 1, size=(3, 4))        # [0, 1) uniform
normal    = rng.normal(loc=0, scale=1, size=1000)  # Standard normal
integers  = rng.integers(0, 10, size=20)           # Random ints in [0, 10)
binomial  = rng.binomial(n=10, p=0.3, size=100)    # Binomial distribution

# Shuffling
arr = np.arange(10)
rng.shuffle(arr)   # In-place shuffle
permuted = rng.permutation(arr)   # Returns new shuffled array

# Random choice (sampling)
population = np.array(["warfarin", "aspirin", "metformin", "lisinopril"])
sample = rng.choice(population, size=2, replace=False)   # Without replacement
print(sample)   # e.g., ["metformin", "aspirin"]

# Weight sampling (non-uniform probability)
probs = np.array([0.4, 0.3, 0.2, 0.1])
weighted = rng.choice(population, size=5, p=probs)


# Reproducibility: always set a seed for ML experiments
np.random.seed(42)   # Legacy API
# or
rng = np.random.default_rng(seed=42)   # Preferred modern API

Activation Functions (Neural Networks)

Python

def relu(x: np.ndarray) -> np.ndarray:
    return np.maximum(0, x)

def sigmoid(x: np.ndarray) -> np.ndarray:
    return 1 / (1 + np.exp(-x))

def softmax(x: np.ndarray) -> np.ndarray:
    """Numerically stable softmax."""
    e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims=True)

def tanh(x: np.ndarray) -> np.ndarray:
    return np.tanh(x)   # Built-in


# Test
logits = np.array([[2.0, 1.0, 0.1], [0.5, 1.5, 2.0]])
print(relu(logits))
# [[2.  1.  0.1]
#  [0.5 1.5 2.0]] — already all positive

print(softmax(logits))
# Each row sums to 1 — probability distribution

Useful Aggregation Functions

Python

arr = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5, 3])

print(np.sum(arr))        # 39
print(np.cumsum(arr))     # [3 4 8 9 14 23 25 31 36 39]
print(np.prod(arr))       # Product of all elements
print(np.cumprod(arr))    # Cumulative product

print(np.sort(arr))       # [1 1 2 3 3 4 5 5 6 9] — new sorted array
print(np.argsort(arr))    # Indices that would sort the array
print(np.argmax(arr))     # 5 — index of maximum value
print(np.argmin(arr))     # 1 — index of minimum value (first occurrence)

# Count unique values
unique, counts = np.unique(arr, return_counts=True)
print(dict(zip(unique, counts)))   # {1: 2, 2: 1, 3: 2, 4: 1, 5: 2, 6: 1, 9: 1}

# Boolean aggregations
flags = np.array([True, False, True, True, False])
print(np.all(flags))    # False — not all True
print(np.any(flags))    # True — at least one True
print(np.sum(flags))    # 3 — count of True values (True == 1)

Matrix Decompositions for ML

Python

# Singular Value Decomposition (SVD) — used in PCA, recommendation systems
A = np.random.randn(100, 50)
U, S, Vt = np.linalg.svd(A, full_matrices=False)
print(U.shape, S.shape, Vt.shape)   # (100, 50), (50,), (50, 50)

# Principal Component Analysis using SVD
def pca(X: np.ndarray, n_components: int) -> np.ndarray:
    """Reduce dimensionality of X to n_components."""
    X_centered = X - X.mean(axis=0)
    U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
    return X_centered @ Vt[:n_components].T   # Project to principal components

features = np.random.randn(1000, 1536)   # 1000 embeddings
reduced = pca(features, n_components=128)
print(reduced.shape)   # (1000, 128)

Operations Summary

| Operation | NumPy syntax | Description | |---|---|---| | Element-wise | a * b, a + b | Apply op to each pair | | Matrix multiply | a @ b | Dot product / matmul | | Dot product | np.dot(a, b) | Sum of element products | | Transpose | a.T | Swap axes | | Inverse | np.linalg.inv(A) | Matrix inverse | | L2 norm | np.linalg.norm(v) | Euclidean length | | Normalize | v / np.linalg.norm(v) | Unit vector | | Eigendecomposition | np.linalg.eig(A) | Eigenvalues + vectors | | SVD | np.linalg.svd(A) | Singular value decomp | | Mean per axis | np.mean(X, axis=0) | Column means | | Std per axis | np.std(X, axis=1) | Row std devs | | Argsort | np.argsort(scores)[::-1] | Indices sorted desc |

Broadcasting: NumPy's Superpower

Next Lesson

Interview: NumPy Problem Walk-Through