NumPy Math Operations for ML
Master NumPy mathematical operations for machine learning: linear algebra, matrix multiplication, statistical functions, random generation, and ML-specific patterns like dot products and eigenvectors.
Matrix Multiplication
Matrix multiplication is the foundation of neural networks:
import numpy as np
# Element-wise (Hadamard) product: *
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print(a * b)
# [[ 5 12]
# [21 32]]
# Matrix multiplication: @ (Python 3.5+)
print(a @ b)
# [[19 22]
# [43 50]]
# Same as np.matmul
print(np.matmul(a, b))
# Dot product for 1D vectors
v1 = np.array([1.0, 2.0, 3.0])
v2 = np.array([4.0, 5.0, 6.0])
print(np.dot(v1, v2)) # 32.0 = 1*4 + 2*5 + 3*6
# Neural network forward pass
def linear_layer(X: np.ndarray, W: np.ndarray, b: np.ndarray) -> np.ndarray:
"""One fully-connected layer: output = XW + b"""
return X @ W + b # (batch, in_features) @ (in_features, out_features) + (out_features,)
batch_size, in_features, out_features = 32, 128, 64
X = np.random.randn(batch_size, in_features)
W = np.random.randn(in_features, out_features) * 0.01 # Small initialization
b = np.zeros(out_features)
output = linear_layer(X, W, b)
print(output.shape) # (32, 64)Common Linear Algebra Operations
A = np.array([[4.0, 7.0], [2.0, 6.0]])
# Transpose
print(A.T)
# [[4. 2.]
# [7. 6.]]
# Determinant
print(np.linalg.det(A)) # 10.0
# Inverse (Aโปยน)
A_inv = np.linalg.inv(A)
print(A @ A_inv) # Identity matrix (approximately)
# Solve linear system Ax = b
b_vec = np.array([1.0, 2.0])
x = np.linalg.solve(A, b_vec) # More stable than A_inv @ b
print(A @ x - b_vec) # ~[0. 0.] โ verified solution
# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(A)
print("Eigenvalues:", eigenvalues)
print("Eigenvectors shape:", eigenvectors.shape) # (2, 2) โ columns are eigenvectors
# L2 norm (vector length)
v = np.array([3.0, 4.0])
print(np.linalg.norm(v)) # 5.0 (Pythagorean theorem)
print(np.linalg.norm(v, ord=1)) # 7.0 โ L1 norm (sum of abs values)
print(np.linalg.norm(v, ord=np.inf)) # 4.0 โ Lโ norm (max abs value)
# Matrix norms
M = np.random.randn(4, 4)
print(np.linalg.norm(M, 'fro')) # Frobenius norm: sqrt(sum of squared elements)Statistical Operations
data = np.array([[85, 90, 78, 92], [73, 88, 95, 81], [91, 76, 84, 87]])
# Shape: (3, 4) โ 3 samples, 4 features
# Global statistics
print(np.mean(data)) # 85.08
print(np.median(data)) # 86.0
print(np.std(data)) # 6.27 (standard deviation)
print(np.var(data)) # 39.32 (variance)
# Per-feature statistics (axis=0: collapse across samples โ one value per feature)
print(np.mean(data, axis=0)) # [83. 84.67 85.67 86.67]
print(np.std(data, axis=0)) # Per-feature std dev
# Per-sample statistics (axis=1: one value per sample)
print(np.mean(data, axis=1)) # [86.25 84.25 84.5]
# Percentile and quantile
scores = np.random.randn(1000)
print(np.percentile(scores, 95)) # 95th percentile
print(np.quantile(scores, [0.25, 0.5, 0.75])) # Quartiles
# Correlation matrix (useful for feature analysis)
features = np.random.randn(100, 5)
corr_matrix = np.corrcoef(features.T) # (5, 5) correlation matrix
print(corr_matrix.shape) # (5, 5)Random Number Generation
rng = np.random.default_rng(seed=42) # Modern API โ reproducible
# Distributions
uniform = rng.uniform(0, 1, size=(3, 4)) # [0, 1) uniform
normal = rng.normal(loc=0, scale=1, size=1000) # Standard normal
integers = rng.integers(0, 10, size=20) # Random ints in [0, 10)
binomial = rng.binomial(n=10, p=0.3, size=100) # Binomial distribution
# Shuffling
arr = np.arange(10)
rng.shuffle(arr) # In-place shuffle
permuted = rng.permutation(arr) # Returns new shuffled array
# Random choice (sampling)
population = np.array(["warfarin", "aspirin", "metformin", "lisinopril"])
sample = rng.choice(population, size=2, replace=False) # Without replacement
print(sample) # e.g., ["metformin", "aspirin"]
# Weight sampling (non-uniform probability)
probs = np.array([0.4, 0.3, 0.2, 0.1])
weighted = rng.choice(population, size=5, p=probs)
# Reproducibility: always set a seed for ML experiments
np.random.seed(42) # Legacy API
# or
rng = np.random.default_rng(seed=42) # Preferred modern APIActivation Functions (Neural Networks)
def relu(x: np.ndarray) -> np.ndarray:
return np.maximum(0, x)
def sigmoid(x: np.ndarray) -> np.ndarray:
return 1 / (1 + np.exp(-x))
def softmax(x: np.ndarray) -> np.ndarray:
"""Numerically stable softmax."""
e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
return e_x / np.sum(e_x, axis=-1, keepdims=True)
def tanh(x: np.ndarray) -> np.ndarray:
return np.tanh(x) # Built-in
# Test
logits = np.array([[2.0, 1.0, 0.1], [0.5, 1.5, 2.0]])
print(relu(logits))
# [[2. 1. 0.1]
# [0.5 1.5 2.0]] โ already all positive
print(softmax(logits))
# Each row sums to 1 โ probability distributionUseful Aggregation Functions
arr = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5, 3])
print(np.sum(arr)) # 39
print(np.cumsum(arr)) # [3 4 8 9 14 23 25 31 36 39]
print(np.prod(arr)) # Product of all elements
print(np.cumprod(arr)) # Cumulative product
print(np.sort(arr)) # [1 1 2 3 3 4 5 5 6 9] โ new sorted array
print(np.argsort(arr)) # Indices that would sort the array
print(np.argmax(arr)) # 5 โ index of maximum value
print(np.argmin(arr)) # 1 โ index of minimum value (first occurrence)
# Count unique values
unique, counts = np.unique(arr, return_counts=True)
print(dict(zip(unique, counts))) # {1: 2, 2: 1, 3: 2, 4: 1, 5: 2, 6: 1, 9: 1}
# Boolean aggregations
flags = np.array([True, False, True, True, False])
print(np.all(flags)) # False โ not all True
print(np.any(flags)) # True โ at least one True
print(np.sum(flags)) # 3 โ count of True values (True == 1)Matrix Decompositions for ML
# Singular Value Decomposition (SVD) โ used in PCA, recommendation systems
A = np.random.randn(100, 50)
U, S, Vt = np.linalg.svd(A, full_matrices=False)
print(U.shape, S.shape, Vt.shape) # (100, 50), (50,), (50, 50)
# Principal Component Analysis using SVD
def pca(X: np.ndarray, n_components: int) -> np.ndarray:
"""Reduce dimensionality of X to n_components."""
X_centered = X - X.mean(axis=0)
U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
return X_centered @ Vt[:n_components].T # Project to principal components
features = np.random.randn(1000, 1536) # 1000 embeddings
reduced = pca(features, n_components=128)
print(reduced.shape) # (1000, 128)Operations Summary
| Operation | NumPy syntax | Description |
|---|---|---|
| Element-wise | a * b, a + b | Apply op to each pair |
| Matrix multiply | a @ b | Dot product / matmul |
| Dot product | np.dot(a, b) | Sum of element products |
| Transpose | a.T | Swap axes |
| Inverse | np.linalg.inv(A) | Matrix inverse |
| L2 norm | np.linalg.norm(v) | Euclidean length |
| Normalize | v / np.linalg.norm(v) | Unit vector |
| Eigendecomposition | np.linalg.eig(A) | Eigenvalues + vectors |
| SVD | np.linalg.svd(A) | Singular value decomp |
| Mean per axis | np.mean(X, axis=0) | Column means |
| Std per axis | np.std(X, axis=1) | Row std devs |
| Argsort | np.argsort(scores)[::-1] | Indices sorted desc |
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.