NumPy Math Operations for ML

Matrix Multiplication

Matrix multiplication is the foundation of neural networks:

Python

import numpy as np

# Element-wise (Hadamard) product: *
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print(a * b)
# [[ 5 12]
#  [21 32]]

# Matrix multiplication: @ (Python 3.5+)
print(a @ b)
# [[19 22]
#  [43 50]]

# Same as np.matmul
print(np.matmul(a, b))

# Dot product for 1D vectors
v1 = np.array([1.0, 2.0, 3.0])
v2 = np.array([4.0, 5.0, 6.0])
print(np.dot(v1, v2))   # 32.0 = 1*4 + 2*5 + 3*6


# Neural network forward pass
def linear_layer(X: np.ndarray, W: np.ndarray, b: np.ndarray) -> np.ndarray:
    """One fully-connected layer: output = XW + b"""
    return X @ W + b   # (batch, in_features) @ (in_features, out_features) + (out_features,)

batch_size, in_features, out_features = 32, 128, 64
X = np.random.randn(batch_size, in_features)
W = np.random.randn(in_features, out_features) * 0.01   # Small initialization
b = np.zeros(out_features)

output = linear_layer(X, W, b)
print(output.shape)   # (32, 64)

Common Linear Algebra Operations

Python

A = np.array([[4.0, 7.0], [2.0, 6.0]])

# Transpose
print(A.T)
# [[4. 2.]
#  [7. 6.]]

# Determinant
print(np.linalg.det(A))   # 10.0

# Inverse (A⁻¹)
A_inv = np.linalg.inv(A)
print(A @ A_inv)   # Identity matrix (approximately)

# Solve linear system Ax = b
b_vec = np.array([1.0, 2.0])
x = np.linalg.solve(A, b_vec)   # More stable than A_inv @ b
print(A @ x - b_vec)   # ~[0. 0.] — verified solution

# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(A)
print("Eigenvalues:", eigenvalues)
print("Eigenvectors shape:", eigenvectors.shape)   # (2, 2) — columns are eigenvectors

# L2 norm (vector length)
v = np.array([3.0, 4.0])
print(np.linalg.norm(v))          # 5.0 (Pythagorean theorem)
print(np.linalg.norm(v, ord=1))   # 7.0 — L1 norm (sum of abs values)
print(np.linalg.norm(v, ord=np.inf))  # 4.0 — L∞ norm (max abs value)

# Matrix norms
M = np.random.randn(4, 4)
print(np.linalg.norm(M, 'fro'))   # Frobenius norm: sqrt(sum of squared elements)

Statistical Operations

Python

data = np.array([[85, 90, 78, 92], [73, 88, 95, 81], [91, 76, 84, 87]])
# Shape: (3, 4) — 3 samples, 4 features

# Global statistics
print(np.mean(data))      # 85.08
print(np.median(data))    # 86.0
print(np.std(data))       # 6.27 (standard deviation)
print(np.var(data))       # 39.32 (variance)

# Per-feature statistics (axis=0: collapse across samples → one value per feature)
print(np.mean(data, axis=0))   # [83.  84.67 85.67 86.67]
print(np.std(data, axis=0))    # Per-feature std dev

# Per-sample statistics (axis=1: one value per sample)
print(np.mean(data, axis=1))   # [86.25 84.25 84.5]

# Percentile and quantile
scores = np.random.randn(1000)
print(np.percentile(scores, 95))           # 95th percentile
print(np.quantile(scores, [0.25, 0.5, 0.75]))  # Quartiles

# Correlation matrix (useful for feature analysis)
features = np.random.randn(100, 5)
corr_matrix = np.corrcoef(features.T)   # (5, 5) correlation matrix
print(corr_matrix.shape)   # (5, 5)

Random Number Generation

Python

rng = np.random.default_rng(seed=42)   # Modern API — reproducible

# Distributions
uniform   = rng.uniform(0, 1, size=(3, 4))        # [0, 1) uniform
normal    = rng.normal(loc=0, scale=1, size=1000)  # Standard normal
integers  = rng.integers(0, 10, size=20)           # Random ints in [0, 10)
binomial  = rng.binomial(n=10, p=0.3, size=100)    # Binomial distribution

# Shuffling
arr = np.arange(10)
rng.shuffle(arr)   # In-place shuffle
permuted = rng.permutation(arr)   # Returns new shuffled array

# Random choice (sampling)
population = np.array(["warfarin", "aspirin", "metformin", "lisinopril"])
sample = rng.choice(population, size=2, replace=False)   # Without replacement
print(sample)   # e.g., ["metformin", "aspirin"]

# Weight sampling (non-uniform probability)
probs = np.array([0.4, 0.3, 0.2, 0.1])
weighted = rng.choice(population, size=5, p=probs)


# Reproducibility: always set a seed for ML experiments
np.random.seed(42)   # Legacy API
# or
rng = np.random.default_rng(seed=42)   # Preferred modern API

Activation Functions (Neural Networks)

Python

def relu(x: np.ndarray) -> np.ndarray:
    return np.maximum(0, x)

def sigmoid(x: np.ndarray) -> np.ndarray:
    return 1 / (1 + np.exp(-x))

def softmax(x: np.ndarray) -> np.ndarray:
    """Numerically stable softmax."""
    e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims=True)

def tanh(x: np.ndarray) -> np.ndarray:
    return np.tanh(x)   # Built-in


# Test
logits = np.array([[2.0, 1.0, 0.1], [0.5, 1.5, 2.0]])
print(relu(logits))
# [[2.  1.  0.1]
#  [0.5 1.5 2.0]] — already all positive

print(softmax(logits))
# Each row sums to 1 — probability distribution

Useful Aggregation Functions

Python

arr = np.array([3, 1, 4, 1, 5, 9, 2, 6, 5, 3])

print(np.sum(arr))        # 39
print(np.cumsum(arr))     # [3 4 8 9 14 23 25 31 36 39]
print(np.prod(arr))       # Product of all elements
print(np.cumprod(arr))    # Cumulative product

print(np.sort(arr))       # [1 1 2 3 3 4 5 5 6 9] — new sorted array
print(np.argsort(arr))    # Indices that would sort the array
print(np.argmax(arr))     # 5 — index of maximum value
print(np.argmin(arr))     # 1 — index of minimum value (first occurrence)

# Count unique values
unique, counts = np.unique(arr, return_counts=True)
print(dict(zip(unique, counts)))   # {1: 2, 2: 1, 3: 2, 4: 1, 5: 2, 6: 1, 9: 1}

# Boolean aggregations
flags = np.array([True, False, True, True, False])
print(np.all(flags))    # False — not all True
print(np.any(flags))    # True — at least one True
print(np.sum(flags))    # 3 — count of True values (True == 1)

Matrix Decompositions for ML

Python

# Singular Value Decomposition (SVD) — used in PCA, recommendation systems
A = np.random.randn(100, 50)
U, S, Vt = np.linalg.svd(A, full_matrices=False)
print(U.shape, S.shape, Vt.shape)   # (100, 50), (50,), (50, 50)

# Principal Component Analysis using SVD
def pca(X: np.ndarray, n_components: int) -> np.ndarray:
    """Reduce dimensionality of X to n_components."""
    X_centered = X - X.mean(axis=0)
    U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
    return X_centered @ Vt[:n_components].T   # Project to principal components

features = np.random.randn(1000, 1536)   # 1000 embeddings
reduced = pca(features, n_components=128)
print(reduced.shape)   # (1000, 128)

Operations Summary

| Operation | NumPy syntax | Description | |---|---|---| | Element-wise | a * b, a + b | Apply op to each pair | | Matrix multiply | a @ b | Dot product / matmul | | Dot product | np.dot(a, b) | Sum of element products | | Transpose | a.T | Swap axes | | Inverse | np.linalg.inv(A) | Matrix inverse | | L2 norm | np.linalg.norm(v) | Euclidean length | | Normalize | v / np.linalg.norm(v) | Unit vector | | Eigendecomposition | np.linalg.eig(A) | Eigenvalues + vectors | | SVD | np.linalg.svd(A) | Singular value decomp | | Mean per axis | np.mean(X, axis=0) | Column means | | Std per axis | np.std(X, axis=1) | Row std devs | | Argsort | np.argsort(scores)[::-1] | Indices sorted desc |

NumPy Math Operations for ML

Matrix Multiplication

Common Linear Algebra Operations

Statistical Operations

Random Number Generation

Activation Functions (Neural Networks)

Useful Aggregation Functions

Matrix Decompositions for ML

Operations Summary

Enjoyed this article?

Leave a comment