Python Essentials for AI Engineers · Lesson 28 of 36
NumPy Slicing and Indexing
Basic Indexing
Python
import numpy as np
arr = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])
# Single element
print(arr[0]) # 10 — first element
print(arr[-1]) # 90 — last element
print(arr[-2]) # 80 — second to last
# Slicing: arr[start:stop:step]
print(arr[2:5]) # [30 40 50] — indices 2, 3, 4 (stop is exclusive)
print(arr[:3]) # [10 20 30] — first 3
print(arr[6:]) # [70 80 90] — from index 6 to end
print(arr[::2]) # [10 30 50 70 90] — every other element
print(arr[::-1]) # [90 80 70 60 50 40 30 20 10] — reversed2D Indexing
NumPy uses [row, col] syntax for 2D arrays:
Python
matrix = np.array([
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
])
# Shape: (3, 4) — 3 rows, 4 columns
# Single element
print(matrix[0, 0]) # 1 — row 0, col 0
print(matrix[1, 2]) # 7 — row 1, col 2
print(matrix[-1, -1]) # 12 — last row, last col
# Row slice
print(matrix[0, :]) # [1 2 3 4] — entire first row
print(matrix[1]) # [5 6 7 8] — shorthand for matrix[1, :]
# Column slice
print(matrix[:, 0]) # [1 5 9] — entire first column
print(matrix[:, 2]) # [3 7 11] — entire third column
# Sub-matrix
print(matrix[0:2, 1:3]) # Rows 0-1, cols 1-2
# [[2 3]
# [6 7]]
# Every other row
print(matrix[::2, :]) # Rows 0 and 2
# [[ 1 2 3 4]
# [ 9 10 11 12]]Slicing in AI: Embedding Batches
Python
# Common AI pattern: batch of embeddings
embeddings = np.random.randn(1000, 1536) # 1000 documents, 1536-dim embeddings
# Get first 32 embeddings (one batch)
batch = embeddings[:32]
print(batch.shape) # (32, 1536)
# Get embeddings for specific documents by index
doc_indices = [0, 5, 42, 99]
selected = embeddings[doc_indices]
print(selected.shape) # (4, 1536)
# Get the first 128 dimensions of all embeddings (dimension reduction)
reduced = embeddings[:, :128]
print(reduced.shape) # (1000, 128)
# Normalize each embedding (divide by its L2 norm)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True) # Shape: (1000, 1)
normalized = embeddings / norms # Broadcasting: (1000, 1536) / (1000, 1)
print(np.linalg.norm(normalized[0])) # ~1.0 — each row has unit normBoolean (Mask) Indexing
Boolean arrays select elements where the condition is True:
Python
scores = np.array([0.92, 0.45, 0.78, 0.61, 0.89, 0.33, 0.75])
# Create a boolean mask
mask = scores >= 0.7
print(mask) # [True False True False True False True]
# Apply mask — returns elements where True
passing = scores[mask]
print(passing) # [0.92 0.78 0.89 0.75]
# Inline — same result
passing = scores[scores >= 0.7]
# Compound conditions
good_range = scores[(scores >= 0.7) & (scores < 0.95)]
# Use & (not 'and'), | (not 'or'), ~ (not 'not')
print(good_range) # [0.92 0.78 0.89 0.75]
# Boolean indexing with 2D arrays
data = np.array([[1.0, 0.9, 0.8], [0.5, 0.4, 0.3], [0.95, 0.85, 0.75]])
# Get rows where the first column score is >= 0.7
high_first_col = data[data[:, 0] >= 0.7]
print(high_first_col)
# [[1.0 0.9 0.8]
# [0.95 0.85 0.75]]Fancy Indexing
Use an array of indices to select elements:
Python
arr = np.array([10, 20, 30, 40, 50, 60, 70, 80])
# Integer array indexing
indices = np.array([0, 2, 5, 7])
print(arr[indices]) # [10 30 60 80] — elements at those indices
# Use argsort to get top-k indices
scores = np.array([0.45, 0.92, 0.78, 0.61, 0.89])
top_k = np.argsort(scores)[::-1][:3] # Top 3 indices by score (descending)
print(top_k) # [1 4 2] — indices of scores 0.92, 0.89, 0.78
print(scores[top_k]) # [0.92 0.89 0.78]
# 2D fancy indexing — select specific rows
embeddings = np.random.randn(100, 1536)
top_doc_indices = np.array([3, 17, 42, 71])
top_embeddings = embeddings[top_doc_indices]
print(top_embeddings.shape) # (4, 1536)
# Fancy indexing returns a COPY, not a view
copy = arr[np.array([0, 2])]
copy[0] = 999
print(arr) # [10 20 30 ...] — original unchangednp.where: Conditional Element Selection
Python
scores = np.array([0.92, 0.45, 0.78, 0.61, 0.89])
# np.where(condition, value_if_true, value_if_false)
labels = np.where(scores >= 0.7, "pass", "fail")
print(labels) # ["pass" "fail" "pass" "fail" "pass"]
# Replace below-threshold values with 0
clamped = np.where(scores >= 0.7, scores, 0.0)
print(clamped) # [0.92 0. 0.78 0. 0.89]
# Use indices of True elements
passing_indices = np.where(scores >= 0.7)[0] # [0] to extract the tuple
print(passing_indices) # [0 2 4]Slicing for ML Data Splitting
Python
# Common ML pattern: split dataset into train/val/test
def split_dataset(X: np.ndarray, y: np.ndarray, train_pct: float = 0.7, val_pct: float = 0.15):
n = len(X)
train_end = int(n * train_pct)
val_end = int(n * (train_pct + val_pct))
X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]
return (X_train, y_train), (X_val, y_val), (X_test, y_test)
# Shuffle before splitting (important!)
np.random.seed(42)
indices = np.random.permutation(len(X)) # Random permutation of indices
X_shuffled = X[indices] # Fancy indexing with permuted indices
y_shuffled = y[indices]
# Mini-batch generation for training
def get_batches(X: np.ndarray, y: np.ndarray, batch_size: int = 32):
n = len(X)
for start in range(0, n, batch_size):
end = min(start + batch_size, n)
yield X[start:end], y[start:end]
for X_batch, y_batch in get_batches(X_train, y_train):
loss = train_step(X_batch, y_batch)Indexing Quick Reference
| Operation | Syntax | Returns |
|---|---|---|
| Single element | arr[i] | scalar |
| Slice (1D) | arr[start:stop:step] | view |
| Row (2D) | mat[i, :] or mat[i] | view |
| Column (2D) | mat[:, j] | view |
| Sub-matrix | mat[r1:r2, c1:c2] | view |
| Boolean mask | arr[arr > 0] | copy |
| Fancy indexing | arr[np.array([0,2,5])] | copy |
| Conditional select | np.where(cond, a, b) | copy |
| Top-k indices | np.argsort(arr)[::-1][:k] | copy |