map(), filter(), and zip() in Practice
Master Python's built-in map(), filter(), and zip() functions. Understand when to use them vs list comprehensions, and practical patterns for AI/ML data preprocessing.
map(): Apply a Function to Each Element
map(function, iterable) applies a function to every item in an iterable and returns a lazy iterator (not a list):
# Without map
drug_names = ["warfarin", "aspirin", "METFORMIN"]
normalized = []
for name in drug_names:
normalized.append(name.lower())
# ["warfarin", "aspirin", "metformin"]
# With map
normalized = list(map(str.lower, drug_names))
# ["warfarin", "aspirin", "metformin"]
# map returns an iterator — wrap in list() to materialize
print(type(map(str.lower, drug_names))) # <class 'map'>
print(list(map(str.lower, drug_names))) # ["warfarin", "aspirin", "metformin"]
# With a lambda
scores = [0.92, 0.876, 0.8321, 0.7456]
rounded = list(map(lambda s: round(s, 2), scores))
# [0.92, 0.88, 0.83, 0.75]
# map with multiple iterables — zips them together
a = [1, 2, 3]
b = [10, 20, 30]
sums = list(map(lambda x, y: x + y, a, b)) # [11, 22, 33]filter(): Keep Elements Matching a Condition
filter(function, iterable) keeps only items where the function returns True:
scores = [0.92, 0.45, 0.78, 0.61, 0.89, 0.33]
# Keep passing scores
passing = list(filter(lambda s: s >= 0.7, scores))
# [0.92, 0.78, 0.89]
# filter with None removes falsy values
values = [1, 0, "hello", "", None, 42, False, [1, 2], []]
truthy = list(filter(None, values))
# [1, "hello", 42, [1, 2]] — removes 0, "", None, False, []
# Multi-condition filter
patients = [
{"id": "P001", "inr": 2.4, "active": True},
{"id": "P002", "inr": 1.6, "active": False},
{"id": "P003", "inr": 4.8, "active": True},
{"id": "P004", "inr": 2.1, "active": True},
]
def needs_review(patient: dict) -> bool:
return patient["active"] and (patient["inr"] < 2.0 or patient["inr"] > 3.0)
review_list = list(filter(needs_review, patients))
# [{"id": "P002"...}, {"id": "P003"...}] — wait P002 is not active
# Actually: [{"id": "P003"...}] — INR 4.8, activemap() vs List Comprehension
Both do the same thing — the convention in modern Python:
drug_names = ["warfarin", "ASPIRIN", "Metformin"]
# map — functional style
normalized = list(map(str.lower, drug_names))
# Comprehension — more readable (preferred by most style guides)
normalized = [name.lower() for name in drug_names]
# With lambda — comprehension is almost always clearer
scores = [0.92, 0.78, 0.65]
doubled = list(map(lambda s: s * 2, scores)) # map + lambda
doubled = [s * 2 for s in scores] # comprehension — clearer
# When map wins: passing a function reference (no lambda needed)
normalized = list(map(str.lower, drug_names)) # Cleaner than comprehension
processed = list(map(json.loads, json_strings)) # No lambda neededzip(): Pair Elements from Multiple Iterables
zip(*iterables) pairs up elements from multiple sequences. Stops at the shortest:
drugs = ["warfarin", "aspirin", "metformin"]
doses = [5, 81, 500]
routes = ["PO", "PO", "PO"]
# Zip two sequences
for drug, dose in zip(drugs, doses):
print(f"{drug}: {dose}mg")
# warfarin: 5mg
# aspirin: 81mg
# metformin: 500mg
# Zip three sequences
prescriptions = list(zip(drugs, doses, routes))
# [("warfarin", 5, "PO"), ("aspirin", 81, "PO"), ("metformin", 500, "PO")]
# Build dict from two lists
drug_dose_map = dict(zip(drugs, doses))
# {"warfarin": 5, "aspirin": 81, "metformin": 500}
# Stops at shortest — no IndexError
long_list = [1, 2, 3, 4, 5]
short_list = ["a", "b", "c"]
print(list(zip(long_list, short_list)))
# [(1, "a"), (2, "b"), (3, "c")] — 4 and 5 are dropped
# zip_longest: fill missing values with a default
from itertools import zip_longest
print(list(zip_longest(long_list, short_list, fillvalue=None)))
# [(1, "a"), (2, "b"), (3, "c"), (4, None), (5, None)]zip() for Parallel Iteration
questions = [
"What is warfarin?",
"What is metformin?",
"What is aspirin?",
]
answers = [
"Warfarin is an anticoagulant that inhibits VKORC1.",
"Metformin is a biguanide antidiabetic that activates AMPK.",
"Aspirin is an NSAID that inhibits COX-1 and COX-2.",
]
scores = [0.92, 0.87, 0.95]
# Iterate all three in parallel
for q, a, s in zip(questions, answers, scores):
print(f"Q: {q}")
print(f"A: {a}")
print(f"Score: {s:.2f}\n")
# Evaluating RAG outputs — pair expected vs actual
expected = ["VKORC1", "AMPK", "COX-1"]
actual = ["VKORC1", "AMP-activated kinase", "COX-1 and COX-2"]
for exp, act in zip(expected, actual):
match = exp.lower() in act.lower()
print(f"Expected: {exp} | Got: {act} | Match: {match}")Unzipping with zip(*matrix)
# Transpose a matrix (list of lists)
matrix = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
# Unzip = transpose
transposed = list(zip(*matrix))
# [(1, 4, 7), (2, 5, 8), (3, 6, 9)]
# Common in ML: separate (question, answer) pairs
qa_pairs = [
("What is warfarin?", "An anticoagulant."),
("What is aspirin?", "An NSAID."),
("What is metformin?", "A biguanide."),
]
questions, answers = zip(*qa_pairs)
print(list(questions)) # ["What is warfarin?", ...]
print(list(answers)) # ["An anticoagulant.", ...]Practical AI Patterns
# 1. Batch embedding with map
from openai import OpenAI
client = OpenAI()
texts = ["warfarin mechanism", "metformin side effects", "aspirin dose"]
embeddings = list(map(
lambda t: client.embeddings.create(input=t, model="text-embedding-3-small").data[0].embedding,
texts
))
# 2. Filter retrieved docs by score threshold
def filter_by_relevance(docs: list, scores: list[float], threshold: float = 0.75) -> list:
return [doc for doc, score in zip(docs, scores) if score >= threshold]
# 3. Build Q&A dataset aligned by index
def build_dataset(questions: list[str], answers: list[str], sources: list[str]) -> list[dict]:
return [
{"question": q, "answer": a, "source": s}
for q, a, s in zip(questions, answers, sources)
]
# 4. Map over a list of model configs to create models
model_names = ["gpt-4o", "gpt-4o-mini", "claude-haiku-4-5"]
from langchain_openai import ChatOpenAI
models = list(map(lambda m: ChatOpenAI(model=m, temperature=0), model_names))
# 5. Compare two drug lists for differences
def find_added_removed(old: list[str], new: list[str]) -> tuple[list, list]:
old_set = set(old)
new_set = set(new)
added = list(new_set - old_set)
removed = list(old_set - new_set)
return added, removed
added, removed = find_added_removed(
["warfarin", "aspirin"],
["warfarin", "metformin", "lisinopril"]
)
print(f"Added: {added}") # ["metformin", "lisinopril"]
print(f"Removed: {removed}") # ["aspirin"]Summary
| Function | Returns | When to use |
|---|---|---|
| map(fn, iterable) | lazy iterator | Apply function to every element; cleaner with method references |
| filter(fn, iterable) | lazy iterator | Keep elements where fn returns True |
| zip(*iterables) | lazy iterator of tuples | Parallel iteration over same-length sequences |
| zip_longest(...) | lazy iterator | zip with unequal lengths |
| zip(*matrix) | transpose | Swap rows and columns |
| List comprehension | list | Most general; usually more readable than map/filter |
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.