Learnixo

Python Essentials for AI Engineers · Lesson 21 of 36

map(), filter(), and zip() in Practice

map(): Apply a Function to Each Element

map(function, iterable) applies a function to every item in an iterable and returns a lazy iterator (not a list):

Python
# Without map
drug_names = ["warfarin", "aspirin", "METFORMIN"]
normalized = []
for name in drug_names:
    normalized.append(name.lower())
# ["warfarin", "aspirin", "metformin"]

# With map
normalized = list(map(str.lower, drug_names))
# ["warfarin", "aspirin", "metformin"]

# map returns an iterator  wrap in list() to materialize
print(type(map(str.lower, drug_names)))   # <class 'map'>
print(list(map(str.lower, drug_names)))   # ["warfarin", "aspirin", "metformin"]


# With a lambda
scores = [0.92, 0.876, 0.8321, 0.7456]
rounded = list(map(lambda s: round(s, 2), scores))
# [0.92, 0.88, 0.83, 0.75]


# map with multiple iterables — zips them together
a = [1, 2, 3]
b = [10, 20, 30]
sums = list(map(lambda x, y: x + y, a, b))   # [11, 22, 33]

filter(): Keep Elements Matching a Condition

filter(function, iterable) keeps only items where the function returns True:

Python
scores = [0.92, 0.45, 0.78, 0.61, 0.89, 0.33]

# Keep passing scores
passing = list(filter(lambda s: s >= 0.7, scores))
# [0.92, 0.78, 0.89]

# filter with None removes falsy values
values = [1, 0, "hello", "", None, 42, False, [1, 2], []]
truthy = list(filter(None, values))
# [1, "hello", 42, [1, 2]]  removes 0, "", None, False, []


# Multi-condition filter
patients = [
    {"id": "P001", "inr": 2.4, "active": True},
    {"id": "P002", "inr": 1.6, "active": False},
    {"id": "P003", "inr": 4.8, "active": True},
    {"id": "P004", "inr": 2.1, "active": True},
]

def needs_review(patient: dict) -> bool:
    return patient["active"] and (patient["inr"] < 2.0 or patient["inr"] > 3.0)

review_list = list(filter(needs_review, patients))
# [{"id": "P002"...}, {"id": "P003"...}]  wait P002 is not active
# Actually: [{"id": "P003"...}]  INR 4.8, active

map() vs List Comprehension

Both do the same thing — the convention in modern Python:

Python
drug_names = ["warfarin", "ASPIRIN", "Metformin"]

# map  functional style
normalized = list(map(str.lower, drug_names))

# Comprehension  more readable (preferred by most style guides)
normalized = [name.lower() for name in drug_names]


# With lambda  comprehension is almost always clearer
scores = [0.92, 0.78, 0.65]
doubled = list(map(lambda s: s * 2, scores))     # map + lambda
doubled = [s * 2 for s in scores]                # comprehension  clearer


# When map wins: passing a function reference (no lambda needed)
normalized = list(map(str.lower, drug_names))    # Cleaner than comprehension
processed  = list(map(json.loads, json_strings))  # No lambda needed

zip(): Pair Elements from Multiple Iterables

zip(*iterables) pairs up elements from multiple sequences. Stops at the shortest:

Python
drugs   = ["warfarin", "aspirin", "metformin"]
doses   = [5, 81, 500]
routes  = ["PO", "PO", "PO"]

# Zip two sequences
for drug, dose in zip(drugs, doses):
    print(f"{drug}: {dose}mg")
# warfarin: 5mg
# aspirin: 81mg
# metformin: 500mg

# Zip three sequences
prescriptions = list(zip(drugs, doses, routes))
# [("warfarin", 5, "PO"), ("aspirin", 81, "PO"), ("metformin", 500, "PO")]


# Build dict from two lists
drug_dose_map = dict(zip(drugs, doses))
# {"warfarin": 5, "aspirin": 81, "metformin": 500}


# Stops at shortest  no IndexError
long_list  = [1, 2, 3, 4, 5]
short_list = ["a", "b", "c"]
print(list(zip(long_list, short_list)))
# [(1, "a"), (2, "b"), (3, "c")]  4 and 5 are dropped


# zip_longest: fill missing values with a default
from itertools import zip_longest
print(list(zip_longest(long_list, short_list, fillvalue=None)))
# [(1, "a"), (2, "b"), (3, "c"), (4, None), (5, None)]

zip() for Parallel Iteration

Python
questions = [
    "What is warfarin?",
    "What is metformin?",
    "What is aspirin?",
]
answers = [
    "Warfarin is an anticoagulant that inhibits VKORC1.",
    "Metformin is a biguanide antidiabetic that activates AMPK.",
    "Aspirin is an NSAID that inhibits COX-1 and COX-2.",
]
scores  = [0.92, 0.87, 0.95]

# Iterate all three in parallel
for q, a, s in zip(questions, answers, scores):
    print(f"Q: {q}")
    print(f"A: {a}")
    print(f"Score: {s:.2f}\n")


# Evaluating RAG outputs  pair expected vs actual
expected = ["VKORC1", "AMPK", "COX-1"]
actual   = ["VKORC1", "AMP-activated kinase", "COX-1 and COX-2"]

for exp, act in zip(expected, actual):
    match = exp.lower() in act.lower()
    print(f"Expected: {exp} | Got: {act} | Match: {match}")

Unzipping with zip(*matrix)

Python
# Transpose a matrix (list of lists)
matrix = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

# Unzip = transpose
transposed = list(zip(*matrix))
# [(1, 4, 7), (2, 5, 8), (3, 6, 9)]


# Common in ML: separate (question, answer) pairs
qa_pairs = [
    ("What is warfarin?", "An anticoagulant."),
    ("What is aspirin?", "An NSAID."),
    ("What is metformin?", "A biguanide."),
]

questions, answers = zip(*qa_pairs)
print(list(questions))   # ["What is warfarin?", ...]
print(list(answers))     # ["An anticoagulant.", ...]

Practical AI Patterns

Python
# 1. Batch embedding with map
from openai import OpenAI
client = OpenAI()

texts = ["warfarin mechanism", "metformin side effects", "aspirin dose"]
embeddings = list(map(
    lambda t: client.embeddings.create(input=t, model="text-embedding-3-small").data[0].embedding,
    texts
))

# 2. Filter retrieved docs by score threshold
def filter_by_relevance(docs: list, scores: list[float], threshold: float = 0.75) -> list:
    return [doc for doc, score in zip(docs, scores) if score >= threshold]


# 3. Build Q&A dataset aligned by index
def build_dataset(questions: list[str], answers: list[str], sources: list[str]) -> list[dict]:
    return [
        {"question": q, "answer": a, "source": s}
        for q, a, s in zip(questions, answers, sources)
    ]


# 4. Map over a list of model configs to create models
model_names = ["gpt-4o", "gpt-4o-mini", "claude-haiku-4-5"]
from langchain_openai import ChatOpenAI
models = list(map(lambda m: ChatOpenAI(model=m, temperature=0), model_names))


# 5. Compare two drug lists for differences
def find_added_removed(old: list[str], new: list[str]) -> tuple[list, list]:
    old_set = set(old)
    new_set = set(new)
    added   = list(new_set - old_set)
    removed = list(old_set - new_set)
    return added, removed

added, removed = find_added_removed(
    ["warfarin", "aspirin"],
    ["warfarin", "metformin", "lisinopril"]
)
print(f"Added: {added}")     # ["metformin", "lisinopril"]
print(f"Removed: {removed}") # ["aspirin"]

Summary

| Function | Returns | When to use | |---|---|---| | map(fn, iterable) | lazy iterator | Apply function to every element; cleaner with method references | | filter(fn, iterable) | lazy iterator | Keep elements where fn returns True | | zip(*iterables) | lazy iterator of tuples | Parallel iteration over same-length sequences | | zip_longest(...) | lazy iterator | zip with unequal lengths | | zip(*matrix) | transpose | Swap rows and columns | | List comprehension | list | Most general; usually more readable than map/filter |