Python Essentials for AI Engineers · Lesson 6 of 36

What is a Dictionary?

A Python dictionary is a hash map: a collection of key-value pairs where keys are unique and access is O(1) average time.

Python

# Creation
drug_info = {
    "name": "warfarin",
    "class": "anticoagulant",
    "dose_mg": 5,
    "indication": "atrial fibrillation",
}

# Keys can be any hashable type
mixed_keys = {
    "string_key": 1,
    42: "int key",
    (1, 2): "tuple key",   # Tuples are hashable
    # [1, 2]: "list key",  # Lists are NOT hashable — TypeError
}

Since Python 3.7, dictionaries preserve insertion order.

Accessing Values

Python

patient = {"id": "P001", "age": 67, "inr": 2.4}

# Direct access — raises KeyError if key missing
print(patient["id"])       # "P001"
print(patient["weight"])   # KeyError!

# .get() — returns default instead of raising
print(patient.get("id"))              # "P001"
print(patient.get("weight"))          # None
print(patient.get("weight", 70.0))   # 70.0 — custom default

# Membership test
print("id" in patient)       # True
print("weight" in patient)   # False

Modifying Dictionaries

Python

config = {"model": "gpt-4o", "temperature": 0}

# Add or update
config["max_tokens"] = 500     # Add new key
config["temperature"] = 0.2   # Update existing key

# Delete
del config["max_tokens"]
removed = config.pop("temperature")          # Remove + return value
config.pop("missing_key", None)              # Safe delete — no KeyError

# Conditional add (only if key doesn't exist)
config.setdefault("timeout", 30)   # Adds "timeout": 30 if not present
config.setdefault("model", "gpt-3.5")  # Does NOT overwrite existing "model"
print(config["model"])   # Still "gpt-4o"

# Update from another dict
overrides = {"temperature": 0.3, "stream": True}
config.update(overrides)   # Merges — rightmost value wins on conflict

# Merge operator (Python 3.9+)
defaults = {"temperature": 0, "max_tokens": 500}
result = defaults | overrides   # New dict — defaults + overrides
# {"temperature": 0.3, "max_tokens": 500, "stream": True}

Iterating

Python

drug_doses = {"warfarin": 5, "metformin": 500, "lisinopril": 10}

# Iterate keys (default)
for drug in drug_doses:
    print(drug)

# Iterate values
for dose in drug_doses.values():
    print(dose)

# Iterate key-value pairs (most common)
for drug, dose in drug_doses.items():
    print(f"{drug}: {dose}mg")

# List of keys, values, items
keys   = list(drug_doses.keys())    # ["warfarin", "metformin", "lisinopril"]
values = list(drug_doses.values())  # [5, 500, 10]
items  = list(drug_doses.items())   # [("warfarin", 5), ("metformin", 500), ...]

Nested Dictionaries

Python

# Common in AI: config objects, JSON from APIs
patient_records = {
    "P001": {
        "name": "Jane Smith",
        "medications": ["warfarin", "aspirin"],
        "labs": {"inr": 2.4, "hba1c": 7.2},
    },
    "P002": {
        "name": "John Doe",
        "medications": ["metformin"],
        "labs": {"hba1c": 8.1},
    },
}

# Access nested values
print(patient_records["P001"]["labs"]["inr"])   # 2.4

# Safe nested access with .get()
inr = patient_records.get("P001", {}).get("labs", {}).get("inr")
print(inr)   # 2.4 — safe even if any level missing

Dictionary Comprehensions

Python

# {key: value for item in iterable}
drugs = ["warfarin", "aspirin", "metformin"]
lengths = {drug: len(drug) for drug in drugs}
# {"warfarin": 8, "aspirin": 7, "metformin": 9}

# With condition
long_names = {drug: len(drug) for drug in drugs if len(drug) > 7}
# {"warfarin": 8, "metformin": 9}

# From list of tuples
pairs = [("warfarin", 5), ("metformin", 500), ("lisinopril", 10)]
dose_map = {drug: dose for drug, dose in pairs}

# Invert a dictionary (value → key)
original = {"warfarin": "anticoagulant", "metformin": "antidiabetic"}
inverted = {v: k for k, v in original.items()}
# {"anticoagulant": "warfarin", "antidiabetic": "metformin"}

`defaultdict`: Auto-Initialize Missing Keys

Python

from collections import defaultdict

# Problem with regular dict
drug_categories = {}
drug_categories["anticoagulant"].append("warfarin")   # KeyError!

# Solution: defaultdict
drug_categories = defaultdict(list)
drug_categories["anticoagulant"].append("warfarin")   # Works
drug_categories["anticoagulant"].append("heparin")
drug_categories["antidiabetic"].append("metformin")

print(dict(drug_categories))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin"]}


# Common pattern: grouping / counting
from collections import defaultdict

def group_by_category(drug_list: list[tuple[str, str]]) -> dict[str, list[str]]:
    """Group (drug, category) pairs by category."""
    groups: dict = defaultdict(list)
    for drug, category in drug_list:
        groups[category].append(drug)
    return dict(groups)


drugs = [
    ("warfarin", "anticoagulant"),
    ("heparin", "anticoagulant"),
    ("metformin", "antidiabetic"),
    ("glipizide", "antidiabetic"),
]
print(group_by_category(drugs))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin", "glipizide"]}

`Counter`: Count Elements

Python

from collections import Counter

# Count word/token frequencies
tokens = ["the", "drug", "warfarin", "the", "drug", "drug", "aspirin", "the"]
counts = Counter(tokens)
print(counts)
# Counter({"the": 3, "drug": 3, "warfarin": 1, "aspirin": 1})

print(counts["the"])          # 3
print(counts["missing"])      # 0 — no KeyError
print(counts.most_common(2))  # [("the", 3), ("drug", 3)]

# Counter arithmetic
tokens_2 = ["drug", "warfarin", "new", "new"]
counts_2 = Counter(tokens_2)

combined = counts + counts_2          # Add counts
difference = counts - counts_2        # Subtract (floor at 0)
intersection = counts & counts_2      # Min of each
union = counts | counts_2             # Max of each

# Common in RAG/NLP: vocabulary analysis
def analyze_vocabulary(texts: list[str]) -> dict:
    all_words = []
    for text in texts:
        all_words.extend(text.lower().split())
    counter = Counter(all_words)
    return {
        "total_tokens": sum(counter.values()),
        "unique_tokens": len(counter),
        "top_10": counter.most_common(10),
    }

Patterns in AI/ML Code

Python

# 1. LLM call configuration
llm_config = {
    "model": "gpt-4o",
    "temperature": 0,
    "max_tokens": 1000,
    "timeout": 30,
}

# 2. Results accumulation
results: dict[str, list[float]] = {}
for experiment in experiments:
    results.setdefault(experiment.name, []).append(experiment.score)

# 3. Caching LLM responses
cache: dict[str, str] = {}
def cached_llm_call(prompt: str) -> str:
    if prompt not in cache:
        cache[prompt] = llm.invoke(prompt)
    return cache[prompt]

# 4. Metadata for vector store documents
doc_metadata = {
    "source": "warfarin_guidelines.pdf",
    "page": 42,
    "category": "anticoagulant",
    "version": "2026",
}

# 5. Routing logic
MODEL_ROUTES: dict[str, str] = {
    "simple": "gpt-4o-mini",
    "complex": "gpt-4o",
    "analysis": "claude-opus-4-7",
}

def route_query(complexity: str) -> str:
    return MODEL_ROUTES.get(complexity, "gpt-4o-mini")

Time Complexity

| Operation | Average | Worst Case | |---|---|---| | d[key] | O(1) | O(n) — hash collision | | d[key] = value | O(1) | O(n) | | key in d | O(1) | O(n) | | del d[key] | O(1) | O(n) | | Iteration | O(n) | O(n) | | len(d) | O(1) | O(1) |

Hash collisions (the worst case) are rare in practice with Python's hash function. For typical string and int keys, all operations are effectively O(1).

What is the difference between List and Tuple?

Next Lesson

What is a Set and when should we use it?

What is a Dictionary?