Python Essentials for AI Engineers · Lesson 6 of 36
What is a Dictionary?
What is a Dictionary?
A Python dictionary is a hash map: a collection of key-value pairs where keys are unique and access is O(1) average time.
Python
# Creation
drug_info = {
"name": "warfarin",
"class": "anticoagulant",
"dose_mg": 5,
"indication": "atrial fibrillation",
}
# Keys can be any hashable type
mixed_keys = {
"string_key": 1,
42: "int key",
(1, 2): "tuple key", # Tuples are hashable
# [1, 2]: "list key", # Lists are NOT hashable — TypeError
}Since Python 3.7, dictionaries preserve insertion order.
Accessing Values
Python
patient = {"id": "P001", "age": 67, "inr": 2.4}
# Direct access — raises KeyError if key missing
print(patient["id"]) # "P001"
print(patient["weight"]) # KeyError!
# .get() — returns default instead of raising
print(patient.get("id")) # "P001"
print(patient.get("weight")) # None
print(patient.get("weight", 70.0)) # 70.0 — custom default
# Membership test
print("id" in patient) # True
print("weight" in patient) # FalseModifying Dictionaries
Python
config = {"model": "gpt-4o", "temperature": 0}
# Add or update
config["max_tokens"] = 500 # Add new key
config["temperature"] = 0.2 # Update existing key
# Delete
del config["max_tokens"]
removed = config.pop("temperature") # Remove + return value
config.pop("missing_key", None) # Safe delete — no KeyError
# Conditional add (only if key doesn't exist)
config.setdefault("timeout", 30) # Adds "timeout": 30 if not present
config.setdefault("model", "gpt-3.5") # Does NOT overwrite existing "model"
print(config["model"]) # Still "gpt-4o"
# Update from another dict
overrides = {"temperature": 0.3, "stream": True}
config.update(overrides) # Merges — rightmost value wins on conflict
# Merge operator (Python 3.9+)
defaults = {"temperature": 0, "max_tokens": 500}
result = defaults | overrides # New dict — defaults + overrides
# {"temperature": 0.3, "max_tokens": 500, "stream": True}Iterating
Python
drug_doses = {"warfarin": 5, "metformin": 500, "lisinopril": 10}
# Iterate keys (default)
for drug in drug_doses:
print(drug)
# Iterate values
for dose in drug_doses.values():
print(dose)
# Iterate key-value pairs (most common)
for drug, dose in drug_doses.items():
print(f"{drug}: {dose}mg")
# List of keys, values, items
keys = list(drug_doses.keys()) # ["warfarin", "metformin", "lisinopril"]
values = list(drug_doses.values()) # [5, 500, 10]
items = list(drug_doses.items()) # [("warfarin", 5), ("metformin", 500), ...]Nested Dictionaries
Python
# Common in AI: config objects, JSON from APIs
patient_records = {
"P001": {
"name": "Jane Smith",
"medications": ["warfarin", "aspirin"],
"labs": {"inr": 2.4, "hba1c": 7.2},
},
"P002": {
"name": "John Doe",
"medications": ["metformin"],
"labs": {"hba1c": 8.1},
},
}
# Access nested values
print(patient_records["P001"]["labs"]["inr"]) # 2.4
# Safe nested access with .get()
inr = patient_records.get("P001", {}).get("labs", {}).get("inr")
print(inr) # 2.4 — safe even if any level missingDictionary Comprehensions
Python
# {key: value for item in iterable}
drugs = ["warfarin", "aspirin", "metformin"]
lengths = {drug: len(drug) for drug in drugs}
# {"warfarin": 8, "aspirin": 7, "metformin": 9}
# With condition
long_names = {drug: len(drug) for drug in drugs if len(drug) > 7}
# {"warfarin": 8, "metformin": 9}
# From list of tuples
pairs = [("warfarin", 5), ("metformin", 500), ("lisinopril", 10)]
dose_map = {drug: dose for drug, dose in pairs}
# Invert a dictionary (value → key)
original = {"warfarin": "anticoagulant", "metformin": "antidiabetic"}
inverted = {v: k for k, v in original.items()}
# {"anticoagulant": "warfarin", "antidiabetic": "metformin"}defaultdict: Auto-Initialize Missing Keys
Python
from collections import defaultdict
# Problem with regular dict
drug_categories = {}
drug_categories["anticoagulant"].append("warfarin") # KeyError!
# Solution: defaultdict
drug_categories = defaultdict(list)
drug_categories["anticoagulant"].append("warfarin") # Works
drug_categories["anticoagulant"].append("heparin")
drug_categories["antidiabetic"].append("metformin")
print(dict(drug_categories))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin"]}
# Common pattern: grouping / counting
from collections import defaultdict
def group_by_category(drug_list: list[tuple[str, str]]) -> dict[str, list[str]]:
"""Group (drug, category) pairs by category."""
groups: dict = defaultdict(list)
for drug, category in drug_list:
groups[category].append(drug)
return dict(groups)
drugs = [
("warfarin", "anticoagulant"),
("heparin", "anticoagulant"),
("metformin", "antidiabetic"),
("glipizide", "antidiabetic"),
]
print(group_by_category(drugs))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin", "glipizide"]}Counter: Count Elements
Python
from collections import Counter
# Count word/token frequencies
tokens = ["the", "drug", "warfarin", "the", "drug", "drug", "aspirin", "the"]
counts = Counter(tokens)
print(counts)
# Counter({"the": 3, "drug": 3, "warfarin": 1, "aspirin": 1})
print(counts["the"]) # 3
print(counts["missing"]) # 0 — no KeyError
print(counts.most_common(2)) # [("the", 3), ("drug", 3)]
# Counter arithmetic
tokens_2 = ["drug", "warfarin", "new", "new"]
counts_2 = Counter(tokens_2)
combined = counts + counts_2 # Add counts
difference = counts - counts_2 # Subtract (floor at 0)
intersection = counts & counts_2 # Min of each
union = counts | counts_2 # Max of each
# Common in RAG/NLP: vocabulary analysis
def analyze_vocabulary(texts: list[str]) -> dict:
all_words = []
for text in texts:
all_words.extend(text.lower().split())
counter = Counter(all_words)
return {
"total_tokens": sum(counter.values()),
"unique_tokens": len(counter),
"top_10": counter.most_common(10),
}Patterns in AI/ML Code
Python
# 1. LLM call configuration
llm_config = {
"model": "gpt-4o",
"temperature": 0,
"max_tokens": 1000,
"timeout": 30,
}
# 2. Results accumulation
results: dict[str, list[float]] = {}
for experiment in experiments:
results.setdefault(experiment.name, []).append(experiment.score)
# 3. Caching LLM responses
cache: dict[str, str] = {}
def cached_llm_call(prompt: str) -> str:
if prompt not in cache:
cache[prompt] = llm.invoke(prompt)
return cache[prompt]
# 4. Metadata for vector store documents
doc_metadata = {
"source": "warfarin_guidelines.pdf",
"page": 42,
"category": "anticoagulant",
"version": "2026",
}
# 5. Routing logic
MODEL_ROUTES: dict[str, str] = {
"simple": "gpt-4o-mini",
"complex": "gpt-4o",
"analysis": "claude-opus-4-7",
}
def route_query(complexity: str) -> str:
return MODEL_ROUTES.get(complexity, "gpt-4o-mini")Time Complexity
| Operation | Average | Worst Case |
|---|---|---|
| d[key] | O(1) | O(n) — hash collision |
| d[key] = value | O(1) | O(n) |
| key in d | O(1) | O(n) |
| del d[key] | O(1) | O(n) |
| Iteration | O(n) | O(n) |
| len(d) | O(1) | O(1) |
Hash collisions (the worst case) are rare in practice with Python's hash function. For typical string and int keys, all operations are effectively O(1).