Learnixo

Python Essentials for AI Engineers · Lesson 6 of 36

What is a Dictionary?

What is a Dictionary?

A Python dictionary is a hash map: a collection of key-value pairs where keys are unique and access is O(1) average time.

Python
# Creation
drug_info = {
    "name": "warfarin",
    "class": "anticoagulant",
    "dose_mg": 5,
    "indication": "atrial fibrillation",
}

# Keys can be any hashable type
mixed_keys = {
    "string_key": 1,
    42: "int key",
    (1, 2): "tuple key",   # Tuples are hashable
    # [1, 2]: "list key",  # Lists are NOT hashable  TypeError
}

Since Python 3.7, dictionaries preserve insertion order.


Accessing Values

Python
patient = {"id": "P001", "age": 67, "inr": 2.4}

# Direct access  raises KeyError if key missing
print(patient["id"])       # "P001"
print(patient["weight"])   # KeyError!

# .get()  returns default instead of raising
print(patient.get("id"))              # "P001"
print(patient.get("weight"))          # None
print(patient.get("weight", 70.0))   # 70.0  custom default

# Membership test
print("id" in patient)       # True
print("weight" in patient)   # False

Modifying Dictionaries

Python
config = {"model": "gpt-4o", "temperature": 0}

# Add or update
config["max_tokens"] = 500     # Add new key
config["temperature"] = 0.2   # Update existing key

# Delete
del config["max_tokens"]
removed = config.pop("temperature")          # Remove + return value
config.pop("missing_key", None)              # Safe delete  no KeyError

# Conditional add (only if key doesn't exist)
config.setdefault("timeout", 30)   # Adds "timeout": 30 if not present
config.setdefault("model", "gpt-3.5")  # Does NOT overwrite existing "model"
print(config["model"])   # Still "gpt-4o"

# Update from another dict
overrides = {"temperature": 0.3, "stream": True}
config.update(overrides)   # Merges — rightmost value wins on conflict

# Merge operator (Python 3.9+)
defaults = {"temperature": 0, "max_tokens": 500}
result = defaults | overrides   # New dict — defaults + overrides
# {"temperature": 0.3, "max_tokens": 500, "stream": True}

Iterating

Python
drug_doses = {"warfarin": 5, "metformin": 500, "lisinopril": 10}

# Iterate keys (default)
for drug in drug_doses:
    print(drug)

# Iterate values
for dose in drug_doses.values():
    print(dose)

# Iterate key-value pairs (most common)
for drug, dose in drug_doses.items():
    print(f"{drug}: {dose}mg")

# List of keys, values, items
keys   = list(drug_doses.keys())    # ["warfarin", "metformin", "lisinopril"]
values = list(drug_doses.values())  # [5, 500, 10]
items  = list(drug_doses.items())   # [("warfarin", 5), ("metformin", 500), ...]

Nested Dictionaries

Python
# Common in AI: config objects, JSON from APIs
patient_records = {
    "P001": {
        "name": "Jane Smith",
        "medications": ["warfarin", "aspirin"],
        "labs": {"inr": 2.4, "hba1c": 7.2},
    },
    "P002": {
        "name": "John Doe",
        "medications": ["metformin"],
        "labs": {"hba1c": 8.1},
    },
}

# Access nested values
print(patient_records["P001"]["labs"]["inr"])   # 2.4

# Safe nested access with .get()
inr = patient_records.get("P001", {}).get("labs", {}).get("inr")
print(inr)   # 2.4  safe even if any level missing

Dictionary Comprehensions

Python
# {key: value for item in iterable}
drugs = ["warfarin", "aspirin", "metformin"]
lengths = {drug: len(drug) for drug in drugs}
# {"warfarin": 8, "aspirin": 7, "metformin": 9}

# With condition
long_names = {drug: len(drug) for drug in drugs if len(drug) > 7}
# {"warfarin": 8, "metformin": 9}

# From list of tuples
pairs = [("warfarin", 5), ("metformin", 500), ("lisinopril", 10)]
dose_map = {drug: dose for drug, dose in pairs}

# Invert a dictionary (value  key)
original = {"warfarin": "anticoagulant", "metformin": "antidiabetic"}
inverted = {v: k for k, v in original.items()}
# {"anticoagulant": "warfarin", "antidiabetic": "metformin"}

defaultdict: Auto-Initialize Missing Keys

Python
from collections import defaultdict

# Problem with regular dict
drug_categories = {}
drug_categories["anticoagulant"].append("warfarin")   # KeyError!

# Solution: defaultdict
drug_categories = defaultdict(list)
drug_categories["anticoagulant"].append("warfarin")   # Works
drug_categories["anticoagulant"].append("heparin")
drug_categories["antidiabetic"].append("metformin")

print(dict(drug_categories))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin"]}


# Common pattern: grouping / counting
from collections import defaultdict

def group_by_category(drug_list: list[tuple[str, str]]) -> dict[str, list[str]]:
    """Group (drug, category) pairs by category."""
    groups: dict = defaultdict(list)
    for drug, category in drug_list:
        groups[category].append(drug)
    return dict(groups)


drugs = [
    ("warfarin", "anticoagulant"),
    ("heparin", "anticoagulant"),
    ("metformin", "antidiabetic"),
    ("glipizide", "antidiabetic"),
]
print(group_by_category(drugs))
# {"anticoagulant": ["warfarin", "heparin"], "antidiabetic": ["metformin", "glipizide"]}

Counter: Count Elements

Python
from collections import Counter

# Count word/token frequencies
tokens = ["the", "drug", "warfarin", "the", "drug", "drug", "aspirin", "the"]
counts = Counter(tokens)
print(counts)
# Counter({"the": 3, "drug": 3, "warfarin": 1, "aspirin": 1})

print(counts["the"])          # 3
print(counts["missing"])      # 0  no KeyError
print(counts.most_common(2))  # [("the", 3), ("drug", 3)]

# Counter arithmetic
tokens_2 = ["drug", "warfarin", "new", "new"]
counts_2 = Counter(tokens_2)

combined = counts + counts_2          # Add counts
difference = counts - counts_2        # Subtract (floor at 0)
intersection = counts & counts_2      # Min of each
union = counts | counts_2             # Max of each

# Common in RAG/NLP: vocabulary analysis
def analyze_vocabulary(texts: list[str]) -> dict:
    all_words = []
    for text in texts:
        all_words.extend(text.lower().split())
    counter = Counter(all_words)
    return {
        "total_tokens": sum(counter.values()),
        "unique_tokens": len(counter),
        "top_10": counter.most_common(10),
    }

Patterns in AI/ML Code

Python
# 1. LLM call configuration
llm_config = {
    "model": "gpt-4o",
    "temperature": 0,
    "max_tokens": 1000,
    "timeout": 30,
}

# 2. Results accumulation
results: dict[str, list[float]] = {}
for experiment in experiments:
    results.setdefault(experiment.name, []).append(experiment.score)

# 3. Caching LLM responses
cache: dict[str, str] = {}
def cached_llm_call(prompt: str) -> str:
    if prompt not in cache:
        cache[prompt] = llm.invoke(prompt)
    return cache[prompt]

# 4. Metadata for vector store documents
doc_metadata = {
    "source": "warfarin_guidelines.pdf",
    "page": 42,
    "category": "anticoagulant",
    "version": "2026",
}

# 5. Routing logic
MODEL_ROUTES: dict[str, str] = {
    "simple": "gpt-4o-mini",
    "complex": "gpt-4o",
    "analysis": "claude-opus-4-7",
}

def route_query(complexity: str) -> str:
    return MODEL_ROUTES.get(complexity, "gpt-4o-mini")

Time Complexity

| Operation | Average | Worst Case | |---|---|---| | d[key] | O(1) | O(n) — hash collision | | d[key] = value | O(1) | O(n) | | key in d | O(1) | O(n) | | del d[key] | O(1) | O(n) | | Iteration | O(n) | O(n) | | len(d) | O(1) | O(1) |

Hash collisions (the worst case) are rare in practice with Python's hash function. For typical string and int keys, all operations are effectively O(1).