Python Essentials for AI Engineers · Lesson 2 of 36
What are Python's built-in data types?
Python's Core Built-in Types
Python has eight fundamental built-in types. Every value in Python is an object of one of these types:
# Numeric
x: int = 42
y: float = 3.14
z: complex = 2 + 3j
# Text
name: str = "warfarin"
# Boolean
flag: bool = True
# Sequence types
items: list = [1, 2, 3] # Mutable, ordered
point: tuple = (1, 2, 3) # Immutable, ordered
# Mapping
data: dict = {"drug": "warfarin", "dose": 5}
# Set types
unique: set = {1, 2, 3} # Mutable, unordered, no duplicates
frozen: frozenset = frozenset({1, 2, 3}) # Immutable set
# Null
value: None = Noneint and float
# int: arbitrary precision — no overflow in Python
big = 10 ** 100 # Works fine, no overflow
print(type(big)) # <class 'int'>
# float: 64-bit IEEE 754 — has precision limits
print(0.1 + 0.2) # 0.30000000000000004 — floating point imprecision
print(0.1 + 0.2 == 0.3) # False
# For financial/clinical precision, use Decimal
from decimal import Decimal
print(Decimal("0.1") + Decimal("0.2")) # 0.3 — exact
# Integer division
print(7 // 2) # 3 (floor division)
print(7 % 2) # 1 (modulo)
print(7 / 2) # 3.5 (true division — always returns float)
# Useful for AI: checking if batch index divides evenly
batch_size = 32
if step % batch_size == 0:
print(f"Completed batch {step // batch_size}")str
# Strings are immutable sequences of Unicode characters
drug = "Warfarin"
print(drug.lower()) # "warfarin"
print(drug.upper()) # "WARFARIN"
print(drug.startswith("War")) # True
print(drug.replace("W", "w")) # "warfarin" — new string, original unchanged
# f-strings (preferred for formatting)
dose = 5.0
print(f"{drug}: {dose}mg daily") # "Warfarin: 5.0mg daily"
# Multi-line strings (common in LLM prompts)
system_prompt = """You are a clinical pharmacist.
Answer drug questions accurately and cite sources.
Never invent drug information."""
# String methods used in AI/NLP
text = " Warfarin 5mg PO daily "
print(text.strip()) # "Warfarin 5mg PO daily"
print(text.split()) # ["Warfarin", "5mg", "PO", "daily"]
words = ["warfarin", "aspirin"]
print(", ".join(words)) # "warfarin, aspirin"
# Slicing
print(drug[0]) # "W"
print(drug[-1]) # "n"
print(drug[1:4]) # "arf"
print(drug[::-1]) # "nirafraW"bool
# bool is a subclass of int: True == 1, False == 0
print(True + True) # 2
print(True * 5) # 5
print(False + 1) # 1
# Truthy and falsy values
falsy_values = [False, 0, 0.0, "", [], {}, set(), None]
# Everything else is truthy
# Common in AI code
results = []
if results: # Empty list is falsy — this block skips
process(results)
embeddings = None
if embeddings is None: # Explicit None check — preferred
embeddings = compute_embeddings()list
# List: mutable, ordered, allows duplicates, O(1) append
drugs = ["warfarin", "aspirin", "metformin"]
# Indexing
print(drugs[0]) # "warfarin"
print(drugs[-1]) # "metformin"
# Slicing
print(drugs[0:2]) # ["warfarin", "aspirin"]
print(drugs[::2]) # Every other element: ["warfarin", "metformin"]
# Mutation
drugs.append("lisinopril") # Add to end
drugs.insert(1, "amiodarone") # Insert at index
drugs.remove("aspirin") # Remove by value (first occurrence)
popped = drugs.pop() # Remove and return last element
popped_at = drugs.pop(0) # Remove and return element at index 0
# Common list operations in AI
scores = [0.92, 0.87, 0.95, 0.78]
print(max(scores)) # 0.95
print(min(scores)) # 0.78
print(sorted(scores, reverse=True)) # [0.95, 0.92, 0.87, 0.78]
# List comprehension (covered in detail separately)
squared = [x ** 2 for x in range(5)] # [0, 1, 4, 9, 16]
filtered = [x for x in scores if x > 0.9] # [0.92, 0.95]Time complexity:
| Operation | Complexity |
|---|---|
| list[i] | O(1) |
| list.append(x) | O(1) amortized |
| list.insert(i, x) | O(n) |
| x in list | O(n) |
| list.sort() | O(n log n) |
tuple
# Tuple: immutable, ordered, allows duplicates
point = (10.5, 20.3) # x, y coordinates
rgb = (255, 0, 128) # color values
# Unpacking
x, y = point
r, g, b = rgb
# Named tuple (cleaner than plain tuple for data)
from collections import namedtuple
Drug = namedtuple("Drug", ["name", "dose_mg", "frequency"])
warfarin = Drug(name="warfarin", dose_mg=5, frequency="daily")
print(warfarin.name) # "warfarin"
print(warfarin[0]) # "warfarin" — still indexable
# Tuples as dict keys (lists cannot be dict keys — they're mutable)
interaction_db = {
("warfarin", "aspirin"): "Major",
("metformin", "contrast"): "Major",
}
key = ("warfarin", "aspirin")
print(interaction_db.get(key)) # "Major"dict
# Dict: mutable, ordered (Python 3.7+), key-value pairs
patient = {
"id": "P001",
"name": "Jane Smith",
"medications": ["warfarin", "aspirin"],
"inr": 2.4,
}
# Access
print(patient["name"]) # "Jane Smith"
print(patient.get("weight", None)) # None — safe default, no KeyError
# Mutation
patient["inr"] = 2.8 # Update
patient["age"] = 67 # Add new key
del patient["name"] # Delete key
# Iteration
for key in patient: # Iterates keys
print(key, patient[key])
for key, value in patient.items(): # Preferred: key-value pairs
print(f"{key}: {value}")
# Merging dicts (Python 3.9+)
defaults = {"temperature": 0, "max_tokens": 500}
overrides = {"model": "gpt-4o", "temperature": 0.2}
config = defaults | overrides # Merge — rightmost wins
# {'temperature': 0.2, 'max_tokens': 500, 'model': 'gpt-4o'}
# Dict comprehension (covered separately)
token_counts = {drug: len(drug.split()) for drug in ["warfarin", "metformin hydrochloride"]}Time complexity:
| Operation | Complexity |
|---|---|
| dict[key] | O(1) average |
| dict[key] = value | O(1) average |
| key in dict | O(1) average |
| Iteration | O(n) |
set
# Set: mutable, unordered, no duplicates, O(1) membership test
drug_set = {"warfarin", "aspirin", "metformin"}
# Membership (this is why you use sets over lists for lookup)
print("warfarin" in drug_set) # True — O(1)
print("ibuprofen" in drug_set) # False — O(1)
# Set operations (useful for comparing drug lists)
patient_a_drugs = {"warfarin", "aspirin", "lisinopril"}
patient_b_drugs = {"warfarin", "metformin", "atorvastatin"}
shared = patient_a_drugs & patient_b_drugs # Intersection: {"warfarin"}
all_drugs = patient_a_drugs | patient_b_drugs # Union: all 5 drugs
only_a = patient_a_drugs - patient_b_drugs # Difference: {"aspirin", "lisinopril"}
# Remove duplicates from a list (order not preserved)
drug_list_with_dupes = ["warfarin", "aspirin", "warfarin", "metformin"]
unique_drugs = list(set(drug_list_with_dupes))
# ["warfarin", "aspirin", "metformin"] — order may varyNone
# None: Python's null — a singleton object
result = None
# Check for None with `is`, not `==`
if result is None: # Correct
result = compute()
if result == None: # Works but not idiomatic
result = compute()
# None as a default argument (common pattern)
def get_drug_info(drug_name: str, category: str | None = None) -> dict:
"""category=None means 'no filter'."""
...
# Function with no return statement returns None implicitly
def log_event(event: str) -> None:
print(f"[LOG] {event}")
# No return needed — type hint None makes this explicitType Summary
| Type | Mutable | Ordered | Duplicates | Key Use |
|---|---|---|---|---|
| int | N/A | N/A | N/A | Counts, indices |
| float | N/A | N/A | N/A | Scores, probabilities |
| str | No | Yes | Yes | Text, prompts |
| bool | N/A | N/A | N/A | Flags, conditions |
| list | Yes | Yes | Yes | Sequences, batches |
| tuple | No | Yes | Yes | Immutable records, dict keys |
| dict | Yes | Yes (3.7+) | No (keys) | Config, metadata |
| set | Yes | No | No | Dedup, fast lookup |
| None | N/A | N/A | N/A | Missing/unset values |