Python Essentials for AI Engineers · Lesson 2 of 36

What are Python's built-in data types?

Python's Core Built-in Types

Python has eight fundamental built-in types. Every value in Python is an object of one of these types:

Python

# Numeric
x: int   = 42
y: float = 3.14
z: complex = 2 + 3j

# Text
name: str = "warfarin"

# Boolean
flag: bool = True

# Sequence types
items: list  = [1, 2, 3]        # Mutable, ordered
point: tuple = (1, 2, 3)        # Immutable, ordered

# Mapping
data: dict = {"drug": "warfarin", "dose": 5}

# Set types
unique: set       = {1, 2, 3}    # Mutable, unordered, no duplicates
frozen: frozenset = frozenset({1, 2, 3})  # Immutable set

# Null
value: None = None

int and float

Python

# int: arbitrary precision — no overflow in Python
big = 10 ** 100   # Works fine, no overflow
print(type(big))  # <class 'int'>

# float: 64-bit IEEE 754 — has precision limits
print(0.1 + 0.2)         # 0.30000000000000004 — floating point imprecision
print(0.1 + 0.2 == 0.3)  # False

# For financial/clinical precision, use Decimal
from decimal import Decimal
print(Decimal("0.1") + Decimal("0.2"))  # 0.3 — exact

# Integer division
print(7 // 2)   # 3  (floor division)
print(7 % 2)    # 1  (modulo)
print(7 / 2)    # 3.5 (true division — always returns float)

# Useful for AI: checking if batch index divides evenly
batch_size = 32
if step % batch_size == 0:
    print(f"Completed batch {step // batch_size}")

str

Python

# Strings are immutable sequences of Unicode characters
drug = "Warfarin"
print(drug.lower())            # "warfarin"
print(drug.upper())            # "WARFARIN"
print(drug.startswith("War")) # True
print(drug.replace("W", "w")) # "warfarin" — new string, original unchanged

# f-strings (preferred for formatting)
dose = 5.0
print(f"{drug}: {dose}mg daily")   # "Warfarin: 5.0mg daily"

# Multi-line strings (common in LLM prompts)
system_prompt = """You are a clinical pharmacist.
Answer drug questions accurately and cite sources.
Never invent drug information."""

# String methods used in AI/NLP
text = "  Warfarin 5mg PO daily  "
print(text.strip())              # "Warfarin 5mg PO daily"
print(text.split())              # ["Warfarin", "5mg", "PO", "daily"]
words = ["warfarin", "aspirin"]
print(", ".join(words))          # "warfarin, aspirin"

# Slicing
print(drug[0])    # "W"
print(drug[-1])   # "n"
print(drug[1:4])  # "arf"
print(drug[::-1]) # "nirafraW"

bool

Python

# bool is a subclass of int: True == 1, False == 0
print(True + True)    # 2
print(True * 5)       # 5
print(False + 1)      # 1

# Truthy and falsy values
falsy_values = [False, 0, 0.0, "", [], {}, set(), None]
# Everything else is truthy

# Common in AI code
results = []
if results:    # Empty list is falsy — this block skips
    process(results)

embeddings = None
if embeddings is None:    # Explicit None check — preferred
    embeddings = compute_embeddings()

list

Python

# List: mutable, ordered, allows duplicates, O(1) append
drugs = ["warfarin", "aspirin", "metformin"]

# Indexing
print(drugs[0])    # "warfarin"
print(drugs[-1])   # "metformin"

# Slicing
print(drugs[0:2])  # ["warfarin", "aspirin"]
print(drugs[::2])  # Every other element: ["warfarin", "metformin"]

# Mutation
drugs.append("lisinopril")           # Add to end
drugs.insert(1, "amiodarone")        # Insert at index
drugs.remove("aspirin")              # Remove by value (first occurrence)
popped = drugs.pop()                 # Remove and return last element
popped_at = drugs.pop(0)             # Remove and return element at index 0

# Common list operations in AI
scores = [0.92, 0.87, 0.95, 0.78]
print(max(scores))                   # 0.95
print(min(scores))                   # 0.78
print(sorted(scores, reverse=True))  # [0.95, 0.92, 0.87, 0.78]

# List comprehension (covered in detail separately)
squared = [x ** 2 for x in range(5)]   # [0, 1, 4, 9, 16]
filtered = [x for x in scores if x > 0.9]  # [0.92, 0.95]

Time complexity:

| Operation | Complexity | |---|---| | list[i] | O(1) | | list.append(x) | O(1) amortized | | list.insert(i, x) | O(n) | | x in list | O(n) | | list.sort() | O(n log n) |

tuple

Python

# Tuple: immutable, ordered, allows duplicates
point = (10.5, 20.3)   # x, y coordinates
rgb   = (255, 0, 128)  # color values

# Unpacking
x, y = point
r, g, b = rgb

# Named tuple (cleaner than plain tuple for data)
from collections import namedtuple
Drug = namedtuple("Drug", ["name", "dose_mg", "frequency"])
warfarin = Drug(name="warfarin", dose_mg=5, frequency="daily")
print(warfarin.name)   # "warfarin"
print(warfarin[0])     # "warfarin" — still indexable

# Tuples as dict keys (lists cannot be dict keys — they're mutable)
interaction_db = {
    ("warfarin", "aspirin"): "Major",
    ("metformin", "contrast"): "Major",
}
key = ("warfarin", "aspirin")
print(interaction_db.get(key))   # "Major"

dict

Python

# Dict: mutable, ordered (Python 3.7+), key-value pairs
patient = {
    "id": "P001",
    "name": "Jane Smith",
    "medications": ["warfarin", "aspirin"],
    "inr": 2.4,
}

# Access
print(patient["name"])              # "Jane Smith"
print(patient.get("weight", None))  # None — safe default, no KeyError

# Mutation
patient["inr"] = 2.8              # Update
patient["age"] = 67               # Add new key
del patient["name"]               # Delete key

# Iteration
for key in patient:               # Iterates keys
    print(key, patient[key])

for key, value in patient.items():  # Preferred: key-value pairs
    print(f"{key}: {value}")

# Merging dicts (Python 3.9+)
defaults = {"temperature": 0, "max_tokens": 500}
overrides = {"model": "gpt-4o", "temperature": 0.2}
config = defaults | overrides      # Merge — rightmost wins
# {'temperature': 0.2, 'max_tokens': 500, 'model': 'gpt-4o'}

# Dict comprehension (covered separately)
token_counts = {drug: len(drug.split()) for drug in ["warfarin", "metformin hydrochloride"]}

Time complexity:

| Operation | Complexity | |---|---| | dict[key] | O(1) average | | dict[key] = value | O(1) average | | key in dict | O(1) average | | Iteration | O(n) |

set

Python

# Set: mutable, unordered, no duplicates, O(1) membership test
drug_set = {"warfarin", "aspirin", "metformin"}

# Membership (this is why you use sets over lists for lookup)
print("warfarin" in drug_set)   # True — O(1)
print("ibuprofen" in drug_set)  # False — O(1)

# Set operations (useful for comparing drug lists)
patient_a_drugs = {"warfarin", "aspirin", "lisinopril"}
patient_b_drugs = {"warfarin", "metformin", "atorvastatin"}

shared    = patient_a_drugs & patient_b_drugs   # Intersection: {"warfarin"}
all_drugs = patient_a_drugs | patient_b_drugs   # Union: all 5 drugs
only_a    = patient_a_drugs - patient_b_drugs   # Difference: {"aspirin", "lisinopril"}

# Remove duplicates from a list (order not preserved)
drug_list_with_dupes = ["warfarin", "aspirin", "warfarin", "metformin"]
unique_drugs = list(set(drug_list_with_dupes))
# ["warfarin", "aspirin", "metformin"] — order may vary

None

Python

# None: Python's null — a singleton object
result = None

# Check for None with `is`, not `==`
if result is None:       # Correct
    result = compute()

if result == None:       # Works but not idiomatic
    result = compute()

# None as a default argument (common pattern)
def get_drug_info(drug_name: str, category: str | None = None) -> dict:
    """category=None means 'no filter'."""
    ...

# Function with no return statement returns None implicitly
def log_event(event: str) -> None:
    print(f"[LOG] {event}")
    # No return needed — type hint None makes this explicit

Type Summary

| Type | Mutable | Ordered | Duplicates | Key Use | |---|---|---|---|---| | int | N/A | N/A | N/A | Counts, indices | | float | N/A | N/A | N/A | Scores, probabilities | | str | No | Yes | Yes | Text, prompts | | bool | N/A | N/A | N/A | Flags, conditions | | list | Yes | Yes | Yes | Sequences, batches | | tuple | No | Yes | Yes | Immutable records, dict keys | | dict | Yes | Yes (3.7+) | No (keys) | Config, metadata | | set | Yes | No | No | Dedup, fast lookup | | None | N/A | N/A | N/A | Missing/unset values |

What is Python? Why is it widely used in AI?

Next Lesson

What is dynamic typing?