Python Types for AI Code
Master Python type hints: str, int, float, list, dict, Optional, Union, Any, TypedDict, Literal, and dataclasses ā and understand why types are essential for AI engineering.
Python Types for AI Code
Type hints are not just documentation ā they are a contract between your code and your tools. In AI engineering, where data flows through LLMs, tools, APIs, and databases, types catch mistakes before they reach production and make your code self-documenting.
1. Basic Type Hints
# Basic scalars
name: str = "gpt-4o"
max_tokens: int = 1024
temperature: float = 0.7
is_streaming: bool = True
# Functions with typed signatures
def truncate(text: str, max_chars: int) -> str:
return text[:max_chars]
def compute_cost(tokens: int, price_per_1k: float) -> float:
return (tokens / 1000) * price_per_1k
# Return None explicitly
def log_event(event: str) -> None:
print(f"[LOG] {event}")2. Collection Types
from typing import List, Dict, Tuple, Set # Python 3.8 style
# Python 3.9+ can use list[], dict[], tuple[], set[] directly
# Lists
messages: list[str] = ["hello", "world"]
embeddings: list[float] = [0.1, -0.3, 0.8]
# Dicts
metadata: dict[str, str] = {"model": "gpt-4o", "user": "alice"}
token_counts: dict[str, int] = {"prompt": 120, "completion": 80}
# Nested types
conversation: list[dict[str, str]] = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
]
# Tuple ā fixed length, fixed types
point: tuple[float, float] = (1.0, 2.0)
result: tuple[str, int, float] = ("gpt-4o", 200, 0.002)3. Optional and Union
from typing import Optional, Union
# Optional[X] is shorthand for Union[X, None]
def get_system_prompt(role: Optional[str] = None) -> str:
if role is None:
return "You are a helpful assistant."
return f"You are a {role} expert."
# Union ā value can be one of several types
def parse_response(response: Union[str, dict]) -> str:
if isinstance(response, dict):
return response.get("content", "")
return response
# Python 3.10+ syntax (pipe operator)
def load_config(path: str | None = None) -> dict | None:
if path is None:
return None
import json
return json.loads(open(path).read())4. Any ā the Escape Hatch
from typing import Any
# Use sparingly ā turns off type checking for that value
def log_arbitrary(data: Any) -> None:
print(data)
# Common in AI: raw LLM responses before parsing
def call_llm_raw(prompt: str) -> Any:
# Returns whatever the SDK gives back
...
# Better: narrow the type as soon as possible
def call_llm(prompt: str) -> str:
raw: Any = call_llm_raw(prompt)
if isinstance(raw, str):
return raw
return str(raw)5. Callable Types
from typing import Callable
# A function that takes a string and returns a string
Processor = Callable[[str], str]
def apply_processor(text: str, fn: Processor) -> str:
return fn(text)
# Used in AI: passing functions as tools
ToolFunction = Callable[..., Any]
def register_tool(name: str, fn: ToolFunction) -> None:
print(f"Registered tool: {name}")6. TypedDict ā Structured Dictionaries
TypedDict gives you a typed dict schema without creating a full class. Great for LLM message formats.
from typing import TypedDict, Required, NotRequired
class Message(TypedDict):
role: str # "user" | "assistant" | "system"
content: str
class ToolCall(TypedDict):
id: str
type: str # always "function"
function: dict[str, str]
class LLMResponse(TypedDict):
id: str
model: str
choices: list[dict]
usage: dict[str, int]
# Using TypedDict
def format_message(role: str, content: str) -> Message:
return {"role": role, "content": content}
user_msg: Message = format_message("user", "What is RAG?")
print(user_msg["role"]) # type-safe accessTotal vs. Non-Total TypedDict
class Config(TypedDict, total=False):
# All keys optional when total=False
temperature: float
max_tokens: int
top_p: float
# Mix required and optional (Python 3.11+)
class FullConfig(TypedDict):
model: str # required
temperature: NotRequired[float] # optional
max_tokens: NotRequired[int] # optional7. Literal ā Enumerated String Values
from typing import Literal
Role = Literal["user", "assistant", "system", "tool"]
Model = Literal["gpt-4o", "gpt-4o-mini", "o1", "o1-mini"]
FinishReason = Literal["stop", "length", "tool_calls", "content_filter"]
def create_message(role: Role, content: str) -> Message:
return {"role": role, "content": content}
# This call is valid
create_message("user", "Hello")
# A type checker catches this mistake immediately:
# create_message("admin", "Hello") # Error: "admin" not in Literal
def choose_model(task: Literal["chat", "embed", "rerank"]) -> Model:
if task == "chat":
return "gpt-4o"
elif task == "embed":
return "gpt-4o-mini"
return "gpt-4o-mini"8. Dataclasses
Dataclasses generate __init__, __repr__, and __eq__ automatically. Perfect for config objects and message types.
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class LLMConfig:
model: str = "gpt-4o"
temperature: float = 0.7
max_tokens: int = 1024
top_p: float = 1.0
stream: bool = False
system_prompt: Optional[str] = None
@dataclass
class ChatMessage:
role: str
content: str
name: Optional[str] = None
tool_call_id: Optional[str] = None
# Usage
config = LLMConfig(model="gpt-4o-mini", temperature=0.0)
msg = ChatMessage(role="user", content="Explain async/await")
print(config)
# LLMConfig(model='gpt-4o-mini', temperature=0.0, max_tokens=1024, ...)
print(msg)
# ChatMessage(role='user', content='Explain async/await', name=None, ...)field() for mutable defaults
@dataclass
class Conversation:
messages: list[ChatMessage] = field(default_factory=list)
metadata: dict[str, str] = field(default_factory=dict)
max_turns: int = 20
def add_message(self, role: str, content: str) -> None:
self.messages.append(ChatMessage(role=role, content=content))
def token_estimate(self) -> int:
total_chars = sum(len(m.content) for m in self.messages)
return total_chars // 4 # rough: 4 chars per token
convo = Conversation()
convo.add_message("user", "What is an embedding?")
convo.add_message("assistant", "An embedding is a vector representation...")
print(f"Messages: {len(convo.messages)}, ~tokens: {convo.token_estimate()}")post_init for validation
@dataclass
class EmbeddingRequest:
text: str
model: str = "text-embedding-3-small"
dimensions: int = 1536
def __post_init__(self) -> None:
if not self.text.strip():
raise ValueError("text cannot be empty")
if self.dimensions not in (256, 512, 1024, 1536, 3072):
raise ValueError(f"Invalid dimensions: {self.dimensions}")
# This raises ValueError
# EmbeddingRequest(text="", dimensions=100)Frozen Dataclasses (Immutable)
@dataclass(frozen=True)
class ModelVersion:
name: str
context_window: int
supports_vision: bool = False
def fits(self, token_count: int) -> bool:
return token_count < self.context_window
GPT4O = ModelVersion("gpt-4o", 128_000, supports_vision=True)
GPT4O_MINI = ModelVersion("gpt-4o-mini", 128_000)
# Frozen dataclasses are hashable ā can be used as dict keys
model_costs: dict[ModelVersion, float] = {
GPT4O: 0.005,
GPT4O_MINI: 0.00015,
}
# GPT4O.name = "other" # raises FrozenInstanceError9. Type Aliases
from typing import TypeAlias
# Simple aliases for readability
Embedding: TypeAlias = list[float]
TokenCount: TypeAlias = int
ConversationHistory: TypeAlias = list[dict[str, str]]
# In Python 3.12, use the `type` statement
type Embedding = list[float]
type TokenCount = int
def cosine_similarity(a: Embedding, b: Embedding) -> float:
import math
dot = sum(x * y for x, y in zip(a, b))
norm_a = math.sqrt(sum(x ** 2 for x in a))
norm_b = math.sqrt(sum(y ** 2 for y in b))
return dot / (norm_a * norm_b) if norm_a and norm_b else 0.010. Why Types Matter for AI Engineering
Schema Validation
# Without types: bugs are silent
def build_request(model, messages, temperature, max_tokens):
return {"model": model, "messages": messages,
"temperature": temperature, "max_tokens": max_tokens}
# With types: mistakes are caught before runtime
def build_request_typed(
model: Model,
messages: list[Message],
temperature: float = 0.7,
max_tokens: int = 1024,
) -> dict:
assert 0.0 <= temperature <= 2.0, "temperature out of range"
return {"model": model, "messages": messages,
"temperature": temperature, "max_tokens": max_tokens}IDE Support
When everything is typed, your IDE autocompletes field names, flags wrong argument types, and shows inline documentation ā a huge productivity boost when working with complex LLM response structures.
Runtime Validation with Pydantic
from pydantic import BaseModel, Field
class ToolSchema(BaseModel):
name: str
description: str
parameters: dict[str, Any]
class LLMRequest(BaseModel):
model: Model
messages: list[Message]
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
max_tokens: int = Field(default=1024, ge=1, le=128_000)
model_config = {"arbitrary_types_allowed": True}
# Pydantic validates at runtime ā catches bad data from external sources
try:
req = LLMRequest(model="gpt-4o", messages=[], temperature=5.0)
except Exception as e:
print(e) # temperature must be <= 2.0Summary
| Feature | Use Case |
|---|---|
| str, int, float, bool | Basic scalars |
| list[T], dict[K, V] | Collections |
| Optional[T] | Nullable values |
| Union[A, B] | Multiple valid types |
| Literal[...] | Enumerated string values |
| TypedDict | Typed dict schemas (LLM messages) |
| @dataclass | Config objects, value types |
| frozen=True | Immutable, hashable objects |
| Callable[[A], B] | Function types (tool dispatch) |
| Any | Escape hatch ā use sparingly |
Type hints pay dividends when your AI app grows to dozens of tools, multiple agents, and complex data pipelines.
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.