Python Types for AI Code

Type hints are not just documentation — they are a contract between your code and your tools. In AI engineering, where data flows through LLMs, tools, APIs, and databases, types catch mistakes before they reach production and make your code self-documenting.

1. Basic Type Hints

Python

# Basic scalars
name: str = "gpt-4o"
max_tokens: int = 1024
temperature: float = 0.7
is_streaming: bool = True

# Functions with typed signatures
def truncate(text: str, max_chars: int) -> str:
    return text[:max_chars]


def compute_cost(tokens: int, price_per_1k: float) -> float:
    return (tokens / 1000) * price_per_1k


# Return None explicitly
def log_event(event: str) -> None:
    print(f"[LOG] {event}")

2. Collection Types

Python

from typing import List, Dict, Tuple, Set   # Python 3.8 style
# Python 3.9+ can use list[], dict[], tuple[], set[] directly

# Lists
messages: list[str] = ["hello", "world"]
embeddings: list[float] = [0.1, -0.3, 0.8]

# Dicts
metadata: dict[str, str] = {"model": "gpt-4o", "user": "alice"}
token_counts: dict[str, int] = {"prompt": 120, "completion": 80}

# Nested types
conversation: list[dict[str, str]] = [
    {"role": "user", "content": "Hello"},
    {"role": "assistant", "content": "Hi there!"},
]

# Tuple — fixed length, fixed types
point: tuple[float, float] = (1.0, 2.0)
result: tuple[str, int, float] = ("gpt-4o", 200, 0.002)

3. Optional and Union

Python

from typing import Optional, Union

# Optional[X] is shorthand for Union[X, None]
def get_system_prompt(role: Optional[str] = None) -> str:
    if role is None:
        return "You are a helpful assistant."
    return f"You are a {role} expert."


# Union — value can be one of several types
def parse_response(response: Union[str, dict]) -> str:
    if isinstance(response, dict):
        return response.get("content", "")
    return response


# Python 3.10+ syntax (pipe operator)
def load_config(path: str | None = None) -> dict | None:
    if path is None:
        return None
    import json
    return json.loads(open(path).read())

4. Any — the Escape Hatch

Python

from typing import Any

# Use sparingly — turns off type checking for that value
def log_arbitrary(data: Any) -> None:
    print(data)


# Common in AI: raw LLM responses before parsing
def call_llm_raw(prompt: str) -> Any:
    # Returns whatever the SDK gives back
    ...


# Better: narrow the type as soon as possible
def call_llm(prompt: str) -> str:
    raw: Any = call_llm_raw(prompt)
    if isinstance(raw, str):
        return raw
    return str(raw)

5. Callable Types

Python

from typing import Callable

# A function that takes a string and returns a string
Processor = Callable[[str], str]

def apply_processor(text: str, fn: Processor) -> str:
    return fn(text)


# Used in AI: passing functions as tools
ToolFunction = Callable[..., Any]

def register_tool(name: str, fn: ToolFunction) -> None:
    print(f"Registered tool: {name}")

6. TypedDict — Structured Dictionaries

TypedDict gives you a typed dict schema without creating a full class. Great for LLM message formats.

Python

from typing import TypedDict, Required, NotRequired


class Message(TypedDict):
    role: str       # "user" | "assistant" | "system"
    content: str


class ToolCall(TypedDict):
    id: str
    type: str       # always "function"
    function: dict[str, str]


class LLMResponse(TypedDict):
    id: str
    model: str
    choices: list[dict]
    usage: dict[str, int]


# Using TypedDict
def format_message(role: str, content: str) -> Message:
    return {"role": role, "content": content}


user_msg: Message = format_message("user", "What is RAG?")
print(user_msg["role"])     # type-safe access

Total vs. Non-Total TypedDict

Python

class Config(TypedDict, total=False):
    # All keys optional when total=False
    temperature: float
    max_tokens: int
    top_p: float


# Mix required and optional (Python 3.11+)
class FullConfig(TypedDict):
    model: str                          # required
    temperature: NotRequired[float]     # optional
    max_tokens: NotRequired[int]        # optional

7. Literal — Enumerated String Values

Python

from typing import Literal

Role = Literal["user", "assistant", "system", "tool"]
Model = Literal["gpt-4o", "gpt-4o-mini", "o1", "o1-mini"]
FinishReason = Literal["stop", "length", "tool_calls", "content_filter"]


def create_message(role: Role, content: str) -> Message:
    return {"role": role, "content": content}


# This call is valid
create_message("user", "Hello")

# A type checker catches this mistake immediately:
# create_message("admin", "Hello")  # Error: "admin" not in Literal


def choose_model(task: Literal["chat", "embed", "rerank"]) -> Model:
    if task == "chat":
        return "gpt-4o"
    elif task == "embed":
        return "gpt-4o-mini"
    return "gpt-4o-mini"

8. Dataclasses

Dataclasses generate __init__, __repr__, and __eq__ automatically. Perfect for config objects and message types.

Python

from dataclasses import dataclass, field
from typing import Optional


@dataclass
class LLMConfig:
    model: str = "gpt-4o"
    temperature: float = 0.7
    max_tokens: int = 1024
    top_p: float = 1.0
    stream: bool = False
    system_prompt: Optional[str] = None


@dataclass
class ChatMessage:
    role: str
    content: str
    name: Optional[str] = None
    tool_call_id: Optional[str] = None


# Usage
config = LLMConfig(model="gpt-4o-mini", temperature=0.0)
msg = ChatMessage(role="user", content="Explain async/await")

print(config)
# LLMConfig(model='gpt-4o-mini', temperature=0.0, max_tokens=1024, ...)

print(msg)
# ChatMessage(role='user', content='Explain async/await', name=None, ...)

field() for mutable defaults

Python

@dataclass
class Conversation:
    messages: list[ChatMessage] = field(default_factory=list)
    metadata: dict[str, str] = field(default_factory=dict)
    max_turns: int = 20

    def add_message(self, role: str, content: str) -> None:
        self.messages.append(ChatMessage(role=role, content=content))

    def token_estimate(self) -> int:
        total_chars = sum(len(m.content) for m in self.messages)
        return total_chars // 4   # rough: 4 chars per token


convo = Conversation()
convo.add_message("user", "What is an embedding?")
convo.add_message("assistant", "An embedding is a vector representation...")
print(f"Messages: {len(convo.messages)}, ~tokens: {convo.token_estimate()}")

post_init for validation

Python

@dataclass
class EmbeddingRequest:
    text: str
    model: str = "text-embedding-3-small"
    dimensions: int = 1536

    def __post_init__(self) -> None:
        if not self.text.strip():
            raise ValueError("text cannot be empty")
        if self.dimensions not in (256, 512, 1024, 1536, 3072):
            raise ValueError(f"Invalid dimensions: {self.dimensions}")


# This raises ValueError
# EmbeddingRequest(text="", dimensions=100)

Frozen Dataclasses (Immutable)

Python

@dataclass(frozen=True)
class ModelVersion:
    name: str
    context_window: int
    supports_vision: bool = False

    def fits(self, token_count: int) -> bool:
        return token_count < self.context_window


GPT4O = ModelVersion("gpt-4o", 128_000, supports_vision=True)
GPT4O_MINI = ModelVersion("gpt-4o-mini", 128_000)

# Frozen dataclasses are hashable — can be used as dict keys
model_costs: dict[ModelVersion, float] = {
    GPT4O: 0.005,
    GPT4O_MINI: 0.00015,
}

# GPT4O.name = "other"  # raises FrozenInstanceError

9. Type Aliases

Python

from typing import TypeAlias

# Simple aliases for readability
Embedding: TypeAlias = list[float]
TokenCount: TypeAlias = int
ConversationHistory: TypeAlias = list[dict[str, str]]

# In Python 3.12, use the `type` statement
type Embedding = list[float]
type TokenCount = int


def cosine_similarity(a: Embedding, b: Embedding) -> float:
    import math
    dot = sum(x * y for x, y in zip(a, b))
    norm_a = math.sqrt(sum(x ** 2 for x in a))
    norm_b = math.sqrt(sum(y ** 2 for y in b))
    return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0

10. Why Types Matter for AI Engineering

Schema Validation

Python

# Without types: bugs are silent
def build_request(model, messages, temperature, max_tokens):
    return {"model": model, "messages": messages,
            "temperature": temperature, "max_tokens": max_tokens}

# With types: mistakes are caught before runtime
def build_request_typed(
    model: Model,
    messages: list[Message],
    temperature: float = 0.7,
    max_tokens: int = 1024,
) -> dict:
    assert 0.0 <= temperature <= 2.0, "temperature out of range"
    return {"model": model, "messages": messages,
            "temperature": temperature, "max_tokens": max_tokens}

IDE Support

When everything is typed, your IDE autocompletes field names, flags wrong argument types, and shows inline documentation — a huge productivity boost when working with complex LLM response structures.

Runtime Validation with Pydantic

Python

from pydantic import BaseModel, Field


class ToolSchema(BaseModel):
    name: str
    description: str
    parameters: dict[str, Any]


class LLMRequest(BaseModel):
    model: Model
    messages: list[Message]
    temperature: float = Field(default=0.7, ge=0.0, le=2.0)
    max_tokens: int = Field(default=1024, ge=1, le=128_000)

    model_config = {"arbitrary_types_allowed": True}


# Pydantic validates at runtime — catches bad data from external sources
try:
    req = LLMRequest(model="gpt-4o", messages=[], temperature=5.0)
except Exception as e:
    print(e)  # temperature must be <= 2.0

Summary

| Feature | Use Case | |---|---| | str, int, float, bool | Basic scalars | | list[T], dict[K, V] | Collections | | Optional[T] | Nullable values | | Union[A, B] | Multiple valid types | | Literal[...] | Enumerated string values | | TypedDict | Typed dict schemas (LLM messages) | | @dataclass | Config objects, value types | | frozen=True | Immutable, hashable objects | | Callable[[A], B] | Function types (tool dispatch) | | Any | Escape hatch — use sparingly |

Type hints pay dividends when your AI app grows to dozens of tools, multiple agents, and complex data pipelines.

Python Types for AI Code

Python Types for AI Code

1. Basic Type Hints

2. Collection Types

3. Optional and Union

4. Any — the Escape Hatch

5. Callable Types

6. TypedDict — Structured Dictionaries

Total vs. Non-Total TypedDict

7. Literal — Enumerated String Values

8. Dataclasses

field() for mutable defaults

post_init for validation

Frozen Dataclasses (Immutable)

9. Type Aliases

10. Why Types Matter for AI Engineering

Schema Validation

IDE Support

Runtime Validation with Pydantic

Summary

Enjoyed this article?

Leave a comment