Python Essentials for AI Engineers · Lesson 12 of 36
Default Arguments and Keyword Arguments
Default Argument Values
Default values make parameters optional — callers can omit them if the default is appropriate:
Python
def create_llm_config(
model: str = "gpt-4o",
temperature: float = 0.0,
max_tokens: int = 500,
timeout: float = 30.0,
) -> dict:
return {
"model": model,
"temperature": temperature,
"max_tokens": max_tokens,
"timeout": timeout,
}
# Use all defaults
config = create_llm_config()
# {"model": "gpt-4o", "temperature": 0.0, "max_tokens": 500, "timeout": 30.0}
# Override one
config = create_llm_config(model="gpt-4o-mini")
# {"model": "gpt-4o-mini", "temperature": 0.0, "max_tokens": 500, "timeout": 30.0}
# Override several
config = create_llm_config(model="claude-sonnet-4-6", temperature=0.3, max_tokens=1000)Positional vs Keyword Calls
Python
def calculate_dose(drug: str, weight_kg: float, dose_per_kg: float = 15.0) -> float:
return weight_kg * dose_per_kg
# Positional — order determines which parameter gets which value
dose = calculate_dose("vancomycin", 70.0, 20.0)
# Keyword — explicitly name each parameter (order doesn't matter)
dose = calculate_dose(dose_per_kg=20.0, drug="vancomycin", weight_kg=70.0)
# Mixed — positional first, keyword after
dose = calculate_dose("vancomycin", 70.0, dose_per_kg=20.0)
# This is an error — positional after keyword
dose = calculate_dose(drug="vancomycin", 70.0, 20.0) # SyntaxErrorKeyword-Only Parameters
Parameters after * can only be passed as keyword arguments:
Python
def embed_text(
text: str,
*, # Everything after this is keyword-only
model: str = "text-embedding-3-small",
batch_size: int = 100,
normalize: bool = True,
) -> list[float]:
...
# Correct — keyword arguments
embed_text("warfarin mechanism", model="text-embedding-3-large", normalize=False)
# Wrong — positional for keyword-only parameter
embed_text("warfarin mechanism", "text-embedding-3-large") # TypeError
# Why keyword-only?
# 1. Prevents accidental positional mismatches
# 2. Makes the API self-documenting at the call site
# 3. Common in LangChain, FastAPI, and ML APIsPositional-Only Parameters
Parameters before / can only be passed positionally (rarely used, mostly for C extensions):
Python
def normalize(value: float, /, min_val: float, max_val: float) -> float:
return (value - min_val) / (max_val - min_val)
normalize(0.5, 0.0, 1.0) # OK
normalize(value=0.5, min_val=0.0, max_val=1.0) # TypeError — value is positional-onlyThe Mutable Default Argument Bug
This is one of Python's most common bugs:
Python
# BUG: the default [] is created ONCE, not on each call
def add_observation(obs: str, history: list = []) -> list:
history.append(obs)
return history
print(add_observation("INR 2.4")) # ["INR 2.4"]
print(add_observation("INR 2.8")) # ["INR 2.4", "INR 2.8"] — wrong!
print(add_observation("dose adjusted")) # ["INR 2.4", "INR 2.8", "dose adjusted"]
# The same list is reused across all calls — it's shared state!
# FIX: use None, create inside the function
def add_observation_fixed(obs: str, history: list | None = None) -> list:
if history is None:
history = [] # Fresh list on every call where None is passed
history.append(obs)
return history
print(add_observation_fixed("INR 2.4")) # ["INR 2.4"]
print(add_observation_fixed("INR 2.8")) # ["INR 2.8"] — independent
# Same bug with dict and set defaults
def add_tag(tag: str, tags: dict | None = None) -> dict:
if tags is None:
tags = {}
tags[tag] = True
return tagsRule: Never use list, dict, or set as a default argument value. Use None.
Default Values are Evaluated Once
Python
import time
# The default is the timestamp AT FUNCTION DEFINITION TIME
def log_event(event: str, timestamp: float = time.time()) -> str:
return f"[{timestamp:.0f}] {event}"
print(log_event("start")) # [1747400000] start
time.sleep(1)
print(log_event("end")) # [1747400000] end — SAME timestamp, not current time!
# FIX: compute the default inside the function
def log_event_fixed(event: str, timestamp: float | None = None) -> str:
if timestamp is None:
timestamp = time.time() # Current time at call time
return f"[{timestamp:.0f}] {event}"Argument Ordering Rules
Parameters must follow this order:
def func(pos_only, /, positional, *, keyword_only)Python
def full_example(
a: int, # Positional or keyword
b: int, # Positional or keyword
c: int = 10, # Optional, positional or keyword
*args: int, # Variable positional (covered separately)
d: int, # Keyword-only, required
e: int = 20, # Keyword-only, optional
**kwargs, # Variable keyword (covered separately)
) -> None:
print(a, b, c, args, d, e, kwargs)
full_example(1, 2, d=4)
# a=1, b=2, c=10 (default), args=(), d=4, e=20 (default), kwargs={}
full_example(1, 2, 3, 4, 5, d=6, e=7, extra=8)
# a=1, b=2, c=3, args=(4, 5), d=6, e=7, kwargs={"extra": 8}Keyword Arguments in AI/ML APIs
LangChain, FastAPI, and scikit-learn all use keyword-heavy APIs:
Python
from langchain_openai import ChatOpenAI
# LangChain: almost all configuration is keyword-only
model = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=500,
timeout=30,
max_retries=2,
)
# scikit-learn: consistent keyword convention
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(
n_estimators=100,
max_depth=10,
random_state=42,
n_jobs=-1, # Use all CPU cores
)
# FastAPI: path operations use keyword-style dependencies
from fastapi import FastAPI, Query
app = FastAPI()
@app.get("/drugs")
def search_drugs(
query: str = Query(..., min_length=2, max_length=100),
limit: int = Query(default=10, ge=1, le=100),
category: str | None = Query(default=None),
):
...Using keyword arguments at the call site makes code self-documenting:
Python
# Hard to read — what does 100, 10, True, False mean?
clf = RandomForestClassifier(100, None, None, 10, "gini", 0.0, None, 1, True, False, None, 0, 42, -1)
# Clear — parameter names document their purpose
clf = RandomForestClassifier(
n_estimators=100,
max_depth=10,
random_state=42,
n_jobs=-1,
)