Python Essentials for AI Engineers · Lesson 23 of 36
__init__ and self Explained
What is __init__?
__init__ is the initializer method — it runs automatically when a new object is created. It sets up the initial state of the object.
class Drug:
def __init__(self, name: str, category: str, dose_mg: float):
# Set up instance attributes
self.name = name
self.category = category
self.dose_mg = dose_mg
# When you call Drug(...), Python:
# 1. Creates a new empty object
# 2. Calls Drug.__init__(new_object, "warfarin", "anticoagulant", 5.0)
# 3. Returns the initialized object
warfarin = Drug("warfarin", "anticoagulant", 5.0)__init__ is not exactly a constructor (that's __new__) — it's an initializer. The object already exists when __init__ runs; __init__ just fills in its attributes.
What is self?
self is a reference to the current instance — the specific object that a method is being called on.
class Drug:
def __init__(self, name: str, dose_mg: float):
self.name = name # Store on THIS instance
self.dose_mg = dose_mg
def describe(self) -> str:
return f"{self.name} at {self.dose_mg}mg" # Access THIS instance's attributes
warfarin = Drug("warfarin", 5.0)
aspirin = Drug("aspirin", 81.0)
# When you call warfarin.describe():
# Python calls Drug.describe(warfarin)
# self IS warfarin — points to that specific object
print(warfarin.describe()) # "warfarin at 5.0mg"
print(aspirin.describe()) # "aspirin at 81.0mg"self is not a keyword — it's just the conventional name for the first parameter. You could name it anything, but always use self.
__init__ Parameters and Defaults
class LLMConfig:
def __init__(
self,
model: str = "gpt-4o",
temperature: float = 0.0,
max_tokens: int = 500,
timeout: float = 30.0,
):
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.timeout = timeout
# Can also compute derived attributes:
self.is_reasoning_model = "o1" in model or "o3" in model
def to_dict(self) -> dict:
return {
"model": self.model,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
}
# Use all defaults
config = LLMConfig()
# Override some
config = LLMConfig(model="gpt-4o-mini", temperature=0.3)
# Override all
config = LLMConfig("claude-sonnet-4-6", 0.2, 1000, 45.0)Initialization Patterns
Validation in __init__
class PatientRecord:
def __init__(self, patient_id: str, age: int, weight_kg: float):
if not patient_id.strip():
raise ValueError("patient_id cannot be empty")
if age < 0 or age > 150:
raise ValueError(f"Invalid age: {age}")
if weight_kg <= 0 or weight_kg > 600:
raise ValueError(f"Invalid weight: {weight_kg}kg")
self.patient_id = patient_id
self.age = age
self.weight_kg = weight_kg
# Validation fires at creation time — you never get an invalid PatientRecord
try:
invalid = PatientRecord("", 67, 80.0) # ValueError immediately
except ValueError as e:
print(e)Lazy Initialization
class VectorStore:
def __init__(self, collection_name: str, persist_dir: str):
self.collection_name = collection_name
self.persist_dir = persist_dir
self._store = None # Not loaded yet
def _ensure_loaded(self):
"""Load the store on first use — not at init time."""
if self._store is None:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
self._store = Chroma(
collection_name=self.collection_name,
embedding_function=OpenAIEmbeddings(model="text-embedding-3-small"),
persist_directory=self.persist_dir,
)
def search(self, query: str, k: int = 4):
self._ensure_loaded()
return self._store.similarity_search(query, k=k)
# Object created instantly — Chroma not loaded yet
store = VectorStore("drugs", "./chroma_db")
# Chroma loads on first search call
results = store.search("warfarin interactions")Mutable Default Argument — The Fix
# WRONG: mutable default in __init__ is shared across instances
class BadAgent:
def __init__(self, tools: list = []): # BUG: all instances share this list
self.tools = tools
agent_a = BadAgent()
agent_b = BadAgent()
agent_a.tools.append("search")
print(agent_b.tools) # ["search"] — contaminated!
# CORRECT: use None as default, create inside __init__
class GoodAgent:
def __init__(self, tools: list | None = None):
self.tools = tools if tools is not None else [] # Fresh list per instance
agent_a = GoodAgent()
agent_b = GoodAgent()
agent_a.tools.append("search")
print(agent_b.tools) # [] — independent__init__ in Inheritance
class BaseRetriever:
def __init__(self, k: int = 4):
self.k = k
self._call_count = 0
def retrieve(self, query: str) -> list:
self._call_count += 1
return self._search(query)
def _search(self, query: str) -> list:
raise NotImplementedError("Subclasses must implement _search")
class VectorRetriever(BaseRetriever):
def __init__(self, vectorstore, k: int = 4, score_threshold: float = 0.7):
super().__init__(k=k) # Call parent __init__ FIRST
self.vectorstore = vectorstore
self.score_threshold = score_threshold # Then set subclass attributes
def _search(self, query: str) -> list:
return self.vectorstore.similarity_search(query, k=self.k)
class HybridRetriever(VectorRetriever):
def __init__(self, vectorstore, bm25_corpus: list, k: int = 4):
super().__init__(vectorstore, k=k) # Chain up to VectorRetriever → BaseRetriever
self.bm25_corpus = bm25_corpus
retriever = HybridRetriever(vectorstore=vs, bm25_corpus=docs, k=6)
print(retriever.k) # 6 — set by BaseRetriever via super()
print(retriever.score_threshold) # 0.7 — set by VectorRetriever default
print(retriever._call_count) # 0 — set by BaseRetriever__init__ in LangChain's Class Design
LangChain's classes (like BaseTool) use Pydantic's BaseModel for validation instead of raw __init__:
from langchain_core.tools import BaseTool
from pydantic import BaseModel, Field
from typing import Type
class DrugLookupInput(BaseModel):
drug_name: str = Field(description="Generic drug name")
include_interactions: bool = Field(default=True)
class DrugLookupTool(BaseTool):
name: str = "drug_lookup"
description: str = "Look up clinical drug information"
args_schema: Type[BaseModel] = DrugLookupInput
# Pydantic handles __init__ automatically — just declare fields
# If you need custom init logic, override model_post_init:
def model_post_init(self, __context) -> None:
"""Called after Pydantic initializes the model."""
# custom setup here if needed
pass
def _run(self, drug_name: str, include_interactions: bool = True) -> str:
return f"Clinical info for {drug_name}"self vs cls vs No First Parameter
| Method type | First parameter | How to declare | Can access |
|---|---|---|---|
| Instance method | self | Normal def | Instance + class |
| Class method | cls | @classmethod | Class only (not instance) |
| Static method | None | @staticmethod | Neither (pure utility) |
class Drug:
registry: dict = {}
def __init__(self, name: str):
self.name = name
Drug.registry[name] = self
def info(self) -> str: # Instance method — self is this Drug
return f"Drug: {self.name}"
@classmethod
def from_name(cls, name: str): # Class method — cls is Drug (or subclass)
return cls(name=name)
@staticmethod
def normalize(name: str) -> str: # Static method — no self or cls
return name.lower().strip()