Learnixo

Python Essentials for AI Engineers · Lesson 23 of 36

__init__ and self Explained

What is __init__?

__init__ is the initializer method — it runs automatically when a new object is created. It sets up the initial state of the object.

Python
class Drug:
    def __init__(self, name: str, category: str, dose_mg: float):
        # Set up instance attributes
        self.name = name
        self.category = category
        self.dose_mg = dose_mg

# When you call Drug(...), Python:
# 1. Creates a new empty object
# 2. Calls Drug.__init__(new_object, "warfarin", "anticoagulant", 5.0)
# 3. Returns the initialized object

warfarin = Drug("warfarin", "anticoagulant", 5.0)

__init__ is not exactly a constructor (that's __new__) — it's an initializer. The object already exists when __init__ runs; __init__ just fills in its attributes.


What is self?

self is a reference to the current instance — the specific object that a method is being called on.

Python
class Drug:
    def __init__(self, name: str, dose_mg: float):
        self.name = name      # Store on THIS instance
        self.dose_mg = dose_mg

    def describe(self) -> str:
        return f"{self.name} at {self.dose_mg}mg"  # Access THIS instance's attributes

warfarin = Drug("warfarin", 5.0)
aspirin  = Drug("aspirin", 81.0)

# When you call warfarin.describe():
# Python calls Drug.describe(warfarin)
# self IS warfarin — points to that specific object
print(warfarin.describe())   # "warfarin at 5.0mg"
print(aspirin.describe())    # "aspirin at 81.0mg"

self is not a keyword — it's just the conventional name for the first parameter. You could name it anything, but always use self.


__init__ Parameters and Defaults

Python
class LLMConfig:
    def __init__(
        self,
        model: str = "gpt-4o",
        temperature: float = 0.0,
        max_tokens: int = 500,
        timeout: float = 30.0,
    ):
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.timeout = timeout
        # Can also compute derived attributes:
        self.is_reasoning_model = "o1" in model or "o3" in model

    def to_dict(self) -> dict:
        return {
            "model": self.model,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
        }

# Use all defaults
config = LLMConfig()

# Override some
config = LLMConfig(model="gpt-4o-mini", temperature=0.3)

# Override all
config = LLMConfig("claude-sonnet-4-6", 0.2, 1000, 45.0)

Initialization Patterns

Validation in __init__

Python
class PatientRecord:
    def __init__(self, patient_id: str, age: int, weight_kg: float):
        if not patient_id.strip():
            raise ValueError("patient_id cannot be empty")
        if age < 0 or age > 150:
            raise ValueError(f"Invalid age: {age}")
        if weight_kg <= 0 or weight_kg > 600:
            raise ValueError(f"Invalid weight: {weight_kg}kg")

        self.patient_id = patient_id
        self.age = age
        self.weight_kg = weight_kg

# Validation fires at creation time  you never get an invalid PatientRecord
try:
    invalid = PatientRecord("", 67, 80.0)   # ValueError immediately
except ValueError as e:
    print(e)

Lazy Initialization

Python
class VectorStore:
    def __init__(self, collection_name: str, persist_dir: str):
        self.collection_name = collection_name
        self.persist_dir = persist_dir
        self._store = None   # Not loaded yet

    def _ensure_loaded(self):
        """Load the store on first use — not at init time."""
        if self._store is None:
            from langchain_chroma import Chroma
            from langchain_openai import OpenAIEmbeddings
            self._store = Chroma(
                collection_name=self.collection_name,
                embedding_function=OpenAIEmbeddings(model="text-embedding-3-small"),
                persist_directory=self.persist_dir,
            )

    def search(self, query: str, k: int = 4):
        self._ensure_loaded()
        return self._store.similarity_search(query, k=k)

# Object created instantly  Chroma not loaded yet
store = VectorStore("drugs", "./chroma_db")
# Chroma loads on first search call
results = store.search("warfarin interactions")

Mutable Default Argument — The Fix

Python
# WRONG: mutable default in __init__ is shared across instances
class BadAgent:
    def __init__(self, tools: list = []):   # BUG: all instances share this list
        self.tools = tools

agent_a = BadAgent()
agent_b = BadAgent()
agent_a.tools.append("search")
print(agent_b.tools)   # ["search"]  contaminated!

# CORRECT: use None as default, create inside __init__
class GoodAgent:
    def __init__(self, tools: list | None = None):
        self.tools = tools if tools is not None else []   # Fresh list per instance

agent_a = GoodAgent()
agent_b = GoodAgent()
agent_a.tools.append("search")
print(agent_b.tools)   # []  independent

__init__ in Inheritance

Python
class BaseRetriever:
    def __init__(self, k: int = 4):
        self.k = k
        self._call_count = 0

    def retrieve(self, query: str) -> list:
        self._call_count += 1
        return self._search(query)

    def _search(self, query: str) -> list:
        raise NotImplementedError("Subclasses must implement _search")


class VectorRetriever(BaseRetriever):
    def __init__(self, vectorstore, k: int = 4, score_threshold: float = 0.7):
        super().__init__(k=k)   # Call parent __init__ FIRST
        self.vectorstore = vectorstore
        self.score_threshold = score_threshold   # Then set subclass attributes

    def _search(self, query: str) -> list:
        return self.vectorstore.similarity_search(query, k=self.k)


class HybridRetriever(VectorRetriever):
    def __init__(self, vectorstore, bm25_corpus: list, k: int = 4):
        super().__init__(vectorstore, k=k)   # Chain up to VectorRetriever  BaseRetriever
        self.bm25_corpus = bm25_corpus


retriever = HybridRetriever(vectorstore=vs, bm25_corpus=docs, k=6)
print(retriever.k)               # 6  set by BaseRetriever via super()
print(retriever.score_threshold)  # 0.7  set by VectorRetriever default
print(retriever._call_count)      # 0  set by BaseRetriever

__init__ in LangChain's Class Design

LangChain's classes (like BaseTool) use Pydantic's BaseModel for validation instead of raw __init__:

Python
from langchain_core.tools import BaseTool
from pydantic import BaseModel, Field
from typing import Type

class DrugLookupInput(BaseModel):
    drug_name: str = Field(description="Generic drug name")
    include_interactions: bool = Field(default=True)

class DrugLookupTool(BaseTool):
    name: str = "drug_lookup"
    description: str = "Look up clinical drug information"
    args_schema: Type[BaseModel] = DrugLookupInput

    # Pydantic handles __init__ automatically  just declare fields
    # If you need custom init logic, override model_post_init:
    def model_post_init(self, __context) -> None:
        """Called after Pydantic initializes the model."""
        # custom setup here if needed
        pass

    def _run(self, drug_name: str, include_interactions: bool = True) -> str:
        return f"Clinical info for {drug_name}"

self vs cls vs No First Parameter

| Method type | First parameter | How to declare | Can access | |---|---|---|---| | Instance method | self | Normal def | Instance + class | | Class method | cls | @classmethod | Class only (not instance) | | Static method | None | @staticmethod | Neither (pure utility) |

Python
class Drug:
    registry: dict = {}

    def __init__(self, name: str):
        self.name = name
        Drug.registry[name] = self

    def info(self) -> str:           # Instance method  self is this Drug
        return f"Drug: {self.name}"

    @classmethod
    def from_name(cls, name: str):   # Class method  cls is Drug (or subclass)
        return cls(name=name)

    @staticmethod
    def normalize(name: str) -> str: # Static method  no self or cls
        return name.lower().strip()