LangChain Mastery · Lesson 24 of 33
Interview: Build a Research Agent with LangChain
Q1: Build a drug research agent that searches a database, checks interactions, and formats a clinical summary. Walk through your implementation.
Answer:
A clinical research agent needs: clear tool boundaries, a structured system prompt, and safety limits.
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from pydantic import BaseModel, Field
from typing import Optional
# --- Tools ---
@tool
def search_drug_database(query: str) -> str:
"""
Search the clinical pharmacology database for drug information.
Use for: mechanism of action, standard dosing, drug class, indications.
Do NOT use for drug-drug interactions — use check_drug_interaction instead.
"""
db = {
"warfarin": "Class: Anticoagulant. Mechanism: VKORC1 inhibitor. Dose: 2-10mg/day INR-guided.",
"aspirin": "Class: NSAID/antiplatelet. Mechanism: COX-1/COX-2 inhibitor. Dose: 81-325mg/day.",
"amiodarone": "Class: Antiarrhythmic (Class III). Dose: 200-400mg/day maintenance.",
}
for drug, info in db.items():
if drug in query.lower():
return info
return f"No exact match for '{query}'. Try the generic drug name."
@tool
def check_drug_interaction(drug_a: str, drug_b: str) -> str:
"""
Check for clinically significant interactions between two drugs.
Returns severity (Major/Moderate/Minor), mechanism, and recommendation.
Always use both generic names.
"""
interactions = {
("warfarin", "aspirin"): "MAJOR: Additive anticoagulation + antiplatelet effect. Avoid unless high cardiac risk. Monitor INR closely.",
("warfarin", "amiodarone"): "MAJOR: Amiodarone inhibits CYP2C9, raises INR significantly. Reduce warfarin 30-50%, check INR weekly.",
}
key = tuple(sorted([drug_a.lower(), drug_b.lower()]))
result = interactions.get(key)
return result or f"No major documented interaction between {drug_a} and {drug_b}."
@tool
def format_clinical_summary(
patient_context: str,
findings: str,
recommendation: str,
) -> str:
"""
Format research findings into a structured clinical summary.
Use as the FINAL step after gathering all drug information.
"""
return (
f"=== CLINICAL SUMMARY ===\n"
f"Patient Context: {patient_context}\n\n"
f"Findings:\n{findings}\n\n"
f"Recommendation:\n{recommendation}\n"
f"========================\n"
f"Note: Always verify with current clinical references before prescribing."
)
# --- Agent setup ---
tools = [search_drug_database, check_drug_interaction, format_clinical_summary]
model = ChatOpenAI(model="gpt-4o", temperature=0)
prompt = ChatPromptTemplate.from_messages([
("system",
"You are a clinical pharmacist. Answer drug questions using your tools. "
"Always check interactions when multiple drugs are involved. "
"End every response by calling format_clinical_summary to structure your findings."),
("placeholder", "{chat_history}"),
("human", "{input}"),
("placeholder", "{agent_scratchpad}"),
])
agent = create_tool_calling_agent(model, tools, prompt)
executor = AgentExecutor(
agent=agent,
tools=tools,
max_iterations=8,
max_execution_time=30.0,
handle_parsing_errors=True,
return_intermediate_steps=True,
verbose=False,
)
result = executor.invoke({
"input": "My patient is on warfarin and needs aspirin for a recent MI. What should I know?",
"chat_history": [],
})
print(result["output"])
print(f"\nTool calls made: {len(result['intermediate_steps'])}")
for action, observation in result["intermediate_steps"]:
print(f" → {action.tool}({action.tool_input})")Key design decisions:
- Tool descriptions tell the model when not to use the tool (avoids wrong-tool selection)
format_clinical_summaryis a structured final step — prevents the agent from ending with raw tool outputmax_iterations=8prevents runaway loops;handle_parsing_errors=Trueprevents crashesreturn_intermediate_steps=Truegives an audit trail for clinical accountability
Q2: Your agent sometimes loops indefinitely or times out. How do you diagnose and fix it?
Answer:
Agent loops are caused by five failure modes. Diagnose by checking intermediate_steps:
def diagnose_agent_loop(result: dict) -> dict:
"""Analyze agent execution for loop patterns."""
steps = result.get("intermediate_steps", [])
# Check for repeated tool calls (infinite loop pattern)
tool_calls = [(a.tool, str(a.tool_input)) for a, _ in steps]
repeated = [call for call in tool_calls if tool_calls.count(call) > 1]
# Check for error cascades (tool keeps failing)
errors = [obs for _, obs in steps if "error" in str(obs).lower()]
# Check for scratchpad bloat (context window filling up)
total_obs_chars = sum(len(str(obs)) for _, obs in steps)
return {
"total_steps": len(steps),
"repeated_calls": repeated,
"error_count": len(errors),
"scratchpad_chars": total_obs_chars,
"likely_cause": (
"infinite_loop" if repeated else
"error_cascade" if len(errors) > 3 else
"context_overflow" if total_obs_chars > 20000 else
"legitimate_complexity"
),
}
# Fix 1: Iteration + time limits
executor = AgentExecutor(
agent=agent,
tools=tools,
max_iterations=6, # Hard stop
max_execution_time=20.0, # Wall-clock hard stop
early_stopping_method="generate", # LLM writes best-effort answer instead of crashing
)
# Fix 2: Tool output truncation (prevents scratchpad overflow)
@tool
def safe_search(query: str) -> str:
"""Search the drug database for clinical information."""
raw_result = expensive_search(query)
return raw_result[:500] # Never return more than 500 chars to the agent
# Fix 3: Detect and break loops in a custom callback
from langchain_core.callbacks import BaseCallbackHandler
class LoopDetectionCallback(BaseCallbackHandler):
def __init__(self, max_same_tool: int = 2):
self.tool_call_counts: dict = {}
self.max_same_tool = max_same_tool
def on_tool_start(self, serialized, input_str, **kwargs):
tool_name = serialized.get("name", "unknown")
self.tool_call_counts[tool_name] = self.tool_call_counts.get(tool_name, 0) + 1
if self.tool_call_counts[tool_name] > self.max_same_tool:
raise ValueError(
f"Loop detected: tool '{tool_name}' called {self.tool_call_counts[tool_name]} times. "
"Please synthesize what you have and give a final answer."
)
executor_with_loop_detection = AgentExecutor(
agent=agent,
tools=tools,
max_iterations=8,
callbacks=[LoopDetectionCallback(max_same_tool=2)],
handle_parsing_errors=True,
)Common root causes:
| Symptom | Cause | Fix |
|---|---|---|
| Same tool called 3+ times | Tool description too vague — LLM doesn't know when to stop | Sharpen description, add "use once" guidance |
| Tool always returns error | Tool broken, LLM keeps retrying | Fix tool + add handle_parsing_errors=True |
| Stops at max_iterations | Task genuinely needs more steps | Increase max_iterations or decompose task |
| Context window exceeded | Tool outputs too large | Truncate tool outputs to 500 chars |
| Timeout on first tool call | External API slow | Add timeout to tool, return cached/partial result |
Q3: You need two agents to collaborate — a research agent and a writing agent. How do you coordinate them?
Answer:
The simplest reliable pattern is sequential orchestration: one agent's output becomes the next agent's input. Avoid peer-to-peer agent communication — it's brittle.
from langchain_core.runnables import RunnableLambda
from langchain_core.output_parsers import StrOutputParser
# --- Agent 1: Research Agent ---
research_tools = [search_drug_database, check_drug_interaction]
research_prompt = ChatPromptTemplate.from_messages([
("system",
"You are a drug research specialist. Use tools to gather comprehensive, accurate drug data. "
"Return structured findings with: drug names, mechanisms, interactions, severity levels. "
"Do NOT write prose — output raw research findings only."),
("placeholder", "{chat_history}"),
("human", "{input}"),
("placeholder", "{agent_scratchpad}"),
])
research_agent = create_tool_calling_agent(model, research_tools, research_prompt)
research_executor = AgentExecutor(
agent=research_agent,
tools=research_tools,
max_iterations=5,
verbose=False,
)
# --- Agent 2: Writing Agent (no tools needed) ---
writing_prompt = ChatPromptTemplate.from_messages([
("system",
"You are a clinical medical writer. Transform raw drug research into clear, "
"structured patient-safe summaries. Use plain language. Include: "
"what the drugs do, the interaction risk, what the patient should do. "
"Never invent information not present in the research."),
("human", "Research findings:\n{research}\n\nWrite a clinical summary for a physician."),
])
writing_chain = writing_prompt | model | StrOutputParser()
# --- Orchestrator: wires them together ---
def run_multi_agent_pipeline(question: str) -> dict:
"""Run research agent then writing agent on its output."""
# Step 1: Research agent gathers facts
research_result = research_executor.invoke({
"input": question,
"chat_history": [],
})
raw_research = research_result["output"]
# Step 2: Writing agent formats the findings
final_summary = writing_chain.invoke({"research": raw_research})
return {
"question": question,
"raw_research": raw_research,
"clinical_summary": final_summary,
"tool_calls": len(research_result.get("intermediate_steps", [])),
}
result = run_multi_agent_pipeline(
"What are the risks of combining warfarin and aspirin post-MI?"
)
print(result["clinical_summary"])Why sequential over peer-to-peer:
- Deterministic flow — easy to test each agent independently
- No circular dependencies or deadlock risk
- Each agent has a single responsibility
- Failures are isolated and easy to retry at the failed stage
When to use parallel agents:
import asyncio
async def parallel_research(drugs: list[str]) -> list[dict]:
"""Research multiple drugs simultaneously."""
tasks = [
research_executor.ainvoke({"input": f"Research {drug}", "chat_history": []})
for drug in drugs
]
return await asyncio.gather(*tasks)
# Use parallel when: independent queries, no shared state, latency mattersQ4: When would you choose ReAct over a tool calling agent, and vice versa?
Answer:
The decision comes down to model support, reliability needs, and debuggability requirements.
# Tool Calling Agent — use by default with GPT-4o, Claude, Gemini
from langchain.agents import create_tool_calling_agent
tc_agent = create_tool_calling_agent(
ChatOpenAI(model="gpt-4o", temperature=0),
tools,
prompt, # Standard {agent_scratchpad} placeholder
)
tc_executor = AgentExecutor(agent=tc_agent, tools=tools)
# Advantages:
# - Structured JSON tool calls — no parser failures
# - Parallel tool execution in one API response
# - Pydantic schema validation on tool inputs
# - Less prompt sensitivity
# ReAct Agent — use when model doesn't support tool calling
from langchain.agents import create_react_agent
from langchain import hub
react_prompt = hub.pull("hwchase17/react")
react_agent = create_react_agent(
ChatOpenAI(model="gpt-4o", temperature=0),
tools,
react_prompt, # MUST use hwchase17/react format
)
react_executor = AgentExecutor(
agent=react_agent,
tools=tools,
handle_parsing_errors=True, # Critical: text parser can fail
)
# Advantages:
# - Works with any model (open-source, fine-tuned, older GPT)
# - Explicit Thought/Action reasoning visible in logs
# - Easier to debug reasoning errors
# - Useful for audit trails where reasoning transparency matters
# Hybrid: use ReAct for debugging, switch to tool calling for production
import os
def create_agent(debug_mode: bool = False):
if debug_mode or not supports_tool_calling(os.getenv("MODEL_NAME")):
prompt = hub.pull("hwchase17/react")
agent = create_react_agent(model, tools, prompt)
return AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True, verbose=True)
else:
agent = create_tool_calling_agent(model, tools, prompt)
return AgentExecutor(agent=agent, tools=tools, verbose=False)
def supports_tool_calling(model_name: str) -> bool:
supported = {"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "claude-3", "claude-sonnet", "gemini"}
return any(s in (model_name or "") for s in supported)Comparison:
| Factor | Tool Calling | ReAct | |---|---|---| | Reliability | High (structured JSON) | Lower (text parsing can fail) | | Model requirement | Tool calling API support required | Any model | | Parallel tools | Yes | No | | Reasoning visibility | Low (implicit) | High (Thought/Action explicit) | | Prompt sensitivity | Low | High (depends on exact format) | | Best for | Production systems | Debugging, open-source models |
Q5: Design a production clinical agent system. What are the non-negotiables?
Answer:
A production clinical agent has four non-negotiable properties: auditability, cost control, safety fallbacks, and latency guarantees.
from langchain_core.runnables import RunnableConfig
from langchain_core.callbacks import BaseCallbackHandler
import logging
import time
logger = logging.getLogger("clinical_agent")
# --- 1. Audit Callback (every tool call logged) ---
class ClinicalAuditCallback(BaseCallbackHandler):
def __init__(self, session_id: str, user_id: str):
self.session_id = session_id
self.user_id = user_id
self.tool_calls = []
def on_tool_start(self, serialized, input_str, **kwargs):
entry = {
"tool": serialized.get("name"),
"input": input_str,
"timestamp": time.time(),
"session_id": self.session_id,
"user_id": self.user_id,
}
self.tool_calls.append(entry)
logger.info("tool_call", extra=entry)
def on_tool_end(self, output, **kwargs):
if self.tool_calls:
self.tool_calls[-1]["output_chars"] = len(str(output))
# --- 2. Safety validator (never pass raw clinical output without checks) ---
DANGER_KEYWORDS = ["overdose", "lethal dose", "maximum fatal", "how to get high"]
def validate_query(query: str) -> str:
"""Block queries that could generate harmful clinical guidance."""
lower = query.lower()
for keyword in DANGER_KEYWORDS:
if keyword in lower:
raise ValueError(f"Query flagged for safety review: contains '{keyword}'")
if len(query) > 2000:
raise ValueError("Query too long — maximum 2000 characters")
return query
# --- 3. Production executor factory ---
def create_clinical_agent(session_id: str, user_id: str) -> AgentExecutor:
audit_callback = ClinicalAuditCallback(session_id, user_id)
executor = AgentExecutor(
agent=create_tool_calling_agent(
ChatOpenAI(model="gpt-4o", temperature=0),
tools,
prompt,
),
tools=tools,
max_iterations=6, # Hard cap — clinical queries rarely need more
max_execution_time=25.0, # 25s SLA
handle_parsing_errors=True,
early_stopping_method="generate",
return_intermediate_steps=True,
callbacks=[audit_callback],
verbose=False,
)
return executor, audit_callback
# --- 4. Invocation with all guardrails ---
def clinical_query(
question: str,
session_id: str,
user_id: str,
) -> dict:
start = time.time()
try:
# Safety gate
question = validate_query(question)
executor, audit = create_clinical_agent(session_id, user_id)
result = executor.invoke(
{"input": question, "chat_history": []},
config=RunnableConfig(
tags=["clinical", "production"],
metadata={"session_id": session_id, "user_id": user_id},
),
)
latency_ms = (time.time() - start) * 1000
logger.info("agent_success", extra={
"session_id": session_id,
"latency_ms": round(latency_ms),
"tool_calls": len(audit.tool_calls),
})
return {
"answer": result["output"],
"tool_calls": len(audit.tool_calls),
"latency_ms": round(latency_ms),
"success": True,
"disclaimer": "This output is for educational purposes only. Always verify with current clinical references.",
}
except ValueError as e:
# Safety or validation failure — do not retry
logger.warning("query_rejected", extra={"reason": str(e), "session_id": session_id})
return {"answer": str(e), "success": False, "retriable": False}
except Exception as e:
# LLM or tool failure — safe fallback
logger.error("agent_error", extra={"error": str(e), "session_id": session_id})
return {
"answer": "I was unable to complete this query. Please consult a pharmacist or clinical reference directly.",
"success": False,
"retriable": True,
}Production non-negotiables checklist:
| Requirement | Implementation |
|---|---|
| Audit every tool call | ClinicalAuditCallback logs tool name, input, session, user |
| Safety input filter | validate_query() blocks harmful patterns before any LLM call |
| Hard iteration limit | max_iterations=6 — never unbounded |
| Wall-clock timeout | max_execution_time=25.0 — SLA enforcement |
| Graceful degradation | early_stopping_method="generate" + exception fallback message |
| Cost control | Per-session callbacks track token usage; alert on anomalies |
| Disclaimer on output | Appended to every response — never omit in clinical context |
| Structured logging | JSON logs with session_id for tracing and incident investigation |