AI Systemsadvanced
Function Calling Internals: How Tool Use Works
How LLM function calling works under the hood: JSON schema injection, token patterns, multi-tool orchestration, error recovery, and building reliable tool-using agents.
Asma Hafeez KhanMay 16, 20267 min read
LLMFunction CallingTool UseAgentsAPI
What Happens Under the Hood
Function calling is not a special capability separate from language generation — it's a prompt engineering technique combined with output parsing and constrained generation. When you pass tool definitions to the API:
- The tool schemas are serialized into the model's context (often into the system prompt)
- The model is fine-tuned to produce structured JSON when it decides to invoke a tool
- The API (or a wrapper) intercepts the structured output and routes it to the calling code
- The tool result is injected back into the conversation as a new message
- The model generates its final response using the tool result
Tool Schemas: JSON Schema Format
Tools are defined using JSON Schema. The model reads these schemas as part of its context:
Python
from openai import OpenAI
import json
client = OpenAI()
# Tool definitions — converted to JSON Schema
CLINICAL_TOOLS = [
{
"type": "function",
"function": {
"name": "lookup_drug_interaction",
"description": "Look up the severity and management of a drug-drug interaction. Use this when asked about interactions between two specific medications.",
"parameters": {
"type": "object",
"properties": {
"drug_a": {
"type": "string",
"description": "First drug name (generic name preferred)",
},
"drug_b": {
"type": "string",
"description": "Second drug name (generic name preferred)",
},
"patient_context": {
"type": "string",
"description": "Optional patient context (age, comorbidities, indication)",
},
},
"required": ["drug_a", "drug_b"],
"additionalProperties": False,
},
},
},
{
"type": "function",
"function": {
"name": "calculate_renal_dose",
"description": "Calculate the appropriate dose of a drug given the patient's renal function (eGFR).",
"parameters": {
"type": "object",
"properties": {
"drug_name": {"type": "string"},
"standard_dose": {"type": "string", "description": "Normal dose (e.g., '500mg twice daily')"},
"egfr": {"type": "number", "description": "eGFR in mL/min/1.73m²"},
},
"required": ["drug_name", "standard_dose", "egfr"],
"additionalProperties": False,
},
},
},
]The Full Tool-Calling Loop
Python
def run_clinical_agent(user_query: str, max_iterations: int = 5) -> str:
"""Run an agent that uses clinical tools to answer pharmacy questions."""
# Tool implementations (in production, these call real databases)
def lookup_drug_interaction(drug_a: str, drug_b: str, patient_context: str = "") -> dict:
# Simplified mock — replace with Lexicomp/Micromedex API call
interactions = {
("warfarin", "clarithromycin"): {
"severity": "major",
"mechanism": "CYP2C9/3A4 inhibition by clarithromycin increases warfarin exposure",
"effect": "Increased bleeding risk; INR may double or triple",
"management": "Avoid combination. If unavoidable, reduce warfarin dose by 30-50% and monitor INR every 2-3 days.",
}
}
key = tuple(sorted([drug_a.lower(), drug_b.lower()]))
return interactions.get(key, {"severity": "unknown", "message": "No data found"})
def calculate_renal_dose(drug_name: str, standard_dose: str, egfr: float) -> dict:
# Simplified mock — replace with renal dosing database
if drug_name.lower() == "metformin":
if egfr >= 45:
return {"dose": standard_dose, "notes": "No adjustment required"}
elif egfr >= 30:
return {"dose": "Reduce dose, monitor closely", "notes": "eGFR 30-44: use with caution"}
else:
return {"dose": "Contraindicated", "notes": f"eGFR {egfr} < 30: contraindicated"}
return {"dose": standard_dose, "notes": "No specific renal adjustment data found"}
TOOL_FUNCTIONS = {
"lookup_drug_interaction": lookup_drug_interaction,
"calculate_renal_dose": calculate_renal_dose,
}
messages = [
{"role": "system", "content": "You are a clinical pharmacist. Use the available tools to provide accurate, evidence-based answers."},
{"role": "user", "content": user_query},
]
for iteration in range(max_iterations):
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=CLINICAL_TOOLS,
tool_choice="auto", # Let model decide when to use tools
temperature=0,
)
message = response.choices[0].message
# Check if the model wants to call tools
if message.tool_calls:
# Add model's tool call decision to conversation
messages.append(message)
# Execute all requested tool calls
for tool_call in message.tool_calls:
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
print(f"Calling tool: {function_name}({arguments})")
if function_name not in TOOL_FUNCTIONS:
result = {"error": f"Tool '{function_name}' not found"}
else:
try:
result = TOOL_FUNCTIONS[function_name](**arguments)
except Exception as e:
result = {"error": str(e)}
# Add tool result to conversation
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result),
})
else:
# Model gave a final response (no tool calls)
return message.content
return "Max iterations reached without a final answer."
# Test the agent
result = run_clinical_agent(
"My patient is on warfarin and needs clarithromycin for pneumonia. Is this safe?"
)
print(result)Parallel Tool Calls
GPT-4 can request multiple tools simultaneously:
Python
def handle_parallel_tool_calls(user_query: str) -> str:
"""Handle multiple simultaneous tool calls from the model."""
from concurrent.futures import ThreadPoolExecutor
messages = [
{"role": "user", "content": user_query}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=CLINICAL_TOOLS,
tool_choice="auto",
temperature=0,
)
message = response.choices[0].message
if not message.tool_calls:
return message.content
messages.append(message)
# Execute parallel tool calls
def execute_tool(tool_call):
fn_name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
# ... execute and return result
return tool_call.id, json.dumps({"result": "mock_result"})
with ThreadPoolExecutor(max_workers=len(message.tool_calls)) as executor:
results = list(executor.map(execute_tool, message.tool_calls))
# Add all results to conversation
for call_id, result in results:
messages.append({
"role": "tool",
"tool_call_id": call_id,
"content": result,
})
# Get final response
final = client.chat.completions.create(
model="gpt-4o",
messages=messages,
temperature=0,
)
return final.choices[0].message.contentForcing Tool Use vs Auto
Python
# Auto: model decides when to use tools
tool_choice_auto = {"tool_choice": "auto"}
# Force a specific tool: model MUST call this tool
tool_choice_required = {
"tool_choice": {
"type": "function",
"function": {"name": "lookup_drug_interaction"},
}
}
# Prevent tool use: model MUST respond in text only
tool_choice_none = {"tool_choice": "none"}
# Required (any tool): model MUST call at least one tool
tool_choice_any = {"tool_choice": "required"}
# Example: force structured extraction
def extract_medications_structured(clinical_note: str) -> list[dict]:
"""Force the model to use a tool for structured medication extraction."""
extraction_tool = {
"type": "function",
"function": {
"name": "record_medications",
"description": "Record extracted medications from clinical text",
"parameters": {
"type": "object",
"properties": {
"medications": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"dose": {"type": "string"},
"frequency": {"type": "string"},
"route": {"type": "string"},
"status": {"type": "string", "enum": ["active", "discontinued", "prn"]},
},
"required": ["name", "status"],
},
}
},
"required": ["medications"],
},
},
}
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": f"Extract all medications from:\n\n{clinical_note}"}
],
tools=[extraction_tool],
tool_choice={"type": "function", "function": {"name": "record_medications"}},
temperature=0,
)
# The model MUST have called record_medications
tool_call = response.choices[0].message.tool_calls[0]
result = json.loads(tool_call.function.arguments)
return result["medications"]Error Recovery in Tool Calling
Python
def resilient_tool_call(user_query: str, max_retries: int = 2) -> str:
"""Tool-calling agent with error recovery."""
messages = [{"role": "user", "content": user_query}]
errors_seen = []
for attempt in range(max_retries + 1):
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=CLINICAL_TOOLS,
tool_choice="auto",
temperature=0,
)
message = response.choices[0].message
if not message.tool_calls:
return message.content
messages.append(message)
# Try executing tools
all_succeeded = True
for tool_call in message.tool_calls:
try:
args = json.loads(tool_call.function.arguments)
result = execute_tool(tool_call.function.name, args)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result),
})
except json.JSONDecodeError as e:
# Model generated invalid JSON for arguments
error_msg = f"Tool call failed: invalid JSON in arguments for {tool_call.function.name}: {e}"
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps({"error": error_msg}),
})
errors_seen.append(error_msg)
all_succeeded = False
except Exception as e:
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps({"error": str(e), "hint": "Try different parameters"}),
})
errors_seen.append(str(e))
all_succeeded = False
# If we exhausted retries, ask model to answer without tools
messages.append({
"role": "user",
"content": f"The tools have been unavailable. Please answer based on your knowledge and note any limitations.",
})
fallback = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tool_choice="none", # Prevent further tool calls
temperature=0,
)
return fallback.choices[0].message.contentAnthropic Tool Use (Different API Format)
Python
import anthropic
claude_client = anthropic.Anthropic()
ANTHROPIC_TOOLS = [
{
"name": "lookup_drug_interaction",
"description": "Look up drug interaction data between two medications",
"input_schema": {
"type": "object",
"properties": {
"drug_a": {"type": "string", "description": "First drug"},
"drug_b": {"type": "string", "description": "Second drug"},
},
"required": ["drug_a", "drug_b"],
},
}
]
def run_claude_agent(user_query: str) -> str:
"""Tool-calling agent using Anthropic's API."""
messages = [{"role": "user", "content": user_query}]
while True:
response = claude_client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
tools=ANTHROPIC_TOOLS,
messages=messages,
)
if response.stop_reason == "end_turn":
# Extract text response
for block in response.content:
if block.type == "text":
return block.text
elif response.stop_reason == "tool_use":
# Process tool calls
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(result),
})
messages.append({"role": "user", "content": tool_results})
else:
break
return "No response generated"Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.