Function Calling: LLMs as Orchestrators

How Function Calling Works

Function calling (also called "tool use") lets the model decide when to call a function and with what arguments. The model doesn't execute the function — it returns structured JSON describing the call; your code executes it and feeds the result back.

1. You send: message + list of function schemas
2. Model decides: "I need to call drug_database(name='warfarin')"
3. Model returns: tool_call JSON instead of text
4. Your code executes: drug_database('warfarin') → result
5. You send: tool result back to the model
6. Model generates: final answer using the tool result

Defining Tools

Python

from openai import OpenAI
import json

client = OpenAI()

# Tool definitions — JSON schema describing what the function does and expects
DRUG_TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_drug_interactions",
            "description": "Look up drug-drug interactions between a pair of medications. Returns severity, mechanism, and management recommendations.",
            "parameters": {
                "type": "object",
                "properties": {
                    "drug_a": {
                        "type": "string",
                        "description": "First drug name (generic preferred)",
                    },
                    "drug_b": {
                        "type": "string",
                        "description": "Second drug name (generic preferred)",
                    },
                },
                "required": ["drug_a", "drug_b"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_drug_dosing",
            "description": "Get standard dosing information for a drug, including renal and hepatic dose adjustments.",
            "parameters": {
                "type": "object",
                "properties": {
                    "drug_name": {
                        "type": "string",
                        "description": "Generic drug name",
                    },
                    "indication": {
                        "type": "string",
                        "description": "The clinical indication (e.g., 'atrial fibrillation', 'hypertension')",
                    },
                    "egfr": {
                        "type": "number",
                        "description": "Patient's eGFR in mL/min/1.73m² (optional, for renal adjustment)",
                    },
                },
                "required": ["drug_name"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "calculate_creatinine_clearance",
            "description": "Calculate creatinine clearance using the Cockcroft-Gault formula.",
            "parameters": {
                "type": "object",
                "properties": {
                    "age": {"type": "number", "description": "Patient age in years"},
                    "weight_kg": {"type": "number", "description": "Patient weight in kilograms"},
                    "creatinine_mg_dl": {"type": "number", "description": "Serum creatinine in mg/dL"},
                    "sex": {"type": "string", "enum": ["male", "female"], "description": "Patient biological sex"},
                },
                "required": ["age", "weight_kg", "creatinine_mg_dl", "sex"],
            },
        },
    },
]

# Actual function implementations
def get_drug_interactions(drug_a: str, drug_b: str) -> dict:
    """Simulated drug interaction lookup."""
    interactions_db = {
        ("warfarin", "clarithromycin"): {
            "severity": "major",
            "mechanism": "Clarithromycin inhibits CYP3A4 and CYP2C9, reducing warfarin metabolism",
            "effect": "Increased warfarin levels and bleeding risk",
            "management": "Monitor INR within 3-5 days; consider warfarin dose reduction of 25-50%",
        },
        ("warfarin", "aspirin"): {
            "severity": "major",
            "mechanism": "Pharmacodynamic: additive bleeding risk via platelet inhibition",
            "effect": "Significantly increased bleeding risk",
            "management": "Combination sometimes appropriate (e.g., mechanical valve + AFib); use lowest effective aspirin dose; monitor closely",
        },
    }

    key = tuple(sorted([drug_a.lower(), drug_b.lower()]))
    return interactions_db.get(key, {"severity": "unknown", "message": f"No interaction data for {drug_a} + {drug_b}"})

def get_drug_dosing(drug_name: str, indication: str = None, egfr: float = None) -> dict:
    """Simulated dosing lookup."""
    dosing_db = {
        "warfarin": {
            "standard_dose": "2-10mg daily (highly individualized; titrate to INR)",
            "monitoring": "INR 2-3 times weekly until stable, then every 4-12 weeks",
            "renal_note": "No dose adjustment required for renal impairment, but INR monitoring may be more variable",
        },
    }
    return dosing_db.get(drug_name.lower(), {"message": f"No dosing data for {drug_name}"})

def calculate_creatinine_clearance(age: float, weight_kg: float, creatinine_mg_dl: float, sex: str) -> dict:
    """Cockcroft-Gault formula."""
    cockcroft_gault = ((140 - age) * weight_kg) / (72 * creatinine_mg_dl)
    if sex == "female":
        cockcroft_gault *= 0.85
    return {
        "crcl_ml_per_min": round(cockcroft_gault, 1),
        "formula": "Cockcroft-Gault",
        "sex_adjustment": "×0.85 applied for female" if sex == "female" else "No adjustment",
    }

TOOL_IMPLEMENTATIONS = {
    "get_drug_interactions": get_drug_interactions,
    "get_drug_dosing": get_drug_dosing,
    "calculate_creatinine_clearance": calculate_creatinine_clearance,
}

The Tool Use Loop

Python

def run_with_tools(user_message: str, max_iterations: int = 5) -> str:
    """Run a conversation with tool use until the model generates a final answer."""

    messages = [
        {
            "role": "system",
            "content": "You are a clinical pharmacist assistant. Use the available tools to look up accurate drug information before answering.",
        },
        {"role": "user", "content": user_message},
    ]

    for iteration in range(max_iterations):
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=DRUG_TOOLS,
            tool_choice="auto",  # Model decides when to use tools
        )

        message = response.choices[0].message
        messages.append(message)

        # If no tool calls, the model is done
        if response.choices[0].finish_reason == "stop" or not message.tool_calls:
            return message.content

        # Execute all tool calls
        for tool_call in message.tool_calls:
            function_name = tool_call.function.name
            function_args = json.loads(tool_call.function.arguments)

            print(f"  → Calling {function_name}({function_args})")

            if function_name in TOOL_IMPLEMENTATIONS:
                result = TOOL_IMPLEMENTATIONS[function_name](**function_args)
            else:
                result = {"error": f"Unknown function: {function_name}"}

            # Add tool result to conversation
            messages.append({
                "role": "tool",
                "content": json.dumps(result),
                "tool_call_id": tool_call.id,
            })

    return "Maximum iterations reached."

# Example
answer = run_with_tools(
    "A 68-year-old female patient weighing 65kg with a creatinine of 1.4 mg/dL is on warfarin 5mg daily for AFib. She's starting clarithromycin for pneumonia. What should we do?"
)
print("\n=== Final Answer ===")
print(answer)

Parallel Tool Calls

OpenAI will request multiple tool calls simultaneously when independent information is needed:

Python

def run_with_parallel_tools(user_message: str) -> str:
    """Handle parallel tool calls efficiently."""
    messages = [
        {"role": "system", "content": "You are a clinical pharmacist. Use tools to gather information."},
        {"role": "user", "content": user_message},
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        tools=DRUG_TOOLS,
        tool_choice="auto",
        parallel_tool_calls=True,  # Allow multiple simultaneous tool calls
    )

    message = response.choices[0].message
    messages.append(message)

    if not message.tool_calls:
        return message.content

    print(f"Model requested {len(message.tool_calls)} parallel tool call(s)")

    # Execute all calls (can be done in parallel with threading)
    import concurrent.futures

    def execute_tool(tool_call):
        fn_name = tool_call.function.name
        fn_args = json.loads(tool_call.function.arguments)
        result = TOOL_IMPLEMENTATIONS.get(fn_name, lambda **_: {"error": "Unknown"})(**fn_args)
        return tool_call.id, json.dumps(result)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(execute_tool, message.tool_calls))

    # Add all results
    for tool_call_id, result_json in results:
        messages.append({"role": "tool", "content": result_json, "tool_call_id": tool_call_id})

    # Get final answer
    final_response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
    )
    return final_response.choices[0].message.content

Forcing Specific Tool Use

Python

# Force the model to always use a specific tool
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Check warfarin + clarithromycin interaction"}],
    tools=DRUG_TOOLS,
    tool_choice={
        "type": "function",
        "function": {"name": "get_drug_interactions"}
    },  # Force this specific function
)

# Disable tool use entirely for this call
response_no_tools = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Summarize what you know about warfarin from your training."}],
    tools=DRUG_TOOLS,
    tool_choice="none",  # Model cannot call tools even though they're defined
)

Tool Schema Design

Well-designed schemas reduce errors:

Python

# BAD: vague parameter descriptions
bad_tool = {
    "name": "lookup",
    "description": "Look something up",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {"type": "string"},
        },
    },
}

# GOOD: specific, unambiguous schema
good_tool = {
    "name": "get_drug_interactions",
    "description": "Look up clinically significant drug-drug interactions. Use this when asked about safety of combining medications or when analyzing a polypharmacy patient.",
    "parameters": {
        "type": "object",
        "properties": {
            "drug_a": {
                "type": "string",
                "description": "Generic name of the first drug (e.g., 'warfarin', not 'Coumadin')",
            },
            "drug_b": {
                "type": "string",
                "description": "Generic name of the second drug",
            },
        },
        "required": ["drug_a", "drug_b"],
        "additionalProperties": False,  # Prevents model from adding extra fields
    },
}

Error Handling in Tool Responses

Python

def safe_tool_execute(tool_call) -> str:
    """Execute a tool call with comprehensive error handling."""
    fn_name = tool_call.function.name

    try:
        fn_args = json.loads(tool_call.function.arguments)
    except json.JSONDecodeError as e:
        return json.dumps({"error": f"Invalid arguments JSON: {e}"})

    if fn_name not in TOOL_IMPLEMENTATIONS:
        return json.dumps({"error": f"Unknown function: {fn_name}. Available: {list(TOOL_IMPLEMENTATIONS.keys())}"})

    try:
        result = TOOL_IMPLEMENTATIONS[fn_name](**fn_args)
        return json.dumps(result)
    except TypeError as e:
        return json.dumps({"error": f"Wrong arguments for {fn_name}: {e}"})
    except Exception as e:
        return json.dumps({"error": f"Tool execution failed: {str(e)}"})

# Always return a valid JSON string, even on error
# The model will receive the error and can either retry or explain the limitation

The model handles tool errors gracefully when they're described clearly — it will tell the user "I was unable to look up X because..." rather than silently failing.

Function Calling: LLMs as Orchestrators

How Function Calling Works

Defining Tools

The Tool Use Loop

Parallel Tool Calls

Forcing Specific Tool Use

Tool Schema Design

Error Handling in Tool Responses

Enjoyed this article?

Leave a comment