LangChain Mastery · Lesson 16 of 33

ConversationBufferMemory: Simple History

ConversationBufferMemory

ConversationBufferMemory stores every conversation turn verbatim. It's the simplest memory type — use it when your conversations are short enough to fit in the context window.

Python

from langchain.memory import ConversationBufferMemory
from langchain_core.messages import HumanMessage, AIMessage

# Create memory
memory = ConversationBufferMemory(
    return_messages=True,   # Return Message objects (vs concatenated string)
    memory_key="chat_history",   # Variable name to inject into prompts
    input_key="input",           # Key for user input in save_context()
    output_key="output",         # Key for model output in save_context()
)

# Manually add turns
memory.save_context(
    inputs={"input": "What is warfarin?"},
    outputs={"output": "Warfarin is an anticoagulant that inhibits vitamin K recycling."},
)
memory.save_context(
    inputs={"input": "What is the typical dose for AFib?"},
    outputs={"output": "For atrial fibrillation, the typical warfarin dose starts at 2-5mg daily."},
)

# Retrieve history
history = memory.load_memory_variables({})
print(history["chat_history"])
# [HumanMessage("What is warfarin?"), AIMessage("..."), HumanMessage("What is the typical dose..."), AIMessage("...")]

# Check message count
print(len(memory.chat_memory.messages))  # 4 (2 human + 2 AI)

Integration with LCEL (Modern Approach)

The modern way to use memory is explicit history management in LCEL:

Python

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage

model = ChatOpenAI(model="gpt-4o", temperature=0)
parser = StrOutputParser()

# Prompt with history placeholder
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a clinical pharmacist. Answer questions about medications."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}"),
])

chain = prompt | model | parser

# Manage history manually (explicit — preferred in LCEL)
def chat_with_history(question: str, history: list) -> tuple[str, list]:
    """Invoke chain and update history. Returns (response, updated_history)."""
    response = chain.invoke({
        "question": question,
        "chat_history": history,
    })
    
    # Append to history
    updated_history = history + [
        HumanMessage(content=question),
        AIMessage(content=response),
    ]
    
    return response, updated_history


# Have a conversation
history = []
r1, history = chat_with_history("What is warfarin?", history)
r2, history = chat_with_history("What dose for AFib?", history)
r3, history = chat_with_history("What monitoring is needed?", history)

print(f"Turn 1: {r1[:80]}")
print(f"Turn 2: {r2[:80]}")
print(f"Turn 3: {r3[:80]}")
print(f"History length: {len(history)} messages")

Using ConversationChain (Legacy but Common)

The ConversationChain class wraps memory management automatically:

Python

from langchain.chains import ConversationChain

llm = ChatOpenAI(model="gpt-4o", temperature=0)
memory = ConversationBufferMemory(return_messages=True)

conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=False,   # Set True to see memory injection
)

# Conversation — history is automatically managed
r1 = conversation.predict(input="What is warfarin?")
r2 = conversation.predict(input="What dose for AFib?")  # Model knows context
r3 = conversation.predict(input="What are the main interactions?")

# Inspect what memory contains
print(memory.buffer_as_str)  # Complete history as string
for msg in memory.chat_memory.messages:
    role = "User" if msg.type == "human" else "Bot"
    print(f"{role}: {msg.content[:60]}")

Session Management: Multiple Users

In production, each user needs their own memory instance:

Python

from typing import Optional

class ClinicalChatbot:
    """Multi-user clinical pharmacist chatbot."""

    def __init__(self, max_history_tokens: int = 4000):
        self.model = ChatOpenAI(model="gpt-4o", temperature=0)
        self.parser = StrOutputParser()
        self.max_history_tokens = max_history_tokens
        self.sessions: dict[str, list] = {}  # session_id → message history

        self.prompt = ChatPromptTemplate.from_messages([
            ("system",
             "You are a clinical pharmacist. "
             "Provide evidence-based drug information. "
             "Reference previous turns of the conversation when relevant."),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{question}"),
        ])
        self.chain = self.prompt | self.model | self.parser

    def get_session(self, session_id: str) -> list:
        """Get or create session history."""
        if session_id not in self.sessions:
            self.sessions[session_id] = []
        return self.sessions[session_id]

    def _trim_history(self, history: list) -> list:
        """Keep history within token budget (rough char estimate)."""
        max_chars = self.max_history_tokens * 4
        total_chars = sum(len(m.content) for m in history)

        while total_chars > max_chars and len(history) >= 2:
            # Remove oldest pair (human + ai)
            removed = history.pop(0)  # Remove oldest human
            total_chars -= len(removed.content)
            if history:
                removed = history.pop(0)  # Remove oldest ai
                total_chars -= len(removed.content)

        return history

    def chat(self, session_id: str, question: str) -> dict:
        """Process a chat turn."""
        history = self.get_session(session_id)

        response = self.chain.invoke({
            "question": question,
            "chat_history": history,
        })

        # Update session history
        history.extend([
            HumanMessage(content=question),
            AIMessage(content=response),
        ])
        self.sessions[session_id] = self._trim_history(history)

        return {
            "response": response,
            "session_id": session_id,
            "turn_number": len(history) // 2,
        }

    def reset_session(self, session_id: str) -> None:
        """Clear history for a session."""
        self.sessions.pop(session_id, None)

    def get_session_summary(self, session_id: str) -> dict:
        history = self.get_session(session_id)
        return {
            "turns": len(history) // 2,
            "total_messages": len(history),
        }


# Usage
bot = ClinicalChatbot()

r1 = bot.chat("session_001", "What is warfarin?")
r2 = bot.chat("session_001", "What dose for AFib?")
r3 = bot.chat("session_002", "What is metformin?")  # Separate session

print(bot.get_session_summary("session_001"))  # {"turns": 2, "total_messages": 4}
print(bot.get_session_summary("session_002"))  # {"turns": 1, "total_messages": 2}

Persisting Memory to Database

For production: store history in a database, not in-memory:

Python

import json
import redis
from langchain_core.messages import messages_to_dict, messages_from_dict

class RedisConversationMemory:
    """Redis-backed conversation history."""

    def __init__(self, redis_url: str = "redis://localhost:6379", ttl: int = 86400):
        self.client = redis.from_url(redis_url, decode_responses=True)
        self.ttl = ttl   # 24 hours

    def _key(self, session_id: str) -> str:
        return f"chat_history:{session_id}"

    def load(self, session_id: str) -> list:
        """Load history for a session."""
        data = self.client.get(self._key(session_id))
        if not data:
            return []
        return messages_from_dict(json.loads(data))

    def save(self, session_id: str, history: list) -> None:
        """Save history to Redis."""
        serialized = json.dumps(messages_to_dict(history))
        self.client.setex(self._key(session_id), self.ttl, serialized)

    def clear(self, session_id: str) -> None:
        self.client.delete(self._key(session_id))


# Use with chat function
redis_memory = RedisConversationMemory()

def persistent_chat(session_id: str, question: str, chain) -> str:
    history = redis_memory.load(session_id)
    response = chain.invoke({"question": question, "chat_history": history})
    history.extend([HumanMessage(content=question), AIMessage(content=response)])
    redis_memory.save(session_id, history)
    return response

When to Switch Away from Buffer Memory

| Trigger | Symptom | Switch to | |---|---|---| | Over 4000 tokens in history | API errors or slow responses | Window or Summary memory | | Conversations lasting hours | History too long for context | SummaryBuffer memory | | User re-explains old context | Lost early context | Entity or Vector memory | | Multiple users | Memory leaks between users | Per-session instances | | Server restart loses memory | Sessions disappear | Redis-backed persistence |

Types of Memory in LangChain

Next Lesson

ConversationSummaryMemory: Compressed History