Retrievers: Advanced Retrieval Strategies
Go beyond basic vector search. Build multi-query, contextual compression, BM25 hybrid, parent-document, and self-querying retrievers for production RAG pipelines.
The Retriever Interface
Any retriever in LangChain implements one method: invoke(query: str) -> list[Document]. This uniform interface lets you swap retrieval strategies without changing your RAG chain.
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(
collection_name="drug_formulary",
embedding_function=embeddings,
persist_directory="./chroma_db",
)
# Basic vector retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
docs = retriever.invoke("warfarin drug interactions")
for doc in docs:
print(f"[{doc.metadata.get('drug', '?')}] {doc.page_content[:80]}")MultiQueryRetriever
A single query can miss documents phrased differently. MultiQueryRetriever generates multiple query variants and combines the results.
from langchain.retrievers import MultiQueryRetriever
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
multi_query_retriever = MultiQueryRetriever.from_llm(
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
llm=llm,
# Default: generates 3 alternative queries from the original
)
# "warfarin aspirin interaction" generates queries like:
# 1. "warfarin and aspirin drug-drug interaction severity"
# 2. "anticoagulant NSAID combination bleeding risk"
# 3. "coumadin aspirin co-administration safety"
# Results from all 3 queries are deduplicated and returned
docs = multi_query_retriever.invoke("warfarin aspirin interaction")
print(f"Retrieved {len(docs)} unique docs across all query variants")
# Custom query generation prompt
from langchain_core.prompts import PromptTemplate
custom_prompt = PromptTemplate(
input_variables=["question"],
template="""You are a clinical pharmacist. Generate 3 different search queries
to find relevant drug information for this question. Focus on:
- Drug names (both brand and generic)
- Mechanism terms
- Clinical scenario terms
Original question: {question}
Output 3 queries, one per line:""",
)
multi_query_custom = MultiQueryRetriever.from_llm(
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
llm=llm,
prompt=custom_prompt,
)Contextual Compression Retriever
Retrieved chunks often contain irrelevant surrounding text. This retriever extracts only the relevant portion of each chunk.
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
# Compressor: extracts only the relevant part of each retrieved chunk
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
)
docs = compression_retriever.invoke("warfarin monitoring frequency")
for doc in docs:
# Each doc now contains only the relevant sentences from the original chunk
print(f"Compressed: {doc.page_content}")
# Alternative compressor: LLMChainFilter (removes irrelevant chunks entirely)
from langchain.retrievers.document_compressors import LLMChainFilter
filter_compressor = LLMChainFilter.from_llm(llm)
filter_retriever = ContextualCompressionRetriever(
base_compressor=filter_compressor,
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 6}),
)
# Returns only the chunks that are relevant — removes the rest entirely
# EmbeddingsFilter: faster, no LLM calls for compression
from langchain.retrievers.document_compressors import EmbeddingsFilter
embeddings_filter = EmbeddingsFilter(
embeddings=embeddings,
similarity_threshold=0.76, # Only return chunks above this similarity to query
)
fast_compression_retriever = ContextualCompressionRetriever(
base_compressor=embeddings_filter,
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 8}),
)BM25 Retriever (Keyword Search)
BM25 (Best Match 25) is a classical keyword search algorithm. It finds exact term matches — complementary to semantic search.
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
# BM25 works on a corpus of documents
corpus = [
Document(page_content="Warfarin inhibits vitamin K epoxide reductase.", metadata={"drug": "warfarin"}),
Document(page_content="Metformin reduces hepatic glucose production.", metadata={"drug": "metformin"}),
Document(page_content="Aspirin inhibits COX-1 and COX-2 enzymes.", metadata={"drug": "aspirin"}),
Document(page_content="Warfarin dose is adjusted based on INR values.", metadata={"drug": "warfarin"}),
]
bm25_retriever = BM25Retriever.from_documents(corpus)
bm25_retriever.k = 3 # Return top 3
results = bm25_retriever.invoke("INR warfarin monitoring")
# Finds "INR" exactly — semantic search might miss this if embedding space is crowdedHybrid Retriever (BM25 + Vector)
Combines keyword and semantic search with reciprocal rank fusion:
from langchain.retrievers import EnsembleRetriever
bm25_retriever = BM25Retriever.from_documents(corpus)
bm25_retriever.k = 4
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
# EnsembleRetriever: merges results using Reciprocal Rank Fusion
hybrid_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_retriever],
weights=[0.4, 0.6], # BM25 gets 40% weight, vector gets 60%
)
# Use when: documents have specific terminology (drug names, lab values, ICD codes)
# that might not be well-represented in embedding space
docs = hybrid_retriever.invoke("warfarin INR 2.5 anticoagulation target")ParentDocumentRetriever
Store small chunks for precise retrieval, but return the larger parent chunks for LLM context:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Small chunks: what gets embedded and searched
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0)
# Large chunks: what gets returned to the LLM
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
# Document store: holds the full parent chunks (not the vector store)
docstore = InMemoryStore()
retriever = ParentDocumentRetriever(
vectorstore=vectorstore,
docstore=docstore,
child_splitter=child_splitter,
parent_splitter=parent_splitter,
)
# Add documents: splits into parents (stored in docstore) and children (embedded)
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("warfarin_guidelines.pdf")
raw_docs = loader.load()
retriever.add_documents(raw_docs)
# Query: finds small precise child chunks, returns their large parent chunks
docs = retriever.invoke("CYP2C9 warfarin metabolism")
print(f"Returned {len(docs)} parent chunks")
print(f"First parent size: {len(docs[0].page_content)} chars")
# Parent chunks: ~2000 chars — much more context than a 200-char child chunkSelfQueryRetriever
Let the LLM translate natural language questions into structured metadata filters:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
# Describe the metadata fields so the LLM knows what filters are possible
metadata_field_info = [
AttributeInfo(
name="drug",
description="The generic drug name (e.g., warfarin, metformin, aspirin)",
type="string",
),
AttributeInfo(
name="category",
description="Drug category: anticoagulant, antidiabetic, nsaid, antihypertensive",
type="string",
),
AttributeInfo(
name="severity",
description="Interaction severity: Major, Moderate, Minor",
type="string",
),
]
self_query_retriever = SelfQueryRetriever.from_llm(
llm=ChatOpenAI(model="gpt-4o", temperature=0),
vectorstore=vectorstore,
document_contents="Clinical drug information including mechanisms, dosing, and interactions",
metadata_field_info=metadata_field_info,
verbose=True,
)
# LLM extracts: filter={"category": "anticoagulant"}, query="renal dosing"
docs = self_query_retriever.invoke(
"What are the renal dosing guidelines for anticoagulants?"
)
# LLM extracts: filter={"severity": "Major"}, query="warfarin interactions"
docs = self_query_retriever.invoke(
"Show me major severity interactions for warfarin"
)Custom Retriever
from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from typing import Any
class ClinicalRetriever(BaseRetriever):
"""Retrieves from vector store but always includes a safety disclaimer document."""
vectorstore: Any
k: int = 4
def _get_relevant_documents(
self,
query: str,
*,
run_manager: CallbackManagerForRetrieverRun,
) -> list[Document]:
# Base retrieval
docs = self.vectorstore.similarity_search(query, k=self.k)
# Always append a safety disclaimer
disclaimer = Document(
page_content=(
"IMPORTANT: All clinical information provided is for educational purposes only. "
"Always verify drug information with current references (Lexicomp, Micromedex) "
"before making clinical decisions."
),
metadata={"source": "safety_disclaimer", "type": "system"},
)
return docs + [disclaimer]
clinical_retriever = ClinicalRetriever(vectorstore=vectorstore, k=3)
docs = clinical_retriever.invoke("warfarin dose")
# Returns 3 relevant docs + 1 mandatory safety disclaimerRetriever Comparison
| Retriever | Mechanism | Best For | Extra Cost | |---|---|---|---| | Basic vector | Cosine similarity | General semantic search | None | | MultiQuery | LLM generates 3 variants | Ambiguous or short queries | LLM calls for query gen | | Contextual Compression | LLM extracts relevant portion | Long chunks with mixed content | LLM calls per chunk | | BM25 | Keyword frequency | Exact term matching (drug names, codes) | None | | Hybrid (Ensemble) | BM25 + vector, RRF | Best of both worlds | BM25 is free | | ParentDocument | Small embed, large return | Precise search, rich context | Extra docstore | | SelfQuery | LLM builds metadata filter | Structured filter from natural language | LLM calls |
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.