Vector Stores: Storing and Searching Embeddings
Store and retrieve document embeddings with Chroma, FAISS, and Pinecone. Learn similarity search, metadata filtering, MMR retrieval, and vector store management.
What is a Vector Store?
A vector store embeds documents and stores them so you can later retrieve the most semantically similar chunks to a query.
Text chunks ā Embedding model ā Float vectors ā Stored in vector DB
Query text ā Embedding model ā Float vector ā Search for nearest neighborsLangChain wraps multiple vector stores with the same interface: add_documents(), similarity_search(), as_retriever().
Chroma (Local, In-Process)
Best for development, prototyping, and small to medium datasets (under 1M vectors).
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# In-memory (resets on restart)
vectorstore = Chroma(
collection_name="drug_formulary",
embedding_function=embeddings,
)
# Add documents
docs = [
Document(
page_content="Warfarin inhibits VKORC1, blocking vitamin K recycling.",
metadata={"drug": "warfarin", "category": "anticoagulant", "source": "formulary"},
),
Document(
page_content="Metformin activates AMPK and reduces hepatic glucose output.",
metadata={"drug": "metformin", "category": "antidiabetic", "source": "formulary"},
),
Document(
page_content="Aspirin inhibits COX-1 and COX-2, reducing prostaglandin synthesis.",
metadata={"drug": "aspirin", "category": "nsaid", "source": "formulary"},
),
]
vectorstore.add_documents(docs)
# Basic similarity search
results = vectorstore.similarity_search(
query="How does warfarin work?",
k=2, # Return top 2 matches
)
for doc in results:
print(f"[{doc.metadata['drug']}] {doc.page_content[:80]}")
# Similarity search with scores (lower distance = more similar)
results_with_scores = vectorstore.similarity_search_with_score(
query="blood thinning medications",
k=3,
)
for doc, score in results_with_scores:
print(f"Score: {score:.4f} | {doc.page_content[:60]}")
# Persistent storage ā survives restarts
vectorstore_persistent = Chroma(
collection_name="drug_formulary",
embedding_function=embeddings,
persist_directory="./chroma_db", # Saved to disk automatically
)
vectorstore_persistent.add_documents(docs)
# Reload on next run ā same persist_directory, no re-embedding needed
vectorstore_reloaded = Chroma(
collection_name="drug_formulary",
embedding_function=embeddings,
persist_directory="./chroma_db",
)Metadata Filtering
Filter results before vector search ā prevents irrelevant results from different categories:
# Filter: only return documents from the anticoagulant category
results = vectorstore.similarity_search(
query="dosing adjustments",
k=3,
filter={"category": "anticoagulant"},
)
# Multiple filter conditions (Chroma uses dict syntax)
results = vectorstore.similarity_search(
query="mechanism of action",
k=5,
filter={
"$and": [
{"category": {"$eq": "anticoagulant"}},
{"source": {"$eq": "formulary"}},
]
},
)
# Filter with $in operator
results = vectorstore.similarity_search(
query="renal dosing",
k=4,
filter={"drug": {"$in": ["warfarin", "metformin"]}},
)FAISS (Fast, In-Memory, CPU-Optimized)
Best for large local datasets where speed matters. Does not persist automatically.
from langchain_community.vectorstores import FAISS
# Build from documents
vectorstore = FAISS.from_documents(docs, embeddings)
# Save to disk
vectorstore.save_local("./faiss_index")
# Load from disk
vectorstore_loaded = FAISS.load_local(
"./faiss_index",
embeddings,
allow_dangerous_deserialization=True, # Required flag for loading
)
# Merge two FAISS indexes (useful for incremental ingestion)
index_a = FAISS.from_documents(docs_batch_1, embeddings)
index_b = FAISS.from_documents(docs_batch_2, embeddings)
index_a.merge_from(index_b) # Combined index in index_a
# Similarity search (same interface as Chroma)
results = vectorstore.similarity_search("warfarin interactions", k=3)
# Build from raw texts + metadata
texts = ["Warfarin inhibits VKORC1", "Metformin activates AMPK"]
metadatas = [{"drug": "warfarin"}, {"drug": "metformin"}]
vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)Pinecone (Cloud-Managed, Production Scale)
Best for production deployments with millions of vectors, multi-region, or managed infrastructure.
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
# Initialize Pinecone client
pc = Pinecone(api_key="your-pinecone-api-key")
# Create index (one-time setup)
INDEX_NAME = "drug-formulary"
if INDEX_NAME not in pc.list_indexes().names():
pc.create_index(
name=INDEX_NAME,
dimension=1536, # text-embedding-3-small output dim
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
# Connect via LangChain
vectorstore = PineconeVectorStore(
index_name=INDEX_NAME,
embedding=embeddings,
namespace="clinical_docs", # Logical separation within one index
)
# Same interface as Chroma and FAISS
vectorstore.add_documents(docs)
results = vectorstore.similarity_search("anticoagulant mechanism", k=3)
# Metadata filtering uses Pinecone filter syntax
results = vectorstore.similarity_search(
"dosing in renal failure",
k=4,
filter={"category": {"$eq": "anticoagulant"}},
)
# Delete documents by metadata
vectorstore.delete(filter={"source": "outdated_formulary_2024"})MMR: Maximal Marginal Relevance
Standard similarity search returns the top-k most similar results ā which are often redundant (all say the same thing). MMR balances relevance with diversity.
# Standard search: all results might discuss the same warfarin fact
results_standard = vectorstore.similarity_search("warfarin", k=4)
# MMR: returns relevant but diverse results
results_mmr = vectorstore.max_marginal_relevance_search(
query="warfarin",
k=4, # Return 4 results
fetch_k=20, # Fetch 20 candidates first, then re-rank for diversity
lambda_mult=0.5, # 0.0 = max diversity, 1.0 = max relevance (0.5 is balanced)
)
# As a retriever with MMR
retriever = vectorstore.as_retriever(
search_type="mmr",
search_kwargs={"k": 4, "fetch_k": 20, "lambda_mult": 0.5},
)
results = retriever.invoke("warfarin drug interactions")Adding and Deleting Documents
import uuid
# Add with explicit IDs (allows targeted deletion)
ids = [str(uuid.uuid4()) for _ in docs]
vectorstore.add_documents(docs, ids=ids)
# Delete by ID
vectorstore.delete(ids=["id-to-delete-1", "id-to-delete-2"])
# Update: delete + re-add
def update_document(vectorstore, doc_id: str, new_doc: Document):
vectorstore.delete(ids=[doc_id])
vectorstore.add_documents([new_doc], ids=[doc_id])
# Incremental ingestion: add new documents without rebuilding the index
new_docs = [
Document(
page_content="Apixaban inhibits Factor Xa directly, with predictable dosing.",
metadata={"drug": "apixaban", "category": "anticoagulant", "source": "formulary"},
)
]
vectorstore.add_documents(new_docs)From Documents in One Step
# Most common pattern: loader ā splitter ā vector store in three lines
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
loader = PyPDFLoader("warfarin_guidelines.pdf")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
chunks = splitter.split_documents(raw_docs)
# from_documents embeds and stores in one call
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
collection_name="warfarin_guidelines",
persist_directory="./chroma_db",
)
print(f"Stored {vectorstore._collection.count()} vectors")Vector Store as Retriever
# Basic retriever ā used in RAG chains
retriever = vectorstore.as_retriever(
search_type="similarity", # "similarity" | "mmr" | "similarity_score_threshold"
search_kwargs={"k": 4},
)
docs = retriever.invoke("warfarin monitoring requirements")
# Score threshold ā only return docs above a relevance cutoff
threshold_retriever = vectorstore.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"score_threshold": 0.75, "k": 6},
)
# With metadata filter baked in
filtered_retriever = vectorstore.as_retriever(
search_kwargs={
"k": 4,
"filter": {"category": "anticoagulant"},
},
)
# Use in a RAG chain (same interface regardless of underlying store)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
prompt = ChatPromptTemplate.from_messages([
("system", "Answer using only the retrieved context below.\n\nContext: {context}"),
("human", "{question}"),
])
def format_docs(docs):
return "\n\n".join(d.page_content for d in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| ChatOpenAI(model="gpt-4o-mini", temperature=0)
| StrOutputParser()
)
answer = rag_chain.invoke("What does warfarin inhibit?")
print(answer)Comparison
| Store | Persistence | Scale | Filtering | Best For | |---|---|---|---|---| | Chroma | Optional (disk) | Small-medium | Dict filter syntax | Development, local apps | | FAISS | Manual save/load | Large (millions) | Limited | High-speed local search | | Pinecone | Cloud-managed | Unlimited | Full filter API | Production, multi-region | | Weaviate | Cloud/self-hosted | Large | GraphQL-like | Hybrid search + metadata | | Qdrant | Cloud/self-hosted | Large | Payload filters | Advanced filtering needs |
Found this helpful?
Leave a comment
Have a question, correction, or just found this helpful? Leave a note below.