LangChain Mastery · Lesson 27 of 33

Vector Stores: Chroma, Pinecone, FAISS

What is a Vector Store?

A vector store embeds documents and stores them so you can later retrieve the most semantically similar chunks to a query.

Text chunks → Embedding model → Float vectors → Stored in vector DB
Query text  → Embedding model → Float vector  → Search for nearest neighbors

LangChain wraps multiple vector stores with the same interface: add_documents(), similarity_search(), as_retriever().

Chroma (Local, In-Process)

Best for development, prototyping, and small to medium datasets (under 1M vectors).

Python

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# In-memory (resets on restart)
vectorstore = Chroma(
    collection_name="drug_formulary",
    embedding_function=embeddings,
)

# Add documents
docs = [
    Document(
        page_content="Warfarin inhibits VKORC1, blocking vitamin K recycling.",
        metadata={"drug": "warfarin", "category": "anticoagulant", "source": "formulary"},
    ),
    Document(
        page_content="Metformin activates AMPK and reduces hepatic glucose output.",
        metadata={"drug": "metformin", "category": "antidiabetic", "source": "formulary"},
    ),
    Document(
        page_content="Aspirin inhibits COX-1 and COX-2, reducing prostaglandin synthesis.",
        metadata={"drug": "aspirin", "category": "nsaid", "source": "formulary"},
    ),
]

vectorstore.add_documents(docs)

# Basic similarity search
results = vectorstore.similarity_search(
    query="How does warfarin work?",
    k=2,   # Return top 2 matches
)
for doc in results:
    print(f"[{doc.metadata['drug']}] {doc.page_content[:80]}")


# Similarity search with scores (lower distance = more similar)
results_with_scores = vectorstore.similarity_search_with_score(
    query="blood thinning medications",
    k=3,
)
for doc, score in results_with_scores:
    print(f"Score: {score:.4f} | {doc.page_content[:60]}")


# Persistent storage — survives restarts
vectorstore_persistent = Chroma(
    collection_name="drug_formulary",
    embedding_function=embeddings,
    persist_directory="./chroma_db",   # Saved to disk automatically
)
vectorstore_persistent.add_documents(docs)

# Reload on next run — same persist_directory, no re-embedding needed
vectorstore_reloaded = Chroma(
    collection_name="drug_formulary",
    embedding_function=embeddings,
    persist_directory="./chroma_db",
)

Metadata Filtering

Filter results before vector search — prevents irrelevant results from different categories:

Python

# Filter: only return documents from the anticoagulant category
results = vectorstore.similarity_search(
    query="dosing adjustments",
    k=3,
    filter={"category": "anticoagulant"},
)

# Multiple filter conditions (Chroma uses dict syntax)
results = vectorstore.similarity_search(
    query="mechanism of action",
    k=5,
    filter={
        "$and": [
            {"category": {"$eq": "anticoagulant"}},
            {"source": {"$eq": "formulary"}},
        ]
    },
)

# Filter with $in operator
results = vectorstore.similarity_search(
    query="renal dosing",
    k=4,
    filter={"drug": {"$in": ["warfarin", "metformin"]}},
)

FAISS (Fast, In-Memory, CPU-Optimized)

Best for large local datasets where speed matters. Does not persist automatically.

Python

from langchain_community.vectorstores import FAISS

# Build from documents
vectorstore = FAISS.from_documents(docs, embeddings)

# Save to disk
vectorstore.save_local("./faiss_index")

# Load from disk
vectorstore_loaded = FAISS.load_local(
    "./faiss_index",
    embeddings,
    allow_dangerous_deserialization=True,   # Required flag for loading
)

# Merge two FAISS indexes (useful for incremental ingestion)
index_a = FAISS.from_documents(docs_batch_1, embeddings)
index_b = FAISS.from_documents(docs_batch_2, embeddings)
index_a.merge_from(index_b)   # Combined index in index_a

# Similarity search (same interface as Chroma)
results = vectorstore.similarity_search("warfarin interactions", k=3)


# Build from raw texts + metadata
texts = ["Warfarin inhibits VKORC1", "Metformin activates AMPK"]
metadatas = [{"drug": "warfarin"}, {"drug": "metformin"}]
vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)

Pinecone (Cloud-Managed, Production Scale)

Best for production deployments with millions of vectors, multi-region, or managed infrastructure.

Python

from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone client
pc = Pinecone(api_key="your-pinecone-api-key")

# Create index (one-time setup)
INDEX_NAME = "drug-formulary"
if INDEX_NAME not in pc.list_indexes().names():
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,   # text-embedding-3-small output dim
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

# Connect via LangChain
vectorstore = PineconeVectorStore(
    index_name=INDEX_NAME,
    embedding=embeddings,
    namespace="clinical_docs",   # Logical separation within one index
)

# Same interface as Chroma and FAISS
vectorstore.add_documents(docs)
results = vectorstore.similarity_search("anticoagulant mechanism", k=3)

# Metadata filtering uses Pinecone filter syntax
results = vectorstore.similarity_search(
    "dosing in renal failure",
    k=4,
    filter={"category": {"$eq": "anticoagulant"}},
)

# Delete documents by metadata
vectorstore.delete(filter={"source": "outdated_formulary_2024"})

MMR: Maximal Marginal Relevance

Standard similarity search returns the top-k most similar results — which are often redundant (all say the same thing). MMR balances relevance with diversity.

Python

# Standard search: all results might discuss the same warfarin fact
results_standard = vectorstore.similarity_search("warfarin", k=4)

# MMR: returns relevant but diverse results
results_mmr = vectorstore.max_marginal_relevance_search(
    query="warfarin",
    k=4,              # Return 4 results
    fetch_k=20,       # Fetch 20 candidates first, then re-rank for diversity
    lambda_mult=0.5,  # 0.0 = max diversity, 1.0 = max relevance (0.5 is balanced)
)

# As a retriever with MMR
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 4, "fetch_k": 20, "lambda_mult": 0.5},
)
results = retriever.invoke("warfarin drug interactions")

Adding and Deleting Documents

Python

import uuid

# Add with explicit IDs (allows targeted deletion)
ids = [str(uuid.uuid4()) for _ in docs]
vectorstore.add_documents(docs, ids=ids)

# Delete by ID
vectorstore.delete(ids=["id-to-delete-1", "id-to-delete-2"])

# Update: delete + re-add
def update_document(vectorstore, doc_id: str, new_doc: Document):
    vectorstore.delete(ids=[doc_id])
    vectorstore.add_documents([new_doc], ids=[doc_id])


# Incremental ingestion: add new documents without rebuilding the index
new_docs = [
    Document(
        page_content="Apixaban inhibits Factor Xa directly, with predictable dosing.",
        metadata={"drug": "apixaban", "category": "anticoagulant", "source": "formulary"},
    )
]
vectorstore.add_documents(new_docs)

From Documents in One Step

Python

# Most common pattern: loader → splitter → vector store in three lines
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = PyPDFLoader("warfarin_guidelines.pdf")
raw_docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
chunks = splitter.split_documents(raw_docs)

# from_documents embeds and stores in one call
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    collection_name="warfarin_guidelines",
    persist_directory="./chroma_db",
)

print(f"Stored {vectorstore._collection.count()} vectors")

Vector Store as Retriever

Python

# Basic retriever — used in RAG chains
retriever = vectorstore.as_retriever(
    search_type="similarity",       # "similarity" | "mmr" | "similarity_score_threshold"
    search_kwargs={"k": 4},
)

docs = retriever.invoke("warfarin monitoring requirements")


# Score threshold — only return docs above a relevance cutoff
threshold_retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.75, "k": 6},
)


# With metadata filter baked in
filtered_retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 4,
        "filter": {"category": "anticoagulant"},
    },
)


# Use in a RAG chain (same interface regardless of underlying store)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer using only the retrieved context below.\n\nContext: {context}"),
    ("human", "{question}"),
])

def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI(model="gpt-4o-mini", temperature=0)
    | StrOutputParser()
)

answer = rag_chain.invoke("What does warfarin inhibit?")
print(answer)

Comparison

| Store | Persistence | Scale | Filtering | Best For | |---|---|---|---|---| | Chroma | Optional (disk) | Small-medium | Dict filter syntax | Development, local apps | | FAISS | Manual save/load | Large (millions) | Limited | High-speed local search | | Pinecone | Cloud-managed | Unlimited | Full filter API | Production, multi-region | | Weaviate | Cloud/self-hosted | Large | GraphQL-like | Hybrid search + metadata | | Qdrant | Cloud/self-hosted | Large | Payload filters | Advanced filtering needs |

Text Splitters: Chunk Your Documents Right

Next Lesson

Retrievers: Similarity, MMR, and Multi-Query