Learnixo

RAG Systems · Lesson 7 of 24

Vector Stores: FAISS, Chroma, Pinecone, pgvector

What a Vector Store Does

A vector store (vector database) indexes embeddings for fast approximate nearest-neighbor (ANN) search. At query time, you provide a query embedding and retrieve the K most similar document embeddings from potentially millions of stored vectors.

Without a vector store: Brute-force cosine similarity against all documents is O(N×D) — feasible for thousands of documents, unusably slow for millions.

With a vector store: ANN indexes (HNSW, IVF) reduce search to O(log N) or O(N/n_clusters), trading a small accuracy loss for massive speed gains.


Chroma: Simple Local Development

Python
import chromadb
from chromadb.utils import embedding_functions

# Local persistent client
client = chromadb.PersistentClient(path="./chroma_db")

# Use OpenAI embeddings
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="your-api-key",
    model_name="text-embedding-3-small",
)

# Create or get a collection
collection = client.get_or_create_collection(
    name="clinical_documents",
    embedding_function=openai_ef,
    metadata={"hnsw:space": "cosine"},  # Distance metric
)

# Ingest documents
def ingest_to_chroma(
    documents: list[dict],
    collection,
) -> None:
    """Ingest documents with metadata into Chroma."""
    collection.add(
        ids=[doc["id"] for doc in documents],
        documents=[doc["content"] for doc in documents],
        metadatas=[
            {
                "title": doc["title"],
                "category": doc.get("category", ""),
                "source": doc.get("source", ""),
            }
            for doc in documents
        ],
    )
    print(f"Ingested {len(documents)} documents")


# Query
def query_chroma(
    query_text: str,
    collection,
    n_results: int = 5,
    where: dict = None,  # Metadata filter
) -> list[dict]:
    """Query Chroma for similar documents."""
    results = collection.query(
        query_texts=[query_text],
        n_results=n_results,
        where=where,
        include=["documents", "metadatas", "distances"],
    )

    return [
        {
            "content": doc,
            "metadata": meta,
            "score": 1 - dist,  # Convert distance to similarity
        }
        for doc, meta, dist in zip(
            results["documents"][0],
            results["metadatas"][0],
            results["distances"][0],
        )
    ]


# Example with metadata filtering
results = query_chroma(
    "warfarin drug interactions",
    collection,
    where={"category": "drug_interaction"},  # Only search interaction docs
)

Pinecone: Managed Cloud Vector Database

Python
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key="your-pinecone-api-key")

# Create index (do once)
def create_pinecone_index(
    index_name: str = "clinical-rag",
    dimension: int = 1536,  # text-embedding-3-small
) -> None:
    """Create a serverless Pinecone index."""
    if index_name not in [idx.name for idx in pc.list_indexes()]:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(
                cloud="aws",
                region="us-east-1",
            ),
        )
        print(f"Created index: {index_name}")


def get_pinecone_index(index_name: str = "clinical-rag"):
    return pc.Index(index_name)


# Upsert documents
def upsert_to_pinecone(
    documents: list[dict],
    index,
    embeddings: list[list[float]],
    batch_size: int = 100,
) -> None:
    """Upsert document embeddings to Pinecone."""
    vectors = [
        {
            "id": doc["id"],
            "values": emb,
            "metadata": {
                "content": doc["content"][:1000],  # Pinecone metadata limit
                "title": doc["title"],
                "category": doc.get("category", ""),
            },
        }
        for doc, emb in zip(documents, embeddings)
    ]

    for i in range(0, len(vectors), batch_size):
        batch = vectors[i:i + batch_size]
        index.upsert(vectors=batch)

    print(f"Upserted {len(vectors)} vectors")


# Query Pinecone
def query_pinecone(
    query_embedding: list[float],
    index,
    top_k: int = 5,
    filter: dict = None,
) -> list[dict]:
    """Query Pinecone for similar vectors."""
    results = index.query(
        vector=query_embedding,
        top_k=top_k,
        filter=filter,              # Metadata filtering
        include_metadata=True,
    )

    return [
        {
            "id": match.id,
            "score": match.score,
            "content": match.metadata.get("content", ""),
            "metadata": match.metadata,
        }
        for match in results.matches
    ]

Qdrant: Self-Hosted with Rich Filtering

Python
from qdrant_client import QdrantClient
from qdrant_client.models import (
    Distance,
    VectorParams,
    PointStruct,
    Filter,
    FieldCondition,
    MatchValue,
)

# Initialize client (local or remote)
qdrant_client = QdrantClient(url="http://localhost:6333")
# For cloud: QdrantClient(url="https://your-cluster.qdrant.io", api_key="your-key")


def create_qdrant_collection(
    collection_name: str,
    vector_size: int = 1536,
) -> None:
    """Create a Qdrant collection with HNSW indexing."""
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(
            size=vector_size,
            distance=Distance.COSINE,
        ),
    )


def upsert_to_qdrant(
    collection_name: str,
    documents: list[dict],
    embeddings: list[list[float]],
) -> None:
    """Upsert points to Qdrant."""
    points = [
        PointStruct(
            id=i,
            vector=emb,
            payload={
                "content": doc["content"],
                "title": doc["title"],
                "category": doc.get("category", ""),
                "source": doc.get("source", ""),
            },
        )
        for i, (doc, emb) in enumerate(zip(documents, embeddings))
    ]

    qdrant_client.upsert(collection_name=collection_name, points=points)
    print(f"Upserted {len(points)} points to Qdrant")


def query_qdrant(
    collection_name: str,
    query_embedding: list[float],
    top_k: int = 5,
    category_filter: str = None,
) -> list[dict]:
    """Query Qdrant with optional metadata filtering."""
    filter_condition = None
    if category_filter:
        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="category",
                    match=MatchValue(value=category_filter),
                )
            ]
        )

    results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=top_k,
        query_filter=filter_condition,
        with_payload=True,
    )

    return [
        {
            "score": hit.score,
            "content": hit.payload.get("content", ""),
            "title": hit.payload.get("title", ""),
            "metadata": hit.payload,
        }
        for hit in results
    ]

pgvector: Vector Search in PostgreSQL

For applications already using PostgreSQL, pgvector adds vector similarity search without a separate service:

Python
import psycopg2
import numpy as np

def setup_pgvector(connection_string: str) -> None:
    """Set up pgvector extension and documents table."""
    conn = psycopg2.connect(connection_string)
    cur = conn.cursor()

    cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")

    cur.execute("""
        CREATE TABLE IF NOT EXISTS documents (
            id SERIAL PRIMARY KEY,
            content TEXT NOT NULL,
            title TEXT,
            category TEXT,
            source TEXT,
            embedding vector(1536),  -- Match your embedding model dimensions
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
    """)

    # Create HNSW index for fast similarity search
    cur.execute("""
        CREATE INDEX IF NOT EXISTS documents_embedding_idx
        ON documents USING hnsw (embedding vector_cosine_ops)
        WITH (m = 16, ef_construction = 64);
    """)

    conn.commit()
    cur.close()
    conn.close()


def upsert_document_pg(
    conn,
    content: str,
    title: str,
    embedding: list[float],
    category: str = None,
    source: str = None,
) -> int:
    """Insert a document with embedding into PostgreSQL."""
    cur = conn.cursor()
    cur.execute(
        """
        INSERT INTO documents (content, title, embedding, category, source)
        VALUES (%s, %s, %s, %s, %s)
        RETURNING id;
        """,
        (content, title, embedding, category, source),
    )
    doc_id = cur.fetchone()[0]
    conn.commit()
    cur.close()
    return doc_id


def search_similar_pg(
    conn,
    query_embedding: list[float],
    top_k: int = 5,
    category_filter: str = None,
) -> list[dict]:
    """Find similar documents using cosine similarity."""
    cur = conn.cursor()

    # Use pgvector's <=> operator for cosine distance
    if category_filter:
        cur.execute(
            """
            SELECT id, content, title, category, 1 - (embedding <=> %s::vector) AS similarity
            FROM documents
            WHERE category = %s
            ORDER BY similarity DESC
            LIMIT %s;
            """,
            (query_embedding, category_filter, top_k),
        )
    else:
        cur.execute(
            """
            SELECT id, content, title, category, 1 - (embedding <=> %s::vector) AS similarity
            FROM documents
            ORDER BY similarity DESC
            LIMIT %s;
            """,
            (query_embedding, top_k),
        )

    rows = cur.fetchall()
    cur.close()

    return [
        {
            "id": row[0],
            "content": row[1],
            "title": row[2],
            "category": row[3],
            "similarity": float(row[4]),
        }
        for row in rows
    ]

Choosing a Vector Store

| Factor | Chroma | Pinecone | Qdrant | pgvector | |---|---|---|---|---| | Deployment | Local/self-hosted | Managed cloud | Self-hosted/cloud | PostgreSQL extension | | Setup complexity | Low | Very low | Medium | Low (if using PG) | | Scale | Under 1M vectors | 100M+ | 100M+ | Millions (hardware-dependent) | | Filtering | Basic | Good | Excellent | Full SQL | | Cost | Free | Per-vector | Infrastructure | PostgreSQL cost | | Maintenance | None (local) | None (managed) | You manage | PostgreSQL ops | | Privacy | Full (local) | Data leaves infra | Full (self-hosted) | Full (if self-hosted) |

Recommendation by scenario:

  • Prototype/dev: Chroma (zero setup, runs in-process)
  • Production, no infra team: Pinecone (fully managed, scales automatically)
  • Production, need full control: Qdrant (feature-rich, excellent filtering)
  • Already using PostgreSQL: pgvector (no extra service, ACID transactions, full SQL filters)
  • Healthcare with PHI: Qdrant or pgvector self-hosted (data stays on-premises)