.NET & C# Development · Lesson 213 of 229
Project: Build a Production RAG Chatbot in .NET
Build a Production RAG Chatbot in .NET
A RAG (Retrieval-Augmented Generation) chatbot answers questions grounded in your own documents — not the model's training data. This guide builds a complete, production-ready chatbot from scratch: ingestion pipeline, vector search, streaming chat, conversation memory, and automated quality evaluation.
What you'll build:
- Document ingestion pipeline (PDF, Word, Markdown)
- pgvector semantic search with chunk overlap
- Streaming chat endpoint with SSE
- Conversation history in Redis
- Faithfulness evaluation in CI
Project Structure
src/
RagChatbot.Api/
Endpoints/
ChatEndpoints.cs
IngestEndpoints.cs
Program.cs
RagChatbot.Core/
Entities/
Document.cs
DocumentChunk.cs
Conversation.cs
Interfaces/
IDocumentRepository.cs
IChunkRepository.cs
RagChatbot.Application/
Ingestion/
IngestDocumentCommand.cs
IngestDocumentCommandHandler.cs
Chat/
ChatQuery.cs
ChatQueryHandler.cs
ConversationStore.cs
RagChatbot.Infrastructure/
Data/
RagDbContext.cs
Search/
PgVectorSearch.cs
Parsing/
PdfParser.cs
WordParser.cs
MarkdownParser.cs
tests/
RagChatbot.Eval/
FaithfulnessEvalTests.csStep 1: Infrastructure Setup
XML
<!-- RagChatbot.Api.csproj -->
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" Version="9.*" />
<PackageReference Include="Microsoft.Extensions.AI" Version="9.*" />
<PackageReference Include="Pgvector.EntityFrameworkCore" Version="0.*" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.*" />
<PackageReference Include="MediatR" Version="12.*" />
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="9.*" />
<PackageReference Include="UglyToad.PdfPig" Version="0.*" />
<PackageReference Include="DocumentFormat.OpenXml" Version="3.*" />C#
// Program.cs
var builder = WebApplication.CreateBuilder(args);
// AI services
builder.Services.AddChatClient(services =>
new OpenAIClient(builder.Configuration["OpenAI:ApiKey"]!)
.AsChatClient("gpt-4o"))
.UseFunctionInvocation()
.UseLogging()
.UseDistributedCache();
builder.Services.AddEmbeddingGenerator<string, Embedding<float>>(services =>
new OpenAIClient(builder.Configuration["OpenAI:ApiKey"]!)
.AsEmbeddingGenerator("text-embedding-3-small"));
// Database
builder.Services.AddDbContext<RagDbContext>(opts =>
opts.UseNpgsql(builder.Configuration.GetConnectionString("Postgres")));
// Redis (conversation history + response cache)
builder.Services.AddStackExchangeRedisCache(opts =>
opts.Configuration = builder.Configuration.GetConnectionString("Redis"));
// Application services
builder.Services.AddMediatR(cfg =>
cfg.RegisterServicesFromAssemblyContaining<IngestDocumentCommand>());
builder.Services.AddScoped<PgVectorSearch>();
builder.Services.AddScoped<ConversationStore>();
builder.Services.AddScoped<DocumentParser>();
var app = builder.Build();
app.MapChatEndpoints();
app.MapIngestEndpoints();
app.Run();Step 2: Data Model
C#
// src/RagChatbot.Core/Entities/Document.cs
public class Document
{
public int Id { get; set; }
public string Title { get; set; } = "";
public string SourcePath { get; set; } = "";
public string ContentHash { get; set; } = "";
public DateTime LastIngested { get; set; }
public DateTime LastModified { get; set; }
public ICollection<DocumentChunk> Chunks { get; set; } = [];
}
public class DocumentChunk
{
public int Id { get; set; }
public int DocumentId { get; set; }
public int ChunkIndex { get; set; }
public string Text { get; set; } = "";
public Vector? Embedding { get; set; }
public Document Document { get; set; } = null!;
}C#
// src/RagChatbot.Infrastructure/Data/RagDbContext.cs
public class RagDbContext(DbContextOptions<RagDbContext> opts) : DbContext(opts)
{
public DbSet<Document> Documents => Set<Document>();
public DbSet<DocumentChunk> DocumentChunks => Set<DocumentChunk>();
protected override void OnModelCreating(ModelBuilder model)
{
model.HasPostgresExtension("vector");
model.Entity<DocumentChunk>(e =>
{
e.Property(c => c.Embedding).HasColumnType("vector(1536)");
// HNSW index — requires suppressTransaction: true in migration
e.HasIndex(c => c.Embedding)
.HasMethod("hnsw")
.HasOperators("vector_cosine_ops");
});
}
}SQL
-- Migration to enable pgvector (add this to the migration Up method)
migrationBuilder.Sql("CREATE EXTENSION IF NOT EXISTS vector;",
suppressTransaction: true);
-- HNSW index creation also needs suppressTransaction
migrationBuilder.Sql(
"CREATE INDEX CONCURRENTLY idx_chunks_embedding ON document_chunks " +
"USING hnsw (embedding vector_cosine_ops);",
suppressTransaction: true);Step 3: Document Parsing
C#
// src/RagChatbot.Infrastructure/Parsing/DocumentParser.cs
public class DocumentParser
{
public string ExtractText(string filePath)
{
var ext = Path.GetExtension(filePath).ToLowerInvariant();
return ext switch
{
".pdf" => ExtractPdf(filePath),
".docx" => ExtractDocx(filePath),
".md" or ".txt" => File.ReadAllText(filePath),
_ => throw new NotSupportedException($"Unsupported file type: {ext}")
};
}
private static string ExtractPdf(string path)
{
using var doc = PdfDocument.Open(path);
var sb = new System.Text.StringBuilder();
foreach (var page in doc.GetPages())
sb.AppendLine(page.Text);
return sb.ToString();
}
private static string ExtractDocx(string path)
{
using var doc = WordprocessingDocument.Open(path, false);
var body = doc.MainDocumentPart?.Document.Body;
if (body is null) return "";
return string.Join("\n", body.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()
.Select(p => p.InnerText));
}
}C#
// Semantic chunker — splits on paragraph boundaries, not token count
public class SemanticChunker
{
private const int MaxTokensPerChunk = 500;
private const int OverlapTokens = 80;
public List<string> Chunk(string text)
{
var paragraphs = text
.Split(["\n\n", "\r\n\r\n"], StringSplitOptions.RemoveEmptyEntries)
.Select(p => p.Trim())
.Where(p => p.Length > 20)
.ToList();
var chunks = new List<string>();
var current = new System.Text.StringBuilder();
foreach (var para in paragraphs)
{
var estimatedTokens = (current.Length + para.Length) / 4;
if (estimatedTokens > MaxTokensPerChunk && current.Length > 0)
{
chunks.Add(current.ToString().Trim());
// Carry over last N tokens as overlap
var overlapChars = OverlapTokens * 4;
var overlap = current.Length > overlapChars
? current.ToString()[^overlapChars..]
: current.ToString();
current.Clear();
current.AppendLine(overlap);
}
current.AppendLine(para);
}
if (current.Length > 0)
chunks.Add(current.ToString().Trim());
return chunks;
}
}Step 4: Ingestion Pipeline
C#
// src/RagChatbot.Application/Ingestion/IngestDocumentCommand.cs
public record IngestDocumentCommand(string FilePath, string Title) : IRequest<int>;
public class IngestDocumentCommandHandler(
RagDbContext db,
DocumentParser parser,
SemanticChunker chunker,
IEmbeddingGenerator<string, Embedding<float>> embedder,
ILogger<IngestDocumentCommandHandler> logger)
: IRequestHandler<IngestDocumentCommand, int>
{
public async Task<int> Handle(IngestDocumentCommand cmd, CancellationToken ct)
{
// Compute content hash — skip if document unchanged
var rawText = parser.ExtractText(cmd.FilePath);
var contentHash = Convert.ToHexString(
System.Security.Cryptography.SHA256.HashData(
System.Text.Encoding.UTF8.GetBytes(rawText)));
var existing = await db.Documents
.FirstOrDefaultAsync(d => d.SourcePath == cmd.FilePath, ct);
if (existing?.ContentHash == contentHash)
{
logger.LogInformation("Document {Path} unchanged — skipping ingestion", cmd.FilePath);
return 0;
}
// Delete old chunks if re-ingesting
if (existing is not null)
{
db.DocumentChunks.RemoveRange(
db.DocumentChunks.Where(c => c.DocumentId == existing.Id));
await db.SaveChangesAsync(ct);
}
// Create or update document record
var doc = existing ?? new Document();
doc.Title = cmd.Title;
doc.SourcePath = cmd.FilePath;
doc.ContentHash = contentHash;
doc.LastIngested = DateTime.UtcNow;
doc.LastModified = File.GetLastWriteTimeUtc(cmd.FilePath);
if (existing is null) db.Documents.Add(doc);
await db.SaveChangesAsync(ct);
// Chunk the text
var chunks = chunker.Chunk(rawText);
logger.LogInformation("Document {Title}: {Count} chunks", cmd.Title, chunks.Count);
// Embed in batches of 50 (API rate limit friendly)
const int batchSize = 50;
var totalChunks = 0;
for (int i = 0; i < chunks.Count; i += batchSize)
{
var batch = chunks.Skip(i).Take(batchSize).ToList();
// Prefix each chunk with document metadata for better retrieval
var textsForEmbedding = batch.Select(t =>
$"Document: {cmd.Title}\n\n{t}").ToList();
var embeddings = await embedder.GenerateAsync(textsForEmbedding, cancellationToken: ct);
var chunkEntities = batch.Select((text, idx) => new DocumentChunk
{
DocumentId = doc.Id,
ChunkIndex = i + idx,
Text = text,
Embedding = new Vector(embeddings[idx].Vector.ToArray()),
}).ToList();
db.DocumentChunks.AddRange(chunkEntities);
await db.SaveChangesAsync(ct);
totalChunks += batch.Count;
}
logger.LogInformation("Ingested {Count} chunks for document {Title}", totalChunks, cmd.Title);
return totalChunks;
}
}C#
// Ingestion endpoint
app.MapPost("/api/ingest", async (
IngestRequest req,
ISender mediator,
CancellationToken ct) =>
{
if (!File.Exists(req.FilePath))
return Results.BadRequest($"File not found: {req.FilePath}");
var chunks = await mediator.Send(
new IngestDocumentCommand(req.FilePath, req.Title), ct);
return Results.Ok(new { chunks, message = $"Ingested {chunks} chunks" });
})
.RequireAuthorization("AdminOnly");
public record IngestRequest(string FilePath, string Title);Step 5: Vector Search
C#
// src/RagChatbot.Infrastructure/Search/PgVectorSearch.cs
public class PgVectorSearch(
RagDbContext db,
IEmbeddingGenerator<string, Embedding<float>> embedder)
{
private const double SimilarityThreshold = 0.65;
private const int TopK = 8;
public async Task<List<SearchResult>> SearchAsync(
string query,
CancellationToken ct = default)
{
var result = await embedder.GenerateAsync([query], cancellationToken: ct);
var queryVector = new Vector(result[0].Vector.ToArray());
var chunks = await db.DocumentChunks
.Include(c => c.Document)
.Where(c => c.Embedding != null)
.Select(c => new
{
c.Text,
c.Document.Title,
c.Document.LastModified,
Similarity = 1.0 - c.Embedding!.CosineDistance(queryVector)
})
.Where(c => c.Similarity > SimilarityThreshold)
.OrderByDescending(c => c.Similarity)
.Take(TopK)
.ToListAsync(ct);
// Re-order: most relevant first AND last (attention is highest at boundaries)
var results = chunks.Select(c => new SearchResult(
c.Text, c.Title, c.LastModified, c.Similarity)).ToList();
return ReorderForAttention(results);
}
private static List<SearchResult> ReorderForAttention(List<SearchResult> chunks)
{
if (chunks.Count <= 2) return chunks;
var sorted = chunks.OrderByDescending(c => c.Similarity).ToList();
var result = new List<SearchResult> { sorted[0] };
result.AddRange(sorted.Skip(2));
result.Add(sorted[1]);
return result;
}
}
public record SearchResult(string Text, string Source, DateTime LastModified, double Similarity);Step 6: Conversation Store
C#
// src/RagChatbot.Application/Chat/ConversationStore.cs
public class ConversationStore(IDistributedCache cache)
{
private static readonly TimeSpan SessionTtl = TimeSpan.FromHours(2);
private const int MaxMessages = 20;
public async Task<List<ChatMessage>> GetHistoryAsync(string sessionId, CancellationToken ct)
{
var json = await cache.GetStringAsync($"chat:{sessionId}", ct);
if (json is null) return [];
return JsonSerializer.Deserialize<List<MessageDto>>(json)!
.Select(d => new ChatMessage(
d.Role == "assistant" ? ChatRole.Assistant : ChatRole.User,
d.Content))
.ToList();
}
public async Task AppendAsync(
string sessionId,
string role,
string content,
CancellationToken ct)
{
var history = await GetHistoryAsync(sessionId, ct);
history.Add(new ChatMessage(
role == "assistant" ? ChatRole.Assistant : ChatRole.User,
content));
// Sliding window — keep last N messages
if (history.Count > MaxMessages)
history.RemoveRange(0, history.Count - MaxMessages);
await cache.SetStringAsync(
$"chat:{sessionId}",
JsonSerializer.Serialize(history.Select(m =>
new MessageDto(m.Role == ChatRole.Assistant ? "assistant" : "user",
m.Text ?? ""))),
new DistributedCacheEntryOptions { AbsoluteExpirationRelativeToNow = SessionTtl },
ct);
}
}
public record MessageDto(string Role, string Content);Step 7: Chat Pipeline
C#
// src/RagChatbot.Application/Chat/ChatQueryHandler.cs
public class ChatQueryHandler(
PgVectorSearch search,
IChatClient chatClient,
ConversationStore conversations,
ILogger<ChatQueryHandler> logger)
{
private const string SystemPrompt = """
You are a helpful assistant that answers questions from company documents.
RULES:
1. Answer ONLY from the provided context — never from general knowledge.
2. If the context does not contain enough information, say:
"I don't have information about this in the available documents."
3. Always cite the document name and date: e.g. (Source: Policy Guide, updated 2026-01-15)
4. If a document is more than 6 months old, add a staleness warning.
5. Do not speculate or infer information not explicitly stated.
""";
public async IAsyncEnumerable<string> ChatAsync(
string sessionId,
string userMessage,
[System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken ct = default)
{
// 1. Retrieve relevant chunks
var chunks = await search.SearchAsync(userMessage, ct);
if (chunks.Count == 0)
{
logger.LogInformation("No relevant chunks found for query: {Query}", userMessage);
yield return "I don't have information about this in the available documents.";
yield break;
}
// 2. Build context block with source citations
var contextBlock = string.Join("\n\n---\n\n", chunks.Select(c =>
$"[Source: {c.Source} | Updated: {c.LastModified:yyyy-MM-dd}]\n{c.Text}"));
// 3. Load conversation history
var history = await conversations.GetHistoryAsync(sessionId, ct);
// 4. Build message list
var messages = new List<ChatMessage>
{
new(ChatRole.System, SystemPrompt),
new(ChatRole.System, $"CONTEXT:\n{contextBlock}")
};
messages.AddRange(history);
messages.Add(new(ChatRole.User, userMessage));
// 5. Stream the response
var accumulated = new System.Text.StringBuilder();
await foreach (var update in chatClient.CompleteStreamingAsync(messages, cancellationToken: ct))
{
if (update.Text is { Length: > 0 } token)
{
accumulated.Append(token);
yield return token;
}
}
// 6. Persist conversation turn
await conversations.AppendAsync(sessionId, "user", userMessage, ct);
await conversations.AppendAsync(sessionId, "assistant", accumulated.ToString(), ct);
}
}C#
// Streaming chat endpoint
app.MapPost("/api/chat", async (
ChatRequest req,
ChatQueryHandler handler,
HttpContext ctx,
CancellationToken ct) =>
{
var sessionId = req.SessionId ?? Guid.NewGuid().ToString();
ctx.Response.Headers.ContentType = "text/event-stream";
ctx.Response.Headers.CacheControl = "no-cache";
// Send session ID first so client can persist it
await ctx.Response.WriteAsync(
$"data: {JsonSerializer.Serialize(new { sessionId })}\n\n", ct);
await ctx.Response.Body.FlushAsync(ct);
try
{
await foreach (var token in handler.ChatAsync(sessionId, req.Message, ct))
{
await ctx.Response.WriteAsync(
$"data: {JsonSerializer.Serialize(new { token })}\n\n", ct);
await ctx.Response.Body.FlushAsync(ct);
}
}
catch (OperationCanceledException) { }
await ctx.Response.WriteAsync("data: [DONE]\n\n", ct);
await ctx.Response.Body.FlushAsync(ct);
})
.RequireAuthorization();
public record ChatRequest(string Message, string? SessionId = null);Step 8: Faithfulness Evaluation in CI
C#
// tests/RagChatbot.Eval/FaithfulnessEvalTests.cs
public class FaithfulnessEvalTests
{
private static readonly List<EvalCase> TestCases =
[
new("What is the parental leave policy?",
"Employees are entitled to 20 weeks of paid parental leave.",
"The parental leave policy is 20 weeks paid."),
new("What is the training budget per employee?",
"Each employee has an annual training budget of £2,000.",
"The training budget is £2,000 per year per employee."),
new("Do I need manager approval for expenses under £100?",
"Expenses under £100 do not require manager approval.",
"No approval is needed for expenses below £100."),
];
[Fact]
public async Task All_TestCases_AreGrounded()
{
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")!;
var judge = new OpenAIClient(apiKey).AsChatClient("gpt-4o-mini");
var evaluator = new FaithfulnessEvaluator(judge);
var failures = new List<string>();
foreach (var tc in TestCases)
{
var score = await evaluator.EvaluateAsync(tc.Question, tc.Context, tc.ExpectedAnswer);
if (score.Score < 7)
failures.Add($"Q: {tc.Question} | Score: {score.Score} | Reason: {score.Reason}");
}
failures.Should().BeEmpty(
$"The following answers failed faithfulness threshold:\n{string.Join("\n", failures)}");
}
}
public record EvalCase(string Question, string Context, string ExpectedAnswer);Calling the API
# Ingest a document (admin only)
POST /api/ingest
{ "filePath": "/docs/hr-policy.pdf", "title": "HR Policy 2026" }
# Chat (streaming)
POST /api/chat
{ "message": "What is the annual leave policy?", "sessionId": null }
Response (SSE stream):
data: {"sessionId":"abc-123"}
data: {"token":"Annual"}
data: {"token":" leave"}
data: {"token":" entitlement"}
...
data: [DONE]
# Second turn (same session — has conversation memory)
POST /api/chat
{ "message": "What about parental leave?", "sessionId": "abc-123" }Production Checklist
Before going live:
[ ] Run faithfulness eval on sample documents — ensure score above 8/10
[ ] Test the retrieval threshold — query with out-of-scope questions
[ ] Verify conversation history is scoped to sessionId (no cross-session leak)
[ ] Add rate limiting to /api/chat (streaming connections held open 10-30s)
[ ] Set up document re-ingestion schedule (daily check for changed files)
[ ] Add cost monitoring — each chat request costs ~$0.01-0.05
[ ] Add /api/chat to your CI eval pipeline (weekly scheduled run)