1:"$Sreact.fragment" 2:I[88754,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],"ClientPageWrapper"] 3:I[79520,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],""] 5:I[73552,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],"AnimatedText"] 6:I[22016,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],""] 7:I[3601,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],"AnimatedSection"] 14:I[85437,["/_next/static/chunks/0a3f498ef71c618e.js","/_next/static/chunks/61bc785dc6bd3109.js","/_next/static/chunks/a676f0ced11453e9.js","/_next/static/chunks/4b778b2e48718709.js","/_next/static/chunks/95a5e382a1e8200a.js","/_next/static/chunks/06fe5961b16f81d0.js","/_next/static/chunks/8c955ce35598c7b8.js","/_next/static/chunks/15c613f0f258455e.js"],"Image"] 16:I[97367,["/_next/static/chunks/ff1a16fafef87110.js","/_next/static/chunks/247eb132b7f7b574.js"],"OutletBoundary"] 17:"$Sreact.suspense" 4:T487,{"@context":"https://schema.org","@type":"BlogPosting","headline":"Building a Production-Ready AI Chatbot Platform: Lessons from Convot","description":"Deep-dive into the architecture decisions, ingestion pipelines, and hard-won lessons from building Convot — an embeddable AI chatbot that retrieves answers from your own knowledge base.","datePublished":"2025-02-15T00:00:00.000Z","dateModified":"2025-02-15T00:00:00.000Z","author":{"@type":"Person","name":"Hussein Maghrabi","url":"https://h-maghrabi.tech/","sameAs":["https://www.linkedin.com/in/hussein-maghrabi/","https://www.linkedin.com/in/hussein-maghrabi/"]},"publisher":{"@type":"Person","name":"Hussein Maghrabi","url":"https://h-maghrabi.tech/"},"url":"https://h-maghrabi.tech//blogs/building-production-ai-chatbot-convot","mainEntityOfPage":{"@type":"WebPage","@id":"https://h-maghrabi.tech//blogs/building-production-ai-chatbot-convot"},"image":"https://h-maghrabi.tech//projects/convot/logo.png","keywords":"AI, LLM, RAG, FastAPI, Next.js, Vector Search","wordCount":559,"inLanguage":"en-US","isPartOf":{"@type":"Blog","name":"Hussein Maghrabi's Blog","url":"https://h-maghrabi.tech//blogs"}}0:{"buildId":"pOVubU_Kt6m3SrfLgqzM-","rsc":["$","$1","c",{"children":[["$","$L2",null,{"children":[["$","$L3",null,{"id":"schema-blog-post","type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"$4"}}],["$","$L3",null,{"id":"schema-breadcrumb-post","type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"{\"@context\":\"https://schema.org\",\"@type\":\"BreadcrumbList\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https://h-maghrabi.tech/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Blogs\",\"item\":\"https://h-maghrabi.tech//blogs\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Building a Production-Ready AI Chatbot Platform: Lessons from Convot\",\"item\":\"https://h-maghrabi.tech//blogs/building-production-ai-chatbot-convot\"}]}"}}],["$","article",null,{"className":"max-w-3xl mx-auto px-4 sm:px-6 py-8","children":[["$","$L5",null,{"delay":0,"children":["$","nav",null,{"aria-label":"Breadcrumb","className":"mb-6","children":["$","ol",null,{"className":"flex items-center gap-1.5 text-sm text-muted-foreground","children":[["$","li",null,{"children":["$","$L6",null,{"href":"/","className":"hover:text-foreground transition-colors","children":"Home"}]}],["$","li",null,{"aria-hidden":"true","children":["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-chevron-right w-3.5 h-3.5","children":[["$","path","mthhwq",{"d":"m9 18 6-6-6-6"}],"$undefined"]}]}],["$","li",null,{"children":["$","$L6",null,{"href":"/blogs","className":"hover:text-foreground transition-colors","children":"Blogs"}]}],["$","li",null,{"aria-hidden":"true","children":["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-chevron-right w-3.5 h-3.5","children":[["$","path","mthhwq",{"d":"m9 18 6-6-6-6"}],"$undefined"]}]}],["$","li",null,{"className":"text-foreground font-medium truncate max-w-[200px] sm:max-w-[300px]","aria-current":"page","children":"Building a Production-Ready AI Chatbot Platform: Lessons from Convot"}]]}]}]}],["$","$L7",null,{"direction":"up","children":["$","header",null,{"className":"mb-8","children":[["$","div",null,{"className":"flex flex-wrap gap-2 mb-4","children":[["$","span","AI",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"AI"}],["$","span","LLM",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"LLM"}],"$L8","$L9","$La","$Lb"]}],"$Lc","$Ld","$Le"]}]}],"$Lf","$L10","$L11"]}]]}],["$L12"],"$L13"]}],"loading":null,"isPartial":false} 8:["$","span","RAG",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"RAG"}] 9:["$","span","FastAPI",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"FastAPI"}] a:["$","span","Next.js",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"Next.js"}] b:["$","span","Vector Search",{"className":"inline-flex items-center px-2.5 py-1 rounded-md text-xs font-medium bg-primary/10 text-primary border border-primary/20","children":"Vector Search"}] c:["$","h1",null,{"className":"font-heading text-3xl sm:text-4xl md:text-5xl leading-tight text-foreground mb-4","children":"Building a Production-Ready AI Chatbot Platform: Lessons from Convot"}] d:["$","p",null,{"className":"text-lg text-muted-foreground leading-relaxed mb-6","children":"Deep-dive into the architecture decisions, ingestion pipelines, and hard-won lessons from building Convot — an embeddable AI chatbot that retrieves answers from your own knowledge base."}] e:["$","div",null,{"className":"flex flex-wrap items-center gap-4 text-sm text-muted-foreground pb-6 border-b border-border","children":[["$","address",null,{"className":"flex items-center gap-1.5 not-italic","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-user w-4 h-4","children":[["$","path","975kel",{"d":"M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2"}],["$","circle","17ys0d",{"cx":"12","cy":"7","r":"4"}],"$undefined"]}],["$","a",null,{"rel":"author","href":"https://h-maghrabi.tech/","className":"hover:text-foreground transition-colors","children":"Hussein Maghrabi"}]]}],["$","time",null,{"dateTime":"2025-02-15T00:00:00.000Z","className":"flex items-center gap-1.5","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-calendar w-4 h-4","children":[["$","path","1cmpym",{"d":"M8 2v4"}],["$","path","4m81vk",{"d":"M16 2v4"}],["$","rect","1hopcy",{"width":"18","height":"18","x":"3","y":"4","rx":"2"}],["$","path","8toen8",{"d":"M3 10h18"}],"$undefined"]}],"February 15, 2025"]}],"$undefined"]}] f:["$","$L7",null,{"direction":"up","delay":0.05,"children":["$","figure",null,{"className":"mb-10","children":["$","$L14",null,{"src":"/projects/convot/logo.png","alt":"Building a Production-Ready AI Chatbot Platform: Lessons from Convot","width":768,"height":400,"className":"w-full h-auto rounded-lg border border-border object-cover","priority":true}]}]}] 15:T13d4,

Building a production-ready AI chatbot is very different from building a demo. Over six months of building Convot, I learned that the hard part isn't the LLM — it's everything around it. This post shares the key architectural decisions and lessons that made the difference.

The Core Problem

When businesses want to give their users AI-powered answers, they face a fundamental challenge: generic LLMs don't know your specific data. Fine-tuning is expensive and slow. RAG (Retrieval-Augmented Generation) is the pragmatic middle ground — you retrieve relevant context at query time and inject it into the prompt.

But implementing RAG well is harder than it looks.

Architecture Overview

Convot's architecture has three main layers:

User Query
    │
    ▼
┌─────────────────────────────┐
│     Query Processing        │
│  - Embedding generation     │
│  - Hybrid search (dense +   │
│    sparse)                  │
└────────────┬────────────────┘
             │
    ┌────────▼────────┐
    │  Vector Store   │  ← Pinecone / pgvector
    └────────┬────────┘
             │ Top-K chunks
    ┌────────▼────────────────┐
    │   LLM Generation        │
    │  - Context injection    │
    │  - Source attribution   │
    └─────────────────────────┘

Ingestion Pipeline: The Underrated Part

Most tutorials focus on the query side. The ingestion pipeline is equally critical.

Challenge 1 — Chunking Strategy

Naive chunking (split every N tokens) breaks semantic coherence. We moved to semantic chunking: using embedding similarity to detect natural break points in the text. This dramatically improved retrieval precision.

def semantic_chunk(text: str, similarity_threshold: float = 0.8) -> list[str]:
    sentences = split_into_sentences(text)
    embeddings = embed_batch(sentences)
    chunks = []
    current_chunk = [sentences[0]]

    for i in range(1, len(sentences)):
        sim = cosine_similarity(embeddings[i-1], embeddings[i])
        if sim < similarity_threshold:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
        current_chunk.append(sentences[i])

    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

Challenge 2 — Multi-tenant Isolation

Every customer's knowledge base must be strictly isolated. We used namespace-based isolation in Pinecone, where each tenant gets its own namespace. At query time, the namespace is derived from the authenticated API key — zero chance of cross-tenant leakage.

Challenge 3 — Incremental Sync

Re-ingesting the entire knowledge base on every update is too slow and expensive. We implemented a content-hash based diff — only changed documents trigger re-embedding.

Structured Prompt Design

This was the single biggest lever for answer quality. Instead of a generic system prompt, we built structured prompts tailored to each knowledge base type:

SYSTEM_PROMPT = """
You are a helpful assistant for {company_name}.
Answer ONLY based on the provided context.
If the answer is not in the context, say: "I don't have that information."

Rules:
- Be concise and direct
- Always cite the source document
- Never fabricate information
- Format code blocks properly
"""

This alone reduced hallucinations by ~60% compared to a general system prompt.

The Embed Widget

The one-line embed (<script src="convot.js" data-key="..."></script>) required a careful isolation strategy — the widget injects an <iframe> with sandbox attributes to prevent style conflicts with the host page.

Key Takeaways

  1. Chunking strategy matters more than the model choice — invest time here first.
  2. Multi-tenancy is a security requirement, not a nice-to-have — design it in from day one.
  3. Structured prompts beat generic ones by a huge margin for domain-specific answers.
  4. Source attribution is a product differentiator — users trust answers that cite sources.
  5. Start with a small, curated knowledge base to validate retrieval quality before scaling.

Building Convot was one of the most technically challenging and rewarding projects I've worked on. Happy to answer questions in the comments or on X.

10:["$","$L7",null,{"direction":"up","delay":0.1,"children":["$","section",null,{"className":"blog-content","dangerouslySetInnerHTML":{"__html":"$15"}}]}] 11:["$","$L7",null,{"direction":"up","delay":0.15,"className":"mt-16 pt-8 border-t border-border","children":["$","footer",null,{"className":"flex items-center justify-between","children":[["$","$L6",null,{"href":"/blogs","className":"inline-flex items-center justify-center text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 border border-input bg-background hover:bg-accent hover:text-accent-foreground h-10 px-4 py-2 rounded-lg gap-2","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-chevron-left w-4 h-4","children":[["$","path","1wnfg3",{"d":"m15 18-6-6 6-6"}],"$undefined"]}],"All posts"]}],["$","div",null,{"className":"text-sm text-muted-foreground","children":["Written by"," ",["$","$L6",null,{"href":"https://www.linkedin.com/in/hussein-maghrabi/","target":"_blank","rel":"noopener noreferrer","className":"font-medium text-foreground hover:text-primary transition-colors","children":"Hussein Maghrabi"}]]}]]}]}] 12:["$","script","script-0",{"src":"/_next/static/chunks/15c613f0f258455e.js","async":true}] 13:["$","$L16",null,{"children":["$","$17",null,{"name":"Next.MetadataOutlet","children":"$@18"}]}] 18:null