Building a RAG Chatbot with LangChain and Pinecone

Introduction

Retrieval-Augmented Generation (RAG) combines document retrieval with generative AI to answer questions about specific content. This guide builds a RAG chatbot using LangChain and Pinecone.

Prerequisites

Node.js >=18
OpenAI API key
Pinecone account

Step 1: Install Dependencies

npm install langchain @langchain/openai @langchain/pinecone @pinecone-database/pinecone

Step 2: Set up Environment Variables

Create .env:

OPENAI_API_KEY=sk-your-key
PINECONE_API_KEY=your-pinecone-key
PINECONE_ENVIRONMENT=us-west1-gcp
PINECONE_INDEX=chatbot-docs

Step 3: Initialize Pinecone

Create lib/pinecone.ts:

import { Pinecone } from '@pinecone-database/pinecone';

const pinecone = new Pinecone({
  apiKey: process.env.PINECONE_API_KEY!,
});

export const index = pinecone.index(process.env.PINECONE_INDEX!);

Step 4: Document Ingestion Script

Create scripts/ingest.ts:

import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PineconeStore } from '@langchain/pinecone';
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
import { TextLoader } from 'langchain/document_loaders/fs/text';
import { index } from '../lib/pinecone';

async function ingestDocs() {
  // Load documents
  const directoryLoader = new DirectoryLoader('./docs', {
    '.txt': (path) => new TextLoader(path),
    '.md': (path) => new TextLoader(path),
  });

  const rawDocs = await directoryLoader.load();
  console.log(`Loaded ${rawDocs.length} documents`);

  // Split documents
  const textSplitter = new RecursiveCharacterTextSplitter({
    chunkSize: 1000,
    chunkOverlap: 200,
  });

  const docs = await textSplitter.splitDocuments(rawDocs);
  console.log(`Split into ${docs.length} chunks`);

  // Create embeddings and store in Pinecone
  const embeddings = new OpenAIEmbeddings({
    openAIApiKey: process.env.OPENAI_API_KEY!,
  });

  await PineconeStore.fromDocuments(docs, embeddings, {
    pineconeIndex: index,
  });

  console.log('Documents ingested successfully');
}

ingestDocs().catch(console.error);

Step 5: Create RAG Chain

Create lib/chain.ts:

import { ChatOpenAI } from '@langchain/openai';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PineconeStore } from '@langchain/pinecone';
import { RetrievalQAChain } from 'langchain/chains';
import { PromptTemplate } from '@langchain/core/prompts';
import { index } from './pinecone';

const QA_PROMPT = PromptTemplate.fromTemplate(
  `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:`
);

export async function makeChain() {
  // Initialize the LLM
  const model = new ChatOpenAI({
    temperature: 0,
    modelName: 'gpt-4',
    openAIApiKey: process.env.OPENAI_API_KEY!,
  });

  // Initialize embeddings
  const embeddings = new OpenAIEmbeddings({
    openAIApiKey: process.env.OPENAI_API_KEY!,
  });

  // Initialize vector store
  const vectorStore = await PineconeStore.fromExistingIndex(embeddings, {
    pineconeIndex: index,
  });

  // Create the chain
  const chain = RetrievalQAChain.fromLLM(model, vectorStore.asRetriever(), {
    prompt: QA_PROMPT,
    returnSourceDocuments: true,
  });

  return chain;
}

Step 6: Create API Route

Create app/api/chat/route.ts:

import { NextRequest, NextResponse } from 'next/server';
import { makeChain } from '@/lib/chain';

export async function POST(req: NextRequest) {
  try {
    const { question } = await req.json();

    if (!question) {
      return NextResponse.json(
        { error: 'No question provided' },
        { status: 400 }
      );
    }

    const chain = await makeChain();
    const response = await chain.call({
      query: question,
    });

    return NextResponse.json({
      answer: response.text,
      sourceDocuments: response.sourceDocuments?.map((doc: any) => ({
        pageContent: doc.pageContent.slice(0, 200) + '...',
        source: doc.metadata.source,
      })),
    });
  } catch (error) {
    console.error('Error:', error);
    return NextResponse.json(
      { error: 'Internal server error' },
      { status: 500 }
    );
  }
}

Step 7: Create Chat Interface

Create components/RAGChat.tsx:

'use client';

import { useState } from 'react';

interface ChatMessage {
  question: string;
  answer: string;
  sources?: { source: string; pageContent: string }[];
}

export default function RAGChat() {
  const [messages, setMessages] = useState<ChatMessage[]>([]);
  const [question, setQuestion] = useState('');
  const [loading, setLoading] = useState(false);

  const askQuestion = async () => {
    if (!question.trim()) return;

    setLoading(true);
    const currentQuestion = question;
    setQuestion('');

    try {
      const response = await fetch('/api/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ question: currentQuestion }),
      });

      const data = await response.json();

      if (data.error) {
        throw new Error(data.error);
      }

      setMessages(prev => [...prev, {
        question: currentQuestion,
        answer: data.answer,
        sources: data.sourceDocuments,
      }]);
    } catch (error) {
      console.error('Error:', error);
      setMessages(prev => [...prev, {
        question: currentQuestion,
        answer: 'Sorry, I encountered an error. Please try again.',
      }]);
    } finally {
      setLoading(false);
    }
  };

  return (
    <div className="max-w-4xl mx-auto p-6">
      <div className="space-y-6 mb-6">
        {messages.map((msg, idx) => (
          <div key={idx} className="border rounded-lg p-4">
            <div className="font-semibold text-blue-600 mb-2">
              Q: {msg.question}
            </div>
            <div className="text-gray-800 mb-3">
              A: {msg.answer}
            </div>
            {msg.sources && msg.sources.length > 0 && (
              <details className="text-sm text-gray-600">
                <summary className="cursor-pointer">Sources ({msg.sources.length})</summary>
                <div className="mt-2 space-y-2">
                  {msg.sources.map((source, sidx) => (
                    <div key={sidx} className="border-l-2 border-gray-300 pl-3">
                      <div className="font-medium">{source.source}</div>
                      <div className="text-gray-500">{source.pageContent}</div>
                    </div>
                  ))}
                </div>
              </details>
            )}
          </div>
        ))}
      </div>

      <div className="flex gap-3">
        <input
          type="text"
          value={question}
          onChange={(e) => setQuestion(e.target.value)}
          onKeyPress={(e) => e.key === 'Enter' && askQuestion()}
          placeholder="Ask a question about your documents..."
          className="flex-1 p-3 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500"
          disabled={loading}
        />
        <button
          onClick={askQuestion}
          disabled={loading || !question.trim()}
          className="px-6 py-3 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:bg-gray-300"
        >
          {loading ? 'Thinking...' : 'Ask'}
        </button>
      </div>
    </div>
  );
}

Step 8: Run Document Ingestion

# First, create a docs/ folder with your documents
mkdir docs
# Add .txt or .md files to docs/

# Run ingestion
npx ts-node scripts/ingest.ts

Summary

This RAG chatbot uses LangChain for document processing and Pinecone for vector storage, enabling AI to answer questions about your specific documents with source citations.