Creating AI Embeddings with OpenAI for Semantic Search
Introduction
OpenAI embeddings convert text into high-dimensional vectors that capture semantic meaning. This guide implements semantic search using embeddings for intelligent document retrieval.
Prerequisites
- Next.js 14+
- OpenAI API key
- PostgreSQL with pgvector extension
Step 1: Install Dependencies
npm install openai pg @types/pg
Step 2: Setup Database with Vector Extension
Install pgvector extension in PostgreSQL:
-- Connect to your database and run:
CREATE EXTENSION vector;
-- Create table for storing documents and embeddings
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(1536), -- OpenAI ada-002 produces 1536-dimensional vectors
created_at TIMESTAMP DEFAULT NOW()
);
-- Create index for vector similarity search
CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops);
Step 3: Create Embedding Utilities
Create lib/embeddings.ts
:
import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY!,
});
export async function createEmbedding(text: string): Promise<number[]> {
try {
const response = await openai.embeddings.create({
model: 'text-embedding-ada-002',
input: text,
});
return response.data[0].embedding;
} catch (error) {
console.error('Error creating embedding:', error);
throw new Error('Failed to create embedding');
}
}
export async function createEmbeddings(texts: string[]): Promise<number[][]> {
try {
const response = await openai.embeddings.create({
model: 'text-embedding-ada-002',
input: texts,
});
return response.data.map(item => item.embedding);
} catch (error) {
console.error('Error creating embeddings:', error);
throw new Error('Failed to create embeddings');
}
}
// Calculate cosine similarity between two vectors
export function cosineSimilarity(a: number[], b: number[]): number {
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
Step 4: Database Operations
Create lib/database.ts
:
import { Pool } from 'pg';
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
export interface Document {
id: number;
title: string;
content: string;
embedding?: number[];
similarity?: number;
created_at: Date;
}
export async function insertDocument(
title: string,
content: string,
embedding: number[]
): Promise<Document> {
const client = await pool.connect();
try {
const result = await client.query(
'INSERT INTO documents (title, content, embedding) VALUES ($1, $2, $3) RETURNING *',
[title, content, `[${embedding.join(',')}]`]
);
return result.rows[0];
} finally {
client.release();
}
}
export async function searchSimilarDocuments(
queryEmbedding: number[],
limit: number = 10,
threshold: number = 0.7
): Promise<Document[]> {
const client = await pool.connect();
try {
const result = await client.query(
`SELECT
id, title, content, created_at,
1 - (embedding <=> $1) AS similarity
FROM documents
WHERE 1 - (embedding <=> $1) > $2
ORDER BY embedding <=> $1
LIMIT $3`,
[`[${queryEmbedding.join(',')}]`, threshold, limit]
);
return result.rows;
} finally {
client.release();
}
}
export async function getAllDocuments(): Promise<Document[]> {
const client = await pool.connect();
try {
const result = await client.query(
'SELECT id, title, content, created_at FROM documents ORDER BY created_at DESC'
);
return result.rows;
} finally {
client.release();
}
}
Step 5: Create API Routes
Create app/api/documents/route.ts
:
import { NextRequest, NextResponse } from 'next/server';
import { insertDocument, getAllDocuments } from '@/lib/database';
import { createEmbedding } from '@/lib/embeddings';
export async function POST(req: NextRequest) {
try {
const { title, content } = await req.json();
if (!title || !content) {
return NextResponse.json(
{ error: 'Title and content are required' },
{ status: 400 }
);
}
// Create embedding for the document content
const embedding = await createEmbedding(content);
// Store document with embedding
const document = await insertDocument(title, content, embedding);
return NextResponse.json({
id: document.id,
message: 'Document added successfully'
});
} catch (error) {
console.error('Error adding document:', error);
return NextResponse.json(
{ error: 'Failed to add document' },
{ status: 500 }
);
}
}
export async function GET() {
try {
const documents = await getAllDocuments();
return NextResponse.json({ documents });
} catch (error) {
console.error('Error fetching documents:', error);
return NextResponse.json(
{ error: 'Failed to fetch documents' },
{ status: 500 }
);
}
}
Create app/api/search/route.ts
:
import { NextRequest, NextResponse } from 'next/server';
import { searchSimilarDocuments } from '@/lib/database';
import { createEmbedding } from '@/lib/embeddings';
export async function POST(req: NextRequest) {
try {
const { query, limit = 10, threshold = 0.7 } = await req.json();
if (!query) {
return NextResponse.json(
{ error: 'Search query is required' },
{ status: 400 }
);
}
// Create embedding for the search query
const queryEmbedding = await createEmbedding(query);
// Search for similar documents
const results = await searchSimilarDocuments(
queryEmbedding,
limit,
threshold
);
return NextResponse.json({
results,
query,
count: results.length
});
} catch (error) {
console.error('Error searching documents:', error);
return NextResponse.json(
{ error: 'Failed to search documents' },
{ status: 500 }
);
}
}
Step 6: Create Search Interface
Create components/SemanticSearch.tsx
:
'use client';
import { useState } from 'react';
interface SearchResult {
id: number;
title: string;
content: string;
similarity: number;
created_at: string;
}
interface Document {
id: number;
title: string;
content: string;
created_at: string;
}
export default function SemanticSearch() {
const [query, setQuery] = useState('');
const [results, setResults] = useState<SearchResult[]>([]);
const [documents, setDocuments] = useState<Document[]>([]);
const [isSearching, setIsSearching] = useState(false);
const [isAdding, setIsAdding] = useState(false);
const [newTitle, setNewTitle] = useState('');
const [newContent, setNewContent] = useState('');
const [showAddForm, setShowAddForm] = useState(false);
const handleSearch = async () => {
if (!query.trim() || isSearching) return;
setIsSearching(true);
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, limit: 10, threshold: 0.6 }),
});
const data = await response.json();
setResults(data.results || []);
} catch (error) {
console.error('Search failed:', error);
setResults([]);
} finally {
setIsSearching(false);
}
};
const handleAddDocument = async () => {
if (!newTitle.trim() || !newContent.trim() || isAdding) return;
setIsAdding(true);
try {
const response = await fetch('/api/documents', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ title: newTitle, content: newContent }),
});
if (response.ok) {
setNewTitle('');
setNewContent('');
setShowAddForm(false);
await fetchDocuments(); // Refresh the documents list
}
} catch (error) {
console.error('Failed to add document:', error);
} finally {
setIsAdding(false);
}
};
const fetchDocuments = async () => {
try {
const response = await fetch('/api/documents');
const data = await response.json();
setDocuments(data.documents || []);
} catch (error) {
console.error('Failed to fetch documents:', error);
}
};
const highlightText = (text: string, query: string) => {
if (!query.trim()) return text;
const regex = new RegExp(`(${query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi');
return text.replace(regex, '<mark class="bg-yellow-200">$1</mark>');
};
// Fetch documents on component mount
useState(() => {
fetchDocuments();
});
return (
<div className="max-w-6xl mx-auto p-6">
<div className="mb-8">
<h1 className="text-3xl font-bold mb-4">Semantic Search</h1>
{/* Add Document Form */}
<div className="mb-6">
<button
onClick={() => setShowAddForm(!showAddForm)}
className="px-4 py-2 bg-green-500 text-white rounded hover:bg-green-600"
>
{showAddForm ? 'Cancel' : 'Add Document'}
</button>
{showAddForm && (
<div className="mt-4 p-4 border rounded-lg bg-gray-50">
<div className="space-y-3">
<input
type="text"
placeholder="Document title..."
value={newTitle}
onChange={(e) => setNewTitle(e.target.value)}
className="w-full p-2 border rounded focus:outline-none focus:ring-2 focus:ring-green-500"
/>
<textarea
placeholder="Document content..."
value={newContent}
onChange={(e) => setNewContent(e.target.value)}
className="w-full p-2 border rounded focus:outline-none focus:ring-2 focus:ring-green-500 min-h-[100px]"
/>
<button
onClick={handleAddDocument}
disabled={isAdding || !newTitle.trim() || !newContent.trim()}
className="px-4 py-2 bg-green-500 text-white rounded hover:bg-green-600 disabled:bg-gray-300"
>
{isAdding ? 'Adding...' : 'Add Document'}
</button>
</div>
</div>
)}
</div>
{/* Search Interface */}
<div className="flex gap-3 mb-6">
<input
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyPress={(e) => e.key === 'Enter' && handleSearch()}
placeholder="Search for documents using semantic meaning..."
className="flex-1 p-3 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500"
disabled={isSearching}
/>
<button
onClick={handleSearch}
disabled={isSearching || !query.trim()}
className="px-6 py-3 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:bg-gray-300"
>
{isSearching ? 'Searching...' : 'Search'}
</button>
</div>
</div>
<div className="grid md:grid-cols-2 gap-8">
{/* Search Results */}
<div>
<h2 className="text-xl font-semibold mb-4">
Search Results {results.length > 0 && `(${results.length})`}
</h2>
{results.length === 0 && query && !isSearching && (
<p className="text-gray-500">No results found for "{query}"</p>
)}
<div className="space-y-4">
{results.map((result) => (
<div key={result.id} className="border rounded-lg p-4">
<div className="flex justify-between items-start mb-2">
<h3 className="font-semibold">{result.title}</h3>
<span className="text-sm bg-blue-100 px-2 py-1 rounded">
{(result.similarity * 100).toFixed(1)}% match
</span>
</div>
<div
className="text-gray-700 text-sm mb-2"
dangerouslySetInnerHTML={{
__html: highlightText(
result.content.slice(0, 200) + (result.content.length > 200 ? '...' : ''),
query
)
}}
/>
<div className="text-xs text-gray-500">
{new Date(result.created_at).toLocaleDateString()}
</div>
</div>
))}
</div>
</div>
{/* All Documents */}
<div>
<h2 className="text-xl font-semibold mb-4">
All Documents ({documents.length})
</h2>
<div className="space-y-4 max-h-96 overflow-y-auto">
{documents.map((doc) => (
<div key={doc.id} className="border rounded-lg p-3">
<h4 className="font-medium mb-1">{doc.title}</h4>
<p className="text-gray-600 text-sm mb-2">
{doc.content.slice(0, 150)}...
</p>
<div className="text-xs text-gray-500">
{new Date(doc.created_at).toLocaleDateString()}
</div>
</div>
))}
</div>
</div>
</div>
</div>
);
}
Step 7: Use in App
import SemanticSearch from '@/components/SemanticSearch';
export default function Home() {
return (
<main className="min-h-screen bg-gray-50">
<SemanticSearch />
</main>
);
}
Summary
OpenAI embeddings enable powerful semantic search capabilities that understand meaning beyond keyword matching. Combined with vector databases like PostgreSQL with pgvector, you can build intelligent document retrieval systems.