diff --git a/llm-server/config/pinecone.ts b/llm-server/config/pinecone.ts index 596845ed..1b21c2e2 100644 --- a/llm-server/config/pinecone.ts +++ b/llm-server/config/pinecone.ts @@ -3,11 +3,11 @@ */ if (!process.env.PINECONE_INDEX_NAME) { - throw new Error('Missing Pinecone index name in .env file'); + throw new Error('Missing Pinecone index name in .env file'); } const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? ''; const PINECONE_NAME_SPACE = 'bot-test'; //namespace is optional for your vectors -export { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE }; +export {PINECONE_INDEX_NAME, PINECONE_NAME_SPACE}; diff --git a/llm-server/data-sources/codebaseHandler.ts b/llm-server/data-sources/codebaseHandler.ts index 5fe8a01d..c05849c2 100644 --- a/llm-server/data-sources/codebaseHandler.ts +++ b/llm-server/data-sources/codebaseHandler.ts @@ -9,25 +9,18 @@ import {GithubRepoLoader} from "langchain/document_loaders/web/github"; export default async function codebaseHandler(req: NextApiRequest, res: NextApiResponse) { try { const {repo, namespace} = req.body; - const loader = new GithubRepoLoader(repo, // @ts-ignore { - branch: "main", - recursive: true, - unknown: "warn", - // @ts-ignore + branch: "main", recursive: true, unknown: "warn", // @ts-ignore // ignorePaths: ['node_modules', 'vendor', 'bower_components', '__pycache__', '.venv', 'target', 'build', 'bin', 'obj', 'tmp', 'dist', 'public', '.git', '.svn', 'CVS', 'out', 'logs', '.idea', '.vscode', '.gradle', '.classpath', '.project', '.settings', '.DS_Store', 'venv', 'env', 'migrations', 'db', 'log', 'logs', 'backup', 'cache', 'temp', 'tmp', 'docs', 'doc', 'test', 'tests', 'spec', 'specs'] }); - const rawDocs = await loader.load(); console.log('Loaded documents') const textSplitter = new RecursiveCharacterTextSplitter({ - chunkSize: 1000, - chunkOverlap: 200, + chunkSize: 1000, chunkOverlap: 200, }); - const docs = await textSplitter.splitDocuments(rawDocs); console.log('Split documents') @@ -36,17 +29,14 @@ export default async function codebaseHandler(req: NextApiRequest, res: NextApiR const index = pinecone.Index(PINECONE_INDEX_NAME); await PineconeStore.fromDocuments(docs, embeddings, { - pineconeIndex: index, - namespace: namespace, - textKey: 'text', + pineconeIndex: index, namespace: namespace, textKey: 'text', }); console.log('Indexed documents. all done!') - return res.status(200).json({message: 'Success'}); } catch (e) { console.error(e); // @ts-ignore - res.status(500).json({error: e.message, line: e.lineNumber}); + return res.status(500).json({error: e.message, line: e.lineNumber}); } } \ No newline at end of file diff --git a/llm-server/data-sources/pdfHandler.ts b/llm-server/data-sources/pdfHandler.ts index 7e92cb19..ec9dc7ac 100644 --- a/llm-server/data-sources/pdfHandler.ts +++ b/llm-server/data-sources/pdfHandler.ts @@ -28,9 +28,7 @@ export default async function pdfHandler(req: NextApiRequest, res: NextApiRespon const index = pinecone.Index(PINECONE_INDEX_NAME); await PineconeStore.fromDocuments(docs, embeddings, { - pineconeIndex: index, - namespace: namespace, - textKey: 'text', + pineconeIndex: index, namespace: namespace, textKey: 'text', }); console.log('All is done, folder deleted'); @@ -38,6 +36,6 @@ export default async function pdfHandler(req: NextApiRequest, res: NextApiRespon } catch (e) { console.error(e); // @ts-ignore - res.status(500).json({error: e.message, line: e.lineNumber}); + return res.status(500).json({error: e.message, line: e.lineNumber}); } } \ No newline at end of file diff --git a/llm-server/data-sources/websiteHandler.ts b/llm-server/data-sources/websiteHandler.ts index 949ce1ad..3e59b389 100644 --- a/llm-server/data-sources/websiteHandler.ts +++ b/llm-server/data-sources/websiteHandler.ts @@ -19,8 +19,7 @@ export default async function websiteHandler(req: NextApiRequest, res: NextApiRe const rawDocs = await directoryLoader.load(); const textSplitter = new RecursiveCharacterTextSplitter({ - chunkSize: 1000, - chunkOverlap: 200, + chunkSize: 1000, chunkOverlap: 200, }); const docs = await textSplitter.splitDocuments(rawDocs); @@ -29,15 +28,13 @@ export default async function websiteHandler(req: NextApiRequest, res: NextApiRe const index = pinecone.Index(PINECONE_INDEX_NAME); await PineconeStore.fromDocuments(docs, embeddings, { - pineconeIndex: index, - namespace: namespace, - textKey: 'text', + pineconeIndex: index, namespace: namespace, textKey: 'text', }); console.log('All is done, folder deleted'); return res.status(200).json({message: 'Success'}); } catch (e) { console.error(e); // @ts-ignore - res.status(500).json({error: e.message, line: e.lineNumber}); + return res.status(500).json({error: e.message, line: e.lineNumber}); } } \ No newline at end of file diff --git a/llm-server/declarations/pdf-parse.d.ts b/llm-server/declarations/pdf-parse.d.ts index 5b2ab502..7d258d2b 100644 --- a/llm-server/declarations/pdf-parse.d.ts +++ b/llm-server/declarations/pdf-parse.d.ts @@ -1,5 +1,5 @@ declare module 'pdf-parse/lib/pdf-parse.js' { - import pdf from 'pdf-parse'; + import pdf from 'pdf-parse'; - export default pdf; + export default pdf; } diff --git a/llm-server/pages/api/chat.ts b/llm-server/pages/api/chat.ts index 6f309270..bf0c228b 100644 --- a/llm-server/pages/api/chat.ts +++ b/llm-server/pages/api/chat.ts @@ -1,55 +1,45 @@ -import type { NextApiRequest, NextApiResponse } from 'next'; -import { OpenAIEmbeddings } from 'langchain/embeddings/openai'; -import { PineconeStore } from 'langchain/vectorstores/pinecone'; -import { makeChain } from '@/utils/makechain'; -import { pinecone } from '@/utils/pinecone-client'; -import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone'; +import type {NextApiRequest, NextApiResponse} from 'next'; +import {OpenAIEmbeddings} from 'langchain/embeddings/openai'; +import {PineconeStore} from 'langchain/vectorstores/pinecone'; +import {makeChain} from '@/utils/makechain'; +import {pinecone} from '@/utils/pinecone-client'; +import {PINECONE_INDEX_NAME, PINECONE_NAME_SPACE} from '@/config/pinecone'; -export default async function handler( - req: NextApiRequest, - res: NextApiResponse, -) { - const { question, history, namespace, mode, initial_prompt } = req.body; +export default async function handler(req: NextApiRequest, res: NextApiResponse,) { + const {question, history, namespace, mode, initial_prompt} = req.body; - console.log('req.body', req.body); - console.log({ question, history, namespace, mode, initial_prompt }); - //only accept post requests - if (req.method !== 'POST') { - res.status(405).json({ error: 'Method not allowed' }); - return; - } + console.log('req.body', req.body); + console.log({question, history, namespace, mode, initial_prompt}); + //only accept post requests + if (req.method !== 'POST') { + return res.status(405).json({error: 'Method not allowed'}); + } - if (!question) { - return res.status(400).json({ message: 'No question in the request' }); - } - // OpenAI recommends replacing newlines with spaces for best results - const sanitizedQuestion = question.trim().replaceAll('\n', ' '); + if (!question) { + return res.status(400).json({message: 'No question in the request'}); + } + // OpenAI recommends replacing newlines with spaces for best results + const sanitizedQuestion = question.trim().replaceAll('\n', ' '); - try { - const index = pinecone.Index(PINECONE_INDEX_NAME); + try { + const index = pinecone.Index(PINECONE_INDEX_NAME); - /* create vectorstore*/ - const vectorStore = await PineconeStore.fromExistingIndex( - new OpenAIEmbeddings({}), - { - pineconeIndex: index, - textKey: 'text', - namespace: namespace, //namespace comes from your config folder - }, - ); + /* create vectorstore*/ + const vectorStore = await PineconeStore.fromExistingIndex(new OpenAIEmbeddings({}), { + pineconeIndex: index, textKey: 'text', namespace: namespace, //namespace comes from your config folder + },); - //create chain - const chain = makeChain(vectorStore, mode, initial_prompt); - //Ask a question using chat history - const response = await chain.call({ - question: sanitizedQuestion, - chat_history: history || [], - }); + //create chain + const chain = makeChain(vectorStore, mode, initial_prompt); + //Ask a question using chat history + const response = await chain.call({ + question: sanitizedQuestion, chat_history: history || [], + }); - console.log('response', response); - res.status(200).json(response); - } catch (error: any) { - console.log('error', error); - res.status(500).json({ error: error.message || 'Something went wrong' }); - } + console.log('response', response); + return res.status(200).json(response); + } catch (error: any) { + console.log('error', error); + return res.status(500).json({error: error.message || 'Something went wrong'}); + } } \ No newline at end of file diff --git a/llm-server/pages/api/ingest.ts b/llm-server/pages/api/ingest.ts index 0644f901..230b6b8c 100644 --- a/llm-server/pages/api/ingest.ts +++ b/llm-server/pages/api/ingest.ts @@ -29,10 +29,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) } else { return res.status(400).json({message: 'Not supported type'}); } + } catch (e) { console.error(e); // Return error message and line number // @ts-ignore - res.status(500).json({error: e.message, line: e.lineNumber}); + return res.status(500).json({error: e.message, line: e.lineNumber}); } } \ No newline at end of file diff --git a/llm-server/types/chat.ts b/llm-server/types/chat.ts index f1a8fa63..acf32726 100644 --- a/llm-server/types/chat.ts +++ b/llm-server/types/chat.ts @@ -1,8 +1,5 @@ -import { Document } from 'langchain/document'; +import {Document} from 'langchain/document'; export type Message = { - type: 'apiMessage' | 'userMessage'; - message: string; - isStreaming?: boolean; - sourceDocs?: Document[]; + type: 'apiMessage' | 'userMessage'; message: string; isStreaming?: boolean; sourceDocs?: Document[]; }; diff --git a/llm-server/utils/cn.ts b/llm-server/utils/cn.ts index e57f9802..4212541b 100644 --- a/llm-server/utils/cn.ts +++ b/llm-server/utils/cn.ts @@ -1,6 +1,6 @@ -import { ClassValue, clsx } from 'clsx'; -import { twMerge } from 'tailwind-merge'; +import {ClassValue, clsx} from 'clsx'; +import {twMerge} from 'tailwind-merge'; export function cn(...inputs: ClassValue[]) { - return twMerge(clsx(inputs)); + return twMerge(clsx(inputs)); } diff --git a/llm-server/utils/customPDFLoader.ts b/llm-server/utils/customPDFLoader.ts index dba71148..9781455c 100644 --- a/llm-server/utils/customPDFLoader.ts +++ b/llm-server/utils/customPDFLoader.ts @@ -1,61 +1,49 @@ -import { Document } from 'langchain/document'; -import { readFile } from 'fs/promises'; -import { BaseDocumentLoader } from 'langchain/document_loaders'; +import {Document} from 'langchain/document'; +import {readFile} from 'fs/promises'; +import {BaseDocumentLoader} from 'langchain/document_loaders'; export abstract class BufferLoader extends BaseDocumentLoader { - constructor(public filePathOrBlob: string | Blob) { - super(); - } - - protected abstract parse( - raw: Buffer, - metadata: Document['metadata'], - ): Promise; + constructor(public filePathOrBlob: string | Blob) { + super(); + } - public async load(): Promise { - let buffer: Buffer; - let metadata: Record; - if (typeof this.filePathOrBlob === 'string') { - buffer = await readFile(this.filePathOrBlob); - metadata = { source: this.filePathOrBlob }; - } else { - buffer = await this.filePathOrBlob - .arrayBuffer() - .then((ab) => Buffer.from(ab)); - metadata = { source: 'blob', blobType: this.filePathOrBlob.type }; + public async load(): Promise { + let buffer: Buffer; + let metadata: Record; + if (typeof this.filePathOrBlob === 'string') { + buffer = await readFile(this.filePathOrBlob); + metadata = {source: this.filePathOrBlob}; + } else { + buffer = await this.filePathOrBlob + .arrayBuffer() + .then((ab) => Buffer.from(ab)); + metadata = {source: 'blob', blobType: this.filePathOrBlob.type}; + } + return this.parse(buffer, metadata); } - return this.parse(buffer, metadata); - } + + protected abstract parse(raw: Buffer, metadata: Document['metadata'],): Promise; } export class CustomPDFLoader extends BufferLoader { - public async parse( - raw: Buffer, - metadata: Document['metadata'], - ): Promise { - const { pdf } = await PDFLoaderImports(); - const parsed = await pdf(raw); - return [ - new Document({ - pageContent: parsed.text, - metadata: { - ...metadata, - pdf_numpages: parsed.numpages, - }, - }), - ]; - } + public async parse(raw: Buffer, metadata: Document['metadata'],): Promise { + const {pdf} = await PDFLoaderImports(); + const parsed = await pdf(raw); + return [new Document({ + pageContent: parsed.text, metadata: { + ...metadata, pdf_numpages: parsed.numpages, + }, + }),]; + } } async function PDFLoaderImports() { - try { - // the main entrypoint has some debug code that we don't want to import - const { default: pdf } = await import('pdf-parse/lib/pdf-parse.js'); - return { pdf }; - } catch (e) { - console.error(e); - throw new Error( - 'Failed to load pdf-parse. Please install it with eg. `npm install pdf-parse`.', - ); - } + try { + // the main entrypoint has some debug code that we don't want to import + const {default: pdf} = await import('pdf-parse/lib/pdf-parse.js'); + return {pdf}; + } catch (e) { + console.error(e); + throw new Error('Failed to load pdf-parse. Please install it with eg. `npm install pdf-parse`.',); + } } diff --git a/llm-server/utils/makechain.ts b/llm-server/utils/makechain.ts index f111decb..6911184c 100644 --- a/llm-server/utils/makechain.ts +++ b/llm-server/utils/makechain.ts @@ -13,11 +13,13 @@ export const makeChain = (vectorstore: PineconeStore, mode: string, initial_prom let enableSourceDocuments = false; - if(mode === 'pair_programmer') { + if (mode === 'pair_programmer') { enableSourceDocuments = true; } return ConversationalRetrievalQAChain.fromLLM(model, vectorstore.asRetriever(), { - qaTemplate: prompts.qa_prompt, questionGeneratorTemplate: prompts.condense_prompt, returnSourceDocuments: enableSourceDocuments, //The number of source documents returned is 4 by default + qaTemplate: prompts.qa_prompt, + questionGeneratorTemplate: prompts.condense_prompt, + returnSourceDocuments: enableSourceDocuments, //The number of source documents returned is 4 by default },); }; diff --git a/llm-server/utils/pinecone-client.ts b/llm-server/utils/pinecone-client.ts index 56b87341..493fc0e8 100644 --- a/llm-server/utils/pinecone-client.ts +++ b/llm-server/utils/pinecone-client.ts @@ -1,23 +1,23 @@ -import { PineconeClient } from '@pinecone-database/pinecone'; +import {PineconeClient} from '@pinecone-database/pinecone'; if (!process.env.PINECONE_ENVIRONMENT || !process.env.PINECONE_API_KEY) { - throw new Error('Pinecone environment or api key vars missing'); + throw new Error('Pinecone environment or api key vars missing'); } async function initPinecone() { - try { - const pinecone = new PineconeClient(); + try { + const pinecone = new PineconeClient(); - await pinecone.init({ - environment: process.env.PINECONE_ENVIRONMENT ?? '', //this is in the dashboard - apiKey: process.env.PINECONE_API_KEY ?? '', - }); + await pinecone.init({ + environment: process.env.PINECONE_ENVIRONMENT ?? '', //this is in the dashboard + apiKey: process.env.PINECONE_API_KEY ?? '', + }); - return pinecone; - } catch (error) { - console.log('error', error); - throw new Error('Failed to initialize Pinecone Client, please make sure you have the correct environment and api keys'); - } + return pinecone; + } catch (error) { + console.log('error', error); + throw new Error('Failed to initialize Pinecone Client, please make sure you have the correct environment and api keys'); + } } export const pinecone = await initPinecone();