import gradio as gr from embedchain import App, OpenSourceApp, CustomApp from embedchain.config import CustomAppConfig from embedchain.models import Providers, EmbeddingFunctions import chromadb import os import time import subprocess #HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"] class ContextCreator: def __init__(self, app): self.app = app def create_context(self, pdf_urls="", docx_urls="", youtube_urls="", web_urls="", sitemap_url="", upload_files=None): if pdf_urls != "": for x in pdf_urls.split(","): self.app.add(x, data_type='pdf_file') if docx_urls != "": for x in docx_urls.split(","): self.app.add(x, data_type='docx_file') if youtube_urls != "": for x in youtube_urls.split(","): self.app.add(x, data_type='youtube_video') if web_urls != "": for x in web_urls.split(","): self.app.add(x, data_type='web_page') if sitemap_url != "": self.app.add(x, data_type='sitemap') if upload_files is not None: for idx, file in enumerate(upload_files): if file.name.endswith('.pdf'): self.app.add(file.name, data_type='pdf_file') if file.name.endswith('.docx'): self.app.add(file.name, data_type='docx_file') def environ_api_key(api_key): global app os.environ["OPENAI_API_KEY"] = api_key config = CustomAppConfig(embedding_fn=EmbeddingFunctions.OPENAI, provider=Providers.OPENAI, embedding_fn_model="text-embedding-ada-002") app = CustomApp(config) return "OpenAI API key set !" def build_context(pdf_urls, docx_urls, youtube_urls, web_urls, sitemap_url, upload_files): context_creator = ContextCreator(app) context_creator.create_context(pdf_urls, docx_urls, youtube_urls, web_urls, sitemap_url, upload_files) return "loaded" def llm_respond(query, chat_history): result = app.query(query) chat_history.append((query, result)) time.sleep(2) return "", chat_history def loading(): return "Loading..." def clear_chromadb(): subprocess.call('rm -rf ./db', shell=True) with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.HTML("
Made with the embedchain Framework
The framework is builded to be able to use multiple free or paid LLM (OpenAI GPT, GPT4ALL, llama2...), however open models like GPT4all or llama are very slow on CPU, which is why OpenAI is prefered here (the default embeddings model is text-embedding-ada-002 and the chat model is gpt-3.5-turbo)
") with gr.Row(): openai_key = gr.Textbox(label="OpenAI API Key") out = gr.Textbox(interactive=False) openai_key.change(environ_api_key, openai_key, out) with gr.Row(): with gr.Column(scale=1): gr.HTML("