Spaces:
Runtime error
Runtime error
# Imports | |
from langchain.document_loaders import PyPDFLoader | |
import os | |
from langchain.chains import RetrievalQA, ConversationalRetrievalChain | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain import HuggingFacePipeline | |
from langchain.chat_models import ChatOpenAI | |
from dotenv import load_dotenv | |
from langchain.memory import ConversationBufferMemory, ConversationTokenBufferMemory | |
import gradio as gr | |
# Funcion de carga de la api key | |
def process_key(api_key): | |
os.environ['OPENAI_API_KEY'] = api_key | |
def load_pdf(file): | |
name_file = file.name | |
print(file.name) | |
loader = PyPDFLoader(file.name) | |
documents = loader.load() | |
print(documents) | |
# Creo el objeto que permite dividir el texto en chunks | |
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=64) | |
# Esto lo que hace es dividir el texto en chunks de 2048 caracteres con un overlap de 128 caracteres | |
texts = text_splitter.split_documents(documents) | |
# Genero el objeto que crea los embeddings | |
# Nota: Estos embeddings son gratuitos a diferencia de los de OpenAI | |
embeddings = HuggingFaceEmbeddings() | |
# Defino el modelo de lenguaje | |
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.0, max_tokens=1000) | |
# Creo la base de datos de vectores | |
global vectorstore | |
vectorstore = Chroma.from_documents(texts, embeddings) | |
# Defino la memoria | |
global memory | |
# La definicion de Memoria no es trivial, es bastante compleja de hecho se deben especificar bien todos los parameteros para que no de error | |
memory = ConversationTokenBufferMemory(llm=llm, | |
memory_key="chat_history", | |
input_key='question', | |
output_key='answer', | |
max_token_limit=1000, | |
return_messages=False) | |
# Defino la cadena de qa | |
global qa | |
qa = ConversationalRetrievalChain.from_llm(llm, | |
vectorstore.as_retriever(search_kwargs={'k': 3}), # Este parametro especifica cuantos chunks se van a recuperar | |
return_source_documents=True, | |
verbose=True, | |
chain_type='stuff', | |
memory=memory, | |
max_tokens_limit=2500, | |
get_chat_history=lambda h: h) | |
return 'Done' | |
# Funcion que ejecuta LLM y responde la pregunta | |
def answer_question(question): | |
result = qa(inputs={'question': question}) | |
pages = [x.metadata['page'] for i, x in enumerate(result['source_documents'])] | |
return result['answer'], pages | |
# Funcion que pega las respuestas anteriores en el objeto Chat bot | |
def bot(history): | |
res = qa( | |
{ | |
'question': history[-1][0], | |
'chat_history': history[:-1] | |
} | |
) | |
history[-1][1] = res['answer'] | |
return history | |
# Agrego el texto a la historia del chat | |
def add_text(history, text): | |
history = history + [(text, None)] | |
return history, "" | |
# Analizar como parsea las ecuaciones | |
with gr.Blocks() as demo: | |
with gr.Tab(label='Load PDF'): | |
with gr.Row(): | |
with gr.Column(): | |
open_ai_key = gr.Textbox(label='Ingresa tu api key de Open AI', type='password') | |
with gr.Row(): | |
with gr.Column(scale=0.4): | |
api_key_button = gr.Button('Enviar', variant='primary') | |
with gr.Row(): | |
pdf_file = gr.File(label='PDF file') | |
# Esta linea esta para probar si el calculo se realiza | |
emb = gr.Textbox(label='Calculo de Embeddings, por favor espere...') | |
# send_pdf = gr.Button(label='Load PDF').style(full_width=False) | |
with gr.Row(): | |
with gr.Column(scale=0.50): | |
send_pdf = gr.Button(label='Load PDF') | |
send_pdf.click(load_pdf, pdf_file, emb) | |
with gr.Tab(label='Galicia QA Demo'): | |
chatbot = gr.Chatbot([], | |
elem_id="chatbot", | |
label='Document GPT').style(height=500) | |
with gr.Row(): | |
with gr.Column(scale=0.80): | |
txt = gr.Textbox( | |
show_label=False, | |
placeholder="Enter text and press enter", | |
).style(container=False) | |
with gr.Column(scale=0.10): | |
submit_btn = gr.Button( | |
'Submit', | |
variant='primary' | |
) | |
with gr.Column(scale=0.10): | |
clear_btn = gr.Button( | |
'Clear', | |
variant='stop' | |
) | |
# Tanto el submit (hacer enter en el campo de texto) como el submit_btn hacen la misma accion | |
txt.submit(fn=add_text, inputs=[chatbot, txt], outputs=[chatbot, txt] # Cuando envio el submit hago esta funcion | |
).then(fn=bot, inputs=chatbot, outputs=chatbot) # Luego hago esta otra funcion | |
submit_btn.click(fn=add_text, inputs=[chatbot, txt], outputs=[chatbot, txt] | |
).then(fn=bot, inputs=chatbot, outputs=chatbot) | |
clear_btn.click(lambda: None, None, chatbot, queue=False) | |
api_key_button.click(fn=process_key, inputs=[open_ai_key], outputs=None) | |
demo.launch(inline=False) |