Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
#chatbot | |
from langchain.llms import HuggingFacePipeline | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import RetrievalQA | |
from textwrap import fill | |
DATA_PATH='data/' | |
DB_FAISS_PATH='vectorstore/db_faiss' | |
#Call of the model | |
model_name = "TheBloke/Llama-2-13b-Chat-GPTQ" | |
model = AutoModelForCausalLM.from_pretrained(model_name, | |
device_map="auto", | |
trust_remote_code=True) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
gen_cfg = GenerationConfig.from_pretrained(model_name) | |
gen_cfg.max_new_tokens=512 | |
gen_cfg.temperature=0.0000001 # 0.0 | |
gen_cfg.return_full_text=True | |
gen_cfg.do_sample=True | |
gen_cfg.repetition_penalty=1.11 | |
pipe=pipeline( | |
task="text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
generation_config=gen_cfg | |
) | |
if gr.NO_RELOAD: | |
llm = HuggingFacePipeline(pipeline=pipe) | |
embeddings = HuggingFaceEmbeddings() | |
db = FAISS.load_local(DB_FAISS_PATH, embeddings) | |
print('todo ok') | |
#st.title('π¦π Flint, your FinanceBot') | |
Description=""" | |
## Finance Bot: Get instant insights from Finance | |
This chatbot is built using the Retrieval-Augmented Generation (RAG) framework | |
""" | |
#DB_FAISS_PATH = os.path.join(local_path, 'vectorstore_docs/db_faiss') | |
prompt_template = """Use the following pieces of information to answer the user's question. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Context: {context} | |
Question: {question} | |
Only return the helpful answer below and nothing else. Try to make it short. Maximum of 500 words. | |
Helpful answer: | |
""" | |
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) | |
Chain_pdf = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
# retriever=db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.8}) | |
# Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr" | |
# k defines how many documents are returned; defaults to 4. | |
# score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type. | |
# return_source_documents=True, # Optional parameter, returns the source documents used to answer the question | |
retriever=db.as_retriever(), # (search_kwargs={'k': 5, 'score_threshold': 0.8}), | |
chain_type_kwargs={"prompt": prompt}, | |
) | |
#query = "When was the solar system formed?" | |
#result = Chain_pdf.invoke(query) | |
#print(fill(result['result'].strip(), width=100)) | |
def final_result(query,history, Chain_pdf): | |
result = Chain_pdf.invoke(query) | |
print(fill(result['result'].strip(), width=100)) | |
return result | |
with gr.Blocks() as demo: | |
system_prompt = gr.Textbox("You are helpful AI.", label="System Prompt") | |
slider = gr.Slider(10, 100, render=False) | |
gr.ChatInterface( | |
final_result, additional_inputs=[Chain_pdf] | |
) | |
demo.launch() | |