Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,8 +20,7 @@ import torch
|
|
20 |
import tqdm
|
21 |
import accelerate
|
22 |
|
23 |
-
|
24 |
-
default_persist_directory = './chromaDB/'
|
25 |
|
26 |
llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
27 |
|
@@ -52,10 +51,9 @@ def create_db(splits, collection_name):
|
|
52 |
vectordb = Chroma.from_documents(
|
53 |
documents=splits,
|
54 |
embedding=embedding,
|
|
|
55 |
client=new_client,
|
56 |
-
collection_name=collection_name
|
57 |
-
persist_directory=default_persist_directory
|
58 |
-
)
|
59 |
return vectordb
|
60 |
|
61 |
|
@@ -78,30 +76,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
78 |
llm = HuggingFaceHub(
|
79 |
repo_id=llm_model,
|
80 |
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
|
81 |
-
)
|
82 |
-
elif llm_model == "microsoft/phi-2":
|
83 |
-
raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
84 |
-
llm = HuggingFaceHub(
|
85 |
-
repo_id=llm_model,
|
86 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
87 |
-
)
|
88 |
-
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
89 |
-
llm = HuggingFaceHub(
|
90 |
-
repo_id=llm_model,
|
91 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
|
92 |
-
)
|
93 |
-
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
94 |
-
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
95 |
-
llm = HuggingFaceHub(
|
96 |
-
repo_id=llm_model,
|
97 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
98 |
-
)
|
99 |
-
else:
|
100 |
-
llm = HuggingFaceHub(
|
101 |
-
repo_id=llm_model,
|
102 |
-
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
103 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
104 |
-
)
|
105 |
|
106 |
progress(0.75, desc="Defining buffer memory...")
|
107 |
memory = ConversationBufferMemory(
|
@@ -164,7 +139,7 @@ def format_chat_history(message, chat_history):
|
|
164 |
def conversation(qa_chain, message, history):
|
165 |
formatted_chat_history = format_chat_history(message, history)
|
166 |
#print("formatted_chat_history",formatted_chat_history)
|
167 |
-
|
168 |
# Generate response using QA chain
|
169 |
response = qa_chain({"question": message, "chat_history": formatted_chat_history})
|
170 |
response_answer = response["answer"]
|
@@ -199,12 +174,7 @@ def demo():
|
|
199 |
collection_name = gr.State()
|
200 |
|
201 |
gr.Markdown(
|
202 |
-
"""<center><h2>
|
203 |
-
<h3>Ask any questions about your PDF documents, along with follow-ups</h3>
|
204 |
-
<b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
|
205 |
-
When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
|
206 |
-
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
|
207 |
-
""")
|
208 |
with gr.Tab("Step 1 - Document pre-processing"):
|
209 |
with gr.Row():
|
210 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
|
|
20 |
import tqdm
|
21 |
import accelerate
|
22 |
|
23 |
+
default_persist_directory = './ChromaDB'
|
|
|
24 |
|
25 |
llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
26 |
|
|
|
51 |
vectordb = Chroma.from_documents(
|
52 |
documents=splits,
|
53 |
embedding=embedding,
|
54 |
+
persist_directory="./chroma_db",
|
55 |
client=new_client,
|
56 |
+
collection_name=collection_name)
|
|
|
|
|
57 |
return vectordb
|
58 |
|
59 |
|
|
|
76 |
llm = HuggingFaceHub(
|
77 |
repo_id=llm_model,
|
78 |
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
|
79 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
progress(0.75, desc="Defining buffer memory...")
|
82 |
memory = ConversationBufferMemory(
|
|
|
139 |
def conversation(qa_chain, message, history):
|
140 |
formatted_chat_history = format_chat_history(message, history)
|
141 |
#print("formatted_chat_history",formatted_chat_history)
|
142 |
+
formatted_chat_history = ""
|
143 |
# Generate response using QA chain
|
144 |
response = qa_chain({"question": message, "chat_history": formatted_chat_history})
|
145 |
response_answer = response["answer"]
|
|
|
174 |
collection_name = gr.State()
|
175 |
|
176 |
gr.Markdown(
|
177 |
+
"""<center><h2>ChatPDF</center></h2>""")
|
|
|
|
|
|
|
|
|
|
|
178 |
with gr.Tab("Step 1 - Document pre-processing"):
|
179 |
with gr.Row():
|
180 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|