pdf-chatbot

Sleeping

App Files Files Community

MatteoScript commited on Jan 29, 2024

Commit

dfa0ad2

verified ·

1 Parent(s): 5294666

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -2

app.py CHANGED Viewed

@@ -21,9 +21,18 @@ import tqdm
 import accelerate
 llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-list_llm = [llm_name0]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load PDF document and create doc splits
@@ -69,13 +78,59 @@ def load_db():
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     progress(0.1, desc="Initializing HF tokenizer...")
     progress(0.5, desc="Initializing HF Hub...")
     # URL: https://github.com/langchain-ai/langchain/issues/6080
     if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
         llm = HuggingFaceHub(
             repo_id=llm_model,
             model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
         )
     progress(0.75, desc="Defining buffer memory...")
     memory = ConversationBufferMemory(
@@ -256,4 +311,5 @@ def demo():
 if __name__ == "__main__":
-    demo()

 import accelerate
+# default_persist_directory = './chroma_HF/'
 llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+llm_name1 = "mistralai/Mistral-7B-Instruct-v0.2"
+llm_name2 = "mistralai/Mistral-7B-Instruct-v0.1"
+llm_name3 = "meta-llama/Llama-2-7b-chat-hf"
+llm_name4 = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+llm_name5 = "microsoft/phi-2"
+llm_name6 = "mosaicml/mpt-7b-instruct"
+llm_name7 = "tiiuae/falcon-7b-instruct"
+llm_name8 = "google/flan-t5-xxl"
+list_llm = [llm_name0, llm_name1, llm_name2, llm_name3, llm_name4, llm_name5, llm_name6, llm_name7, llm_name8]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load PDF document and create doc splits
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     progress(0.1, desc="Initializing HF tokenizer...")
+    # HuggingFacePipeline uses local model
+    # Note: it will download model locally...
+    # tokenizer=AutoTokenizer.from_pretrained(llm_model)
+    # progress(0.5, desc="Initializing HF pipeline...")
+    # pipeline=transformers.pipeline(
+    #     "text-generation",
+    #     model=llm_model,
+    #     tokenizer=tokenizer,
+    #     torch_dtype=torch.bfloat16,
+    #     trust_remote_code=True,
+    #     device_map="auto",
+    #     # max_length=1024,
+    #     max_new_tokens=max_tokens,
+    #     do_sample=True,
+    #     top_k=top_k,
+    #     num_return_sequences=1,
+    #     eos_token_id=tokenizer.eos_token_id
+    #     )
+    # llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
+    # HuggingFaceHub uses HF inference endpoints
     progress(0.5, desc="Initializing HF Hub...")
+    # Use of trust_remote_code as model_kwargs
+    # Warning: langchain issue
     # URL: https://github.com/langchain-ai/langchain/issues/6080
     if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
         llm = HuggingFaceHub(
             repo_id=llm_model,
             model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
         )
+    elif llm_model == "microsoft/phi-2":
+        raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
+        )
+    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
+        )
+    elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
+        raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
+        )
+    else:
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
+            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
+        )
     progress(0.75, desc="Defining buffer memory...")
     memory = ConversationBufferMemory(
 if __name__ == "__main__":
+    demo()