AI-Interface_memRAG

Runtime error

App Files Files Community

AFischer1985 commited on Feb 26, 2024

Commit

6c61ab3

verified ·

1 Parent(s): 76d8e0b

Update run.py

Browse files

Files changed (1) hide show

run.py +57 -56

run.py CHANGED Viewed

@@ -1,8 +1,8 @@
 #########################################################################################
-# Title:  Gradio Interface to LLM-chatbot with memory RAG on premises
 # Author: Andreas Fischer
 # Date:   October 15th, 2023
-# Last update: February 25st, 2024
 ##########################################################################################
 #https://github.com/abetlen/llama-cpp-python/issues/306
@@ -30,7 +30,7 @@ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
 onPrem = True if(os.path.exists(dbPath)) else False
 if(onPrem==False): dbPath="/home/user/app/db"
-#onPrem=False # override automatic detection
 print(dbPath)
 #client = chromadb.Client()
@@ -68,8 +68,8 @@ print(collection.count())
 x=collection.get(include=[])["ids"]
 if(len(x)==0):
-  message="Ich bin der User."
-  response="Hallo User, wie kann ich dienen?"
   x=collection.get(include=[])["ids"]
   collection.add(
     documents=[message,response],
@@ -86,8 +86,7 @@ if(len(x)==0):
   )
   RAGResults["metadatas"][0][0]["dialog"]
-print(collection.count())
-#collection.get()["ids","documents"]
 x=collection.get(include=[])["ids"]
 x
@@ -116,11 +115,11 @@ else:
   #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
   #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
-  modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
-  modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
-  modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
   #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
   if(os.path.exists(modelPath)==False):
@@ -149,16 +148,12 @@ else:
 # Gradio-GUI
 #------------
-def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
   startOfString=""
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   template0=" [INST]{system}\n  [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
   template1=" [INST] {message} [/INST]"
   template2=" {response}</s>"
-  if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
-    template0="<start_of_turn>user{system}</end_of_turn>"
-    template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
-    template2="{response}</end_of_turn>"
   if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
     startOfString="<s>"
     template0=" [INST]{system}\n  [/INST] </s>" if onPrem else "<s> [INST]{system}\n  [/INST] </s>"
@@ -169,19 +164,23 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
     template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
     template1="[INST] {message} [/INST]"
     template2=" {response}</s>"
-  if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
-    startOfString="<s>"
-    template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
-    template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
-    template2="{response}<|end_of_turn|>"
-  if("SauerkrautLM-7b-HerO" in modelPath):  #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
     template0="<|im_start|>system\n{system}<|im_end|>\n"
     template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     template2="{response}<|im_end|>\n"
-  if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
     template0="<|im_start|>system\n{system}<|im_end|>\n"
     template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     template2="{response}<|im_end|>\n"
   if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
     template0="{system} " #<s>
     template1="USER: {message} ASSISTANT: "
@@ -194,16 +193,17 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
   if RAGAddon is not None:
     system += RAGAddon
   if system is not None:
-    prompt += template0.format(system=system) #"<s>"
   if history is not None:
     for user_message, bot_response in history[-historylimit:]:
-      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])  #"[INST] {user_prompt} [/INST] "
-      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
-  if message is not None: prompt += template1.format(message=message[:zeichenlimit])                #"[INST] {message} [/INST]"
   if system2 is not None:
-    prompt += system2
   return startOfString+prompt
 import gradio as gr
 import requests
 import json
@@ -213,16 +213,18 @@ import re
 def response(message, history,customSysPrompt,settings):
   #print(str(history)) # print history
-  #system="Du bist ein KI-basierter Assistent."
-  system="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch." if customSysPrompt is None else customSysPrompt
   message=message.replace("[INST]","")
   message=message.replace("[/INST]","")
   message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
-  if (settings=="Permanent"):
     if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
   x=collection.get(include=[])["ids"]
   rag=None # RAG is turned off until history gets too long
-  historylimit=4
   if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
     RAGResults=collection.query(
       query_texts=[message],
@@ -232,9 +234,9 @@ def response(message, history,customSysPrompt,settings):
     bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
     #print("Message: "+message+"\n\nBest Match: "+bestMatch)
     rag="\n\n"
-    rag += "Mit Blick auf den aktuellen Stand der Session erinnerst du dich insb. an folgende Episode:\n"
     rag += bestMatch
-    rag += "\n\nIm Folgenden siehst du den aktuellen Stand der Session."
     #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
   #else:
     #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
@@ -249,19 +251,19 @@ def response(message, history,customSysPrompt,settings):
   print("AI running on prem!" if(onPrem) else "AI running HFHub!")
   if(onPrem==False):
-    generate_kwargs = dict( #https://github.com/huggingface/chat-ui/blob/main/.env.template
-        temperature=0.6,
-        top_p=0.95,
-        repetition_penalty=1.2,
-        top_k=50,
-        truncate=24576,
-        max_new_tokens=8192
-        #temperature=temperature,
-        #max_new_tokens=max_new_tokens,
-        #top_p=top_p,
-        #repetition_penalty=repetition_penalty,
-        #do_sample=True,
-        #seed=42,
     )
     stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     response = ""
@@ -272,8 +274,8 @@ def response(message, history,customSysPrompt,settings):
         response += part
         yield response
     history.append((message, response)) # add current dialog to history
-    # Store current state in DB if settings=="Permanent"
-    if (settings=="Permanent"):
       x=collection.get(include=[])["ids"] # add current dialog to db
       collection.add(
         documents=[message,response],
@@ -288,8 +290,8 @@ def response(message, history,customSysPrompt,settings):
   if(onPrem==True):
     # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
     url="http://0.0.0.0:2600/v1/completions"
-    body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}            # e.g. Mixtral-Instruct
-    if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})         # fix stop-token of DiscoLM
     if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]})   # fix stop-token of Gemma
     response="" #+"("+myType+")\n"
     buffer=""
@@ -319,8 +321,8 @@ def response(message, history,customSysPrompt,settings):
           pass
       yield response
     history.append((message, response)) # add current dialog to history
-    # Store current state in DB if settings=="Permanent"
-    if (settings=="Permanent"):
       x=collection.get(include=[])["ids"] # add current dialog to db
       collection.add(
         documents=[message,response],
@@ -337,12 +339,11 @@ gr.ChatInterface(
   chatbot=gr.Chatbot(render_markdown=True),
   title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
   additional_inputs=[
-    gr.Textbox(value="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch.",label="System Prompt"),
-    gr.Dropdown(["Permanent","Temporär"],value="Temporär",label="Dialog speichern?")
   ]
   ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
 print("Interface up and running!")

 #########################################################################################
+# Title:  Gradio Interface to LLM-chatbot with Memory-RAG
 # Author: Andreas Fischer
 # Date:   October 15th, 2023
+# Last update: Fabruary 26th, 2024
 ##########################################################################################
 #https://github.com/abetlen/llama-cpp-python/issues/306
 onPrem = True if(os.path.exists(dbPath)) else False
 if(onPrem==False): dbPath="/home/user/app/db"
+#onPrem=True  # uncomment to override automatic detection
 print(dbPath)
 #client = chromadb.Client()
 x=collection.get(include=[])["ids"]
 if(len(x)==0):
+  message="Wer hat dich gemacht?"#"Ich bin ein User."
+  response="Dr. Andreas Fischer hat mich auf Basis von open-source Software programmiert."
   x=collection.get(include=[])["ids"]
   collection.add(
     documents=[message,response],
   )
   RAGResults["metadatas"][0][0]["dialog"]
+collection.get()["ids","documents"]
 x=collection.get(include=[])["ids"]
 x
   #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
   #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
+  #modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
+  #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
+  #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
   #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
+  modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
   #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
   if(os.path.exists(modelPath)==False):
 # Gradio-GUI
 #------------
+def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
   startOfString=""
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   template0=" [INST]{system}\n  [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
   template1=" [INST] {message} [/INST]"
   template2=" {response}</s>"
   if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
     startOfString="<s>"
     template0=" [INST]{system}\n  [/INST] </s>" if onPrem else "<s> [INST]{system}\n  [/INST] </s>"
     template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
     template1="[INST] {message} [/INST]"
     template2=" {response}</s>"
+  if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
     template0="<|im_start|>system\n{system}<|im_end|>\n"
     template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     template2="{response}<|im_end|>\n"
+  if("SauerkrautLM-7b-HerO" in modelPath):  #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
     template0="<|im_start|>system\n{system}<|im_end|>\n"
     template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     template2="{response}<|im_end|>\n"
+  if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+    template0="<start_of_turn>user{system}</end_of_turn>"
+    template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
+    template2="{response}</end_of_turn>"
+  if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
+    startOfString="<s>"
+    template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
+    template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
+    template2="{response}<|end_of_turn|>"
   if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
     template0="{system} " #<s>
     template1="USER: {message} ASSISTANT: "
   if RAGAddon is not None:
     system += RAGAddon
   if system is not None:
+    prompt += template0.format(system=system.strip())
   if history is not None:
     for user_message, bot_response in history[-historylimit:]:
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit].strip())
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit].strip())
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit].strip())
   if system2 is not None:
+    prompt += system2.strip()
   return startOfString+prompt
 import gradio as gr
 import requests
 import json
 def response(message, history,customSysPrompt,settings):
   #print(str(history)) # print history
+  noAdditions=False
+  system=customSysPrompt
+  #if (system!="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter."): noAdditions=True
   message=message.replace("[INST]","")
   message=message.replace("[/INST]","")
+  message=message.replace("</s>","")
   message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
+  if (settings=="Memory On"):
     if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
   x=collection.get(include=[])["ids"]
   rag=None # RAG is turned off until history gets too long
+  historylimit=0 #4
   if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
     RAGResults=collection.query(
       query_texts=[message],
     bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
     #print("Message: "+message+"\n\nBest Match: "+bestMatch)
     rag="\n\n"
+    rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
     rag += bestMatch
+    rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
     #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
   #else:
     #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
   print("AI running on prem!" if(onPrem) else "AI running HFHub!")
   if(onPrem==False):
+    temperature=float(0.9)
+    max_new_tokens=500
+    top_p=0.95
+    repetition_penalty=1.0
+    if temperature < 1e-2: temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,
     )
     stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     response = ""
         response += part
         yield response
     history.append((message, response)) # add current dialog to history
+    # Store current state in DB if memory is turned on
+    if (settings=="Memory On"):
       x=collection.get(include=[])["ids"] # add current dialog to db
       collection.add(
         documents=[message,response],
   if(onPrem==True):
     # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
     url="http://0.0.0.0:2600/v1/completions"
+    body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}      # e.g. Mixtral-Instruct
+    if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})   # fix stop-token of DiscoLM
     if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]})   # fix stop-token of Gemma
     response="" #+"("+myType+")\n"
     buffer=""
           pass
       yield response
     history.append((message, response)) # add current dialog to history
+    # Store current state in DB if memory is turned on
+    if (settings=="Memory On"):
       x=collection.get(include=[])["ids"] # add current dialog to db
       collection.add(
         documents=[message,response],
   chatbot=gr.Chatbot(render_markdown=True),
   title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
   additional_inputs=[
+    gr.Textbox(value=None,label="System Prompt"),
+    gr.Dropdown(["Memory On","Memory Off"],value="Memory Off",label="Memory")
   ]
   ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
 print("Interface up and running!")