AI-Interface_memRAG

Runtime error

App Files Files Community

AFischer1985 commited on Feb 26, 2024

Commit

58c0109

verified ·

1 Parent(s): 54fdb1a

Update run.py

Browse files

Files changed (1) hide show

run.py +38 -36

run.py CHANGED Viewed

@@ -1,5 +1,5 @@
 #########################################################################################
-# Title:  Gradio Interface to LLM-chatbot with Memory-RAG
 # Author: Andreas Fischer
 # Date:   October 15th, 2023
 # Last update: Fabruary 26th, 2024
@@ -68,8 +68,8 @@ print(collection.count())
 x=collection.get(include=[])["ids"]
 if(len(x)==0):
-  message="Wer hat dich gemacht?"#"Ich bin ein User."
-  response="Dr. Andreas Fischer hat mich auf Basis von open-source Software programmiert."
   x=collection.get(include=[])["ids"]
   collection.add(
     documents=[message,response],
@@ -104,24 +104,11 @@ else:
   import os
   import requests
   import subprocess
-  ##modelPath="/home/af/gguf/models/phi-2.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/openchat-3.5-0106.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/decilm-7b-uniform-gqa-q8_0.gguf"
-  #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
-  #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
-  #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
-  modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
-  #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
   if(os.path.exists(modelPath)==False):
-    #url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
-    url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
-    #url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
     #url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
     response = requests.get(url)
     with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
       file.write(response.content)
@@ -143,8 +130,10 @@ else:
 # Gradio-GUI
 #------------
-def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
   startOfString=""
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   template0=" [INST]{system}\n  [/INST] </s>"
@@ -187,15 +176,18 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
     prompt += template0.format(system=system) #"<s>"
   if history is not None:
     for user_message, bot_response in history[-historylimit:]:
-      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])  #"[INST] {user_prompt} [/INST] "
-      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
-  if message is not None: prompt += template1.format(message=message[:zeichenlimit])                #"[INST] {message} [/INST]"
   if system2 is not None:
     prompt += system2
   return startOfString+prompt
 import gradio as gr
 import requests
 import json
@@ -204,10 +196,9 @@ import os
 import re
 def response(message, history,customSysPrompt,settings):
-  #print(str(history)) # print history
-  noAdditions=False
   system=customSysPrompt
-  #if (system!="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter."): noAdditions=True
   message=message.replace("[INST]","")
   message=message.replace("[/INST]","")
   message=message.replace("</s>","")
@@ -229,14 +220,24 @@ def response(message, history,customSysPrompt,settings):
     rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
     rag += bestMatch
     rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
-    #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
-  #else:
-    #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
   system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
   #print("RAG: "+rag)
   #print("System: "+system+"\n\nMessage: "+message)
-  prompt=extend_prompt(message,history,system,rag,system2,historylimit=historylimit)
-  print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
   ## Request response from model
   #------------------------------
@@ -264,8 +265,8 @@ def response(message, history,customSysPrompt,settings):
         part=text.token.text
         #print(part, end="", flush=True)
         response += part
         yield response
-    history.append((message, response)) # add current dialog to history
     # Store current state in DB if memory is turned on
     if (settings=="Memory On"):
       x=collection.get(include=[])["ids"] # add current dialog to db
@@ -283,8 +284,8 @@ def response(message, history,customSysPrompt,settings):
     # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
     url="http://0.0.0.0:2600/v1/completions"
     body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}      # e.g. Mixtral-Instruct
-    if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})   # fix stop-token of DiscoLM
-    if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]})   # fix stop-token of Gemma
     response="" #+"("+myType+")\n"
     buffer=""
     #print("URL: "+url)
@@ -311,6 +312,7 @@ def response(message, history,customSysPrompt,settings):
         except Exception as e:
           print("Exception:"+str(e))
           pass
       yield response
     history.append((message, response)) # add current dialog to history
     # Store current state in DB if memory is turned on

 #########################################################################################
+# Title:  Gradio AI-Interface with Memory-RAG
 # Author: Andreas Fischer
 # Date:   October 15th, 2023
 # Last update: Fabruary 26th, 2024
 x=collection.get(include=[])["ids"]
 if(len(x)==0):
+  message="Ich bin der User."
+  response="Hallo User, wie kann ich dienen?"
   x=collection.get(include=[])["ids"]
   collection.add(
     documents=[message,response],
   import os
   import requests
   import subprocess
+  #modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
+  modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
   if(os.path.exists(modelPath)==False):
     #url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
+    url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
     response = requests.get(url)
     with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
       file.write(response.content)
 # Gradio-GUI
 #------------
+# Gradio-GUI
+#------------
+import re
+def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
   startOfString=""
   if zeichenlimit is None: zeichenlimit=1000000000 # :-)
   template0=" [INST]{system}\n  [/INST] </s>"
     prompt += template0.format(system=system) #"<s>"
   if history is not None:
     for user_message, bot_response in history[-historylimit:]:
+      if user_message is None: user_message = ""
+      if bot_response is None: bot_response = ""
+      bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
+      if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit])
   if system2 is not None:
     prompt += system2
   return startOfString+prompt
 import gradio as gr
 import requests
 import json
 import re
 def response(message, history,customSysPrompt,settings):
+  #settings="Memory Off"
+  removeHTML=True
   system=customSysPrompt
   message=message.replace("[INST]","")
   message=message.replace("[/INST]","")
   message=message.replace("</s>","")
     rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
     rag += bestMatch
     rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
   system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
   #print("RAG: "+rag)
   #print("System: "+system+"\n\nMessage: "+message)
+  prompt=extend_prompt(
+    message,                  # current message of the user
+    history,                  # complete history
+    system,                   # system prompt
+    rag,                      # RAG-component added to the system prompt
+    system2,                  # fictive first words of the AI (neither displayed nor stored)
+    historylimit=historylimit # number of past messages to consider for response to current message
+    removeHTML=removeHTML     # remove HTML-components from History (to prevent bugs with Markdown)
+    )
+  #print("\n\nMESSAGE:"+str(message))
+  #print("\n\nHISTORY:"+str(history))
+  #print("\n\nSYSTEM:"+str(system))
+  #print("\n\nRAG:"+str(rag))
+  #print("\n\nSYSTEM2:"+str(system2))
+  #print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
   ## Request response from model
   #------------------------------
         part=text.token.text
         #print(part, end="", flush=True)
         response += part
+        if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
         yield response
     # Store current state in DB if memory is turned on
     if (settings=="Memory On"):
       x=collection.get(include=[])["ids"] # add current dialog to db
     # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
     url="http://0.0.0.0:2600/v1/completions"
     body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}      # e.g. Mixtral-Instruct
+    if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})   # fix stop-token of DiscoLM
+    if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]})   # fix stop-token of Gemma
     response="" #+"("+myType+")\n"
     buffer=""
     #print("URL: "+url)
         except Exception as e:
           print("Exception:"+str(e))
           pass
+      if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
       yield response
     history.append((message, response)) # add current dialog to history
     # Store current state in DB if memory is turned on