AFischer1985 commited on
Commit
58c0109
·
verified ·
1 Parent(s): 54fdb1a

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +38 -36
run.py CHANGED
@@ -1,5 +1,5 @@
1
  #########################################################################################
2
- # Title: Gradio Interface to LLM-chatbot with Memory-RAG
3
  # Author: Andreas Fischer
4
  # Date: October 15th, 2023
5
  # Last update: Fabruary 26th, 2024
@@ -68,8 +68,8 @@ print(collection.count())
68
 
69
  x=collection.get(include=[])["ids"]
70
  if(len(x)==0):
71
- message="Wer hat dich gemacht?"#"Ich bin ein User."
72
- response="Dr. Andreas Fischer hat mich auf Basis von open-source Software programmiert."
73
  x=collection.get(include=[])["ids"]
74
  collection.add(
75
  documents=[message,response],
@@ -104,24 +104,11 @@ else:
104
  import os
105
  import requests
106
  import subprocess
107
- ##modelPath="/home/af/gguf/models/phi-2.Q4_0.gguf"
108
- #modelPath="/home/af/gguf/models/openchat-3.5-0106.Q4_0.gguf"
109
- #modelPath="/home/af/gguf/models/decilm-7b-uniform-gqa-q8_0.gguf"
110
- #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
111
- #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
112
- #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
113
- #modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
114
- #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
115
- #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
116
- #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
117
- modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
118
- #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
119
- #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
120
  if(os.path.exists(modelPath)==False):
121
- #url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
122
- url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
123
- #url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
124
  #url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
 
125
  response = requests.get(url)
126
  with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
127
  file.write(response.content)
@@ -143,8 +130,10 @@ else:
143
  # Gradio-GUI
144
  #------------
145
 
146
-
147
- def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
 
 
148
  startOfString=""
149
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
150
  template0=" [INST]{system}\n [/INST] </s>"
@@ -187,15 +176,18 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
187
  prompt += template0.format(system=system) #"<s>"
188
  if history is not None:
189
  for user_message, bot_response in history[-historylimit:]:
190
- if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) #"[INST] {user_prompt} [/INST] "
191
- if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
192
- if message is not None: prompt += template1.format(message=message[:zeichenlimit]) #"[INST] {message} [/INST]"
 
 
 
 
193
  if system2 is not None:
194
  prompt += system2
195
  return startOfString+prompt
196
 
197
 
198
-
199
  import gradio as gr
200
  import requests
201
  import json
@@ -204,10 +196,9 @@ import os
204
  import re
205
 
206
  def response(message, history,customSysPrompt,settings):
207
- #print(str(history)) # print history
208
- noAdditions=False
209
  system=customSysPrompt
210
- #if (system!="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter."): noAdditions=True
211
  message=message.replace("[INST]","")
212
  message=message.replace("[/INST]","")
213
  message=message.replace("</s>","")
@@ -229,14 +220,24 @@ def response(message, history,customSysPrompt,settings):
229
  rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
230
  rag += bestMatch
231
  rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
232
- #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
233
- #else:
234
- #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
235
  system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
236
  #print("RAG: "+rag)
237
  #print("System: "+system+"\n\nMessage: "+message)
238
- prompt=extend_prompt(message,history,system,rag,system2,historylimit=historylimit)
239
- print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  ## Request response from model
242
  #------------------------------
@@ -264,8 +265,8 @@ def response(message, history,customSysPrompt,settings):
264
  part=text.token.text
265
  #print(part, end="", flush=True)
266
  response += part
 
267
  yield response
268
- history.append((message, response)) # add current dialog to history
269
  # Store current state in DB if memory is turned on
270
  if (settings=="Memory On"):
271
  x=collection.get(include=[])["ids"] # add current dialog to db
@@ -283,8 +284,8 @@ def response(message, history,customSysPrompt,settings):
283
  # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
284
  url="http://0.0.0.0:2600/v1/completions"
285
  body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
286
- if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
287
- if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
288
  response="" #+"("+myType+")\n"
289
  buffer=""
290
  #print("URL: "+url)
@@ -311,6 +312,7 @@ def response(message, history,customSysPrompt,settings):
311
  except Exception as e:
312
  print("Exception:"+str(e))
313
  pass
 
314
  yield response
315
  history.append((message, response)) # add current dialog to history
316
  # Store current state in DB if memory is turned on
 
1
  #########################################################################################
2
+ # Title: Gradio AI-Interface with Memory-RAG
3
  # Author: Andreas Fischer
4
  # Date: October 15th, 2023
5
  # Last update: Fabruary 26th, 2024
 
68
 
69
  x=collection.get(include=[])["ids"]
70
  if(len(x)==0):
71
+ message="Ich bin der User."
72
+ response="Hallo User, wie kann ich dienen?"
73
  x=collection.get(include=[])["ids"]
74
  collection.add(
75
  documents=[message,response],
 
104
  import os
105
  import requests
106
  import subprocess
107
+ #modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
108
+ modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
 
 
 
 
 
 
 
 
 
 
 
109
  if(os.path.exists(modelPath)==False):
 
 
 
110
  #url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
111
+ url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
112
  response = requests.get(url)
113
  with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
114
  file.write(response.content)
 
130
  # Gradio-GUI
131
  #------------
132
 
133
+ # Gradio-GUI
134
+ #------------
135
+ import re
136
+ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
137
  startOfString=""
138
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
139
  template0=" [INST]{system}\n [/INST] </s>"
 
176
  prompt += template0.format(system=system) #"<s>"
177
  if history is not None:
178
  for user_message, bot_response in history[-historylimit:]:
179
+ if user_message is None: user_message = ""
180
+ if bot_response is None: bot_response = ""
181
+ bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
182
+ if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
183
+ if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
184
+ if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
185
+ if message is not None: prompt += template1.format(message=message[:zeichenlimit])
186
  if system2 is not None:
187
  prompt += system2
188
  return startOfString+prompt
189
 
190
 
 
191
  import gradio as gr
192
  import requests
193
  import json
 
196
  import re
197
 
198
  def response(message, history,customSysPrompt,settings):
199
+ #settings="Memory Off"
200
+ removeHTML=True
201
  system=customSysPrompt
 
202
  message=message.replace("[INST]","")
203
  message=message.replace("[/INST]","")
204
  message=message.replace("</s>","")
 
220
  rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
221
  rag += bestMatch
222
  rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
 
 
 
223
  system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
224
  #print("RAG: "+rag)
225
  #print("System: "+system+"\n\nMessage: "+message)
226
+ prompt=extend_prompt(
227
+ message, # current message of the user
228
+ history, # complete history
229
+ system, # system prompt
230
+ rag, # RAG-component added to the system prompt
231
+ system2, # fictive first words of the AI (neither displayed nor stored)
232
+ historylimit=historylimit # number of past messages to consider for response to current message
233
+ removeHTML=removeHTML # remove HTML-components from History (to prevent bugs with Markdown)
234
+ )
235
+ #print("\n\nMESSAGE:"+str(message))
236
+ #print("\n\nHISTORY:"+str(history))
237
+ #print("\n\nSYSTEM:"+str(system))
238
+ #print("\n\nRAG:"+str(rag))
239
+ #print("\n\nSYSTEM2:"+str(system2))
240
+ #print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
241
 
242
  ## Request response from model
243
  #------------------------------
 
265
  part=text.token.text
266
  #print(part, end="", flush=True)
267
  response += part
268
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
269
  yield response
 
270
  # Store current state in DB if memory is turned on
271
  if (settings=="Memory On"):
272
  x=collection.get(include=[])["ids"] # add current dialog to db
 
284
  # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
285
  url="http://0.0.0.0:2600/v1/completions"
286
  body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
287
+ if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
288
+ if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
289
  response="" #+"("+myType+")\n"
290
  buffer=""
291
  #print("URL: "+url)
 
312
  except Exception as e:
313
  print("Exception:"+str(e))
314
  pass
315
+ if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
316
  yield response
317
  history.append((message, response)) # add current dialog to history
318
  # Store current state in DB if memory is turned on