AFischer1985 commited on
Commit
6c61ab3
·
verified ·
1 Parent(s): 76d8e0b

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +57 -56
run.py CHANGED
@@ -1,8 +1,8 @@
1
  #########################################################################################
2
- # Title: Gradio Interface to LLM-chatbot with memory RAG on premises
3
  # Author: Andreas Fischer
4
  # Date: October 15th, 2023
5
- # Last update: February 25st, 2024
6
  ##########################################################################################
7
 
8
  #https://github.com/abetlen/llama-cpp-python/issues/306
@@ -30,7 +30,7 @@ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
30
  onPrem = True if(os.path.exists(dbPath)) else False
31
  if(onPrem==False): dbPath="/home/user/app/db"
32
 
33
- #onPrem=False # override automatic detection
34
  print(dbPath)
35
 
36
  #client = chromadb.Client()
@@ -68,8 +68,8 @@ print(collection.count())
68
 
69
  x=collection.get(include=[])["ids"]
70
  if(len(x)==0):
71
- message="Ich bin der User."
72
- response="Hallo User, wie kann ich dienen?"
73
  x=collection.get(include=[])["ids"]
74
  collection.add(
75
  documents=[message,response],
@@ -86,8 +86,7 @@ if(len(x)==0):
86
  )
87
  RAGResults["metadatas"][0][0]["dialog"]
88
 
89
- print(collection.count())
90
- #collection.get()["ids","documents"]
91
  x=collection.get(include=[])["ids"]
92
  x
93
 
@@ -116,11 +115,11 @@ else:
116
  #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
117
  #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
118
  #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
119
- modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
120
- modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
121
- modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
122
  #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
123
- #modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
124
  #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
125
  #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
126
  if(os.path.exists(modelPath)==False):
@@ -149,16 +148,12 @@ else:
149
  # Gradio-GUI
150
  #------------
151
 
152
- def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
153
  startOfString=""
154
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
155
  template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
156
  template1=" [INST] {message} [/INST]"
157
  template2=" {response}</s>"
158
- if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
159
- template0="<start_of_turn>user{system}</end_of_turn>"
160
- template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
161
- template2="{response}</end_of_turn>"
162
  if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
163
  startOfString="<s>"
164
  template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST]{system}\n [/INST] </s>"
@@ -169,19 +164,23 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
169
  template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
170
  template1="[INST] {message} [/INST]"
171
  template2=" {response}</s>"
172
- if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
173
- startOfString="<s>"
174
- template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
175
- template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
176
- template2="{response}<|end_of_turn|>"
177
- if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
178
  template0="<|im_start|>system\n{system}<|im_end|>\n"
179
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
180
  template2="{response}<|im_end|>\n"
181
- if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
182
  template0="<|im_start|>system\n{system}<|im_end|>\n"
183
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
184
  template2="{response}<|im_end|>\n"
 
 
 
 
 
 
 
 
 
185
  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
186
  template0="{system} " #<s>
187
  template1="USER: {message} ASSISTANT: "
@@ -194,16 +193,17 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
194
  if RAGAddon is not None:
195
  system += RAGAddon
196
  if system is not None:
197
- prompt += template0.format(system=system) #"<s>"
198
  if history is not None:
199
  for user_message, bot_response in history[-historylimit:]:
200
- if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) #"[INST] {user_prompt} [/INST] "
201
- if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
202
- if message is not None: prompt += template1.format(message=message[:zeichenlimit]) #"[INST] {message} [/INST]"
203
  if system2 is not None:
204
- prompt += system2
205
  return startOfString+prompt
206
 
 
207
  import gradio as gr
208
  import requests
209
  import json
@@ -213,16 +213,18 @@ import re
213
 
214
  def response(message, history,customSysPrompt,settings):
215
  #print(str(history)) # print history
216
- #system="Du bist ein KI-basierter Assistent."
217
- system="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch." if customSysPrompt is None else customSysPrompt
 
218
  message=message.replace("[INST]","")
219
  message=message.replace("[/INST]","")
 
220
  message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
221
- if (settings=="Permanent"):
222
  if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
223
  x=collection.get(include=[])["ids"]
224
  rag=None # RAG is turned off until history gets too long
225
- historylimit=4
226
  if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
227
  RAGResults=collection.query(
228
  query_texts=[message],
@@ -232,9 +234,9 @@ def response(message, history,customSysPrompt,settings):
232
  bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
233
  #print("Message: "+message+"\n\nBest Match: "+bestMatch)
234
  rag="\n\n"
235
- rag += "Mit Blick auf den aktuellen Stand der Session erinnerst du dich insb. an folgende Episode:\n"
236
  rag += bestMatch
237
- rag += "\n\nIm Folgenden siehst du den aktuellen Stand der Session."
238
  #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
239
  #else:
240
  #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
@@ -249,19 +251,19 @@ def response(message, history,customSysPrompt,settings):
249
 
250
  print("AI running on prem!" if(onPrem) else "AI running HFHub!")
251
  if(onPrem==False):
252
- generate_kwargs = dict( #https://github.com/huggingface/chat-ui/blob/main/.env.template
253
- temperature=0.6,
254
- top_p=0.95,
255
- repetition_penalty=1.2,
256
- top_k=50,
257
- truncate=24576,
258
- max_new_tokens=8192
259
- #temperature=temperature,
260
- #max_new_tokens=max_new_tokens,
261
- #top_p=top_p,
262
- #repetition_penalty=repetition_penalty,
263
- #do_sample=True,
264
- #seed=42,
265
  )
266
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
267
  response = ""
@@ -272,8 +274,8 @@ def response(message, history,customSysPrompt,settings):
272
  response += part
273
  yield response
274
  history.append((message, response)) # add current dialog to history
275
- # Store current state in DB if settings=="Permanent"
276
- if (settings=="Permanent"):
277
  x=collection.get(include=[])["ids"] # add current dialog to db
278
  collection.add(
279
  documents=[message,response],
@@ -288,8 +290,8 @@ def response(message, history,customSysPrompt,settings):
288
  if(onPrem==True):
289
  # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
290
  url="http://0.0.0.0:2600/v1/completions"
291
- body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
292
- if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
293
  if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
294
  response="" #+"("+myType+")\n"
295
  buffer=""
@@ -319,8 +321,8 @@ def response(message, history,customSysPrompt,settings):
319
  pass
320
  yield response
321
  history.append((message, response)) # add current dialog to history
322
- # Store current state in DB if settings=="Permanent"
323
- if (settings=="Permanent"):
324
  x=collection.get(include=[])["ids"] # add current dialog to db
325
  collection.add(
326
  documents=[message,response],
@@ -337,12 +339,11 @@ gr.ChatInterface(
337
  chatbot=gr.Chatbot(render_markdown=True),
338
  title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
339
  additional_inputs=[
340
- gr.Textbox(value="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch.",label="System Prompt"),
341
- gr.Dropdown(["Permanent","Temporär"],value="Temporär",label="Dialog speichern?")
342
  ]
343
  ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
344
  print("Interface up and running!")
345
 
346
 
347
 
348
-
 
1
  #########################################################################################
2
+ # Title: Gradio Interface to LLM-chatbot with Memory-RAG
3
  # Author: Andreas Fischer
4
  # Date: October 15th, 2023
5
+ # Last update: Fabruary 26th, 2024
6
  ##########################################################################################
7
 
8
  #https://github.com/abetlen/llama-cpp-python/issues/306
 
30
  onPrem = True if(os.path.exists(dbPath)) else False
31
  if(onPrem==False): dbPath="/home/user/app/db"
32
 
33
+ #onPrem=True # uncomment to override automatic detection
34
  print(dbPath)
35
 
36
  #client = chromadb.Client()
 
68
 
69
  x=collection.get(include=[])["ids"]
70
  if(len(x)==0):
71
+ message="Wer hat dich gemacht?"#"Ich bin ein User."
72
+ response="Dr. Andreas Fischer hat mich auf Basis von open-source Software programmiert."
73
  x=collection.get(include=[])["ids"]
74
  collection.add(
75
  documents=[message,response],
 
86
  )
87
  RAGResults["metadatas"][0][0]["dialog"]
88
 
89
+ collection.get()["ids","documents"]
 
90
  x=collection.get(include=[])["ids"]
91
  x
92
 
 
115
  #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
116
  #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
117
  #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
118
+ #modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
119
+ #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
120
+ #modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
121
  #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
122
+ modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
123
  #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
124
  #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
125
  if(os.path.exists(modelPath)==False):
 
148
  # Gradio-GUI
149
  #------------
150
 
151
+ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
152
  startOfString=""
153
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
154
  template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
155
  template1=" [INST] {message} [/INST]"
156
  template2=" {response}</s>"
 
 
 
 
157
  if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
158
  startOfString="<s>"
159
  template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST]{system}\n [/INST] </s>"
 
164
  template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
165
  template1="[INST] {message} [/INST]"
166
  template2=" {response}</s>"
167
+ if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
 
 
 
 
 
168
  template0="<|im_start|>system\n{system}<|im_end|>\n"
169
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
170
  template2="{response}<|im_end|>\n"
171
+ if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
172
  template0="<|im_start|>system\n{system}<|im_end|>\n"
173
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
174
  template2="{response}<|im_end|>\n"
175
+ if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
176
+ template0="<start_of_turn>user{system}</end_of_turn>"
177
+ template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
178
+ template2="{response}</end_of_turn>"
179
+ if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
180
+ startOfString="<s>"
181
+ template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
182
+ template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
183
+ template2="{response}<|end_of_turn|>"
184
  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
185
  template0="{system} " #<s>
186
  template1="USER: {message} ASSISTANT: "
 
193
  if RAGAddon is not None:
194
  system += RAGAddon
195
  if system is not None:
196
+ prompt += template0.format(system=system.strip())
197
  if history is not None:
198
  for user_message, bot_response in history[-historylimit:]:
199
+ if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit].strip())
200
+ if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit].strip())
201
+ if message is not None: prompt += template1.format(message=message[:zeichenlimit].strip())
202
  if system2 is not None:
203
+ prompt += system2.strip()
204
  return startOfString+prompt
205
 
206
+
207
  import gradio as gr
208
  import requests
209
  import json
 
213
 
214
  def response(message, history,customSysPrompt,settings):
215
  #print(str(history)) # print history
216
+ noAdditions=False
217
+ system=customSysPrompt
218
+ #if (system!="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter."): noAdditions=True
219
  message=message.replace("[INST]","")
220
  message=message.replace("[/INST]","")
221
+ message=message.replace("</s>","")
222
  message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
223
+ if (settings=="Memory On"):
224
  if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
225
  x=collection.get(include=[])["ids"]
226
  rag=None # RAG is turned off until history gets too long
227
+ historylimit=0 #4
228
  if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
229
  RAGResults=collection.query(
230
  query_texts=[message],
 
234
  bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
235
  #print("Message: "+message+"\n\nBest Match: "+bestMatch)
236
  rag="\n\n"
237
+ rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
238
  rag += bestMatch
239
+ rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
240
  #if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
241
  #else:
242
  #if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
 
251
 
252
  print("AI running on prem!" if(onPrem) else "AI running HFHub!")
253
  if(onPrem==False):
254
+ temperature=float(0.9)
255
+ max_new_tokens=500
256
+ top_p=0.95
257
+ repetition_penalty=1.0
258
+ if temperature < 1e-2: temperature = 1e-2
259
+ top_p = float(top_p)
260
+ generate_kwargs = dict(
261
+ temperature=temperature,
262
+ max_new_tokens=max_new_tokens,
263
+ top_p=top_p,
264
+ repetition_penalty=repetition_penalty,
265
+ do_sample=True,
266
+ seed=42,
267
  )
268
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
269
  response = ""
 
274
  response += part
275
  yield response
276
  history.append((message, response)) # add current dialog to history
277
+ # Store current state in DB if memory is turned on
278
+ if (settings=="Memory On"):
279
  x=collection.get(include=[])["ids"] # add current dialog to db
280
  collection.add(
281
  documents=[message,response],
 
290
  if(onPrem==True):
291
  # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
292
  url="http://0.0.0.0:2600/v1/completions"
293
+ body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
294
+ if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
295
  if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
296
  response="" #+"("+myType+")\n"
297
  buffer=""
 
321
  pass
322
  yield response
323
  history.append((message, response)) # add current dialog to history
324
+ # Store current state in DB if memory is turned on
325
+ if (settings=="Memory On"):
326
  x=collection.get(include=[])["ids"] # add current dialog to db
327
  collection.add(
328
  documents=[message,response],
 
339
  chatbot=gr.Chatbot(render_markdown=True),
340
  title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
341
  additional_inputs=[
342
+ gr.Textbox(value=None,label="System Prompt"),
343
+ gr.Dropdown(["Memory On","Memory Off"],value="Memory Off",label="Memory")
344
  ]
345
  ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
346
  print("Interface up and running!")
347
 
348
 
349