Spaces:
Runtime error
Runtime error
Update run.py
Browse files
run.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
#########################################################################################
|
2 |
-
# Title: Gradio Interface to LLM-chatbot with
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 15th, 2023
|
5 |
-
# Last update:
|
6 |
##########################################################################################
|
7 |
|
8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
@@ -30,7 +30,7 @@ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
|
|
30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
32 |
|
33 |
-
#onPrem=
|
34 |
print(dbPath)
|
35 |
|
36 |
#client = chromadb.Client()
|
@@ -68,8 +68,8 @@ print(collection.count())
|
|
68 |
|
69 |
x=collection.get(include=[])["ids"]
|
70 |
if(len(x)==0):
|
71 |
-
message="Ich bin
|
72 |
-
response="
|
73 |
x=collection.get(include=[])["ids"]
|
74 |
collection.add(
|
75 |
documents=[message,response],
|
@@ -86,8 +86,7 @@ if(len(x)==0):
|
|
86 |
)
|
87 |
RAGResults["metadatas"][0][0]["dialog"]
|
88 |
|
89 |
-
|
90 |
-
#collection.get()["ids","documents"]
|
91 |
x=collection.get(include=[])["ids"]
|
92 |
x
|
93 |
|
@@ -116,11 +115,11 @@ else:
|
|
116 |
#modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
|
117 |
#modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
|
118 |
#modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
|
119 |
-
modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
|
120 |
-
modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
|
121 |
-
modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
|
122 |
#modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
|
123 |
-
|
124 |
#modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
|
125 |
#modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
|
126 |
if(os.path.exists(modelPath)==False):
|
@@ -149,16 +148,12 @@ else:
|
|
149 |
# Gradio-GUI
|
150 |
#------------
|
151 |
|
152 |
-
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
|
153 |
startOfString=""
|
154 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
155 |
template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
|
156 |
template1=" [INST] {message} [/INST]"
|
157 |
template2=" {response}</s>"
|
158 |
-
if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
159 |
-
template0="<start_of_turn>user{system}</end_of_turn>"
|
160 |
-
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
161 |
-
template2="{response}</end_of_turn>"
|
162 |
if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
163 |
startOfString="<s>"
|
164 |
template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST]{system}\n [/INST] </s>"
|
@@ -169,19 +164,23 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
169 |
template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
|
170 |
template1="[INST] {message} [/INST]"
|
171 |
template2=" {response}</s>"
|
172 |
-
if("
|
173 |
-
startOfString="<s>"
|
174 |
-
template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
|
175 |
-
template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
|
176 |
-
template2="{response}<|end_of_turn|>"
|
177 |
-
if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
178 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
179 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
180 |
template2="{response}<|im_end|>\n"
|
181 |
-
if("
|
182 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
183 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
184 |
template2="{response}<|im_end|>\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
186 |
template0="{system} " #<s>
|
187 |
template1="USER: {message} ASSISTANT: "
|
@@ -194,16 +193,17 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
194 |
if RAGAddon is not None:
|
195 |
system += RAGAddon
|
196 |
if system is not None:
|
197 |
-
prompt += template0.format(system=system)
|
198 |
if history is not None:
|
199 |
for user_message, bot_response in history[-historylimit:]:
|
200 |
-
if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
|
201 |
-
if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
|
202 |
-
if message is not None: prompt += template1.format(message=message[:zeichenlimit])
|
203 |
if system2 is not None:
|
204 |
-
prompt += system2
|
205 |
return startOfString+prompt
|
206 |
|
|
|
207 |
import gradio as gr
|
208 |
import requests
|
209 |
import json
|
@@ -213,16 +213,18 @@ import re
|
|
213 |
|
214 |
def response(message, history,customSysPrompt,settings):
|
215 |
#print(str(history)) # print history
|
216 |
-
|
217 |
-
system=
|
|
|
218 |
message=message.replace("[INST]","")
|
219 |
message=message.replace("[/INST]","")
|
|
|
220 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
221 |
-
if (settings=="
|
222 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
223 |
x=collection.get(include=[])["ids"]
|
224 |
rag=None # RAG is turned off until history gets too long
|
225 |
-
historylimit=4
|
226 |
if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
|
227 |
RAGResults=collection.query(
|
228 |
query_texts=[message],
|
@@ -232,9 +234,9 @@ def response(message, history,customSysPrompt,settings):
|
|
232 |
bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
|
233 |
#print("Message: "+message+"\n\nBest Match: "+bestMatch)
|
234 |
rag="\n\n"
|
235 |
-
rag += "Mit Blick auf den aktuellen Stand
|
236 |
rag += bestMatch
|
237 |
-
rag += "\n\nIm Folgenden siehst du den aktuellen Stand
|
238 |
#if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
|
239 |
#else:
|
240 |
#if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
|
@@ -249,19 +251,19 @@ def response(message, history,customSysPrompt,settings):
|
|
249 |
|
250 |
print("AI running on prem!" if(onPrem) else "AI running HFHub!")
|
251 |
if(onPrem==False):
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
)
|
266 |
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
267 |
response = ""
|
@@ -272,8 +274,8 @@ def response(message, history,customSysPrompt,settings):
|
|
272 |
response += part
|
273 |
yield response
|
274 |
history.append((message, response)) # add current dialog to history
|
275 |
-
# Store current state in DB if
|
276 |
-
if (settings=="
|
277 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
278 |
collection.add(
|
279 |
documents=[message,response],
|
@@ -288,8 +290,8 @@ def response(message, history,customSysPrompt,settings):
|
|
288 |
if(onPrem==True):
|
289 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
290 |
url="http://0.0.0.0:2600/v1/completions"
|
291 |
-
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}
|
292 |
-
if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})
|
293 |
if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
|
294 |
response="" #+"("+myType+")\n"
|
295 |
buffer=""
|
@@ -319,8 +321,8 @@ def response(message, history,customSysPrompt,settings):
|
|
319 |
pass
|
320 |
yield response
|
321 |
history.append((message, response)) # add current dialog to history
|
322 |
-
# Store current state in DB if
|
323 |
-
if (settings=="
|
324 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
325 |
collection.add(
|
326 |
documents=[message,response],
|
@@ -337,12 +339,11 @@ gr.ChatInterface(
|
|
337 |
chatbot=gr.Chatbot(render_markdown=True),
|
338 |
title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
|
339 |
additional_inputs=[
|
340 |
-
gr.Textbox(value=
|
341 |
-
gr.Dropdown(["
|
342 |
]
|
343 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
344 |
print("Interface up and running!")
|
345 |
|
346 |
|
347 |
|
348 |
-
|
|
|
1 |
#########################################################################################
|
2 |
+
# Title: Gradio Interface to LLM-chatbot with Memory-RAG
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 15th, 2023
|
5 |
+
# Last update: Fabruary 26th, 2024
|
6 |
##########################################################################################
|
7 |
|
8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
|
|
30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
32 |
|
33 |
+
#onPrem=True # uncomment to override automatic detection
|
34 |
print(dbPath)
|
35 |
|
36 |
#client = chromadb.Client()
|
|
|
68 |
|
69 |
x=collection.get(include=[])["ids"]
|
70 |
if(len(x)==0):
|
71 |
+
message="Wer hat dich gemacht?"#"Ich bin ein User."
|
72 |
+
response="Dr. Andreas Fischer hat mich auf Basis von open-source Software programmiert."
|
73 |
x=collection.get(include=[])["ids"]
|
74 |
collection.add(
|
75 |
documents=[message,response],
|
|
|
86 |
)
|
87 |
RAGResults["metadatas"][0][0]["dialog"]
|
88 |
|
89 |
+
collection.get()["ids","documents"]
|
|
|
90 |
x=collection.get(include=[])["ids"]
|
91 |
x
|
92 |
|
|
|
115 |
#modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
|
116 |
#modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
|
117 |
#modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
|
118 |
+
#modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
|
119 |
+
#modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
|
120 |
+
#modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
|
121 |
#modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
|
122 |
+
modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
123 |
#modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
|
124 |
#modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
|
125 |
if(os.path.exists(modelPath)==False):
|
|
|
148 |
# Gradio-GUI
|
149 |
#------------
|
150 |
|
151 |
+
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
|
152 |
startOfString=""
|
153 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
154 |
template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST] {system} [/INST] </s>"
|
155 |
template1=" [INST] {message} [/INST]"
|
156 |
template2=" {response}</s>"
|
|
|
|
|
|
|
|
|
157 |
if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
158 |
startOfString="<s>"
|
159 |
template0=" [INST]{system}\n [/INST] </s>" if onPrem else "<s> [INST]{system}\n [/INST] </s>"
|
|
|
164 |
template0="[INST]{system}\n [/INST]</s>" if onPrem else "<s>[INST]{system}\n [/INST]</s>"
|
165 |
template1="[INST] {message} [/INST]"
|
166 |
template2=" {response}</s>"
|
167 |
+
if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
|
|
|
|
|
|
|
|
|
|
|
168 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
169 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
170 |
template2="{response}<|im_end|>\n"
|
171 |
+
if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
172 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
173 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
174 |
template2="{response}<|im_end|>\n"
|
175 |
+
if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
176 |
+
template0="<start_of_turn>user{system}</end_of_turn>"
|
177 |
+
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
178 |
+
template2="{response}</end_of_turn>"
|
179 |
+
if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
|
180 |
+
startOfString="<s>"
|
181 |
+
template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
|
182 |
+
template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
|
183 |
+
template2="{response}<|end_of_turn|>"
|
184 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
185 |
template0="{system} " #<s>
|
186 |
template1="USER: {message} ASSISTANT: "
|
|
|
193 |
if RAGAddon is not None:
|
194 |
system += RAGAddon
|
195 |
if system is not None:
|
196 |
+
prompt += template0.format(system=system.strip())
|
197 |
if history is not None:
|
198 |
for user_message, bot_response in history[-historylimit:]:
|
199 |
+
if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit].strip())
|
200 |
+
if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit].strip())
|
201 |
+
if message is not None: prompt += template1.format(message=message[:zeichenlimit].strip())
|
202 |
if system2 is not None:
|
203 |
+
prompt += system2.strip()
|
204 |
return startOfString+prompt
|
205 |
|
206 |
+
|
207 |
import gradio as gr
|
208 |
import requests
|
209 |
import json
|
|
|
213 |
|
214 |
def response(message, history,customSysPrompt,settings):
|
215 |
#print(str(history)) # print history
|
216 |
+
noAdditions=False
|
217 |
+
system=customSysPrompt
|
218 |
+
#if (system!="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter."): noAdditions=True
|
219 |
message=message.replace("[INST]","")
|
220 |
message=message.replace("[/INST]","")
|
221 |
+
message=message.replace("</s>","")
|
222 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
223 |
+
if (settings=="Memory On"):
|
224 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
225 |
x=collection.get(include=[])["ids"]
|
226 |
rag=None # RAG is turned off until history gets too long
|
227 |
+
historylimit=0 #4
|
228 |
if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
|
229 |
RAGResults=collection.query(
|
230 |
query_texts=[message],
|
|
|
234 |
bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
|
235 |
#print("Message: "+message+"\n\nBest Match: "+bestMatch)
|
236 |
rag="\n\n"
|
237 |
+
rag += "Mit Blick auf den aktuellen Stand des Dialogs erinnerst du dich insb. an folgende Episode:\n"
|
238 |
rag += bestMatch
|
239 |
+
rag += "\n\nIm Folgenden siehst du den aktuellen Stand des Dialogs."
|
240 |
#if (noAdditions==False): rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
|
241 |
#else:
|
242 |
#if (noAdditions==False): system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
|
|
|
251 |
|
252 |
print("AI running on prem!" if(onPrem) else "AI running HFHub!")
|
253 |
if(onPrem==False):
|
254 |
+
temperature=float(0.9)
|
255 |
+
max_new_tokens=500
|
256 |
+
top_p=0.95
|
257 |
+
repetition_penalty=1.0
|
258 |
+
if temperature < 1e-2: temperature = 1e-2
|
259 |
+
top_p = float(top_p)
|
260 |
+
generate_kwargs = dict(
|
261 |
+
temperature=temperature,
|
262 |
+
max_new_tokens=max_new_tokens,
|
263 |
+
top_p=top_p,
|
264 |
+
repetition_penalty=repetition_penalty,
|
265 |
+
do_sample=True,
|
266 |
+
seed=42,
|
267 |
)
|
268 |
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
269 |
response = ""
|
|
|
274 |
response += part
|
275 |
yield response
|
276 |
history.append((message, response)) # add current dialog to history
|
277 |
+
# Store current state in DB if memory is turned on
|
278 |
+
if (settings=="Memory On"):
|
279 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
280 |
collection.add(
|
281 |
documents=[message,response],
|
|
|
290 |
if(onPrem==True):
|
291 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
292 |
url="http://0.0.0.0:2600/v1/completions"
|
293 |
+
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
|
294 |
+
if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
|
295 |
if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
|
296 |
response="" #+"("+myType+")\n"
|
297 |
buffer=""
|
|
|
321 |
pass
|
322 |
yield response
|
323 |
history.append((message, response)) # add current dialog to history
|
324 |
+
# Store current state in DB if memory is turned on
|
325 |
+
if (settings=="Memory On"):
|
326 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
327 |
collection.add(
|
328 |
documents=[message,response],
|
|
|
339 |
chatbot=gr.Chatbot(render_markdown=True),
|
340 |
title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
|
341 |
additional_inputs=[
|
342 |
+
gr.Textbox(value=None,label="System Prompt"),
|
343 |
+
gr.Dropdown(["Memory On","Memory Off"],value="Memory Off",label="Memory")
|
344 |
]
|
345 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
346 |
print("Interface up and running!")
|
347 |
|
348 |
|
349 |
|
|