|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import requests |
|
|
|
modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf" |
|
if(os.path.exists(modelPath)==False): |
|
|
|
|
|
|
|
|
|
|
|
|
|
url="https://huggingface.co/TheBloke/SauerkrautLM-7B-HerO-GGUF/resolve/main/sauerkrautlm-7b-hero.Q4_0.gguf?download=true" |
|
response = requests.get(url) |
|
with open("./model.gguf", mode="wb") as file: |
|
file.write(response.content) |
|
print("Model downloaded") |
|
modelPath="./model.gguf" |
|
|
|
print(modelPath) |
|
|
|
|
|
|
|
|
|
|
|
import subprocess |
|
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "4"] |
|
subprocess.Popen(command) |
|
print("Server ready!") |
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import requests |
|
import json |
|
def response(message, history): |
|
addon="" |
|
url="http://localhost:2600/v1/completions" |
|
system="Du bist ein KI-basiertes Assistenzsystem."+addon+"\n\nUser-Anliegen:" |
|
|
|
|
|
|
|
body={"prompt":f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n", |
|
"max_tokens":500, "echo":"False","stream":"True"} |
|
response="" |
|
buffer="" |
|
print("URL: "+url) |
|
print(str(body)) |
|
print("User: "+message+"\nAI: ") |
|
for text in requests.post(url, json=body, stream=True): |
|
if buffer is None: buffer="" |
|
buffer=str("".join(buffer)) |
|
|
|
text=text.decode('utf-8') |
|
if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text) |
|
|
|
buffer=buffer.split('"finish_reason": null}]}') |
|
if(len(buffer)==1): |
|
buffer="".join(buffer) |
|
pass |
|
if(len(buffer)==2): |
|
part=buffer[0]+'"finish_reason": null}]}' |
|
if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "") |
|
try: |
|
part = str(json.loads(part)["choices"][0]["text"]) |
|
print(part, end="", flush=True) |
|
response=response+part |
|
buffer="" |
|
except Exception as e: |
|
print("Exception:"+str(e)) |
|
pass |
|
yield response |
|
|
|
gr.ChatInterface(response, chatbot=gr.Chatbot(render_markdown=True),title="German SauerkrautLM-7B-HerO-GGUF Chat").queue().launch(share=True) |
|
print("Interface up and running!") |
|
|