Spaces:
Sleeping
Sleeping
File size: 2,128 Bytes
5bea577 b50e8e0 5bea577 b50e8e0 232d9d3 887a802 2c0fdb1 887a802 2c0fdb1 887a802 120e3f7 232d9d3 5bea577 232d9d3 887a802 3022c67 5bea577 8d7efeb 3022c67 887a802 232d9d3 b50e8e0 13dde62 b50e8e0 92158f0 b50e8e0 92158f0 7962d2c 232d9d3 13dde62 232d9d3 a548531 232d9d3 ba6d665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import os
import threading
import time
import subprocess
#from transformers import pipeline
import ollama
import gradio
OLLAMA = os.path.expanduser("~/ollama")
if not os.path.exists(OLLAMA):
subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
os.chmod(OLLAMA, 0o755)
history = []
def ollama_service_thread():
subprocess.run("~/ollama serve", shell=True)
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()
print("Giving ollama serve a moment")
time.sleep(10)
subprocess.run("~/ollama pull tinydolphin:latest", shell=True)
def get_history_messages():
messages = []
for user, assist in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": assist})
return messages
def predict(prompt):
response = ollama.chat(
model="tinydolphin",
messages=[
*get_history_messages(),
{"role": "user", "content": prompt}
],
stream=True
)
history.append((prompt, ""))
message = ""
for chunk in response:
message += chunk["message"]["content"]
history[-1] = (prompt, message)
yield "", history
def predict_t(prompt):
print("Predict:", prompt)
print("Loading model")
pipe = pipeline("conversational", model="cognitivecomputations/TinyDolphin-2.8-1.1b")
print("Running pipeline")
response = pipe(
[
*get_history_messages(),
{"role": "user", "content": prompt}
],
)
history.append((prompt, response.messages[-1]["content"]))
print("Predict done")
return "", history
with gradio.Blocks(fill_height=True) as demo:
chat = gradio.Chatbot(scale=1)
with gradio.Row(variant="compact"):
prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True)
button = gradio.Button(scale=1)
for handler in [button.click, prompt.submit]:
handler(predict, inputs=[prompt], outputs=[prompt, chat])
if __name__ == '__main__':
demo.launch()
|