File size: 2,128 Bytes
5bea577
 
 
 
b50e8e0
5bea577
b50e8e0
 
232d9d3
887a802
2c0fdb1
887a802
2c0fdb1
887a802
120e3f7
232d9d3
5bea577
232d9d3
 
887a802
 
 
3022c67
 
 
5bea577
 
8d7efeb
3022c67
887a802
232d9d3
 
 
 
 
 
 
 
 
b50e8e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13dde62
b50e8e0
92158f0
b50e8e0
92158f0
7962d2c
232d9d3
 
 
 
13dde62
 
 
232d9d3
a548531
232d9d3
 
 
 
 
 
 
 
 
 
ba6d665
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

import os
import threading
import time
import subprocess

#from transformers import pipeline
import ollama
import gradio

OLLAMA = os.path.expanduser("~/ollama")

if not os.path.exists(OLLAMA):
    subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
    os.chmod(OLLAMA, 0o755)


history = []

def ollama_service_thread():
    subprocess.run("~/ollama serve", shell=True)

OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()

print("Giving ollama serve a moment")
time.sleep(10)
subprocess.run("~/ollama pull tinydolphin:latest", shell=True)
     

def get_history_messages():
    messages = []
    for user, assist in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assist})
    return messages


def predict(prompt):
    response = ollama.chat(
        model="tinydolphin",
        messages=[
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
        stream=True
    )
    history.append((prompt, ""))
    message = ""
    for chunk in response:
        message += chunk["message"]["content"]
        history[-1] = (prompt, message)
        yield "", history


def predict_t(prompt):
    print("Predict:", prompt)
    print("Loading model")
    pipe = pipeline("conversational", model="cognitivecomputations/TinyDolphin-2.8-1.1b")
    print("Running pipeline")
    response = pipe(
        [
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
    )
    history.append((prompt, response.messages[-1]["content"])) 
    print("Predict done")
    return "", history

with gradio.Blocks(fill_height=True) as demo:
    chat = gradio.Chatbot(scale=1)
    with gradio.Row(variant="compact"):
        prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True)
        button = gradio.Button(scale=1)

    for handler in [button.click, prompt.submit]:
        handler(predict, inputs=[prompt], outputs=[prompt, chat])


if __name__ == '__main__':
    demo.launch()