File size: 4,227 Bytes
7831eba
 
9d49e57
76a7d46
7831eba
 
 
a7d91d4
 
37a3c87
a7d91d4
b752df1
b035ea0
f071706
7665893
cfb5462
 
6fafd7a
7831eba
555ac42
7831eba
 
c7fd9ac
7831eba
 
 
b5fab19
8baca64
7831eba
0cd27a0
 
 
7831eba
 
 
 
 
 
 
 
 
 
 
555ac42
8baca64
408d3e1
7831eba
 
408d3e1
 
 
8baca64
408d3e1
 
 
890c8a8
408d3e1
890c8a8
7831eba
32ff87b
 
f73d42f
7831eba
32ff87b
 
97f173f
32ff87b
 
f73d42f
7831eba
 
 
 
 
 
cfb5462
 
d5b1c0a
70ac69e
9436706
051148e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3910665
 
 
 
 
7831eba
 
 
d5b1c0a
6fafd7a
03ba387
4323b57
7831eba
 
 
 
 
 
 
793da93
7831eba
3910665
555ac42
7831eba
 
 
 
d8d19ad
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
from huggingface_hub import InferenceClient
import os

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
import requests

from openai import OpenAI, AsyncOpenAI

clients = {}
token = os.getenv('API_KEY')

clients['32B-QWQ'] = [
    OpenAI(api_key=token, base_url=os.getenv('RUADAPT_UNIVERSAL_URL')), 
    'RefalMachine/RuadaptQwen2.5-32B-QWQ-Beta'
]

def respond(
    message,
    history: list[tuple[str, str]],
    model_name,
    system_message,
    max_tokens,
    temperature,
    top_p,
    repetition_penalty
):
    messages = []
    if len(system_message.strip()) > 0:
        messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    res = clients[model_name][0].chat.completions.create(
        model=clients[model_name][1],
        messages=messages,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        stream=True,
        extra_body={
            "repetition_penalty": repetition_penalty,
            "add_generation_prompt": True,
        }
    )
    #print(res)
    for message in res:
        #print(message)
        token = message.choices[0].delta.content
        #if token in ['<think>', '</think>']:
        #    token = token.replace('<', '\\<').replace('>', '\\>')
        #print(type(token))
        response += token
        if '<think>' in response:
            response = response.replace('<think>', '\\<think\\>')

        if '</think>' in response:
            response = response.replace('</think>', '\\</think\\>')
        #print(response)
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
options = ['32B-QWQ']
options = options[:1]
system_old = "You are a helpful and harmless assistant. You should think step-by-step. First, reason (the user does not see your reasoning), then give your final answer."
system_new = "Ты Руадапт - полезный и дружелюбный интеллектуальный ассистент для помощи пользователям в их вопросах."
system_new2 = "Ты — Руадапт, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
latex_delimiters = [{
    "left": "\\(",
    "right": "\\)",
    "display": True
}, {
    "left": "\\begin\{equation\}",
    "right": "\\end\{equation\}",
    "display": True
}, {
    "left": "\\begin\{align\}",
    "right": "\\end\{align\}",
    "display": True
}, {
    "left": "\\begin\{alignat\}",
    "right": "\\end\{alignat\}",
    "display": True
}, {
    "left": "\\begin\{gather\}",
    "right": "\\end\{gather\}",
    "display": True
}, {
    "left": "\\begin\{CD\}",
    "right": "\\end\{CD\}",
    "display": True
}, {
    "left": "\\[",
    "right": "\\]",
    "display": True
}, {"left": "$$", "right": "$$", "display": True}]
chatbot = gr.Chatbot(label="Chatbot",
                scale=1,
                height=400,
                latex_delimiters=latex_delimiters)
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Radio(choices=options, label="Model:", value=options[0]),
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=4096*6, value=4096, step=2, label="Max new tokens"),
        gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
    ],
    chatbot=chatbot,
    concurrency_limit=10
)


if __name__ == "__main__":
    demo.launch(share=True)