Spaces:
Running
Running
File size: 4,227 Bytes
7831eba 9d49e57 76a7d46 7831eba a7d91d4 37a3c87 a7d91d4 b752df1 b035ea0 f071706 7665893 cfb5462 6fafd7a 7831eba 555ac42 7831eba c7fd9ac 7831eba b5fab19 8baca64 7831eba 0cd27a0 7831eba 555ac42 8baca64 408d3e1 7831eba 408d3e1 8baca64 408d3e1 890c8a8 408d3e1 890c8a8 7831eba 32ff87b f73d42f 7831eba 32ff87b 97f173f 32ff87b f73d42f 7831eba cfb5462 d5b1c0a 70ac69e 9436706 051148e 3910665 7831eba d5b1c0a 6fafd7a 03ba387 4323b57 7831eba 793da93 7831eba 3910665 555ac42 7831eba d8d19ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
import requests
from openai import OpenAI, AsyncOpenAI
clients = {}
token = os.getenv('API_KEY')
clients['32B-QWQ'] = [
OpenAI(api_key=token, base_url=os.getenv('RUADAPT_UNIVERSAL_URL')),
'RefalMachine/RuadaptQwen2.5-32B-QWQ-Beta'
]
def respond(
message,
history: list[tuple[str, str]],
model_name,
system_message,
max_tokens,
temperature,
top_p,
repetition_penalty
):
messages = []
if len(system_message.strip()) > 0:
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
res = clients[model_name][0].chat.completions.create(
model=clients[model_name][1],
messages=messages,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
stream=True,
extra_body={
"repetition_penalty": repetition_penalty,
"add_generation_prompt": True,
}
)
#print(res)
for message in res:
#print(message)
token = message.choices[0].delta.content
#if token in ['<think>', '</think>']:
# token = token.replace('<', '\\<').replace('>', '\\>')
#print(type(token))
response += token
if '<think>' in response:
response = response.replace('<think>', '\\<think\\>')
if '</think>' in response:
response = response.replace('</think>', '\\</think\\>')
#print(response)
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
options = ['32B-QWQ']
options = options[:1]
system_old = "You are a helpful and harmless assistant. You should think step-by-step. First, reason (the user does not see your reasoning), then give your final answer."
system_new = "Ты Руадапт - полезный и дружелюбный интеллектуальный ассистент для помощи пользователям в их вопросах."
system_new2 = "Ты — Руадапт, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
latex_delimiters = [{
"left": "\\(",
"right": "\\)",
"display": True
}, {
"left": "\\begin\{equation\}",
"right": "\\end\{equation\}",
"display": True
}, {
"left": "\\begin\{align\}",
"right": "\\end\{align\}",
"display": True
}, {
"left": "\\begin\{alignat\}",
"right": "\\end\{alignat\}",
"display": True
}, {
"left": "\\begin\{gather\}",
"right": "\\end\{gather\}",
"display": True
}, {
"left": "\\begin\{CD\}",
"right": "\\end\{CD\}",
"display": True
}, {
"left": "\\[",
"right": "\\]",
"display": True
}, {"left": "$$", "right": "$$", "display": True}]
chatbot = gr.Chatbot(label="Chatbot",
scale=1,
height=400,
latex_delimiters=latex_delimiters)
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=options, label="Model:", value=options[0]),
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=4096*6, value=4096, step=2, label="Max new tokens"),
gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
],
chatbot=chatbot,
concurrency_limit=10
)
if __name__ == "__main__":
demo.launch(share=True)
|