Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
from
|
4 |
|
5 |
# ---------------- CONFIG ----------------
|
6 |
-
|
|
|
|
|
7 |
TRANSLATOR_MODEL = "facebook/m2m100_418M"
|
8 |
|
9 |
SYSTEM_PROMPT_DEFAULT = (
|
10 |
-
"You are Vicuna, a formal and polite AI assistant. "
|
11 |
"Always respond formally and answer appropriately depending on the selected explanation style."
|
12 |
)
|
13 |
|
@@ -16,8 +18,27 @@ MAX_NEW_TOKENS_DEFAULT = 300
|
|
16 |
TEMP_DEFAULT = 0.3
|
17 |
TOP_P_DEFAULT = 0.9
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
translator = pipeline("translation", model=TRANSLATOR_MODEL)
|
22 |
|
23 |
# ---------------- HELPERS ----------------
|
@@ -29,19 +50,18 @@ def is_translation_request(message: str) -> bool:
|
|
29 |
return non_ascii_ratio > 0.4
|
30 |
|
31 |
# ---------------- CHAT FUNCTION ----------------
|
32 |
-
def
|
|
|
33 |
if is_translation_request(message):
|
34 |
try:
|
35 |
translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
|
36 |
chat_history.append({"role": "user", "content": message})
|
37 |
chat_history.append({"role": "assistant", "content": translated})
|
38 |
-
|
39 |
-
return
|
40 |
except Exception as e:
|
41 |
chat_history.append({"role": "user", "content": message})
|
42 |
chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
|
43 |
-
|
44 |
-
return
|
45 |
|
46 |
# Apply response style
|
47 |
if response_style == "No explanation":
|
@@ -51,48 +71,33 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
|
|
51 |
elif response_style == "Detailed explanation":
|
52 |
system_message += " Provide a thorough and detailed answer with reasoning and examples."
|
53 |
|
54 |
-
# Format
|
55 |
-
|
56 |
for turn in chat_history:
|
57 |
-
|
58 |
-
|
59 |
-
formatted_prompt += f"{role.capitalize()}: {content}\n"
|
60 |
-
formatted_prompt += f"User: {message}\nAssistant:"
|
61 |
|
62 |
# Append user turn first
|
63 |
chat_history.append({"role": "user", "content": message})
|
64 |
response = ""
|
65 |
-
chat_history.append({"role": "assistant", "content": ""}) # placeholder
|
66 |
|
67 |
try:
|
68 |
-
|
69 |
-
prompt
|
70 |
max_new_tokens=max_tokens,
|
71 |
-
stream=True,
|
72 |
temperature=temperature,
|
73 |
-
top_p=top_p
|
74 |
-
)
|
75 |
-
|
76 |
-
|
77 |
-
piece = token
|
78 |
-
elif isinstance(token, dict):
|
79 |
-
if "token" in token and "text" in token["token"]:
|
80 |
-
piece = token["token"]["text"]
|
81 |
-
elif "generated_text" in token:
|
82 |
-
piece = token["generated_text"]
|
83 |
-
|
84 |
-
response += piece
|
85 |
-
chat_history[-1]["content"] = response
|
86 |
-
yield "", chat_history
|
87 |
except Exception as e:
|
88 |
-
chat_history
|
89 |
-
yield "", chat_history
|
90 |
|
91 |
-
|
92 |
|
93 |
# ---------------- UI ----------------
|
94 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
95 |
-
gr.Markdown("#
|
96 |
|
97 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
|
98 |
|
@@ -113,12 +118,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
|
|
113 |
max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
|
114 |
|
115 |
# Events
|
116 |
-
send_btn.click(
|
117 |
-
msg.submit(
|
118 |
clear_btn.click(lambda: [], None, chatbot, queue=False)
|
119 |
|
120 |
gr.Markdown("---")
|
121 |
-
gr.Markdown("🔗 Built with ❤️ using [Vicuna-13B](https://huggingface.co/TheBloke/vicuna-13b-
|
122 |
|
123 |
if __name__ == "__main__":
|
124 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, pipeline
|
3 |
+
from auto_gptq import AutoGPTQForCausalLM
|
4 |
|
5 |
# ---------------- CONFIG ----------------
|
6 |
+
MODEL_NAME = "TheBloke/vicuna-13b-v1.3.0-GPTQ"
|
7 |
+
MODEL_BASENAME = "vicuna-13b-v1.3.0-GPTQ-4bit-128g.no-act.order"
|
8 |
+
|
9 |
TRANSLATOR_MODEL = "facebook/m2m100_418M"
|
10 |
|
11 |
SYSTEM_PROMPT_DEFAULT = (
|
12 |
+
"You are Vicuna-13B, a formal and polite AI assistant. "
|
13 |
"Always respond formally and answer appropriately depending on the selected explanation style."
|
14 |
)
|
15 |
|
|
|
18 |
TEMP_DEFAULT = 0.3
|
19 |
TOP_P_DEFAULT = 0.9
|
20 |
|
21 |
+
# ---------------- LOAD MODELS ----------------
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
|
23 |
+
model = AutoGPTQForCausalLM.from_quantized(
|
24 |
+
MODEL_NAME,
|
25 |
+
model_basename=MODEL_BASENAME,
|
26 |
+
use_safetensors=True,
|
27 |
+
trust_remote_code=True,
|
28 |
+
device="cuda:0", # GPU
|
29 |
+
use_triton=False,
|
30 |
+
quantize_config=None
|
31 |
+
)
|
32 |
+
generator = pipeline(
|
33 |
+
"text-generation",
|
34 |
+
model=model,
|
35 |
+
tokenizer=tokenizer,
|
36 |
+
max_new_tokens=MAX_NEW_TOKENS_DEFAULT,
|
37 |
+
temperature=TEMP_DEFAULT,
|
38 |
+
top_p=TOP_P_DEFAULT,
|
39 |
+
repetition_penalty=1.15
|
40 |
+
)
|
41 |
+
|
42 |
translator = pipeline("translation", model=TRANSLATOR_MODEL)
|
43 |
|
44 |
# ---------------- HELPERS ----------------
|
|
|
50 |
return non_ascii_ratio > 0.4
|
51 |
|
52 |
# ---------------- CHAT FUNCTION ----------------
|
53 |
+
def chat_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
|
54 |
+
# Translation
|
55 |
if is_translation_request(message):
|
56 |
try:
|
57 |
translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
|
58 |
chat_history.append({"role": "user", "content": message})
|
59 |
chat_history.append({"role": "assistant", "content": translated})
|
60 |
+
return "", chat_history
|
|
|
61 |
except Exception as e:
|
62 |
chat_history.append({"role": "user", "content": message})
|
63 |
chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
|
64 |
+
return "", chat_history
|
|
|
65 |
|
66 |
# Apply response style
|
67 |
if response_style == "No explanation":
|
|
|
71 |
elif response_style == "Detailed explanation":
|
72 |
system_message += " Provide a thorough and detailed answer with reasoning and examples."
|
73 |
|
74 |
+
# Format prompt
|
75 |
+
prompt = system_message + "\n\n"
|
76 |
for turn in chat_history:
|
77 |
+
prompt += f"{turn['role'].capitalize()}: {turn['content']}\n"
|
78 |
+
prompt += f"User: {message}\nAssistant:"
|
|
|
|
|
79 |
|
80 |
# Append user turn first
|
81 |
chat_history.append({"role": "user", "content": message})
|
82 |
response = ""
|
|
|
83 |
|
84 |
try:
|
85 |
+
output = generator(
|
86 |
+
prompt,
|
87 |
max_new_tokens=max_tokens,
|
|
|
88 |
temperature=temperature,
|
89 |
+
top_p=top_p
|
90 |
+
)
|
91 |
+
response_text = output[0]["generated_text"].split("Assistant:")[-1].strip()
|
92 |
+
chat_history.append({"role": "assistant", "content": response_text})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
except Exception as e:
|
94 |
+
chat_history.append({"role": "assistant", "content": f"⚠️ Error generating response: {str(e)}"})
|
|
|
95 |
|
96 |
+
return "", chat_history
|
97 |
|
98 |
# ---------------- UI ----------------
|
99 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
|
100 |
+
gr.Markdown("# Vicuna-13B Chat + 🌍 Translator")
|
101 |
|
102 |
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
|
103 |
|
|
|
118 |
max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
|
119 |
|
120 |
# Events
|
121 |
+
send_btn.click(chat_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
|
122 |
+
msg.submit(chat_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
|
123 |
clear_btn.click(lambda: [], None, chatbot, queue=False)
|
124 |
|
125 |
gr.Markdown("---")
|
126 |
+
gr.Markdown("🔗 Built with ❤️ using [Vicuna-13B](https://huggingface.co/TheBloke/vicuna-13b-v1.3.0-GPTQ) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
demo.launch()
|