Asilbek14 commited on
Commit
17da298
·
verified ·
1 Parent(s): 275a6bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -42
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- from transformers import pipeline
4
 
5
  # ---------------- CONFIG ----------------
6
- MODEL_REPO = "TheBloke/vicuna-13b-1.1-HF"
 
 
7
  TRANSLATOR_MODEL = "facebook/m2m100_418M"
8
 
9
  SYSTEM_PROMPT_DEFAULT = (
10
- "You are Vicuna, a formal and polite AI assistant. "
11
  "Always respond formally and answer appropriately depending on the selected explanation style."
12
  )
13
 
@@ -16,8 +18,27 @@ MAX_NEW_TOKENS_DEFAULT = 300
16
  TEMP_DEFAULT = 0.3
17
  TOP_P_DEFAULT = 0.9
18
 
19
- # Clients
20
- client = InferenceClient(MODEL_REPO)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  translator = pipeline("translation", model=TRANSLATOR_MODEL)
22
 
23
  # ---------------- HELPERS ----------------
@@ -29,19 +50,18 @@ def is_translation_request(message: str) -> bool:
29
  return non_ascii_ratio > 0.4
30
 
31
  # ---------------- CHAT FUNCTION ----------------
32
- def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
 
33
  if is_translation_request(message):
34
  try:
35
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
36
  chat_history.append({"role": "user", "content": message})
37
  chat_history.append({"role": "assistant", "content": translated})
38
- yield "", chat_history
39
- return
40
  except Exception as e:
41
  chat_history.append({"role": "user", "content": message})
42
  chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
43
- yield "", chat_history
44
- return
45
 
46
  # Apply response style
47
  if response_style == "No explanation":
@@ -51,48 +71,33 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
51
  elif response_style == "Detailed explanation":
52
  system_message += " Provide a thorough and detailed answer with reasoning and examples."
53
 
54
- # Format chat for Vicuna (text-generation)
55
- formatted_prompt = system_message + "\n\n"
56
  for turn in chat_history:
57
- role = turn["role"]
58
- content = turn["content"]
59
- formatted_prompt += f"{role.capitalize()}: {content}\n"
60
- formatted_prompt += f"User: {message}\nAssistant:"
61
 
62
  # Append user turn first
63
  chat_history.append({"role": "user", "content": message})
64
  response = ""
65
- chat_history.append({"role": "assistant", "content": ""}) # placeholder
66
 
67
  try:
68
- for token in client.text_generation(
69
- prompt=formatted_prompt,
70
  max_new_tokens=max_tokens,
71
- stream=True,
72
  temperature=temperature,
73
- top_p=top_p,
74
- ):
75
- piece = ""
76
- if isinstance(token, str):
77
- piece = token
78
- elif isinstance(token, dict):
79
- if "token" in token and "text" in token["token"]:
80
- piece = token["token"]["text"]
81
- elif "generated_text" in token:
82
- piece = token["generated_text"]
83
-
84
- response += piece
85
- chat_history[-1]["content"] = response
86
- yield "", chat_history
87
  except Exception as e:
88
- chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}"
89
- yield "", chat_history
90
 
91
- yield "", chat_history
92
 
93
  # ---------------- UI ----------------
94
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
95
- gr.Markdown("# 🦙 Vicuna-13B Chat + 🌍 Translator")
96
 
97
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
98
 
@@ -113,12 +118,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
113
  max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
114
 
115
  # Events
116
- send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
117
- msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
118
  clear_btn.click(lambda: [], None, chatbot, queue=False)
119
 
120
  gr.Markdown("---")
121
- gr.Markdown("🔗 Built with ❤️ using [Vicuna-13B](https://huggingface.co/TheBloke/vicuna-13b-1.1-HF) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")
122
 
123
  if __name__ == "__main__":
124
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, pipeline
3
+ from auto_gptq import AutoGPTQForCausalLM
4
 
5
  # ---------------- CONFIG ----------------
6
+ MODEL_NAME = "TheBloke/vicuna-13b-v1.3.0-GPTQ"
7
+ MODEL_BASENAME = "vicuna-13b-v1.3.0-GPTQ-4bit-128g.no-act.order"
8
+
9
  TRANSLATOR_MODEL = "facebook/m2m100_418M"
10
 
11
  SYSTEM_PROMPT_DEFAULT = (
12
+ "You are Vicuna-13B, a formal and polite AI assistant. "
13
  "Always respond formally and answer appropriately depending on the selected explanation style."
14
  )
15
 
 
18
  TEMP_DEFAULT = 0.3
19
  TOP_P_DEFAULT = 0.9
20
 
21
+ # ---------------- LOAD MODELS ----------------
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
23
+ model = AutoGPTQForCausalLM.from_quantized(
24
+ MODEL_NAME,
25
+ model_basename=MODEL_BASENAME,
26
+ use_safetensors=True,
27
+ trust_remote_code=True,
28
+ device="cuda:0", # GPU
29
+ use_triton=False,
30
+ quantize_config=None
31
+ )
32
+ generator = pipeline(
33
+ "text-generation",
34
+ model=model,
35
+ tokenizer=tokenizer,
36
+ max_new_tokens=MAX_NEW_TOKENS_DEFAULT,
37
+ temperature=TEMP_DEFAULT,
38
+ top_p=TOP_P_DEFAULT,
39
+ repetition_penalty=1.15
40
+ )
41
+
42
  translator = pipeline("translation", model=TRANSLATOR_MODEL)
43
 
44
  # ---------------- HELPERS ----------------
 
50
  return non_ascii_ratio > 0.4
51
 
52
  # ---------------- CHAT FUNCTION ----------------
53
+ def chat_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
54
+ # Translation
55
  if is_translation_request(message):
56
  try:
57
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
58
  chat_history.append({"role": "user", "content": message})
59
  chat_history.append({"role": "assistant", "content": translated})
60
+ return "", chat_history
 
61
  except Exception as e:
62
  chat_history.append({"role": "user", "content": message})
63
  chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
64
+ return "", chat_history
 
65
 
66
  # Apply response style
67
  if response_style == "No explanation":
 
71
  elif response_style == "Detailed explanation":
72
  system_message += " Provide a thorough and detailed answer with reasoning and examples."
73
 
74
+ # Format prompt
75
+ prompt = system_message + "\n\n"
76
  for turn in chat_history:
77
+ prompt += f"{turn['role'].capitalize()}: {turn['content']}\n"
78
+ prompt += f"User: {message}\nAssistant:"
 
 
79
 
80
  # Append user turn first
81
  chat_history.append({"role": "user", "content": message})
82
  response = ""
 
83
 
84
  try:
85
+ output = generator(
86
+ prompt,
87
  max_new_tokens=max_tokens,
 
88
  temperature=temperature,
89
+ top_p=top_p
90
+ )
91
+ response_text = output[0]["generated_text"].split("Assistant:")[-1].strip()
92
+ chat_history.append({"role": "assistant", "content": response_text})
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
+ chat_history.append({"role": "assistant", "content": f"⚠️ Error generating response: {str(e)}"})
 
95
 
96
+ return "", chat_history
97
 
98
  # ---------------- UI ----------------
99
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
100
+ gr.Markdown("# Vicuna-13B Chat + 🌍 Translator")
101
 
102
  chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
103
 
 
118
  max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
119
 
120
  # Events
121
+ send_btn.click(chat_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
122
+ msg.submit(chat_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
123
  clear_btn.click(lambda: [], None, chatbot, queue=False)
124
 
125
  gr.Markdown("---")
126
+ gr.Markdown("🔗 Built with ❤️ using [Vicuna-13B](https://huggingface.co/TheBloke/vicuna-13b-v1.3.0-GPTQ) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")
127
 
128
  if __name__ == "__main__":
129
  demo.launch()