ruslanmv commited on
Commit
54fe9a3
·
verified ·
1 Parent(s): 1472595

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -22
app.py CHANGED
@@ -4,18 +4,13 @@ from functools import lru_cache
4
  # Cache model loading to optimize performance
5
  @lru_cache(maxsize=3)
6
  def load_hf_model(model_name):
7
- # Use the Gradio-built huggingface loader instead of transformers_gradio
8
- return gr.load(
9
- name=f"deepseek-ai/{model_name}",
10
- src="huggingface", # Changed from transformers_gradio.registry
11
- api_name="/chat"
12
- )
13
 
14
  # Load all models at startup
15
  MODELS = {
16
- "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
17
- "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
18
- "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
19
  }
20
 
21
  # --- Chatbot function ---
@@ -27,22 +22,21 @@ def chatbot(input_text, history, model_choice, system_message, max_new_tokens, t
27
 
28
  # Create payload for the model
29
  payload = {
30
- "messages": [{"role": "user", "content": input_text}],
31
- "system": system_message,
32
- "max_tokens": max_new_tokens,
33
- "temperature": temperature,
34
- "top_p": top_p
 
 
35
  }
36
 
37
  # Run inference using the selected model
38
  try:
39
- response = model_component(payload) # The response is likely a dictionary
40
- if isinstance(response, dict) and "choices" in response:
41
- # Assuming the response structure is similar to OpenAI's API
42
- assistant_response = response["choices"][0]["message"]["content"]
43
- elif isinstance(response, dict) and "generated_text" in response:
44
- # If the response is in a different format, adjust accordingly
45
- assistant_response = response["generated_text"]
46
  else:
47
  assistant_response = "Unexpected model response format."
48
  except Exception as e:
@@ -77,7 +71,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
77
  model_choice = gr.Radio(
78
  choices=list(MODELS.keys()),
79
  label="Choose a Model",
80
- value="DeepSeek-R1"
81
  )
82
  with gr.Accordion("Optional Parameters", open=False):
83
  system_message = gr.Textbox(
 
4
  # Cache model loading to optimize performance
5
  @lru_cache(maxsize=3)
6
  def load_hf_model(model_name):
7
+ return gr.load(f"models/{model_name}", src="huggingface")
 
 
 
 
 
8
 
9
  # Load all models at startup
10
  MODELS = {
11
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": load_hf_model("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"),
12
+ "deepseek-ai/DeepSeek-R1": load_hf_model("deepseek-ai/DeepSeek-R1"),
13
+ "deepseek-ai/DeepSeek-R1-Zero": load_hf_model("deepseek-ai/DeepSeek-R1-Zero")
14
  }
15
 
16
  # --- Chatbot function ---
 
22
 
23
  # Create payload for the model
24
  payload = {
25
+ "inputs": input_text, # Directly pass the input text
26
+ "parameters": {
27
+ "max_new_tokens": max_new_tokens,
28
+ "temperature": temperature,
29
+ "top_p": top_p,
30
+ "return_full_text": False # Only return the generated text
31
+ }
32
  }
33
 
34
  # Run inference using the selected model
35
  try:
36
+ response = model_component(**payload) # Pass payload as keyword arguments
37
+ if isinstance(response, list) and len(response) > 0:
38
+ # Extract the generated text from the response
39
+ assistant_response = response[0].get("generated_text", "No response generated.")
 
 
 
40
  else:
41
  assistant_response = "Unexpected model response format."
42
  except Exception as e:
 
71
  model_choice = gr.Radio(
72
  choices=list(MODELS.keys()),
73
  label="Choose a Model",
74
+ value="deepseek-ai/DeepSeek-R1"
75
  )
76
  with gr.Accordion("Optional Parameters", open=False):
77
  system_message = gr.Textbox(