Armando Medina commited on
Commit
86e8c59
·
1 Parent(s): 16db460
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -1,24 +1,16 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, check:
6
- https://huggingface.co/docs/huggingface_hub/en/guides/inference
7
- """
8
-
9
- # Initialize the Inference API Client with your model
10
- client = InferenceClient("one1cat/FineTunes_LLM_CFR_49")
11
-
12
 
13
  def respond(message, history, system_message, max_tokens, temperature, top_p):
14
  """
15
- Generates responses using the fine-tuned CFR 49 model.
16
  """
17
 
18
- # Format prompt
19
  prompt = f"{system_message}\n\nUser: {message}\n\nAssistant:"
20
 
21
- # Generate response
22
  response = ""
23
 
24
  try:
@@ -27,20 +19,20 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
27
  max_new_tokens=max_tokens,
28
  temperature=temperature,
29
  top_p=top_p,
30
- stream=True, # Enables token streaming
31
  ):
32
  response += token
33
- yield response
34
 
35
  except Exception as e:
36
- yield f"Error: {str(e)}"
37
 
38
 
39
- # Gradio Chat Interface
40
  demo = gr.ChatInterface(
41
  respond,
42
  additional_inputs=[
43
- gr.Textbox(value="You are an AI trained on CFR 49 regulations.", label="System message"),
44
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
45
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
46
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # 🔹 Initialize Hugging Face Inference Client
5
+ client = InferenceClient(model="one1cat/FineTunes_LLM_CFR_49")
 
 
 
 
 
 
6
 
7
  def respond(message, history, system_message, max_tokens, temperature, top_p):
8
  """
9
+ Generates responses using the fine-tuned CFR 49 model hosted on Hugging Face.
10
  """
11
 
 
12
  prompt = f"{system_message}\n\nUser: {message}\n\nAssistant:"
13
 
 
14
  response = ""
15
 
16
  try:
 
19
  max_new_tokens=max_tokens,
20
  temperature=temperature,
21
  top_p=top_p,
22
+ stream=True, # Enables real-time streaming output
23
  ):
24
  response += token
25
+ yield response # Stream response token by token
26
 
27
  except Exception as e:
28
+ yield f"Error: {str(e)}" # Handle potential API errors
29
 
30
 
31
+ # 🔹 Gradio Chat Interface
32
  demo = gr.ChatInterface(
33
  respond,
34
  additional_inputs=[
35
+ gr.Textbox(value="You are a CFR 49 regulatory compliance assistant.", label="System message"),
36
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
37
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
38
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),