akmalmzkki commited on
Commit
5cbf26d
·
verified ·
1 Parent(s): 2a08607

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -91
app.py CHANGED
@@ -1,108 +1,54 @@
1
- # import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
- # import os
4
 
5
- # client = InferenceClient(
6
- # model="mistralai/Mistral-Small-24B-Instruct-2501",
7
- # token=os.getenv('HF_TOKEN')
8
- # )
9
 
10
- # def chat_fn(message, system_message, history_str, max_tokens, temperature, top_p):
11
- # # Convert history string (optional) to message list
12
- # messages = [{"role": "system", "content": system_message}]
13
 
14
- # if history_str:
15
- # # Format: user1||assistant1\nuser2||assistant2
16
- # for pair in history_str.split("\n"):
17
- # if "||" in pair:
18
- # user_msg, assistant_msg = pair.split("||", 1)
19
- # messages.append({"role": "user", "content": user_msg})
20
- # messages.append({"role": "assistant", "content": assistant_msg})
21
 
22
- # messages.append({"role": "user", "content": message})
23
-
24
- # # Get response from HF
25
- # response = ""
26
- # for chunk in client.chat_completion(
27
- # messages=messages,
28
- # stream=True,
29
- # max_tokens=max_tokens,
30
- # temperature=temperature,
31
- # top_p=top_p,
32
- # ):
33
- # response += chunk.choices[0].delta.content or ""
34
 
35
- # return response
36
-
37
- # demo = gr.Interface(
38
- # fn=chat_fn,
39
- # inputs=[
40
- # gr.Textbox(lines=2, label="User Message"),
41
- # gr.Textbox(value="You are a friendly Chatbot.", label="System Prompt"),
42
- # gr.Textbox(lines=4, placeholder="user||bot\nuser2||bot2", label="Conversation History (optional)"),
43
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
44
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
45
- # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
46
- # ],
47
- # outputs="text",
48
- # allow_flagging="never",
49
- # title="LLM Budaya",
50
- # description="Chatbot menggunakan model HuggingFace Zephyr-7B"
51
- # )
52
-
53
- # if __name__ == "__main__":
54
- # demo.launch()
55
-
56
- import gradio as gr
57
- import torch
58
- from transformers import AutoTokenizer, AutoModelForCausalLM
59
-
60
- # Load model & tokenizer
61
- model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
62
- tokenizer = AutoTokenizer.from_pretrained(model_id)
63
 
64
- # Load model di CPU
65
- model = AutoModelForCausalLM.from_pretrained(
66
- model_id,
67
- torch_dtype=torch.float32,
68
- device_map={"": "cpu"}
69
- )
70
-
71
- # Inference function
72
- def chat_fn(message, system_prompt, max_tokens, temperature, top_p):
73
- prompt = f"<s>[INST] {system_prompt.strip()}\n{message.strip()} [/INST]"
74
-
75
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
76
-
77
- with torch.no_grad():
78
- output = model.generate(
79
- **inputs,
80
- max_new_tokens=max_tokens,
81
- temperature=temperature,
82
- top_p=top_p,
83
- do_sample=True,
84
- pad_token_id=tokenizer.eos_token_id
85
- )
86
-
87
- decoded = tokenizer.decode(output[0], skip_special_tokens=True)
88
- return decoded.split("[/INST]")[-1].strip()
89
-
90
- # Gradio UI
91
  demo = gr.Interface(
92
  fn=chat_fn,
93
  inputs=[
94
  gr.Textbox(lines=2, label="User Message"),
95
- gr.Textbox(value="You are a helpful and concise assistant.", label="System Prompt"),
96
- gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
 
97
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
98
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
99
  ],
100
  outputs="text",
101
- title="Mistral-Small-24B CPU Chat",
102
- description="Chatbot menggunakan model Mistral-Small-24B-Instruct-2501 dijalankan lokal via CPU. Ini akan berjalan lambat.",
103
- flagging_mode="never",
104
  )
105
 
106
  if __name__ == "__main__":
107
- demo.launch()
108
-
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import os
4
 
5
+ client = InferenceClient(
6
+ model="mistralai/Mistral-7B-Instruct-v0.3",
7
+ token=os.getenv('HF_TOKEN')
8
+ )
9
 
10
+ def chat_fn(message, system_message, history_str, max_tokens, temperature, top_p):
11
+ # Convert history string (optional) to message list
12
+ messages = [{"role": "system", "content": system_message}]
13
 
14
+ if history_str:
15
+ # Format: user1||assistant1\nuser2||assistant2
16
+ for pair in history_str.split("\n"):
17
+ if "||" in pair:
18
+ user_msg, assistant_msg = pair.split("||", 1)
19
+ messages.append({"role": "user", "content": user_msg})
20
+ messages.append({"role": "assistant", "content": assistant_msg})
21
 
22
+ messages.append({"role": "user", "content": message})
23
+
24
+ # Get response from HF
25
+ response = ""
26
+ for chunk in client.chat_completion(
27
+ messages=messages,
28
+ stream=True,
29
+ max_tokens=max_tokens,
30
+ temperature=temperature,
31
+ top_p=top_p,
32
+ ):
33
+ response += chunk.choices[0].delta.content or ""
34
 
35
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  demo = gr.Interface(
38
  fn=chat_fn,
39
  inputs=[
40
  gr.Textbox(lines=2, label="User Message"),
41
+ gr.Textbox(value="You are a friendly Chatbot.", label="System Prompt"),
42
+ gr.Textbox(lines=4, placeholder="user||bot\nuser2||bot2", label="Conversation History (optional)"),
43
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
44
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
45
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
46
  ],
47
  outputs="text",
48
+ allow_flagging="never",
49
+ title="LLM Budaya",
50
+ description="Chatbot menggunakan model HuggingFace Zephyr-7B"
51
  )
52
 
53
  if __name__ == "__main__":
54
+ demo.launch()