HAMMALE commited on
Commit
bb8930c
·
verified ·
1 Parent(s): 34b49b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -56
app.py CHANGED
@@ -1,64 +1,172 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
1
+ # app.py - SmallLM Gradio Demo
2
  import gradio as gr
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
7
 
8
+ # Global variables for model and tokenizer
9
+ model = None
10
+ tokenizer = None
 
11
 
12
+ def load_model():
13
+ """Load the SmallLM model and tokenizer"""
14
+ global model, tokenizer
15
+
16
+ try:
17
+ print("Loading SmallLM model...")
18
+ model_name = "XsoraS/SmallLM"
19
+
20
+ # Load tokenizer
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+
23
+ # Add padding token if it doesn't exist
24
+ if tokenizer.pad_token is None:
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+
27
+ # Load model
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ model_name,
30
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
31
+ device_map="auto" if torch.cuda.is_available() else None,
32
+ trust_remote_code=True
33
+ )
34
+
35
+ print("Model loaded successfully!")
36
+ return "Model loaded successfully!"
37
+
38
+ except Exception as e:
39
+ error_msg = f"Error loading model: {str(e)}"
40
+ print(error_msg)
41
+ return error_msg
42
 
43
+ def generate_text(prompt, max_length=100, temperature=0.7, top_p=0.9):
44
+ """Generate text using the loaded model"""
45
+ global model, tokenizer
46
+
47
+ if model is None or tokenizer is None:
48
+ return "Please load the model first!"
49
+
50
+ try:
51
+ # Tokenize input
52
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
53
+
54
+ # Move to same device as model
55
+ if torch.cuda.is_available():
56
+ inputs = inputs.to(model.device)
57
+
58
+ # Generate
59
+ with torch.no_grad():
60
+ outputs = model.generate(
61
+ inputs,
62
+ max_length=max_length,
63
+ temperature=temperature,
64
+ top_p=top_p,
65
+ do_sample=True,
66
+ pad_token_id=tokenizer.eos_token_id,
67
+ num_return_sequences=1
68
+ )
69
+
70
+ # Decode output
71
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
72
+
73
+ # Return only the new generated part
74
+ return generated_text[len(prompt):].strip()
75
+
76
+ except Exception as e:
77
+ return f"Error generating text: {str(e)}"
78
 
79
+ def clear_text():
80
+ """Clear the input and output"""
81
+ return "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Create Gradio interface
84
+ with gr.Blocks(title="SmallLM Demo", theme=gr.themes.Soft()) as demo:
85
+ gr.Markdown("# 🤖 SmallLM Inference Demo")
86
+ gr.Markdown("Simple demo for XsoraS/SmallLM text generation")
87
+
88
+ with gr.Row():
89
+ with gr.Column(scale=1):
90
+ load_btn = gr.Button("🔄 Load Model", variant="primary")
91
+ status = gr.Textbox(
92
+ label="Status",
93
+ value="Click 'Load Model' to start",
94
+ interactive=False
95
+ )
96
+
97
+ with gr.Row():
98
+ with gr.Column(scale=2):
99
+ prompt_input = gr.Textbox(
100
+ label="Enter your prompt:",
101
+ placeholder="Once upon a time...",
102
+ lines=3
103
+ )
104
+
105
+ with gr.Row():
106
+ max_length = gr.Slider(
107
+ label="Max Length",
108
+ minimum=10,
109
+ maximum=500,
110
+ value=100,
111
+ step=10
112
+ )
113
+ temperature = gr.Slider(
114
+ label="Temperature",
115
+ minimum=0.1,
116
+ maximum=2.0,
117
+ value=0.7,
118
+ step=0.1
119
+ )
120
+ top_p = gr.Slider(
121
+ label="Top P",
122
+ minimum=0.1,
123
+ maximum=1.0,
124
+ value=0.9,
125
+ step=0.05
126
+ )
127
+
128
+ with gr.Row():
129
+ generate_btn = gr.Button("✨ Generate", variant="primary")
130
+ clear_btn = gr.Button("🗑️ Clear")
131
+
132
+ with gr.Column(scale=2):
133
+ output = gr.Textbox(
134
+ label="Generated Text:",
135
+ lines=10,
136
+ interactive=False
137
+ )
138
+
139
+ # Event handlers
140
+ load_btn.click(
141
+ fn=load_model,
142
+ outputs=status
143
+ )
144
+
145
+ generate_btn.click(
146
+ fn=generate_text,
147
+ inputs=[prompt_input, max_length, temperature, top_p],
148
+ outputs=output
149
+ )
150
+
151
+ clear_btn.click(
152
+ fn=clear_text,
153
+ outputs=[prompt_input, output]
154
+ )
155
+
156
+ # Examples
157
+ gr.Examples(
158
+ examples=[
159
+ ["The future of artificial intelligence is"],
160
+ ["In a world where technology and nature coexist"],
161
+ ["Write a short story about a robot who"],
162
+ ["Explain quantum computing in simple terms:"],
163
+ ],
164
+ inputs=prompt_input
165
+ )
166
 
167
  if __name__ == "__main__":
168
+ demo.launch(
169
+ server_name="0.0.0.0",
170
+ server_port=7860,
171
+ share=True
172
+ )