arad1367 commited on
Commit
076ae66
·
verified ·
1 Parent(s): 857cc54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -142
app.py CHANGED
@@ -1,148 +1,66 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
-
6
- # import spaces #[uncomment to use ZeroGPU]
7
- from diffusers import DiffusionPipeline
8
  import torch
 
9
 
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model_repo_id = "Qwen/Qwen2.5-3B-Instruct"
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- css = """
55
- #col-container {
56
- margin: 0 auto;
57
- max-width: 640px;
58
- }
59
- """
60
-
61
- with gr.Blocks(css=css) as demo:
62
- with gr.Column(elem_id="col-container"):
63
- gr.Markdown(" # Text-to-Image Gradio Template")
64
-
65
- with gr.Row():
66
- prompt = gr.Text(
67
- label="Prompt",
68
- show_label=False,
69
- max_lines=1,
70
- placeholder="Enter your prompt",
71
- container=False,
72
- )
73
-
74
- run_button = gr.Button("Run", scale=0, variant="primary")
75
-
76
- result = gr.Image(label="Result", show_label=False)
77
-
78
- with gr.Accordion("Advanced Settings", open=False):
79
- negative_prompt = gr.Text(
80
- label="Negative prompt",
81
- max_lines=1,
82
- placeholder="Enter a negative prompt",
83
- visible=False,
84
- )
85
-
86
- seed = gr.Slider(
87
- label="Seed",
88
- minimum=0,
89
- maximum=MAX_SEED,
90
- step=1,
91
- value=0,
92
- )
93
-
94
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
95
-
96
- with gr.Row():
97
- width = gr.Slider(
98
- label="Width",
99
- minimum=256,
100
- maximum=MAX_IMAGE_SIZE,
101
- step=32,
102
- value=1024, # Replace with defaults that work for your model
103
- )
104
-
105
- height = gr.Slider(
106
- label="Height",
107
- minimum=256,
108
- maximum=MAX_IMAGE_SIZE,
109
- step=32,
110
- value=1024, # Replace with defaults that work for your model
111
- )
112
-
113
- with gr.Row():
114
- guidance_scale = gr.Slider(
115
- label="Guidance scale",
116
- minimum=0.0,
117
- maximum=10.0,
118
- step=0.1,
119
- value=0.0, # Replace with defaults that work for your model
120
- )
121
-
122
- num_inference_steps = gr.Slider(
123
- label="Number of inference steps",
124
- minimum=1,
125
- maximum=50,
126
- step=1,
127
- value=2, # Replace with defaults that work for your model
128
- )
129
-
130
- gr.Examples(examples=examples, inputs=[prompt])
131
- gr.on(
132
- triggers=[run_button.click, prompt.submit],
133
- fn=infer,
134
- inputs=[
135
- prompt,
136
- negative_prompt,
137
- seed,
138
- randomize_seed,
139
- width,
140
- height,
141
- guidance_scale,
142
- num_inference_steps,
143
- ],
144
- outputs=[result, seed],
145
  )
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  if __name__ == "__main__":
148
  demo.launch()
 
1
+ # app.py
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
3
  import torch
4
+ import gradio as gr
5
 
6
+ # Load model and tokenizer
7
+ model_name = "Qwen/Qwen2.5-3B-Instruct"
8
+
9
+ print("Loading tokenizer...")
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+
12
+ print("Loading model...")
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_name,
15
+ torch_dtype=torch.bfloat16, # Use bfloat16 to save memory and speed up inference
16
+ device_map="auto", # Automatically use GPU if available
17
+ trust_remote_code=True # Required for Qwen models
18
+ )
19
+
20
+ # Define chat function
21
+ def respond(message, history):
22
+ messages = [{"role": "user", "content": message}]
23
+
24
+ # Apply chat template
25
+ text = tokenizer.apply_chat_template(
26
+ messages,
27
+ tokenize=False,
28
+ add_generation_prompt=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
+
31
+ # Tokenize input
32
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
33
+
34
+ # Generate response
35
+ generated_ids = model.generate(
36
+ **model_inputs,
37
+ max_new_tokens=512,
38
+ do_sample=True,
39
+ temperature=0.7,
40
+ top_p=0.9,
41
+ repetition_penalty=1.1
42
+ )
43
+
44
+ # Extract only the new tokens
45
+ generated_ids = generated_ids[0][model_inputs.input_ids.shape[-1]:]
46
+ response = tokenizer.decode(generated_ids, skip_special_tokens=True)
47
+
48
+ return response
49
+
50
+ # Create Gradio chat interface
51
+ demo = gr.ChatInterface(
52
+ fn=respond,
53
+ title="Qwen2.5-3B Chatbot",
54
+ description="Chat with Qwen2.5-3B-Instruct, a powerful 3-billion-parameter LLM by Alibaba Cloud.",
55
+ examples=[
56
+ "Explain quantum computing in simple terms.",
57
+ "Write a Python function to calculate Fibonacci numbers.",
58
+ "Tell me a joke about AI."
59
+ ],
60
+ retry_btn=None,
61
+ undo_btn=None,
62
+ )
63
+
64
+ # Launch the app
65
  if __name__ == "__main__":
66
  demo.launch()