Groundlight commited on
Commit
2d40a27
·
1 Parent(s): 687b4a7

UI improvements

Browse files
Files changed (1) hide show
  1. app.py +126 -64
app.py CHANGED
@@ -1,8 +1,8 @@
1
- import spaces
2
  import random
3
  from threading import Thread
4
 
5
  import gradio as gr
 
6
  import torch # Need this for torch.no_grad()
7
  from datasets import load_dataset
8
  from qwen_vl_utils import process_vision_info
@@ -13,12 +13,11 @@ from transformers import (
13
  )
14
  from trl import ModelConfig
15
 
16
- # run with:
17
- # CUDA_VISIBLE_DEVICES=0 uv run gradio demo/demo.py
18
-
19
 
20
  def get_eval_dataset():
21
- full_dataset = load_dataset("sunildkumar/message-decoding-words-and-sequences")["train"]
 
 
22
  full_dataset = full_dataset.shuffle(seed=42)
23
 
24
  # split the dataset with the same seed as used in the training script
@@ -95,7 +94,7 @@ def prepare_model_input(image, mapping, processor, submitted_word):
95
  coded_message = " ".join(coded_message)
96
 
97
  instruction = (
98
- f'Use the decoder in the image to decode this coded message: "{coded_message}". '
99
  "The decoded message will be one or more words. Underscore characters "
100
  '("_") in the coded message should be mapped to a space (" ") when decoding.'
101
  )
@@ -105,8 +104,9 @@ def prepare_model_input(image, mapping, processor, submitted_word):
105
  "While thinking, you must include a section with the decoded characters using <chars></chars> tags. "
106
  "The <chars> section should include the decoded characters in the order they are decoded. It should include the "
107
  "underscore character wherever there is a space in the decoded message. For example, if the coded message is "
108
- "a b c _ d e f, the <chars> section might be <chars> c a t _ d o g </chars>. Once you are done thinking, "
109
- "provide your answer in the <answer> section, e.g. <answer> cat dog </answer>."
 
110
  )
111
  instruction = f"{instruction} {ending}"
112
 
@@ -161,7 +161,7 @@ def encode_word(word, mapping):
161
  """
162
  if not word or not mapping:
163
  return ""
164
-
165
  word = word.lower()
166
  # reverse the decoder to encode the word
167
  encoder = {v: k for k, v in mapping.items()}
@@ -173,25 +173,52 @@ def encode_word(word, mapping):
173
  def validate_and_submit(word, mapping):
174
  # Check if input contains only letters
175
  if not word.replace(" ", "").isalpha():
 
 
 
176
  return (
177
  gr.update(), # word input
178
  gr.update(), # submit button
179
  gr.update(interactive=False), # run button - disable but keep visible
180
- gr.update(visible=False) # encoded word display
 
 
 
 
 
 
 
 
 
181
  )
182
 
183
  word = word.lower()
184
  encoded_word = encode_word(word, mapping)
185
-
186
  # Only enable run button if we have a valid encoded word
187
  has_valid_encoded_word = bool(encoded_word.strip())
188
-
 
 
 
 
 
 
 
 
 
 
 
189
  # Return updates for input, submit button, run button, and encoded word display
190
  return (
191
  gr.update(value=word, interactive=False, label="Submitted Word"),
192
  gr.update(interactive=False), # Disable submit button
193
- gr.update(interactive=has_valid_encoded_word), # Enable run button only if valid, but always visible
194
- gr.update(value=f"Encoded word: {encoded_word}", visible=has_valid_encoded_word) # Show encoded word
 
 
 
 
195
  )
196
 
197
 
@@ -253,56 +280,87 @@ with gr.Blocks() as demo:
253
  # Load resources when the app starts
254
  load_resources()
255
 
256
- gr.Markdown("# Message Decoding Demo")
257
  current_mapping = gr.State()
258
  current_image = gr.State()
259
 
260
  with gr.Row():
261
- # Image display component
262
- image_output = gr.Image(label="Decoder")
263
-
264
- # Button to load new random example
265
- next_button = gr.Button("Generate Random Decoder")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  next_button.click(
267
  fn=show_random_example, outputs=[image_output, current_mapping, current_image]
268
  )
269
 
270
- # Text input for the word
271
- word_input = gr.Textbox(
272
- label="Enter a single word",
273
- placeholder="Enter word here...",
274
- max_lines=1,
275
- show_copy_button=False,
276
- )
277
-
278
- # Add encoded word display
279
- encoded_word_display = gr.Textbox(
280
- label="Encoded Word",
281
- interactive=False,
282
- visible=False,
283
- max_lines=1,
284
- show_copy_button=True,
285
- )
286
-
287
- # Group submit and run buttons vertically
288
- with gr.Column(): # Use Column instead of Row for vertical layout
289
- submit_button = gr.Button("Submit Word")
290
- run_button = gr.Button("Run Model", interactive=False) # Initialize as visible but disabled
291
-
292
- # Output area for model response
293
- model_output = gr.Textbox(
294
- label="Model Output",
295
- interactive=False,
296
- visible=False,
297
- max_lines=10,
298
- container=True,
299
- show_copy_button=True,
300
- )
301
-
302
- # Add loading indicator
303
- with gr.Row():
304
- loading_indicator = gr.HTML(visible=False)
305
-
306
  # Validate word on submit and update interface
307
  submit_button.click(
308
  fn=validate_and_submit,
@@ -310,7 +368,6 @@ with gr.Blocks() as demo:
310
  outputs=[word_input, submit_button, run_button, encoded_word_display],
311
  )
312
 
313
- # Run inference when run button is clicked
314
  run_button.click(
315
  fn=prepare_for_inference,
316
  outputs=[model_output, run_button, loading_indicator],
@@ -320,16 +377,21 @@ with gr.Blocks() as demo:
320
  outputs=model_output,
321
  api_name=False,
322
  ).then(
323
- # Reset interface after generation
324
  lambda: (
325
- gr.update(interactive=False), # Disable run button but keep visible
326
- gr.update(visible=False), # Hide loading indicator
327
- gr.update(interactive=True, label="Enter a single word"), # Re-enable word input
328
- gr.update(interactive=True), # Re-enable submit button
329
- gr.update(visible=False), # Hide encoded word display
330
  ),
331
  None,
332
- [run_button, loading_indicator, word_input, submit_button, encoded_word_display],
 
 
 
 
 
 
333
  )
334
 
335
  if __name__ == "__main__":
 
 
1
  import random
2
  from threading import Thread
3
 
4
  import gradio as gr
5
+ import spaces
6
  import torch # Need this for torch.no_grad()
7
  from datasets import load_dataset
8
  from qwen_vl_utils import process_vision_info
 
13
  )
14
  from trl import ModelConfig
15
 
 
 
 
16
 
17
  def get_eval_dataset():
18
+ full_dataset = load_dataset("sunildkumar/message-decoding-words-and-sequences")[
19
+ "train"
20
+ ]
21
  full_dataset = full_dataset.shuffle(seed=42)
22
 
23
  # split the dataset with the same seed as used in the training script
 
94
  coded_message = " ".join(coded_message)
95
 
96
  instruction = (
97
+ "Use the decoder in the image to decode a coded message."
98
  "The decoded message will be one or more words. Underscore characters "
99
  '("_") in the coded message should be mapped to a space (" ") when decoding.'
100
  )
 
104
  "While thinking, you must include a section with the decoded characters using <chars></chars> tags. "
105
  "The <chars> section should include the decoded characters in the order they are decoded. It should include the "
106
  "underscore character wherever there is a space in the decoded message. For example, if the coded message is "
107
+ "a b c _ d e f, the chars section might be <chars> c a t _ d o g </chars>. You can think about the problem for "
108
+ "as long as you'd like. While thinking, you should robustly verify your solution. Once you are done thinking, "
109
+ f"provide your answer in the <answer> section, e.g. <answer> cat dog </answer>. The coded message is: {coded_message}."
110
  )
111
  instruction = f"{instruction} {ending}"
112
 
 
161
  """
162
  if not word or not mapping:
163
  return ""
164
+
165
  word = word.lower()
166
  # reverse the decoder to encode the word
167
  encoder = {v: k for k, v in mapping.items()}
 
173
  def validate_and_submit(word, mapping):
174
  # Check if input contains only letters
175
  if not word.replace(" ", "").isalpha():
176
+ gr.Warning(
177
+ "Invalid input! Please enter only English letters and spaces. No numbers or punctuation allowed."
178
+ )
179
  return (
180
  gr.update(), # word input
181
  gr.update(), # submit button
182
  gr.update(interactive=False), # run button - disable but keep visible
183
+ gr.update(visible=False), # encoded word display
184
+ )
185
+
186
+ if not mapping:
187
+ gr.Warning("Please generate a decoder first")
188
+ return (
189
+ gr.update(), # word input
190
+ gr.update(), # submit button
191
+ gr.update(interactive=False), # run button - disable but keep visible
192
+ gr.update(visible=False), # encoded word display
193
  )
194
 
195
  word = word.lower()
196
  encoded_word = encode_word(word, mapping)
197
+
198
  # Only enable run button if we have a valid encoded word
199
  has_valid_encoded_word = bool(encoded_word.strip())
200
+
201
+ if not has_valid_encoded_word:
202
+ gr.Warning(
203
+ "Invalid input! The word contains characters that cannot be encoded with the current decoder."
204
+ )
205
+ return (
206
+ gr.update(), # word input
207
+ gr.update(), # submit button
208
+ gr.update(interactive=False), # run button - disable but keep visible
209
+ gr.update(visible=False), # encoded word display
210
+ )
211
+
212
  # Return updates for input, submit button, run button, and encoded word display
213
  return (
214
  gr.update(value=word, interactive=False, label="Submitted Word"),
215
  gr.update(interactive=False), # Disable submit button
216
+ gr.update(
217
+ interactive=has_valid_encoded_word
218
+ ), # Enable run button only if valid, but always visible
219
+ gr.update(
220
+ value=f"Encoded message: {encoded_word}", visible=has_valid_encoded_word
221
+ ), # Show encoded message
222
  )
223
 
224
 
 
280
  # Load resources when the app starts
281
  load_resources()
282
 
283
+ gr.Markdown("# Groundlight's VLM Reasoning Model - Cryptogram Decoder")
284
  current_mapping = gr.State()
285
  current_image = gr.State()
286
 
287
  with gr.Row():
288
+ # Left column - Inputs
289
+ with gr.Column(scale=1):
290
+ # Instructions at the top
291
+ instructions = """
292
+ Welcome! This demos Groundlight's VLM reasoning model trained to decode cryptograms. To use the model:
293
+ 1. Generate a decoder image. This will be provided to the model to decode your message.
294
+ 2. Enter your message in the text box below. Your message should only contain English letters and spaces.
295
+
296
+ Some examples:
297
+ • hello world
298
+ • i love reinforcement learning
299
+ • groundlight makes computer vision easy
300
+
301
+ 3. Encode your message. Just click the "Encode Message" button, and we'll handle encoding for you.
302
+ 4. Run the model. You will see the model's reasoning process and the decoded message in <answer></answer> tags.
303
+ """
304
+
305
+ gr.Textbox(
306
+ value=instructions,
307
+ label="Instructions",
308
+ interactive=False,
309
+ lines=4,
310
+ )
311
+
312
+ # Image display component
313
+ image_output = gr.Image(label="Decoder")
314
+
315
+ # Button to load new random example
316
+ next_button = gr.Button("Generate Random Decoder")
317
+
318
+ # Text input for the word
319
+ word_input = gr.Textbox(
320
+ label="Enter your message",
321
+ placeholder="Enter message here...",
322
+ max_lines=1,
323
+ show_copy_button=False,
324
+ )
325
+ gr.Markdown(
326
+ "Note: Only English letters and spaces are allowed. Please do not enter any numbers or punctuation."
327
+ )
328
+
329
+ # Add encoded word display
330
+ encoded_word_display = gr.Textbox(
331
+ label="Encoded Message",
332
+ interactive=False,
333
+ visible=False,
334
+ max_lines=1,
335
+ show_copy_button=True,
336
+ )
337
+
338
+ # Group submit and run buttons vertically
339
+ with gr.Column():
340
+ submit_button = gr.Button("Encode Message")
341
+ run_button = gr.Button("Run Model", interactive=False)
342
+
343
+ # Right column - Outputs
344
+ with gr.Column(scale=1):
345
+ # Output area for model response
346
+ model_output = gr.Textbox(
347
+ label="Model Output",
348
+ interactive=False,
349
+ lines=40,
350
+ max_lines=80,
351
+ container=True,
352
+ show_copy_button=True,
353
+ visible=True,
354
+ )
355
+
356
+ # Add loading indicator
357
+ loading_indicator = gr.HTML(visible=False)
358
+
359
+ # Event handlers
360
  next_button.click(
361
  fn=show_random_example, outputs=[image_output, current_mapping, current_image]
362
  )
363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  # Validate word on submit and update interface
365
  submit_button.click(
366
  fn=validate_and_submit,
 
368
  outputs=[word_input, submit_button, run_button, encoded_word_display],
369
  )
370
 
 
371
  run_button.click(
372
  fn=prepare_for_inference,
373
  outputs=[model_output, run_button, loading_indicator],
 
377
  outputs=model_output,
378
  api_name=False,
379
  ).then(
 
380
  lambda: (
381
+ gr.update(interactive=False),
382
+ gr.update(visible=False),
383
+ gr.update(interactive=True, label="Enter your message"),
384
+ gr.update(interactive=True),
385
+ gr.update(visible=False),
386
  ),
387
  None,
388
+ [
389
+ run_button,
390
+ loading_indicator,
391
+ word_input,
392
+ submit_button,
393
+ encoded_word_display,
394
+ ],
395
  )
396
 
397
  if __name__ == "__main__":