TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 21

Commit

d0e6378

verified ·

1 Parent(s): 415515c

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -13

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import os
-import glob  # Added missing import
 import streamlit as st
 from PIL import Image
 import torch
@@ -163,49 +164,57 @@ with tab2:
     st.header("Test OCR 🔍")
     captured_images = get_gallery_files(["png"])
     if captured_images:
-        selected_image = st.selectbox("Select Image", captured_images)
         image = Image.open(selected_image)
         st.image(image, caption="Input Image", use_container_width=True)
-        ocr_model = st.selectbox("Select OCR Model", ["Qwen2-VL-OCR-2B", "GOT-OCR2_0"])
-        prompt = st.text_area("Prompt", "Extract text from the image")
-        if st.button("Run OCR 🚀"):
             if ocr_model == "Qwen2-VL-OCR-2B":
                 processor, model = load_ocr_qwen2vl()
-                inputs = processor(text=[prompt], images=[image], return_tensors="pt").to("cpu")
                 outputs = model.generate(**inputs, max_new_tokens=1024)
-                text = processor.decode(outputs[0], skip_special_tokens=True)
             else:  # GOT-OCR2_0
                 tokenizer, model = load_ocr_got()
                 with open(selected_image, "rb") as f:
                     img_bytes = f.read()
                 img = Image.open(BytesIO(img_bytes))
                 text = model.chat(tokenizer, img, ocr_type='ocr')
-            st.text_area("OCR Result", text, height=200)
 with tab3:
     st.header("Test Image Gen 🎨")
     captured_images = get_gallery_files(["png"])
     if captured_images:
-        selected_image = st.selectbox("Select Image", captured_images)
         image = Image.open(selected_image)
         st.image(image, caption="Reference Image", use_container_width=True)
-        prompt = st.text_area("Prompt", "Generate a similar superhero image")
-        if st.button("Run Image Gen 🚀"):
             pipeline = load_image_gen()
             gen_image = pipeline(prompt, num_inference_steps=50).images[0]
             st.image(gen_image, caption="Generated Image", use_container_width=True)
 with tab4:
     st.header("Test Line Drawings ✏️")
     captured_images = get_gallery_files(["png"])
     if captured_images:
-        selected_image = st.selectbox("Select Image", captured_images)
         image = Image.open(selected_image)
         st.image(image, caption="Input Image", use_container_width=True)
-        if st.button("Run Line Drawing 🚀"):
             edge_fn = load_line_drawer()
             line_drawing = edge_fn(image)
             st.image(line_drawing, caption="Line Drawing", use_container_width=True)
 # Initial Gallery Update
 update_gallery()

 #!/usr/bin/env python3
 import os
+import glob
+import time  # Added missing import
 import streamlit as st
 from PIL import Image
 import torch
     st.header("Test OCR 🔍")
     captured_images = get_gallery_files(["png"])
     if captured_images:
+        selected_image = st.selectbox("Select Image", captured_images, key="ocr_select")
         image = Image.open(selected_image)
         st.image(image, caption="Input Image", use_container_width=True)
+        ocr_model = st.selectbox("Select OCR Model", ["Qwen2-VL-OCR-2B", "GOT-OCR2_0"], key="ocr_model_select")
+        prompt = st.text_area("Prompt", "Extract text from the image", key="ocr_prompt")
+        if st.button("Run OCR 🚀", key="ocr_run"):
             if ocr_model == "Qwen2-VL-OCR-2B":
                 processor, model = load_ocr_qwen2vl()
+                # Prepare inputs correctly for Qwen2-VL
+                messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
+                inputs = processor(messages, return_tensors="pt").to("cpu")
                 outputs = model.generate(**inputs, max_new_tokens=1024)
+                text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
             else:  # GOT-OCR2_0
                 tokenizer, model = load_ocr_got()
                 with open(selected_image, "rb") as f:
                     img_bytes = f.read()
                 img = Image.open(BytesIO(img_bytes))
                 text = model.chat(tokenizer, img, ocr_type='ocr')
+            st.text_area("OCR Result", text, height=200, key="ocr_result")
+    else:
+        st.warning("No images captured yet. Use Camera Snap first!")
 with tab3:
     st.header("Test Image Gen 🎨")
     captured_images = get_gallery_files(["png"])
     if captured_images:
+        selected_image = st.selectbox("Select Image", captured_images, key="gen_select")
         image = Image.open(selected_image)
         st.image(image, caption="Reference Image", use_container_width=True)
+        prompt = st.text_area("Prompt", "Generate a similar superhero image", key="gen_prompt")
+        if st.button("Run Image Gen 🚀", key="gen_run"):
             pipeline = load_image_gen()
             gen_image = pipeline(prompt, num_inference_steps=50).images[0]
             st.image(gen_image, caption="Generated Image", use_container_width=True)
+    else:
+        st.warning("No images captured yet. Use Camera Snap first!")
 with tab4:
     st.header("Test Line Drawings ✏️")
     captured_images = get_gallery_files(["png"])
     if captured_images:
+        selected_image = st.selectbox("Select Image", captured_images, key="line_select")
         image = Image.open(selected_image)
         st.image(image, caption="Input Image", use_container_width=True)
+        if st.button("Run Line Drawing 🚀", key="line_run"):
             edge_fn = load_line_drawer()
             line_drawing = edge_fn(image)
             st.image(line_drawing, caption="Line Drawing", use_container_width=True)
+    else:
+        st.warning("No images captured yet. Use Camera Snap first!")
 # Initial Gallery Update
 update_gallery()