Spaces:

Akshayram1
/

smol_vlm_ocr

Running

Akshayram1 commited on Dec 17, 2024

Commit

5037d5c

verified ·

1 Parent(s): cea10a0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,26 +4,28 @@ from PIL import Image
 import torch
 # Load model and processor
-@st.cache_resource  # Cache model to avoid reloading
 def load_model():
     processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
     model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
     return processor, model
-# Extract text from image using SmolVLM
 def extract_text(image, processor, model):
     # Preprocess image
-    inputs = processor(images=image, text="What is the text in this image? extract all data in JSON format", return_tensors="pt")
     with torch.no_grad():
         outputs = model.generate(**inputs)
     result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
     return result
 # Streamlit UI
 def main():
-    st.title("🖼️ OCR App using SmolVLM")
     st.write("Upload an image, and I will extract the text for you!")
     # Load the model and processor
@@ -39,11 +41,12 @@ def main():
         # Extract text
         with st.spinner("Extracting text..."):
-            extracted_text = extract_text(image, processor, model)
-        # Display result
-        st.subheader("📝 Extracted Text:")
-        st.write(extracted_text)
 if __name__ == "__main__":
     main()

 import torch
 # Load model and processor
+@st.cache_resource
 def load_model():
     processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
     model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
     return processor, model
+# Extract text from image
 def extract_text(image, processor, model):
     # Preprocess image
+    inputs = processor(images=image, return_tensors="pt")
+    # Perform generation
     with torch.no_grad():
         outputs = model.generate(**inputs)
+    # Decode outputs
     result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
     return result
 # Streamlit UI
 def main():
+    st.title("🖼️ OCR App using SmolVLM-Instruct")
     st.write("Upload an image, and I will extract the text for you!")
     # Load the model and processor
         # Extract text
         with st.spinner("Extracting text..."):
+            try:
+                extracted_text = extract_text(image, processor, model)
+                st.subheader("📝 Extracted Text:")
+                st.write(extracted_text)
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
 if __name__ == "__main__":
     main()