ocr-time-machine

Running on Zero

davanstrien HF Staff commited on May 22

Commit

864e5c4

1 Parent(s): c2a6750

Refactor OCR processing by introducing a GPU-accelerated predict function and updating the run_hf_ocr method to utilize it

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
 import os
 import torch
 from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
 # --- Global Model and Processor Initialization ---
 # Load the OCR model and processor once when the app starts
 try:
@@ -85,11 +85,7 @@ def run_hf_ocr(image_path):
         # [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
         # This suggests the pipeline is returning a conversational style output.
         # We will try to call the pipeline with the image and prompt directly.
-        ocr_results = HF_PIPE(
-            pil_image,
-            prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
-            # The pipeline should handle formatting this into messages if needed by the model.
-        )
         # Parse the output based on the user's example structure
         if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
@@ -129,6 +125,15 @@ def run_hf_ocr(image_path):
     except Exception as e:
         print(f"Error during Hugging Face OCR: {e}")
         return f"Error during Hugging Face OCR: {str(e)}"
 # --- Gradio Interface Function ---

 import os
 import torch
 from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
+import spaces
 # --- Global Model and Processor Initialization ---
 # Load the OCR model and processor once when the app starts
 try:
         # [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
         # This suggests the pipeline is returning a conversational style output.
         # We will try to call the pipeline with the image and prompt directly.
+        ocr_results = predict(pil_image)
         # Parse the output based on the user's example structure
         if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
     except Exception as e:
         print(f"Error during Hugging Face OCR: {e}")
         return f"Error during Hugging Face OCR: {str(e)}"
+@spaces.GPU
+def predict(pil_image):
+    ocr_results = HF_PIPE(
+            pil_image,
+            prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
+            # The pipeline should handle formatting this into messages if needed by the model.
+        )
+    return ocr_results
 # --- Gradio Interface Function ---