Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
864e5c4
1
Parent(s):
c2a6750
Refactor OCR processing by introducing a GPU-accelerated predict function and updating the run_hf_ocr method to utilize it
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
|
| 7 |
-
|
| 8 |
# --- Global Model and Processor Initialization ---
|
| 9 |
# Load the OCR model and processor once when the app starts
|
| 10 |
try:
|
|
@@ -85,11 +85,7 @@ def run_hf_ocr(image_path):
|
|
| 85 |
# [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
|
| 86 |
# This suggests the pipeline is returning a conversational style output.
|
| 87 |
# We will try to call the pipeline with the image and prompt directly.
|
| 88 |
-
ocr_results =
|
| 89 |
-
pil_image,
|
| 90 |
-
prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
|
| 91 |
-
# The pipeline should handle formatting this into messages if needed by the model.
|
| 92 |
-
)
|
| 93 |
|
| 94 |
# Parse the output based on the user's example structure
|
| 95 |
if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
|
|
@@ -129,6 +125,15 @@ def run_hf_ocr(image_path):
|
|
| 129 |
except Exception as e:
|
| 130 |
print(f"Error during Hugging Face OCR: {e}")
|
| 131 |
return f"Error during Hugging Face OCR: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
# --- Gradio Interface Function ---
|
| 134 |
|
|
|
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
|
| 7 |
+
import spaces
|
| 8 |
# --- Global Model and Processor Initialization ---
|
| 9 |
# Load the OCR model and processor once when the app starts
|
| 10 |
try:
|
|
|
|
| 85 |
# [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
|
| 86 |
# This suggests the pipeline is returning a conversational style output.
|
| 87 |
# We will try to call the pipeline with the image and prompt directly.
|
| 88 |
+
ocr_results = predict(pil_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# Parse the output based on the user's example structure
|
| 91 |
if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
|
|
|
|
| 125 |
except Exception as e:
|
| 126 |
print(f"Error during Hugging Face OCR: {e}")
|
| 127 |
return f"Error during Hugging Face OCR: {str(e)}"
|
| 128 |
+
@spaces.GPU
|
| 129 |
+
def predict(pil_image):
|
| 130 |
+
ocr_results = HF_PIPE(
|
| 131 |
+
pil_image,
|
| 132 |
+
prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
|
| 133 |
+
# The pipeline should handle formatting this into messages if needed by the model.
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
return ocr_results
|
| 137 |
|
| 138 |
# --- Gradio Interface Function ---
|
| 139 |
|