Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -710,6 +710,9 @@
|
|
| 710 |
# -*- coding: utf-8 -*-
|
| 711 |
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
| 712 |
|
|
|
|
|
|
|
|
|
|
| 713 |
import os
|
| 714 |
import io
|
| 715 |
import tempfile
|
|
@@ -718,7 +721,7 @@ from typing import List
|
|
| 718 |
|
| 719 |
import numpy as np
|
| 720 |
import cv2
|
| 721 |
-
from PIL import Image
|
| 722 |
import pypdfium2
|
| 723 |
import pytesseract
|
| 724 |
|
|
@@ -742,7 +745,6 @@ os.environ.setdefault("HF_HOME", "/tmp/hf_home")
|
|
| 742 |
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_home")
|
| 743 |
# جلوگیری از sdpa backend که با Surya ordering ممکن است ناسازگار باشد
|
| 744 |
os.environ.setdefault("TRANSFORMERS_ATTENTION_BACKEND", "eager")
|
| 745 |
-
|
| 746 |
# مسیرهای استاتیک/کش به /tmp برای جلوگیری از Permission denied
|
| 747 |
os.environ.setdefault("STREAMLIT_STATIC_DIR", "/tmp/streamlit_static")
|
| 748 |
os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
|
|
@@ -800,8 +802,9 @@ from surya.model.ordering.processor import load_processor as load_order_processo
|
|
| 800 |
from surya.ordering import batch_ordering
|
| 801 |
|
| 802 |
from surya.ocr import run_ocr
|
| 803 |
-
|
| 804 |
-
from surya.postprocessing.
|
|
|
|
| 805 |
from surya.languages import CODE_TO_LANGUAGE
|
| 806 |
from surya.input.langs import replace_lang_with_code
|
| 807 |
from surya.schema import OCRResult, TextDetectionResult, LayoutResult, OrderResult
|
|
@@ -855,6 +858,26 @@ def page_count(pdf_file) -> int:
|
|
| 855 |
doc = open_pdf(pdf_file)
|
| 856 |
return len(doc)
|
| 857 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
# ===================== Streamlit UI =====================
|
| 859 |
st.set_page_config(page_title="TRUST OCR DEMO", layout="wide")
|
| 860 |
st.markdown("# TRUST OCR DEMO")
|
|
@@ -891,7 +914,6 @@ def load_layout_cached():
|
|
| 891 |
|
| 892 |
@st.cache_resource(show_spinner=True)
|
| 893 |
def load_order_cached():
|
| 894 |
-
# اگر باز هم خطای sdpa دیدی، میتونی موقتاً این را disable کنی.
|
| 895 |
return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
|
| 896 |
|
| 897 |
# ---------- PERSONAL RECOGNITION ONLY ----------
|
|
@@ -902,7 +924,7 @@ PERSONAL_HF_REPO = os.environ.get("TRUSTOCR_REPO") # ریپوی مدل HF
|
|
| 902 |
def load_rec_personal():
|
| 903 |
"""
|
| 904 |
اولویت با مدل شخصی است. اگر تنظیم نبود، به یک مدل عمومی Surya فالبک میشود.
|
| 905 |
-
اگر
|
| 906 |
"""
|
| 907 |
if PERSONAL_MODEL_PATH and os.path.isdir(PERSONAL_MODEL_PATH):
|
| 908 |
m = load_rec_model(checkpoint=PERSONAL_MODEL_PATH)
|
|
@@ -953,14 +975,15 @@ def _apply_auto_rotate(pil_img: Image.Image) -> Image.Image:
|
|
| 953 |
def text_detection(pil_img: Image.Image):
|
| 954 |
pred: TextDetectionResult = batch_text_detection([pil_img], det_model, det_processor)[0]
|
| 955 |
polygons = [p.polygon for p in pred.bboxes]
|
| 956 |
-
det_img =
|
| 957 |
return det_img, pred
|
| 958 |
|
| 959 |
def layout_detection(pil_img: Image.Image):
|
| 960 |
_, det_pred = text_detection(pil_img)
|
| 961 |
pred: LayoutResult = batch_layout_detection([pil_img], layout_model, layout_processor, [det_pred])[0]
|
| 962 |
-
polygons = [p.polygon for p in pred.bboxes]
|
| 963 |
-
|
|
|
|
| 964 |
return layout_img, pred
|
| 965 |
|
| 966 |
def order_detection(pil_img: Image.Image):
|
|
@@ -969,20 +992,21 @@ def order_detection(pil_img: Image.Image):
|
|
| 969 |
_, layout_pred = layout_detection(pil_img)
|
| 970 |
bboxes = [l.bbox for l in layout_pred.bboxes]
|
| 971 |
pred: OrderResult = batch_ordering([pil_img], [bboxes], order_model, order_processor)[0]
|
| 972 |
-
polys = [l.polygon for l in pred.bboxes]
|
| 973 |
-
|
|
|
|
| 974 |
return order_img, pred
|
| 975 |
|
| 976 |
def ocr_page(pil_img: Image.Image, langs: List[str]):
|
| 977 |
langs = list(langs) if langs else ["Persian"]
|
| 978 |
replace_lang_with_code(langs)
|
|
|
|
| 979 |
if det_model and det_processor and rec_model and rec_processor:
|
| 980 |
img_pred: OCRResult = run_ocr([pil_img], [langs], det_model, det_processor, rec_model, rec_processor)[0]
|
| 981 |
else:
|
| 982 |
img_pred: OCRResult = run_ocr([pil_img], [langs], rec_model=rec_model, rec_processor=rec_processor)[0]
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
return rec_img, img_pred
|
| 986 |
|
| 987 |
# ===================== Input Handling =====================
|
| 988 |
|
|
@@ -997,8 +1021,7 @@ else:
|
|
| 997 |
bytes_data = in_file.getvalue()
|
| 998 |
temp_dir = os.path.join(tempfile.gettempdir(), "trustocr_temp"); os.makedirs(temp_dir, exist_ok=True)
|
| 999 |
file_path = os.path.join(temp_dir, in_file.name)
|
| 1000 |
-
with open(file_path, "wb") as f:
|
| 1001 |
-
f.write(bytes_data)
|
| 1002 |
out_file = os.path.splitext(file_path)[0] + "-1.JPG"
|
| 1003 |
try:
|
| 1004 |
if auto_border:
|
|
@@ -1041,8 +1064,10 @@ with col1:
|
|
| 1041 |
try:
|
| 1042 |
rec_img, ocr_pred = ocr_page(pil_image, languages)
|
| 1043 |
text_tab, json_tab = st.tabs(["متن صفحه | Page Text", "JSON"])
|
| 1044 |
-
with text_tab:
|
| 1045 |
-
|
|
|
|
|
|
|
| 1046 |
except Exception as e:
|
| 1047 |
st.error(f"خطا در بازشناسی متن (Recognition): {e}")
|
| 1048 |
|
|
|
|
| 710 |
# -*- coding: utf-8 -*-
|
| 711 |
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
| 712 |
|
| 713 |
+
# -*- coding: utf-8 -*-
|
| 714 |
+
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
| 715 |
+
|
| 716 |
import os
|
| 717 |
import io
|
| 718 |
import tempfile
|
|
|
|
| 721 |
|
| 722 |
import numpy as np
|
| 723 |
import cv2
|
| 724 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 725 |
import pypdfium2
|
| 726 |
import pytesseract
|
| 727 |
|
|
|
|
| 745 |
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_home")
|
| 746 |
# جلوگیری از sdpa backend که با Surya ordering ممکن است ناسازگار باشد
|
| 747 |
os.environ.setdefault("TRANSFORMERS_ATTENTION_BACKEND", "eager")
|
|
|
|
| 748 |
# مسیرهای استاتیک/کش به /tmp برای جلوگیری از Permission denied
|
| 749 |
os.environ.setdefault("STREAMLIT_STATIC_DIR", "/tmp/streamlit_static")
|
| 750 |
os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
|
|
|
|
| 802 |
from surya.ordering import batch_ordering
|
| 803 |
|
| 804 |
from surya.ocr import run_ocr
|
| 805 |
+
# مهم: دیگر از surya.postprocessing.* استفاده نمیکنیم تا چیزی در site-packages ننویسد
|
| 806 |
+
# from surya.postprocessing.heatmap import draw_polys_on_image
|
| 807 |
+
# from surya.postprocessing.text import draw_text_on_image
|
| 808 |
from surya.languages import CODE_TO_LANGUAGE
|
| 809 |
from surya.input.langs import replace_lang_with_code
|
| 810 |
from surya.schema import OCRResult, TextDetectionResult, LayoutResult, OrderResult
|
|
|
|
| 858 |
doc = open_pdf(pdf_file)
|
| 859 |
return len(doc)
|
| 860 |
|
| 861 |
+
# ----- رسم سبک خودمان (بدون وابستگی به surya.postprocessing) -----
|
| 862 |
+
def _norm_poly(polygon) -> list[tuple[int, int]]:
|
| 863 |
+
arr = np.array(polygon).reshape(-1, 2)
|
| 864 |
+
return [(int(x), int(y)) for x, y in arr]
|
| 865 |
+
|
| 866 |
+
def draw_polys_simple(pil_img: Image.Image, polygons, labels=None) -> Image.Image:
|
| 867 |
+
"""Draw polygons (and optional labels) using Pillow only. No disk writes."""
|
| 868 |
+
img = pil_img.copy()
|
| 869 |
+
draw = ImageDraw.Draw(img)
|
| 870 |
+
font = ImageFont.load_default()
|
| 871 |
+
for i, poly in enumerate(polygons):
|
| 872 |
+
pts = _norm_poly(poly)
|
| 873 |
+
# خطوط چندضلعی
|
| 874 |
+
draw.polygon(pts, outline=(0, 255, 0))
|
| 875 |
+
# برچسب اختیاری
|
| 876 |
+
if labels is not None and i < len(labels):
|
| 877 |
+
x, y = pts[0]
|
| 878 |
+
draw.text((x, max(0, y - 12)), str(labels[i]), fill=(255, 0, 0), font=font)
|
| 879 |
+
return img
|
| 880 |
+
|
| 881 |
# ===================== Streamlit UI =====================
|
| 882 |
st.set_page_config(page_title="TRUST OCR DEMO", layout="wide")
|
| 883 |
st.markdown("# TRUST OCR DEMO")
|
|
|
|
| 914 |
|
| 915 |
@st.cache_resource(show_spinner=True)
|
| 916 |
def load_order_cached():
|
|
|
|
| 917 |
return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
|
| 918 |
|
| 919 |
# ---------- PERSONAL RECOGNITION ONLY ----------
|
|
|
|
| 924 |
def load_rec_personal():
|
| 925 |
"""
|
| 926 |
اولویت با مدل شخصی است. اگر تنظیم نبود، به یک مدل عمومی Surya فالبک میشود.
|
| 927 |
+
اگر فالبک نمیخواهی، بخش آخر را حذف کن و بهجایش RuntimeError بده.
|
| 928 |
"""
|
| 929 |
if PERSONAL_MODEL_PATH and os.path.isdir(PERSONAL_MODEL_PATH):
|
| 930 |
m = load_rec_model(checkpoint=PERSONAL_MODEL_PATH)
|
|
|
|
| 975 |
def text_detection(pil_img: Image.Image):
|
| 976 |
pred: TextDetectionResult = batch_text_detection([pil_img], det_model, det_processor)[0]
|
| 977 |
polygons = [p.polygon for p in pred.bboxes]
|
| 978 |
+
det_img = draw_polys_simple(pil_img, polygons) # ← نسخه سبک خودمان
|
| 979 |
return det_img, pred
|
| 980 |
|
| 981 |
def layout_detection(pil_img: Image.Image):
|
| 982 |
_, det_pred = text_detection(pil_img)
|
| 983 |
pred: LayoutResult = batch_layout_detection([pil_img], layout_model, layout_processor, [det_pred])[0]
|
| 984 |
+
polygons = [p.polygon for p in pred.bboxes]
|
| 985 |
+
labels = [p.label for p in pred.bboxes]
|
| 986 |
+
layout_img = draw_polys_simple(pil_img, polygons, labels=labels) # ← نسخه سبک خودمان
|
| 987 |
return layout_img, pred
|
| 988 |
|
| 989 |
def order_detection(pil_img: Image.Image):
|
|
|
|
| 992 |
_, layout_pred = layout_detection(pil_img)
|
| 993 |
bboxes = [l.bbox for l in layout_pred.bboxes]
|
| 994 |
pred: OrderResult = batch_ordering([pil_img], [bboxes], order_model, order_processor)[0]
|
| 995 |
+
polys = [l.polygon for l in pred.bboxes]
|
| 996 |
+
positions = [str(l.position) for l in pred.bboxes]
|
| 997 |
+
order_img = draw_polys_simple(pil_img, polys, labels=positions) # ← نسخه سبک خودمان
|
| 998 |
return order_img, pred
|
| 999 |
|
| 1000 |
def ocr_page(pil_img: Image.Image, langs: List[str]):
|
| 1001 |
langs = list(langs) if langs else ["Persian"]
|
| 1002 |
replace_lang_with_code(langs)
|
| 1003 |
+
# مهم: دیگر draw_text_on_image نمیسازیم تا نیازی به فونت/استاتیک نباشد
|
| 1004 |
if det_model and det_processor and rec_model and rec_processor:
|
| 1005 |
img_pred: OCRResult = run_ocr([pil_img], [langs], det_model, det_processor, rec_model, rec_processor)[0]
|
| 1006 |
else:
|
| 1007 |
img_pred: OCRResult = run_ocr([pil_img], [langs], rec_model=rec_model, rec_processor=rec_processor)[0]
|
| 1008 |
+
# برای نمایش، فقط متن را میگذاریم؛ تصویر چسبانده نمیشود تا وابستگی به فونت نباشد
|
| 1009 |
+
return None, img_pred
|
|
|
|
| 1010 |
|
| 1011 |
# ===================== Input Handling =====================
|
| 1012 |
|
|
|
|
| 1021 |
bytes_data = in_file.getvalue()
|
| 1022 |
temp_dir = os.path.join(tempfile.gettempdir(), "trustocr_temp"); os.makedirs(temp_dir, exist_ok=True)
|
| 1023 |
file_path = os.path.join(temp_dir, in_file.name)
|
| 1024 |
+
with open(file_path, "wb") as f: f.write(bytes_data)
|
|
|
|
| 1025 |
out_file = os.path.splitext(file_path)[0] + "-1.JPG"
|
| 1026 |
try:
|
| 1027 |
if auto_border:
|
|
|
|
| 1064 |
try:
|
| 1065 |
rec_img, ocr_pred = ocr_page(pil_image, languages)
|
| 1066 |
text_tab, json_tab = st.tabs(["متن صفحه | Page Text", "JSON"])
|
| 1067 |
+
with text_tab:
|
| 1068 |
+
st.text("\n".join([p.text for p in ocr_pred.text_lines]))
|
| 1069 |
+
with json_tab:
|
| 1070 |
+
st.json(ocr_pred.model_dump(), expanded=False)
|
| 1071 |
except Exception as e:
|
| 1072 |
st.error(f"خطا در بازشناسی متن (Recognition): {e}")
|
| 1073 |
|