Update app.py
Browse files
app.py
CHANGED
@@ -710,6 +710,9 @@
|
|
710 |
# -*- coding: utf-8 -*-
|
711 |
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
712 |
|
|
|
|
|
|
|
713 |
import os
|
714 |
import io
|
715 |
import tempfile
|
@@ -718,7 +721,7 @@ from typing import List
|
|
718 |
|
719 |
import numpy as np
|
720 |
import cv2
|
721 |
-
from PIL import Image
|
722 |
import pypdfium2
|
723 |
import pytesseract
|
724 |
|
@@ -742,7 +745,6 @@ os.environ.setdefault("HF_HOME", "/tmp/hf_home")
|
|
742 |
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_home")
|
743 |
# جلوگیری از sdpa backend که با Surya ordering ممکن است ناسازگار باشد
|
744 |
os.environ.setdefault("TRANSFORMERS_ATTENTION_BACKEND", "eager")
|
745 |
-
|
746 |
# مسیرهای استاتیک/کش به /tmp برای جلوگیری از Permission denied
|
747 |
os.environ.setdefault("STREAMLIT_STATIC_DIR", "/tmp/streamlit_static")
|
748 |
os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
|
@@ -800,8 +802,9 @@ from surya.model.ordering.processor import load_processor as load_order_processo
|
|
800 |
from surya.ordering import batch_ordering
|
801 |
|
802 |
from surya.ocr import run_ocr
|
803 |
-
|
804 |
-
from surya.postprocessing.
|
|
|
805 |
from surya.languages import CODE_TO_LANGUAGE
|
806 |
from surya.input.langs import replace_lang_with_code
|
807 |
from surya.schema import OCRResult, TextDetectionResult, LayoutResult, OrderResult
|
@@ -855,6 +858,26 @@ def page_count(pdf_file) -> int:
|
|
855 |
doc = open_pdf(pdf_file)
|
856 |
return len(doc)
|
857 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
858 |
# ===================== Streamlit UI =====================
|
859 |
st.set_page_config(page_title="TRUST OCR DEMO", layout="wide")
|
860 |
st.markdown("# TRUST OCR DEMO")
|
@@ -891,7 +914,6 @@ def load_layout_cached():
|
|
891 |
|
892 |
@st.cache_resource(show_spinner=True)
|
893 |
def load_order_cached():
|
894 |
-
# اگر باز هم خطای sdpa دیدی، میتونی موقتاً این را disable کنی.
|
895 |
return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
|
896 |
|
897 |
# ---------- PERSONAL RECOGNITION ONLY ----------
|
@@ -902,7 +924,7 @@ PERSONAL_HF_REPO = os.environ.get("TRUSTOCR_REPO") # ریپوی مدل HF
|
|
902 |
def load_rec_personal():
|
903 |
"""
|
904 |
اولویت با مدل شخصی است. اگر تنظیم نبود، به یک مدل عمومی Surya فالبک میشود.
|
905 |
-
اگر
|
906 |
"""
|
907 |
if PERSONAL_MODEL_PATH and os.path.isdir(PERSONAL_MODEL_PATH):
|
908 |
m = load_rec_model(checkpoint=PERSONAL_MODEL_PATH)
|
@@ -953,14 +975,15 @@ def _apply_auto_rotate(pil_img: Image.Image) -> Image.Image:
|
|
953 |
def text_detection(pil_img: Image.Image):
|
954 |
pred: TextDetectionResult = batch_text_detection([pil_img], det_model, det_processor)[0]
|
955 |
polygons = [p.polygon for p in pred.bboxes]
|
956 |
-
det_img =
|
957 |
return det_img, pred
|
958 |
|
959 |
def layout_detection(pil_img: Image.Image):
|
960 |
_, det_pred = text_detection(pil_img)
|
961 |
pred: LayoutResult = batch_layout_detection([pil_img], layout_model, layout_processor, [det_pred])[0]
|
962 |
-
polygons = [p.polygon for p in pred.bboxes]
|
963 |
-
|
|
|
964 |
return layout_img, pred
|
965 |
|
966 |
def order_detection(pil_img: Image.Image):
|
@@ -969,20 +992,21 @@ def order_detection(pil_img: Image.Image):
|
|
969 |
_, layout_pred = layout_detection(pil_img)
|
970 |
bboxes = [l.bbox for l in layout_pred.bboxes]
|
971 |
pred: OrderResult = batch_ordering([pil_img], [bboxes], order_model, order_processor)[0]
|
972 |
-
polys = [l.polygon for l in pred.bboxes]
|
973 |
-
|
|
|
974 |
return order_img, pred
|
975 |
|
976 |
def ocr_page(pil_img: Image.Image, langs: List[str]):
|
977 |
langs = list(langs) if langs else ["Persian"]
|
978 |
replace_lang_with_code(langs)
|
|
|
979 |
if det_model and det_processor and rec_model and rec_processor:
|
980 |
img_pred: OCRResult = run_ocr([pil_img], [langs], det_model, det_processor, rec_model, rec_processor)[0]
|
981 |
else:
|
982 |
img_pred: OCRResult = run_ocr([pil_img], [langs], rec_model=rec_model, rec_processor=rec_processor)[0]
|
983 |
-
|
984 |
-
|
985 |
-
return rec_img, img_pred
|
986 |
|
987 |
# ===================== Input Handling =====================
|
988 |
|
@@ -997,8 +1021,7 @@ else:
|
|
997 |
bytes_data = in_file.getvalue()
|
998 |
temp_dir = os.path.join(tempfile.gettempdir(), "trustocr_temp"); os.makedirs(temp_dir, exist_ok=True)
|
999 |
file_path = os.path.join(temp_dir, in_file.name)
|
1000 |
-
with open(file_path, "wb") as f:
|
1001 |
-
f.write(bytes_data)
|
1002 |
out_file = os.path.splitext(file_path)[0] + "-1.JPG"
|
1003 |
try:
|
1004 |
if auto_border:
|
@@ -1041,8 +1064,10 @@ with col1:
|
|
1041 |
try:
|
1042 |
rec_img, ocr_pred = ocr_page(pil_image, languages)
|
1043 |
text_tab, json_tab = st.tabs(["متن صفحه | Page Text", "JSON"])
|
1044 |
-
with text_tab:
|
1045 |
-
|
|
|
|
|
1046 |
except Exception as e:
|
1047 |
st.error(f"خطا در بازشناسی متن (Recognition): {e}")
|
1048 |
|
|
|
710 |
# -*- coding: utf-8 -*-
|
711 |
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
712 |
|
713 |
+
# -*- coding: utf-8 -*-
|
714 |
+
# TRUST OCR DEMO – Streamlit app (Surya OCR + مدل شخصی)
|
715 |
+
|
716 |
import os
|
717 |
import io
|
718 |
import tempfile
|
|
|
721 |
|
722 |
import numpy as np
|
723 |
import cv2
|
724 |
+
from PIL import Image, ImageDraw, ImageFont
|
725 |
import pypdfium2
|
726 |
import pytesseract
|
727 |
|
|
|
745 |
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_home")
|
746 |
# جلوگیری از sdpa backend که با Surya ordering ممکن است ناسازگار باشد
|
747 |
os.environ.setdefault("TRANSFORMERS_ATTENTION_BACKEND", "eager")
|
|
|
748 |
# مسیرهای استاتیک/کش به /tmp برای جلوگیری از Permission denied
|
749 |
os.environ.setdefault("STREAMLIT_STATIC_DIR", "/tmp/streamlit_static")
|
750 |
os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
|
|
|
802 |
from surya.ordering import batch_ordering
|
803 |
|
804 |
from surya.ocr import run_ocr
|
805 |
+
# مهم: دیگر از surya.postprocessing.* استفاده نمیکنیم تا چیزی در site-packages ننویسد
|
806 |
+
# from surya.postprocessing.heatmap import draw_polys_on_image
|
807 |
+
# from surya.postprocessing.text import draw_text_on_image
|
808 |
from surya.languages import CODE_TO_LANGUAGE
|
809 |
from surya.input.langs import replace_lang_with_code
|
810 |
from surya.schema import OCRResult, TextDetectionResult, LayoutResult, OrderResult
|
|
|
858 |
doc = open_pdf(pdf_file)
|
859 |
return len(doc)
|
860 |
|
861 |
+
# ----- رسم سبک خودمان (بدون وابستگی به surya.postprocessing) -----
|
862 |
+
def _norm_poly(polygon) -> list[tuple[int, int]]:
|
863 |
+
arr = np.array(polygon).reshape(-1, 2)
|
864 |
+
return [(int(x), int(y)) for x, y in arr]
|
865 |
+
|
866 |
+
def draw_polys_simple(pil_img: Image.Image, polygons, labels=None) -> Image.Image:
|
867 |
+
"""Draw polygons (and optional labels) using Pillow only. No disk writes."""
|
868 |
+
img = pil_img.copy()
|
869 |
+
draw = ImageDraw.Draw(img)
|
870 |
+
font = ImageFont.load_default()
|
871 |
+
for i, poly in enumerate(polygons):
|
872 |
+
pts = _norm_poly(poly)
|
873 |
+
# خطوط چندضلعی
|
874 |
+
draw.polygon(pts, outline=(0, 255, 0))
|
875 |
+
# برچسب اختیاری
|
876 |
+
if labels is not None and i < len(labels):
|
877 |
+
x, y = pts[0]
|
878 |
+
draw.text((x, max(0, y - 12)), str(labels[i]), fill=(255, 0, 0), font=font)
|
879 |
+
return img
|
880 |
+
|
881 |
# ===================== Streamlit UI =====================
|
882 |
st.set_page_config(page_title="TRUST OCR DEMO", layout="wide")
|
883 |
st.markdown("# TRUST OCR DEMO")
|
|
|
914 |
|
915 |
@st.cache_resource(show_spinner=True)
|
916 |
def load_order_cached():
|
|
|
917 |
return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
|
918 |
|
919 |
# ---------- PERSONAL RECOGNITION ONLY ----------
|
|
|
924 |
def load_rec_personal():
|
925 |
"""
|
926 |
اولویت با مدل شخصی است. اگر تنظیم نبود، به یک مدل عمومی Surya فالبک میشود.
|
927 |
+
اگر فالبک نمیخواهی، بخش آخر را حذف کن و بهجایش RuntimeError بده.
|
928 |
"""
|
929 |
if PERSONAL_MODEL_PATH and os.path.isdir(PERSONAL_MODEL_PATH):
|
930 |
m = load_rec_model(checkpoint=PERSONAL_MODEL_PATH)
|
|
|
975 |
def text_detection(pil_img: Image.Image):
|
976 |
pred: TextDetectionResult = batch_text_detection([pil_img], det_model, det_processor)[0]
|
977 |
polygons = [p.polygon for p in pred.bboxes]
|
978 |
+
det_img = draw_polys_simple(pil_img, polygons) # ← نسخه سبک خودمان
|
979 |
return det_img, pred
|
980 |
|
981 |
def layout_detection(pil_img: Image.Image):
|
982 |
_, det_pred = text_detection(pil_img)
|
983 |
pred: LayoutResult = batch_layout_detection([pil_img], layout_model, layout_processor, [det_pred])[0]
|
984 |
+
polygons = [p.polygon for p in pred.bboxes]
|
985 |
+
labels = [p.label for p in pred.bboxes]
|
986 |
+
layout_img = draw_polys_simple(pil_img, polygons, labels=labels) # ← نسخه سبک خودمان
|
987 |
return layout_img, pred
|
988 |
|
989 |
def order_detection(pil_img: Image.Image):
|
|
|
992 |
_, layout_pred = layout_detection(pil_img)
|
993 |
bboxes = [l.bbox for l in layout_pred.bboxes]
|
994 |
pred: OrderResult = batch_ordering([pil_img], [bboxes], order_model, order_processor)[0]
|
995 |
+
polys = [l.polygon for l in pred.bboxes]
|
996 |
+
positions = [str(l.position) for l in pred.bboxes]
|
997 |
+
order_img = draw_polys_simple(pil_img, polys, labels=positions) # ← نسخه سبک خودمان
|
998 |
return order_img, pred
|
999 |
|
1000 |
def ocr_page(pil_img: Image.Image, langs: List[str]):
|
1001 |
langs = list(langs) if langs else ["Persian"]
|
1002 |
replace_lang_with_code(langs)
|
1003 |
+
# مهم: دیگر draw_text_on_image نمیسازیم تا نیازی به فونت/استاتیک نباشد
|
1004 |
if det_model and det_processor and rec_model and rec_processor:
|
1005 |
img_pred: OCRResult = run_ocr([pil_img], [langs], det_model, det_processor, rec_model, rec_processor)[0]
|
1006 |
else:
|
1007 |
img_pred: OCRResult = run_ocr([pil_img], [langs], rec_model=rec_model, rec_processor=rec_processor)[0]
|
1008 |
+
# برای نمایش، فقط متن را میگذاریم؛ تصویر چسبانده نمیشود تا وابستگی به فونت نباشد
|
1009 |
+
return None, img_pred
|
|
|
1010 |
|
1011 |
# ===================== Input Handling =====================
|
1012 |
|
|
|
1021 |
bytes_data = in_file.getvalue()
|
1022 |
temp_dir = os.path.join(tempfile.gettempdir(), "trustocr_temp"); os.makedirs(temp_dir, exist_ok=True)
|
1023 |
file_path = os.path.join(temp_dir, in_file.name)
|
1024 |
+
with open(file_path, "wb") as f: f.write(bytes_data)
|
|
|
1025 |
out_file = os.path.splitext(file_path)[0] + "-1.JPG"
|
1026 |
try:
|
1027 |
if auto_border:
|
|
|
1064 |
try:
|
1065 |
rec_img, ocr_pred = ocr_page(pil_image, languages)
|
1066 |
text_tab, json_tab = st.tabs(["متن صفحه | Page Text", "JSON"])
|
1067 |
+
with text_tab:
|
1068 |
+
st.text("\n".join([p.text for p in ocr_pred.text_lines]))
|
1069 |
+
with json_tab:
|
1070 |
+
st.json(ocr_pred.model_dump(), expanded=False)
|
1071 |
except Exception as e:
|
1072 |
st.error(f"خطا در بازشناسی متن (Recognition): {e}")
|
1073 |
|