MohammadReza-Halakoo commited on
Commit
22e7155
·
verified ·
1 Parent(s): 8c30eb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -45
app.py CHANGED
@@ -1,12 +1,14 @@
1
-
2
  import os
 
 
 
3
  import argparse
4
  import io
5
  from typing import List
6
 
7
  import pypdfium2
8
  import streamlit as st
9
- from surya.detection import batch_text_detection
10
  from surya.layout import batch_layout_detection
11
  from surya.model.detection.segformer import load_model, load_processor
12
  from surya.model.recognition.model import load_model as load_rec_model
@@ -25,53 +27,18 @@ import pytesseract
25
  import cv2
26
  import numpy as np
27
 
28
- from huggingface_hub import login
29
-
30
- os.environ["STREAMLIT_RUNTIME_DIR"] = "/app/.streamlit"
31
- os.makedirs("/app/.streamlit", exist_ok=True)
32
-
33
-
34
-
35
-
36
- # خواندن توکن از متغیر محیطی
37
-
38
-
39
- if "HUGGINGFACE_HUB_TOKEN" in os.environ:
40
- login(token=os.environ["HUGGINGFACE_HUB_TOKEN"])
41
-
42
-
43
- # -------------------
44
- # Streamlit UI Config
45
- # -------------------
46
- st.set_page_config(layout="wide")
47
- col2, col1 = st.columns([.5, .5])
48
 
49
  # -------------------
50
- # Load Models
51
  # -------------------
52
- @st.cache_resource()
53
- def load_det_cached():
54
- return load_model(checkpoint="vikp/surya_det2"), load_processor(checkpoint="vikp/surya_det2")
55
 
56
- @st.cache_resource()
57
- def load_rec_cached():
58
- # 🔥 اینجا مدل خودت که روی HF آپلود کردی
59
- return load_rec_model(checkpoint="MohammadReza-Halakoo/TrustOCR"), \
60
- load_rec_processor(checkpoint="MohammadReza-Halakoo/TrustOCR")
61
-
62
- @st.cache_resource()
63
- def load_layout_cached():
64
- return load_model(checkpoint="vikp/surya_layout2"), load_processor(checkpoint="vikp/surya_layout2")
65
-
66
- @st.cache_resource()
67
- def load_order_cached():
68
- return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
69
-
70
-
71
- det_model, det_processor = load_det_cached()
72
- rec_model, rec_processor = load_rec_cached()
73
- layout_model, layout_processor = load_layout_cached()
74
- order_model, order_processor = load_order_cached()
75
 
76
 
77
  # -------------------
@@ -169,6 +136,39 @@ def page_count(pdf_file):
169
  return len(doc)
170
 
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  # -------------------
173
  # UI
174
  # -------------------
 
 
1
  import os
2
+ os.environ["STREAMLIT_RUNTIME_DIR"] = "/app/.streamlit"
3
+ os.makedirs("/app/.streamlit", exist_ok=True)
4
+
5
  import argparse
6
  import io
7
  from typing import List
8
 
9
  import pypdfium2
10
  import streamlit as st
11
+ from surya.detection.infer import batch_text_detection # اصلاح مسیر
12
  from surya.layout import batch_layout_detection
13
  from surya.model.detection.segformer import load_model, load_processor
14
  from surya.model.recognition.model import load_model as load_rec_model
 
27
  import cv2
28
  import numpy as np
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # -------------------
32
+ # Args
33
  # -------------------
34
+ parser = argparse.ArgumentParser(description="Run OCR on an image or PDF.")
35
+ parser.add_argument("--math", action="store_true", help="Use math model for detection", default=False)
 
36
 
37
+ try:
38
+ args = parser.parse_args()
39
+ except SystemExit as e:
40
+ print(f"Error parsing arguments: {e}")
41
+ os._exit(e.code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
  # -------------------
 
136
  return len(doc)
137
 
138
 
139
+ # -------------------
140
+ # Streamlit UI Config
141
+ # -------------------
142
+ st.set_page_config(layout="wide")
143
+ col2, col1 = st.columns([.5, .5])
144
+
145
+ # -------------------
146
+ # Load Models
147
+ # -------------------
148
+ @st.cache_resource()
149
+ def load_det_cached():
150
+ return load_model(checkpoint="vikp/surya_det2"), load_processor(checkpoint="vikp/surya_det2")
151
+
152
+ @st.cache_resource()
153
+ def load_rec_cached():
154
+ return load_rec_model(checkpoint="MohammadReza-Halakoo/TrustOCR"), \
155
+ load_rec_processor(checkpoint="MohammadReza-Halakoo/TrustOCR")
156
+
157
+ @st.cache_resource()
158
+ def load_layout_cached():
159
+ return load_model(checkpoint="vikp/surya_layout2"), load_processor(checkpoint="vikp/surya_layout2")
160
+
161
+ @st.cache_resource()
162
+ def load_order_cached():
163
+ return load_order_model(checkpoint="vikp/surya_order"), load_order_processor(checkpoint="vikp/surya_order")
164
+
165
+
166
+ det_model, det_processor = load_det_cached()
167
+ rec_model, rec_processor = load_rec_cached()
168
+ layout_model, layout_processor = load_layout_cached()
169
+ order_model, order_processor = load_order_cached()
170
+
171
+
172
  # -------------------
173
  # UI
174
  # -------------------