Update app_pages/ocr_comparator.py
Browse files- app_pages/ocr_comparator.py +25 -23
app_pages/ocr_comparator.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
"""This Streamlit app allows you to compare, from a given image, the results of different solutions:
|
2 |
EasyOcr, PaddleOCR, MMOCR, Tesseract
|
3 |
"""
|
4 |
-
|
5 |
import mim
|
6 |
|
7 |
mim.install(['mmengine>=0.7.1,<1.1.0'])
|
8 |
mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
|
9 |
mim.install(['mmdet>=3.0.rc5,<3.2.0'])
|
10 |
mim.install(['mmocr'])
|
|
|
11 |
|
12 |
import streamlit as st
|
13 |
import plotly.express as px
|
@@ -21,7 +22,7 @@ from PIL import Image, ImageColor
|
|
21 |
import PIL
|
22 |
import easyocr
|
23 |
from paddleocr import PaddleOCR
|
24 |
-
from mmocr.utils.ocr import MMOCR
|
25 |
import pytesseract
|
26 |
from pytesseract import Output
|
27 |
import os
|
@@ -80,9 +81,10 @@ def app():
|
|
80 |
plotly figure : confidence color scale figure
|
81 |
"""
|
82 |
# the readers considered
|
83 |
-
out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
|
84 |
-
out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
|
85 |
-
|
|
|
86 |
# Columns for recognition details results
|
87 |
out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
|
88 |
|
@@ -123,7 +125,7 @@ def app():
|
|
123 |
'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
|
124 |
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
|
125 |
|
126 |
-
out_dict_lang_mmocr = {'English & Chinese': 'en'}
|
127 |
|
128 |
out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
|
129 |
'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
|
@@ -156,7 +158,8 @@ def app():
|
|
156 |
'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
|
157 |
'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
|
158 |
|
159 |
-
out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr,
|
|
|
160 |
out_dict_lang_tesseract]
|
161 |
|
162 |
# Initialization of detection form
|
@@ -221,19 +224,18 @@ def app():
|
|
221 |
return out_ocr
|
222 |
|
223 |
###
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
###
|
238 |
def init_readers(in_list_params):
|
239 |
"""Initialization of the readers, and return them as list
|
@@ -255,10 +257,10 @@ def app():
|
|
255 |
reader_ppocr = init_ppocr(in_list_params[1])
|
256 |
|
257 |
# - MMOCR
|
258 |
-
with st.spinner("MMOCR reader initialization in progress ..."):
|
259 |
-
|
260 |
|
261 |
-
out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
|
262 |
|
263 |
return out_list_readers
|
264 |
|
|
|
1 |
"""This Streamlit app allows you to compare, from a given image, the results of different solutions:
|
2 |
EasyOcr, PaddleOCR, MMOCR, Tesseract
|
3 |
"""
|
4 |
+
"""
|
5 |
import mim
|
6 |
|
7 |
mim.install(['mmengine>=0.7.1,<1.1.0'])
|
8 |
mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
|
9 |
mim.install(['mmdet>=3.0.rc5,<3.2.0'])
|
10 |
mim.install(['mmocr'])
|
11 |
+
"""
|
12 |
|
13 |
import streamlit as st
|
14 |
import plotly.express as px
|
|
|
22 |
import PIL
|
23 |
import easyocr
|
24 |
from paddleocr import PaddleOCR
|
25 |
+
#from mmocr.utils.ocr import MMOCR
|
26 |
import pytesseract
|
27 |
from pytesseract import Output
|
28 |
import os
|
|
|
81 |
plotly figure : confidence color scale figure
|
82 |
"""
|
83 |
# the readers considered
|
84 |
+
#out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
|
85 |
+
#out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
|
86 |
+
out_reader_type_list = ['EasyOCR', 'PPOCR', 'Tesseract']
|
87 |
+
out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'Tesseract': 2}
|
88 |
# Columns for recognition details results
|
89 |
out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
|
90 |
|
|
|
125 |
'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
|
126 |
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
|
127 |
|
128 |
+
#out_dict_lang_mmocr = {'English & Chinese': 'en'}
|
129 |
|
130 |
out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
|
131 |
'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
|
|
|
158 |
'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
|
159 |
'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
|
160 |
|
161 |
+
out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr, \
|
162 |
+
#out_dict_lang_mmocr, \
|
163 |
out_dict_lang_tesseract]
|
164 |
|
165 |
# Initialization of detection form
|
|
|
224 |
return out_ocr
|
225 |
|
226 |
###
|
227 |
+
#@st.experimental_memo(show_spinner=False)
|
228 |
+
#def init_mmocr(in_params):
|
229 |
+
# """Initialization of MMOCR reader
|
230 |
+
#
|
231 |
+
# Args:
|
232 |
+
# in_params (dict): dict with parameters
|
233 |
+
#
|
234 |
+
# Returns:
|
235 |
+
# mmocr reader: the ppocr reader instance
|
236 |
+
# """
|
237 |
+
# out_ocr = MMOCR(recog=None, **in_params[1])
|
238 |
+
# return out_ocr
|
|
|
239 |
###
|
240 |
def init_readers(in_list_params):
|
241 |
"""Initialization of the readers, and return them as list
|
|
|
257 |
reader_ppocr = init_ppocr(in_list_params[1])
|
258 |
|
259 |
# - MMOCR
|
260 |
+
#with st.spinner("MMOCR reader initialization in progress ..."):
|
261 |
+
# reader_mmocr = init_mmocr(in_list_params[2])
|
262 |
|
263 |
+
#out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
|
264 |
|
265 |
return out_list_readers
|
266 |
|