Loren commited on
Commit
54f7d07
·
1 Parent(s): 17d195b

Update app_pages/ocr_comparator.py

Browse files
Files changed (1) hide show
  1. app_pages/ocr_comparator.py +25 -23
app_pages/ocr_comparator.py CHANGED
@@ -1,13 +1,14 @@
1
  """This Streamlit app allows you to compare, from a given image, the results of different solutions:
2
  EasyOcr, PaddleOCR, MMOCR, Tesseract
3
  """
4
-
5
  import mim
6
 
7
  mim.install(['mmengine>=0.7.1,<1.1.0'])
8
  mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
9
  mim.install(['mmdet>=3.0.rc5,<3.2.0'])
10
  mim.install(['mmocr'])
 
11
 
12
  import streamlit as st
13
  import plotly.express as px
@@ -21,7 +22,7 @@ from PIL import Image, ImageColor
21
  import PIL
22
  import easyocr
23
  from paddleocr import PaddleOCR
24
- from mmocr.utils.ocr import MMOCR
25
  import pytesseract
26
  from pytesseract import Output
27
  import os
@@ -80,9 +81,10 @@ def app():
80
  plotly figure : confidence color scale figure
81
  """
82
  # the readers considered
83
- out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
84
- out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
85
-
 
86
  # Columns for recognition details results
87
  out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
88
 
@@ -123,7 +125,7 @@ def app():
123
  'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
124
  'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
125
 
126
- out_dict_lang_mmocr = {'English & Chinese': 'en'}
127
 
128
  out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
129
  'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
@@ -156,7 +158,8 @@ def app():
156
  'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
157
  'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
158
 
159
- out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr, out_dict_lang_mmocr, \
 
160
  out_dict_lang_tesseract]
161
 
162
  # Initialization of detection form
@@ -221,19 +224,18 @@ def app():
221
  return out_ocr
222
 
223
  ###
224
- @st.experimental_memo(show_spinner=False)
225
- def init_mmocr(in_params):
226
- """Initialization of MMOCR reader
227
-
228
- Args:
229
- in_params (dict): dict with parameters
230
-
231
- Returns:
232
- mmocr reader: the ppocr reader instance
233
- """
234
- out_ocr = MMOCR(recog=None, **in_params[1])
235
- return out_ocr
236
-
237
  ###
238
  def init_readers(in_list_params):
239
  """Initialization of the readers, and return them as list
@@ -255,10 +257,10 @@ def app():
255
  reader_ppocr = init_ppocr(in_list_params[1])
256
 
257
  # - MMOCR
258
- with st.spinner("MMOCR reader initialization in progress ..."):
259
- reader_mmocr = init_mmocr(in_list_params[2])
260
 
261
- out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
262
 
263
  return out_list_readers
264
 
 
1
  """This Streamlit app allows you to compare, from a given image, the results of different solutions:
2
  EasyOcr, PaddleOCR, MMOCR, Tesseract
3
  """
4
+ """
5
  import mim
6
 
7
  mim.install(['mmengine>=0.7.1,<1.1.0'])
8
  mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
9
  mim.install(['mmdet>=3.0.rc5,<3.2.0'])
10
  mim.install(['mmocr'])
11
+ """
12
 
13
  import streamlit as st
14
  import plotly.express as px
 
22
  import PIL
23
  import easyocr
24
  from paddleocr import PaddleOCR
25
+ #from mmocr.utils.ocr import MMOCR
26
  import pytesseract
27
  from pytesseract import Output
28
  import os
 
81
  plotly figure : confidence color scale figure
82
  """
83
  # the readers considered
84
+ #out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
85
+ #out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
86
+ out_reader_type_list = ['EasyOCR', 'PPOCR', 'Tesseract']
87
+ out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'Tesseract': 2}
88
  # Columns for recognition details results
89
  out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
90
 
 
125
  'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
126
  'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
127
 
128
+ #out_dict_lang_mmocr = {'English & Chinese': 'en'}
129
 
130
  out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
131
  'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
 
158
  'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
159
  'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
160
 
161
+ out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr, \
162
+ #out_dict_lang_mmocr, \
163
  out_dict_lang_tesseract]
164
 
165
  # Initialization of detection form
 
224
  return out_ocr
225
 
226
  ###
227
+ #@st.experimental_memo(show_spinner=False)
228
+ #def init_mmocr(in_params):
229
+ # """Initialization of MMOCR reader
230
+ #
231
+ # Args:
232
+ # in_params (dict): dict with parameters
233
+ #
234
+ # Returns:
235
+ # mmocr reader: the ppocr reader instance
236
+ # """
237
+ # out_ocr = MMOCR(recog=None, **in_params[1])
238
+ # return out_ocr
 
239
  ###
240
  def init_readers(in_list_params):
241
  """Initialization of the readers, and return them as list
 
257
  reader_ppocr = init_ppocr(in_list_params[1])
258
 
259
  # - MMOCR
260
+ #with st.spinner("MMOCR reader initialization in progress ..."):
261
+ # reader_mmocr = init_mmocr(in_list_params[2])
262
 
263
+ #out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
264
 
265
  return out_list_readers
266