Spaces:

vk888
/

SVTR-OCR-App

Sleeping

App Files Files Community

vk commited on Feb 19

Commit

6c55012

1 Parent(s): 9d5ed96

first commit

Browse files

Files changed (9) hide show

.gitignore +3 -0
.idea/svtr-ocr-gradio.iml +8 -0
app.py +21 -0
dict.txt +185 -0
models/ocr_fp16.bin +3 -0
models/ocr_fp16.xml +0 -0
ocr_inference.py +168 -0
requirements.txt +3 -0
utils.py +115 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.ipynb_checkpoints
+ocr-test/
+# *.py

.idea/svtr-ocr-gradio.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from ocr_inference import  OCR
+import gradio as gr
+def get_response(input_img):
+    return ocr.predict([input_img])
+if __name__ == "__main__":
+    ocr=OCR('models/ocr_fp16.xml')
+    iface = gr.Interface(
+        fn=get_response,
+        inputs=gr.Image(type="numpy"),  # Accepts image input
+        outputs=gr.Textbox(),
+        title="SVTR-OCR-App",
+        description="Upload cropped text for accurate Latin OCR"
+    )
+    iface.launch(share=True)

dict.txt ADDED Viewed

	@@ -0,0 +1,185 @@

+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+}
+¡
+£
+§
+ª
+«
+°
+²
+³
+´
+µ
+·
+º
+»
+¿
+À
+Á
+Â
+Ä
+Å
+Ç
+È
+É
+Ê
+Ë
+Ì
+Í
+Î
+Ï
+Ò
+Ó
+Ô
+Õ
+Ö
+Ú
+Ü
+Ý
+ß
+à
+á
+â
+ã
+ä
+å
+æ
+ç
+è
+é
+ê
+ë
+ì
+í
+î
+ï
+ñ
+ò
+ó
+ô
+õ
+ö
+ø
+ù
+ú
+û
+ü
+ý
+ą
+Ć
+ć
+Č
+č
+Đ
+đ
+ę
+ı
+Ł
+ł
+ō
+Ø
+œ
+Š
+š
+Ÿ
+Ž
+ž
+ʒ
+β
+δ
+ε
+з
+Ṡ
+‘
+€
+™

models/ocr_fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ace9c8fd8dba1a0f71a8bef0bd1f2db42af5640728ab6dfcdd4e1e4ae8cbaad
+size 4430022

models/ocr_fp16.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

ocr_inference.py ADDED Viewed

	@@ -0,0 +1,168 @@

+'''
+Copyright 2023 Vignesh(VK)Kotteeswaran <[email protected]>
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+import numpy as np
+from openvino.runtime import Core
+import math
+import cv2
+from utils import CTCLabelDecode
+class OCR():
+    def __init__(self,model_path):
+        ie = Core()
+        print('\n',model_path)
+        model = ie.read_model(model=model_path)
+        self.compiled_model = ie.compile_model(model=model, device_name="CPU")
+        self.input_layer = self.compiled_model.input(0)
+        self.output_layer = self.compiled_model.output(0)
+        self.decoder=CTCLabelDecode('dict.txt',True)
+        self.show_frame=None
+        self.image_shape=None
+        self.dynamic_width=False
+    def img_decode(self,img):
+        img = np.frombuffer(img, dtype='uint8')
+        img=cv2.imdecode(img, 1)
+        #print(img.shape)
+        return img
+    def preprocess_img(self,img):
+        grayscale_image = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+        # Create an empty array of shape (height, width, 3) for the stacked image
+        stacked_image = np.zeros((grayscale_image.shape[0], grayscale_image.shape[1], 3), dtype=np.uint8)
+        # Assign the grayscale image to each channel of the stacked image
+        stacked_image[:, :, 0] = grayscale_image
+        stacked_image[:, :, 1] = grayscale_image
+        stacked_image[:, :, 2] = grayscale_image
+        return self.resize_norm_img(stacked_image)
+    def resize_norm_img(self,img,
+                    padding=True,
+                    interpolation=cv2.INTER_LINEAR):
+        self.image_shape=[3,48,int(img.shape[1]*2)]
+        imgC,imgH,imgW=self.image_shape
+        # todo: change to 0 and modified image shape
+        max_wh_ratio = imgW * 1.0 / imgH
+        h, w = img.shape[0], img.shape[1]
+        ratio = w * 1.0 / h
+        max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio)
+        imgW = int(imgH * max_wh_ratio)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        self.show_frame=resized_image
+        resized_image = resized_image.astype('float32')
+        if self.image_shape[0] == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+    def predict(self,src):
+        imgs=[]
+        show_frames=[]
+        for item in src:
+            if hasattr(item,'shape'):
+                imgs.append(np.expand_dims(self.preprocess_img(item),axis=0))
+            elif isinstance(item,'str'):
+                with open(item, 'rb') as f:
+                    content=f.read()
+                imgs.append(np.expand_dims(self.preprocess_img(self.img_decode(content)),axis=0))
+            else:
+                return "Error: Invalid Input"
+            show_frames.append(self.show_frame)
+        blob=np.concatenate(imgs,axis=0).astype(np.float32)
+        outputs = self.compiled_model([blob])[self.output_layer]
+        texts=[]
+        for output in outputs:
+            output=np.expand_dims(output,axis=0)
+            curr_text=self.decoder(output)[0][0]
+            texts.append(curr_text)
+        return texts[0]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+opencv-python==4.5.3.56
+gradio
+openvino

utils.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import re
+import numpy as np
+class BaseRecLabelDecode(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, character_dict_path=None, use_space_char=False):
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        self.reverse = False
+        self.character_str = []
+        if character_dict_path is None:
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        else:
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str.append(line)
+            if use_space_char:
+                self.character_str.append(" ")
+            dict_character = list(self.character_str)
+            if 'arabic' in character_dict_path:
+                self.reverse = True
+        dict_character = self.add_special_char(dict_character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+    def pred_reverse(self, pred):
+        pred_re = []
+        c_current = ''
+        for c in pred:
+            if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)):
+                if c_current != '':
+                    pred_re.append(c_current)
+                pred_re.append(c)
+                c_current = ''
+            else:
+                c_current += c
+        if c_current != '':
+            pred_re.append(c_current)
+        return ''.join(pred_re[::-1])
+    def add_special_char(self, dict_character):
+        return dict_character
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        ignored_tokens = self.get_ignored_tokens()
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            selection = np.ones(len(text_index[batch_idx]), dtype=bool)
+            if is_remove_duplicate:
+                selection[1:] = text_index[batch_idx][1:] != text_index[
+                                                                 batch_idx][:-1]
+            for ignored_token in ignored_tokens:
+                selection &= text_index[batch_idx] != ignored_token
+            char_list = [
+                self.character[text_id]
+                for text_id in text_index[batch_idx][selection]
+            ]
+            if text_prob is not None:
+                conf_list = text_prob[batch_idx][selection]
+            else:
+                conf_list = [1] * len(selection)
+            if len(conf_list) == 0:
+                conf_list = [0]
+            # print('\n char_list:',char_list)
+            text = ''.join(char_list)
+            if self.reverse:  # for arabic rec
+                text = self.pred_reverse(text)
+            result_list.append((text, np.mean(conf_list).tolist()))
+        return result_list
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+class CTCLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+    def __init__(self, character_dict_path=None, use_space_char=False,
+                 **kwargs):
+        super(CTCLabelDecode, self).__init__(character_dict_path,
+                                             use_space_char)
+        print('\n decoder:', character_dict_path, use_space_char)
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if isinstance(preds, tuple) or isinstance(preds, list):
+            preds = preds[-1]
+        preds_idx = preds.argmax(axis=2)
+        preds_prob = preds.max(axis=2)
+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
+        if label is None:
+            return text
+        label = self.decode(label)
+        return text, label
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character