File size: 3,904 Bytes
fc9d076
 
 
 
 
 
 
 
 
 
 
8818cf9
 
fc9d076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea659e0
fc9d076
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import math
import re
import ast
import gradio as gr
import numpy as np
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from PIL import Image, ImageDraw

img_temp = "tp"
sub_img_temp = "tp1"

def load_model():
    return ocr_predictor(
        det_arch='linknet_resnet18_rotation',
        reco_arch='crnn_vgg16_bn',
        detect_orientation=True,
        assume_straight_pages=False,
        pretrained=True,
        pretrained_backbone=True,
        export_as_straight_boxes=True,
        preserve_aspect_ratio=True,
      )

def convert_coordinates(geometry, page_dim, i, j):
    len_x = page_dim[1]
    len_y = page_dim[0]
    (x_min, y_min) = geometry[0]
    (x_max, y_max) = geometry[1]
    x_min = (math.floor(x_min * len_x)) + i*len_x
    x_max = (math.ceil(x_max * len_x)) + i*len_x
    y_min = (math.floor(y_min * len_y)) + j*len_y
    y_max = (math.ceil(y_max * len_y)) +  j*len_y
    return [x_min, x_max, y_min, y_max]

def get_coordinates(output, x, y):
    page_dim = output['pages'][0]["dimensions"]
    raw_data = []
    for obj1 in output['pages'][0]["blocks"]:
        for obj2 in obj1["lines"]:
            for obj3 in obj2["words"]:
                converted_coordinates = convert_coordinates(obj3["geometry"],page_dim, x, y)
                raw_data.append("{}: {}".format(converted_coordinates,obj3["value"]))
    return raw_data

def get_vals(file_path, wh):
    model = load_model()
    Data, counter = [], 1
    for i in range(wh):      # split_var is fixed
        for j in range(wh):
            path = f"{file_path}/{counter}.jpg"
            temp_doc = DocumentFile.from_images(path)
            output = model(temp_doc).export()
            data = get_coordinates(output, i, j)
            counter += 1
            Data.extend(data)
    return Data

def clean_dir(path):
    files = os.listdir(path=path)
    for i in range(1,len(files)+1):
      os.remove(f"{path}/{i}.jpg")
      
def html_path(img, counter):
    img.save(f"{sub_img_temp}/{counter}.jpg")
    return f"<img src='/file={sub_img_temp}/{counter}.jpg'></img>"

def create_box(l):  # l represents the bounds of box
    return (l[0], l[2], l[1], l[3])

def process(filepath, regex, size=(1656,1170)):
    clean_dir(path=img_temp)
    clean_dir(path=sub_img_temp)
    img = Image.open(filepath)
    (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
    for i in range(0, width, size[0]):
        for j in range(0, height, size[1]):
            counter += 1
            box = (i, j, i+size[0], j+size[1])
            img.crop(box).save(f"{img_temp}/{counter}.jpg")
            parts.append(img.crop(box))
    temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
    if regex == 'Regex-1':
        pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
    else:
        pattern = re.compile(r"\d+")
    
    data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
    counter, idx = 1, []
    for d in data:
        dimensions.append(ast.literal_eval(d.split(':')[0]))
        im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
        values.append(d.split(':')[1])
        counter += 1
    metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
    df =  metadata[metadata['Value'].str.contains(pattern)]  #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.

    return df#.to_markdown()

def main():
    
    demo = gr.Interface(
        fn=process,
        inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
        outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
        title="OCR"
    )
    demo.launch(debug=True, show_error=True)

if __name__=="__main__":
    main()