File size: 6,928 Bytes
bfabfea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import cv2
from ultralytics import YOLO 
import numpy as np
from paddleocr import PaddleOCR


from ObjectDetection.functions import generateMask, cropBlackBackground, enhanceImage
from OCR.rotation_functions import hoffman_transformation, rotate, pytesseractRotate
from NER.ner_inference import inference

import os
from dotenv import load_dotenv
from pathlib import Path
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

path = {
        'SEG_MODEL_PATH': str(os.getenv('SEG_MODEL_PATH')),
        'MAIN_FLOW_GRAY_IMG_DIR_PATH': str(os.getenv('MAIN_FLOW_GRAY_IMG_DIR_PATH')),
        'MAIN_FLOW_INFERENCE_FOLDER': str(os.getenv('MAIN_FLOW_INFERENCE_FOLDER')),
        }
seg_model = YOLO(path['SEG_MODEL_PATH'])

CONF = 0.7




def object_detection(file):
    print("**************************** PERFORMING_OBJECT_DETECTION **************************** ")
    img_file = cv2.imread(file,0)
    img_name = os.path.basename(file)

    Path(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'])).mkdir(parents=True, exist_ok=True)
    cv2.imwrite(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'],img_name),img_file)
    result = seg_model(os.path.join(path['MAIN_FLOW_GRAY_IMG_DIR_PATH'],img_name),conf = CONF,save = True,name = path['MAIN_FLOW_INFERENCE_FOLDER'],exist_ok = True)
    
    return result, img_file
    
    
def crop_image(seg_result, img_file, img_name):
    print("**************************** CROPPING_IMAGE **************************** ")
    for res in seg_result:
        
        croped_img, mask = generateMask(res, img_file)

        if croped_img is not None:
            croped_img = cropBlackBackground(croped_img)

            # save to file
            try: 
                Path('runs').mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'crops_seg')).mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'masks')).mkdir(parents=True, exist_ok=True)
            except OSError as error:  
                print(error)
                pass
                
            cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'masks', img_name), mask)
            cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'crops_seg', img_name), croped_img )
            return croped_img
    return img_file 

def enhance_image(croped_img, img_name):
    print("**************************** ENHANCE_IMAGE **************************** ")
    image = None
    if croped_img is not None:
        image = enhanceImage(croped_img)
        
        if image is not None:
            try: 
                Path('runs').mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
                Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced')).mkdir(parents=True, exist_ok=True)
            except OSError as error:  
                print(error)
                pass
                
        cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'enhanced', img_name), image )
    
    return image

def morphological_transform(image):

    print("**************************** APPLY_MORPHOLOGICAL_TRANSFORM **************************** ")
    processed_img = cv2.resize(image,None,fx=2.7, fy=3)
    kernel = np.ones((2,2),np.uint8)
    processed_img = cv2.dilate(processed_img,kernel)
    sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    processed_img = cv2.filter2D(processed_img, -1, sharpen_kernel)
    
    return processed_img

def hoffman_transform(processed_img, original_img):
    print("**************************** APPLY_HOFFMAN_TRANSFORM **************************** ")
    rotated_image,angle = hoffman_transformation(processed_img, True)
    original_img  = rotate(original_img,angle)
    
    return rotated_image, original_img

def pytesseract_rotate(rotated_image, original_img, img_name):
    print("**************************** APPLY_PYTESSERACT_ROTATION **************************** ")
    rotated_image = pytesseractRotate(rotated_image,original_img,1)

    if rotated_image is not None:
        try: 
            Path('runs').mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image')).mkdir(parents=True, exist_ok=True)
        except OSError as error:  
            print(error)
            pass
        
        cv2.imwrite(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', img_name), rotated_image)
        
    return img_name

def ocr(img_name):
    print("**************************** APPLY_OCR **************************** ")
    ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
    result = ocr.ocr(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'rotated_image', img_name), cls=True)
    
    ocr_output_paddle = []
    if result is not None:
        try:
            for i in result:
                ocr_output_paddle.append(" ".join([line[1][0] for line in i]))
        except:
            pass
        try: 
            Path('runs').mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment')).mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'])).mkdir(parents=True, exist_ok=True)
            Path(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data')).mkdir(parents=True, exist_ok=True)
        except OSError as error:  
            print(error)

    file_name = img_name.split('.')[0] +'.txt'
    with open(os.path.join('runs', 'segment', path['MAIN_FLOW_INFERENCE_FOLDER'], 'ocr_label_data',file_name) ,"w+") as f:
        f.write("\n".join(ocr_output_paddle))
    
    return file_name
def ner(file_name):
    print("**************************** APPLY_NER **************************** ")
    # print(file_name)
    ocr_file = os.path.join('runs', 'segment',path['MAIN_FLOW_INFERENCE_FOLDER'],'ocr_label_data',file_name)
    with open(ocr_file,'r+') as f:
        sent = f.read()
    print(sent)
    output_dict = inference(sent)
    
    return output_dict