File size: 6,623 Bytes
006413e
 
 
 
 
 
 
 
 
e4e17fb
f77f2ec
006413e
a96ff13
 
 
 
006413e
 
 
 
 
e4e17fb
 
 
006413e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98952d2
 
006413e
 
 
 
 
aa401bb
006413e
92eaba8
9d74a95
31f55ae
bef6714
32d2c97
006413e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5d2e12
006413e
 
 
869e95f
21aed0f
e4e17fb
 
 
0152465
3d9e9d3
0152465
7fb5001
f77f2ec
e4e17fb
 
 
 
 
 
23662b6
 
 
 
 
 
bef6714
23662b6
 
 
 
 
21aed0f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

import gradio as gr
import re
import os
from pdf2image import convert_from_path
from transformers import DonutProcessor, VisionEncoderDecoderModel
import torch
from PIL import Image
from pathlib import Path
import multiprocessing
import gradio_theme

from models.experimental import attempt_load
from utils.datasets import LoadImage
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
from utils.torch_utils import select_device
import cv2
#sudo apt-get install poppler-utils Necesario

key = str(os.environ.get('key'))

desired_num_threads = multiprocessing.cpu_count()
torch.set_num_threads(desired_num_threads)

def check_image(image):
    try:
        images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
        return images
    except:
        return [Image.open(image)]

def crop(files = '', #files
           weights = 'yolov7.pt', #model.pt path(s)
           classes = None, #filter by class: --class 0, or --class 0 2 3
           imgsz = 640, #inference size (pixels)
           device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
           conf_thres = 0.25, #object confidence threshold
           iou_thres = 0.45, #IOU threshold for NMS
           augment = False, #augmented inference
           agnostic_nms = False): #class-agnostic NMS 

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size

    if half:
        model.half()  # to FP16

    # Set Dataloader
    dataset = LoadImage(files = files, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    
    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    old_img_w = old_img_h = imgsz
    old_img_b = 1
    list_cropobj = []
    for img, img0s in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        with torch.no_grad():   # Calculating gradients would cause a GPU memory leak
            pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if len(det):
                # Rescale boxes from img_size to img0s size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    #crop an image based on coordinates
                    object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
                    cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                    cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
                    clase = names[int(cls)]
                    list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])
                    
    return list_cropobj


def get_attributes(input_img):
    #access_token = str(os.environ.get('key'))
    access_token = key
    processor = DonutProcessor.from_pretrained("ClipAI/donut-finetuned-crop-cedula", use_auth_token=access_token)
    model = VisionEncoderDecoderModel.from_pretrained("ClipAI/donut-finetuned-crop-cedula", use_auth_token=access_token)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    model.eval()
    model.to(device)
    print(device)
    
    #images = check_image(input_img)
    images = [Image.fromarray(input_img)]
    images = crop(weights="best.pt", files= images, device=device)
    #print(images)
    image_cedula = [img[0] for img in images if img[1]==0][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
                                                              #entrega licencias
    
    pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)
    print(pixel_values.size())
    # prepare decoder inputs
    task_prompt = "<s_cord-v2>"
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
    decoder_input_ids = decoder_input_ids.to(device)

    # autoregressively generate sequence
    outputs = model.generate(
        pixel_values,
        decoder_input_ids=decoder_input_ids,
        max_length=model.decoder.config.max_position_embeddings,
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )

    # turn into JSON
    seq = processor.batch_decode(outputs.sequences)[0]
    seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    seq = re.sub(r"<.*?>", "", seq, count=1).strip()  # remove first task start token
    seq = processor.token2json(seq)

    return seq

#demo = gr.Interface(get_attributes, "file", "label")
#demo.launch()

def create_model():
    demo = gr.Interface(get_attributes,
                        "image",
                        "json",
                        examples=[["examples/cedula1.jpg"],
                                   ["examples/cedula4.jpg"], ["examples/cedula6.jpeg"],
                                   ],
                        enable_queue=False,
                        theme=gradio_theme.theme
                        )
    return demo



def create_model2():
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(source='upload', label='Input Image')
                submit = gr.Button("Submit")
            with gr.Column():
                output_text = gr.JSON(label='Result')

        submit.click(fn=get_attributes,
                         inputs=input_image,
                         outputs=output_text)
            
    return demo


if __name__ == '__main__':
    demo = create_model()
    demo.launch()