Spaces:
Build error
Build error
File size: 6,002 Bytes
006413e e4e17fb f77f2ec 006413e a96ff13 006413e e4e17fb 006413e 98952d2 006413e aa401bb 006413e bef6714 31f55ae 006413e c5d2e12 006413e 21aed0f e4e17fb 8072192 7fb5001 f77f2ec e4e17fb 21aed0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
import re
import os
from pdf2image import convert_from_path
from transformers import DonutProcessor, VisionEncoderDecoderModel
import torch
from PIL import Image
from pathlib import Path
import multiprocessing
import gradio_theme
from models.experimental import attempt_load
from utils.datasets import LoadImage
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
from utils.torch_utils import select_device
import cv2
#sudo apt-get install poppler-utils Necesario
key = str(os.environ.get('key'))
desired_num_threads = multiprocessing.cpu_count()
torch.set_num_threads(desired_num_threads)
def check_image(image):
try:
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
return images
except:
return [Image.open(image)]
def crop(files = '', #files
weights = 'yolov7.pt', #model.pt path(s)
classes = None, #filter by class: --class 0, or --class 0 2 3
imgsz = 640, #inference size (pixels)
device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
conf_thres = 0.25, #object confidence threshold
iou_thres = 0.45, #IOU threshold for NMS
augment = False, #augmented inference
agnostic_nms = False): #class-agnostic NMS
# Initialize
set_logging()
device = select_device(device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if half:
model.half() # to FP16
# Set Dataloader
dataset = LoadImage(files = files, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
list_cropobj = []
for img, img0s in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
pred = model(img, augment=augment)[0]
# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
# Process detections
for i, det in enumerate(pred): # detections per image
if len(det):
# Rescale boxes from img_size to img0s size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()
# Write results
for *xyxy, conf, cls in reversed(det):
#crop an image based on coordinates
object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
clase = names[int(cls)]
list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])
return list_cropobj
def get_attributes(input_img):
#access_token = str(os.environ.get('key'))
access_token = key
processor = DonutProcessor.from_pretrained("ClipAI/donut-finetuned-crop-licencia", use_auth_token=access_token)
model = VisionEncoderDecoderModel.from_pretrained("ClipAI/donut-finetuned-crop-licencia", use_auth_token=access_token)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.eval()
model.to(device)
print(device)
images = [Image.fromarray(input_img)]
images = crop(weights="best.pt", files= images, device = device)
image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
#entrega licencias
pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
print(pixel_values.size())
# prepare decoder inputs
task_prompt = "<s_cord-v2>"
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
decoder_input_ids = decoder_input_ids.to(device)
# autoregressively generate sequence
outputs = model.generate(
pixel_values,
decoder_input_ids=decoder_input_ids,
max_length=model.decoder.config.max_position_embeddings,
early_stopping=True,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
num_beams=1,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
)
# turn into JSON
seq = processor.batch_decode(outputs.sequences)[0]
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token
seq = processor.token2json(seq)
return seq
#demo = gr.Interface(get_attributes, "file", "label")
#demo.launch()
def create_model():
demo = gr.Interface(get_attributes,
"image",
"json",
examples=[["examples/license1.jpg"], ["examples/licencia2.jpg"]],
enable_queue=False,
theme=gradio_theme.theme
)
return demo
if __name__ == '__main__':
demo = create_model()
demo.launch()
|