Spaces:
Build error
Build error
import gradio as gr | |
import re | |
import os | |
from pdf2image import convert_from_path | |
from transformers import DonutProcessor, VisionEncoderDecoderModel | |
import torch | |
from PIL import Image | |
from pathlib import Path | |
import multiprocessing | |
import gradio_theme | |
from models.experimental import attempt_load | |
from utils.datasets import LoadImage | |
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging | |
from utils.torch_utils import select_device | |
import cv2 | |
#sudo apt-get install poppler-utils Necesario | |
key = str(os.environ.get('key')) | |
desired_num_threads = multiprocessing.cpu_count() | |
torch.set_num_threads(desired_num_threads) | |
def check_image(image): | |
try: | |
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280)) | |
return images | |
except: | |
return [Image.open(image)] | |
def crop(files = '', #files | |
weights = 'yolov7.pt', #model.pt path(s) | |
classes = None, #filter by class: --class 0, or --class 0 2 3 | |
imgsz = 640, #inference size (pixels) | |
device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu | |
conf_thres = 0.25, #object confidence threshold | |
iou_thres = 0.45, #IOU threshold for NMS | |
augment = False, #augmented inference | |
agnostic_nms = False): #class-agnostic NMS | |
# Initialize | |
set_logging() | |
device = select_device(device) | |
half = device.type != 'cpu' # half precision only supported on CUDA | |
# Load model | |
model = attempt_load(weights, map_location=device) # load FP32 model | |
stride = int(model.stride.max()) # model stride | |
imgsz = check_img_size(imgsz, s=stride) # check img_size | |
if half: | |
model.half() # to FP16 | |
# Set Dataloader | |
dataset = LoadImage(files = files, img_size=imgsz, stride=stride) | |
# Get names and colors | |
names = model.module.names if hasattr(model, 'module') else model.names | |
# Run inference | |
if device.type != 'cpu': | |
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once | |
old_img_w = old_img_h = imgsz | |
old_img_b = 1 | |
list_cropobj = [] | |
for img, img0s in dataset: | |
img = torch.from_numpy(img).to(device) | |
img = img.half() if half else img.float() # uint8 to fp16/32 | |
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | |
if img.ndimension() == 3: | |
img = img.unsqueeze(0) | |
# Inference | |
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak | |
pred = model(img, augment=augment)[0] | |
# Apply NMS | |
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) | |
# Process detections | |
for i, det in enumerate(pred): # detections per image | |
if len(det): | |
# Rescale boxes from img_size to img0s size | |
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round() | |
# Write results | |
for *xyxy, conf, cls in reversed(det): | |
#crop an image based on coordinates | |
object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])] | |
cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])] | |
cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB) | |
clase = names[int(cls)] | |
list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)]) | |
return list_cropobj | |
def get_attributes(input_img): | |
#access_token = str(os.environ.get('key')) | |
access_token = key | |
processor = DonutProcessor.from_pretrained("ClipAI/donut-finetuned-crop-licencia", use_auth_token=access_token) | |
model = VisionEncoderDecoderModel.from_pretrained("ClipAI/donut-finetuned-crop-licencia", use_auth_token=access_token) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.eval() | |
model.to(device) | |
print(device) | |
images = [Image.fromarray(input_img)] | |
images = crop(weights="best.pt", files= images, device = device) | |
image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1 | |
#entrega licencias | |
pixel_values = processor(image_cedula, return_tensors="pt").pixel_values | |
pixel_values = pixel_values.to(device) | |
print(pixel_values.size()) | |
# prepare decoder inputs | |
task_prompt = "<s_cord-v2>" | |
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids | |
decoder_input_ids = decoder_input_ids.to(device) | |
# autoregressively generate sequence | |
outputs = model.generate( | |
pixel_values, | |
decoder_input_ids=decoder_input_ids, | |
max_length=model.decoder.config.max_position_embeddings, | |
early_stopping=True, | |
pad_token_id=processor.tokenizer.pad_token_id, | |
eos_token_id=processor.tokenizer.eos_token_id, | |
use_cache=True, | |
num_beams=1, | |
bad_words_ids=[[processor.tokenizer.unk_token_id]], | |
return_dict_in_generate=True, | |
) | |
# turn into JSON | |
seq = processor.batch_decode(outputs.sequences)[0] | |
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") | |
seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token | |
seq = processor.token2json(seq) | |
return seq | |
#demo = gr.Interface(get_attributes, "file", "label") | |
#demo.launch() | |
def create_model(): | |
demo = gr.Interface(get_attributes, | |
"image", | |
"json", | |
examples=[["examples/license1.jpg"], ["examples/licencia2.jpg"]], | |
enable_queue=False, | |
theme=gradio_theme.theme | |
) | |
return demo | |
if __name__ == '__main__': | |
demo = create_model() | |
demo.launch() | |