import io | |
import requests | |
from PIL import Image | |
import torch | |
import numpy | |
import gradio as gr | |
from transformers import DetrImageProcessor, DetrForSegmentation, AutoImageProcessor, AutoModelForImageClassification | |
from transformers.models.detr.feature_extraction_detr import rgb_to_id | |
url = "http://images.cocodataset.org/val2017/000000039769.jpg" | |
image = Image.open(requests.get(url, stream=True).raw) | |
# feature_extractor = DetrImageProcessor.from_pretrained("facebook/post_process_panoptic_segmentation") | |
# model = DetrForSegmentation.from_pretrained("facebook/post_process_panoptic_segmentation") | |
# | |
# # prepare image for the model | |
# inputs = feature_extractor(images=image, return_tensors="pt") | |
# | |
# # forward pass | |
# outputs = model(**inputs) | |
# | |
# # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format | |
# processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0) | |
# result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0] | |
# | |
# # the segmentation is stored in a special-format png | |
# panoptic_seg = Image.open(io.BytesIO(result["png_string"])) | |
# panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8) | |
# # retrieve the ids corresponding to each mask | |
# panoptic_seg_id = rgb_to_id(panoptic_seg) | |
preprocessor = AutoImageProcessor.from_pretrained("google/mobilenet_v2_1.0_224") | |
model = AutoModelForImageClassification.from_pretrained("google/mobilenet_v2_1.0_224") | |
inputs = preprocessor(images=image, return_tensors="pt") | |
outputs = model(**inputs) | |
logits = outputs.logits | |
# model predicts one of the 1000 ImageNet classes | |
predicted_class_idx = logits.argmax(-1).item() | |
print("Predicted class:", model.config.id2label[predicted_class_idx]) | |
# gr.Image(image).launch() | |