File size: 1,701 Bytes
93267bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
import os
import torch

from model import create_vit
from timeit import default_timer as timer
from typing import Tuple, Dict

class_names = ["buildings", "forest", "glacier", "mountain", "sea", "street"]

vit_model, vit_transforms = create_vit(num_classes=len(class_names),
                                       seed=42)

vit_model.load_state_dict(
    torch.load(
        f="pretrained_vit_feature_extractor_scene_recognition.pth", 
        map_location=torch.device("cpu")
    )
)

def predict(img):
    start_timer = timer()
    
    img = vit_transforms(img).unsqueeze(0)
    
    vit_model.eval()
    with torch.inference_mode():
        pred_prob = torch.softmax(vit_model(img), dim=1)
        
    pred_labels_and_probs = {class_names[i]: float(pred_prob[0][i]) for i in range(len(class_names))}
    
    pred_time = round(timer() - start_timer, 5)
    
    return pred_labels_and_probs, pred_time

title = "Scene Recognition: Intel Image Classification"
description = "A ViT feature extractor Computer Vision model to classify images of scenes from 1 out of 6 classes."
article = "Access project repository at [GitHub](https://github.com/Ammar2k/intel_image_classification)"

example_list = [["examples/" + example] for example in os.listdir("examples")]

demo = gr.Interface(fn=predict, 
                    inputs=gr.Image(type="pil"),
                    outputs=[gr.Label(num_top_classes=6, label="Predictions"), 
                    gr.Number(label="Prediction time(s)")],
                    examples=example_list,
                    title=title,
                    description=description,
                    article=article
                   )

demo.launch()