Spaces:

nampham1106
/

vifoodvision

Sleeping

App Files Files Community

vifoodvision / app.py

nampham1106

deploy huggingface cloud

1d31025 over 1 year ago

raw

history blame contribute delete

1.78 kB

	import gradio as gr
	import os
	import torch

	from pathlib import Path

	from model import create_effnetb3_model
	from timeit import default_timer as timer
	from typing import Tuple, Dict

	class_names = ['Banh beo', 'Banh bot loc', 'Banh can', 'Banh canh', 'Banh chung','Banh cuon', 'Banh duc', 'Banh gio','Banh khot',
	'Banh mi','Banh pia', 'Banh tet', 'Banh trang nuong', 'Banh xeo', 'Bun bo Hue', 'Bun dau mam tom','Bun mam', 'Bun rieu', 'Bun thit nuong',
	'Ca kho to', 'Canh chua', 'Cao lau', 'Chao long', 'Com tam', 'Goi cuon', 'Hu tieu', 'Mi quang', 'Nem chua', 'Pho', 'Xoi xeo']

	effnetb3, effnetb3_transforms = create_effnetb3_model(num_classes=30)

	effnetb3.load_state_dict(
	torch.load(
	f= "./models/pretrained_effnetb3_vietnamese_food.pth",
	map_location=torch.device("cpu")
	)
	)

	def predict(img) -> Tuple[Dict, float]:
	start_time = timer()
	img = effnetb3_transforms(img).unsqueeze(0)

	effnetb3.eval()
	with torch.inference_mode():
	pred_probs = torch.softmax(effnetb3(img), dim = 1)

	pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}

	pred_time = round(timer() - start_time, 4)

	return pred_labels_and_probs, pred_time

	title = "Vietnamese food vision"
	description = "An EfficientNetB3 feature extractor computer vision model"

	example_list = [["examples/" + example] for example in os.listdir("examples")]

	demo = gr.Interface(fn=predict,
	inputs=gr.Image(type="pil"),
	outputs=[gr.Label(num_top_classes=3, label="Prediction"),
	gr.Number(label="Prediction time (s)")],
	examples=example_list,
	title=title,
	description=description)

	demo.launch(share=True)