Spaces:

nampham1106
/

vifoodvision

Sleeping

App Files Files Community

nampham1106 commited on Dec 14, 2023

Commit

1d31025

1 Parent(s): a49cdc3

deploy huggingface cloud

Browse files

Files changed (7) hide show

app.py +51 -0
examples/breadbread.jpeg +0 -0
examples/bundaumamtombundaumamtom.jpeg +0 -0
examples/pho.jpeg +0 -0
model.py +31 -0
models/pretrained_effnetb3_vietnamese_food.pth +3 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+import os
+import torch
+from pathlib import Path
+from model import create_effnetb3_model
+from timeit import default_timer as timer
+from typing import Tuple, Dict
+class_names = ['Banh beo', 'Banh bot loc', 'Banh can', 'Banh canh', 'Banh chung','Banh cuon', 'Banh duc', 'Banh gio','Banh khot',
+ 'Banh mi','Banh pia', 'Banh tet', 'Banh trang nuong', 'Banh xeo', 'Bun bo Hue', 'Bun dau mam tom','Bun mam', 'Bun rieu', 'Bun thit nuong',
+ 'Ca kho to', 'Canh chua', 'Cao lau', 'Chao long', 'Com tam', 'Goi cuon', 'Hu tieu', 'Mi quang', 'Nem chua', 'Pho', 'Xoi xeo']
+effnetb3, effnetb3_transforms = create_effnetb3_model(num_classes=30)
+effnetb3.load_state_dict(
+    torch.load(
+        f= "./models/pretrained_effnetb3_vietnamese_food.pth",
+        map_location=torch.device("cpu")
+    )
+)
+def predict(img) -> Tuple[Dict, float]:
+  start_time = timer()
+  img = effnetb3_transforms(img).unsqueeze(0)
+  effnetb3.eval()
+  with torch.inference_mode():
+    pred_probs = torch.softmax(effnetb3(img), dim = 1)
+  pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
+  pred_time = round(timer() - start_time, 4)
+  return pred_labels_and_probs, pred_time
+title = "Vietnamese food vision"
+description = "An EfficientNetB3 feature extractor computer vision model"
+example_list = [["examples/" + example] for example in os.listdir("examples")]
+demo = gr.Interface(fn=predict,
+                    inputs=gr.Image(type="pil"),
+                    outputs=[gr.Label(num_top_classes=3, label="Prediction"),
+                             gr.Number(label="Prediction time (s)")],
+                    examples=example_list,
+                    title=title,
+                    description=description)
+demo.launch(share=True)

examples/breadbread.jpeg ADDED Viewed

examples/bundaumamtombundaumamtom.jpeg ADDED Viewed

examples/pho.jpeg ADDED Viewed

model.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import torchvision
+from torch import nn
+from torchvision.models._api import WeightsEnum
+from torch.hub import load_state_dict_from_url
+def get_state_dict(self, *args, **kwargs):
+    kwargs.pop("check_hash")
+    return load_state_dict_from_url(self.url, *args, **kwargs)
+WeightsEnum.get_state_dict = get_state_dict
+def create_effnetb3_model(num_classes:int=30,
+                          seed:int=42):
+  weights = torchvision.models.EfficientNet_B3_Weights.DEFAULT
+  transforms = weights.transforms()
+  model = torchvision.models.efficientnet_b3(weights=weights)
+  for param in model.parameters():
+    param.requires_grad = False
+  torch.manual_seed(seed)
+  model.classifier = nn.Sequential(
+      nn.Dropout(p=0.3, inplace=True),
+      nn.Linear(in_features=1536, out_features=128),
+      nn.ReLU(),
+      nn.Linear(in_features=128,
+                out_features=num_classes),
+  )
+  return model, transforms

models/pretrained_effnetb3_vietnamese_food.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31392fa55dc44551073a938d2941d6baf99ae1ce612168b1e73be4ec84ab61f4
+size 44159481

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==2.1.0+cpu
+torchvision==0.16.0+cpu
+gradio==4.7.1