Videoclassifier-ZEROSHOT

Runtime error

App Files Files Community

Shad0ws

fcakyon commited on Dec 22, 2022

Commit

15343a3

0 Parent(s):

Duplicate from fcakyon/zero-shot-video-classification

Browse files

Co-authored-by: Fatih <[email protected]>

Files changed (5) hide show

.gitattributes +31 -0
README.md +12 -0
app.py +168 -0
requirements.txt +6 -0
utils.py +51 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,31 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Zero Shot Video Classification
+emoji: 🔥
+colorFrom: blue
+colorTo: pink
+sdk: gradio
+sdk_version: 3.12.0
+app_file: app.py
+pinned: true
+license: apache-2.0
+duplicated_from: fcakyon/zero-shot-video-classification
+---

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import torch
+import gradio as gr
+from transformers import AutoProcessor, AutoModel
+from utils import (
+    convert_frames_to_gif,
+    download_youtube_video,
+    get_num_total_frames,
+    sample_frames_from_video_file,
+)
+FRAME_SAMPLING_RATE = 4
+DEFAULT_MODEL = "microsoft/xclip-base-patch16-zero-shot"
+VALID_ZEROSHOT_VIDEOCLASSIFICATION_MODELS = [
+    "microsoft/xclip-base-patch32",
+    "microsoft/xclip-base-patch16-zero-shot",
+    "microsoft/xclip-base-patch16-kinetics-600",
+    "microsoft/xclip-large-patch14ft/xclip-base-patch32-16-frames",
+    "microsoft/xclip-large-patch14",
+    "microsoft/xclip-base-patch16-hmdb-4-shot",
+    "microsoft/xclip-base-patch16-16-frames",
+    "microsoft/xclip-base-patch16-hmdb-2-shot",
+    "microsoft/xclip-base-patch16-ucf-2-shot",
+    "microsoft/xclip-base-patch16-ucf-8-shot",
+    "microsoft/xclip-base-patch16",
+    "microsoft/xclip-base-patch16-hmdb-8-shot",
+    "microsoft/xclip-base-patch16-hmdb-16-shot",
+    "microsoft/xclip-base-patch16-ucf-16-shot",
+]
+processor = AutoProcessor.from_pretrained(DEFAULT_MODEL)
+model = AutoModel.from_pretrained(DEFAULT_MODEL)
+examples = [
+    [
+        "https://www.youtu.be/l1dBM8ZECao",
+        "sleeping dog,cat fight club,birds of prey",
+    ],
+    [
+        "https://youtu.be/VMj-3S1tku0",
+        "programming course,eating spaghetti,playing football",
+    ],
+    [
+        "https://youtu.be/BRw7rvLdGzU",
+        "game of thrones,the lord of the rings,vikings",
+    ],
+]
+def select_model(model_name):
+    global processor, model
+    processor = AutoProcessor.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(model_name)
+def predict(youtube_url_or_file_path, labels_text):
+    if youtube_url_or_file_path.startswith("http"):
+        video_path = download_youtube_video(youtube_url_or_file_path)
+    else:
+        video_path = youtube_url_or_file_path
+    # rearrange sampling rate based on video length and model input length
+    num_total_frames = get_num_total_frames(video_path)
+    num_model_input_frames = model.config.vision_config.num_frames
+    if num_total_frames < FRAME_SAMPLING_RATE * num_model_input_frames:
+        frame_sampling_rate = num_total_frames // num_model_input_frames
+    else:
+        frame_sampling_rate = FRAME_SAMPLING_RATE
+    labels = labels_text.split(",")
+    frames = sample_frames_from_video_file(
+        video_path, num_model_input_frames, frame_sampling_rate
+    )
+    gif_path = convert_frames_to_gif(frames, save_path="video.gif")
+    inputs = processor(
+        text=labels, videos=list(frames), return_tensors="pt", padding=True
+    )
+    # forward pass
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probs = outputs.logits_per_video[0].softmax(dim=-1).cpu().numpy()
+    label_to_prob = {}
+    for ind, label in enumerate(labels):
+        label_to_prob[label] = float(probs[ind])
+    return label_to_prob, gif_path
+app = gr.Blocks()
+with app:
+    gr.Markdown(
+        "# **<p align='center'>Zero-shot Video Classification with 🤗 Transformers</p>**"
+    )
+    gr.Markdown(
+        """
+        <p style='text-align: center'>
+        Follow me for more!
+        <br> <a href='https://twitter.com/fcakyon' target='_blank'>twitter</a> | <a href='https://github.com/fcakyon' target='_blank'>github</a> | <a href='https://www.linkedin.com/in/fcakyon/' target='_blank'>linkedin</a> | <a href='https://fcakyon.medium.com/' target='_blank'>medium</a>
+        </p>
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            model_names_dropdown = gr.Dropdown(
+                choices=VALID_ZEROSHOT_VIDEOCLASSIFICATION_MODELS,
+                label="Model:",
+                show_label=True,
+                value=DEFAULT_MODEL,
+            )
+            model_names_dropdown.change(fn=select_model, inputs=model_names_dropdown)
+            with gr.Tab(label="Youtube URL"):
+                gr.Markdown(
+                    "### **Provide a Youtube video URL and a list of labels separated by commas**"
+                )
+                youtube_url = gr.Textbox(label="Youtube URL:", show_label=True)
+                youtube_url_labels_text = gr.Textbox(
+                    label="Labels Text:", show_label=True
+                )
+                youtube_url_predict_btn = gr.Button(value="Predict")
+            with gr.Tab(label="Local File"):
+                gr.Markdown(
+                    "### **Upload a video file and provide a list of labels separated by commas**"
+                )
+                video_file = gr.Video(label="Video File:", show_label=True)
+                local_video_labels_text = gr.Textbox(
+                    label="Labels Text:", show_label=True
+                )
+                local_video_predict_btn = gr.Button(value="Predict")
+        with gr.Column():
+            video_gif = gr.Image(
+                label="Input Clip",
+                show_label=True,
+            )
+        with gr.Column():
+            predictions = gr.Label(label="Predictions:", show_label=True)
+    gr.Markdown("**Examples:**")
+    gr.Examples(
+        examples,
+        [youtube_url, youtube_url_labels_text],
+        [predictions, video_gif],
+        fn=predict,
+        cache_examples=True,
+    )
+    youtube_url_predict_btn.click(
+        predict,
+        inputs=[youtube_url, youtube_url_labels_text],
+        outputs=[predictions, video_gif],
+    )
+    local_video_predict_btn.click(
+        predict,
+        inputs=[video_file, local_video_labels_text],
+        outputs=[predictions, video_gif],
+    )
+    gr.Markdown(
+        """
+        \n Demo created by: <a href=\"https://github.com/fcakyon\">fcakyon</a>.
+        <br> Based on this <a href=\"https://huggingface.co/docs/transformers/main/model_doc/xclip">HuggingFace model</a>.
+        """
+    )
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+torch
+decord
+pytube
+imageio
+transformers @ git+https://github.com/huggingface/transformers.git@799cea64ac1029d66e9e58f18bc6f47892270723

utils.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from pathlib import Path
+from pytube import YouTube
+import numpy as np
+from decord import VideoReader
+import imageio
+def download_youtube_video(url: str):
+    yt = YouTube(url)
+    streams = yt.streams.filter(file_extension="mp4")
+    file_path = streams[0].download()
+    return file_path
+def sample_frames_from_video_file(
+    file_path: str, num_frames: int = 16, frame_sampling_rate=1
+):
+    videoreader = VideoReader(file_path)
+    videoreader.seek(0)
+    # sample frames
+    start_idx = 0
+    end_idx = num_frames * frame_sampling_rate - 1
+    indices = np.linspace(start_idx, end_idx, num=num_frames, dtype=np.int64)
+    frames = videoreader.get_batch(indices).asnumpy()
+    return frames
+def get_num_total_frames(file_path: str):
+    videoreader = VideoReader(file_path)
+    videoreader.seek(0)
+    return len(videoreader)
+def convert_frames_to_gif(frames, save_path: str = "frames.gif"):
+    converted_frames = frames.astype(np.uint8)
+    Path(save_path).parent.mkdir(parents=True, exist_ok=True)
+    imageio.mimsave(save_path, converted_frames, fps=8)
+    return save_path
+def create_gif_from_video_file(
+    file_path: str,
+    num_frames: int = 16,
+    frame_sampling_rate: int = 1,
+    save_path: str = "frames.gif",
+):
+    frames = sample_frames_from_video_file(file_path, num_frames, frame_sampling_rate)
+    return convert_frames_to_gif(frames, save_path)