VGGSoundDoppler / app.py
bpiyush's picture
Pushes the app file
46e5961
"""Demo to show clips from AudioSet with Doppler effect."""
import os
from os.path import join, exists, dirname, abspath, basename
import json
from glob import glob
from tqdm import tqdm
import numpy as np
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
# from moviepy.video.io.VideoFileClip import VideoFileClip
import warnings
warnings.simplefilter(action='ignore')
curr_filepath = abspath(__file__)
repo_path = dirname(dirname(curr_filepath))
def make_grid(cols,rows):
grid = [0]*cols
for i in range(cols):
with st.container():
grid[i] = st.columns(rows)
return grid
# Filter out samples with (possible) Doppler effect
doppler_classes = [
'airplane',
'ambulance siren',
# 'race car, auto racing', # Typically captured from within the car
'subway, metro, underground',
'car passing by',
# 'motorboat, speedboat acceleration', # Typically captured from within the boat
'railroad car, train wagon',
# 'helicopter',
# 'driving snowmobile', # Typically captured from within the snowmobile
'airplane flyby',
]
if __name__ == "__main__":
# Streamlit app code
st.set_page_config(layout="wide")
st.title("Clips from VGGSound (possibly with Doppler effect) 🎬")
# load data
if "df" not in st.session_state:
csv_path = "./data/vggsound.csv"
df = pd.read_csv(csv_path)
df.columns = ["video_id", "start_seconds", "label", "split"]
df["end_seconds"] = df["start_seconds"] + 10.
df = df[df["label"].isin(doppler_classes)]
st.session_state.df = df
else:
df = st.session_state.df
st.markdown(f"**Total number of relevant clips**: {len(df)}", unsafe_allow_html=True)
# st.markdown("---")
# # plot histogram
# arr = np.random.normal(1, 1, size=100)
# fig, ax = plt.subplots(1, 1, figsize=(1, 1))
# ax.hist(arr, bins=20)
# st.pyplot(fig)
# plot st bar chart
st.markdown("**Distribution of classes**")
count_df = df["label"].value_counts().reset_index()
# sort by count
count_df = count_df.sort_values(by="label", ascending=False)
print(count_df)
st.bar_chart(count_df, width=300, height=0)
reload_button = st.button("Reload")
NUM = 9
indices = np.random.randint(0, len(st.session_state.df), NUM)
if reload_button:
indices = np.random.randint(0, len(st.session_state.df), NUM)
videoids = []
segments = []
labels = []
for index in indices:
sample = st.session_state.df.iloc[index].to_dict()
video_id = sample["video_id"]
videoids.append(video_id)
start_time = sample["start_seconds"]
end_time = sample["end_seconds"]
segments.append((start_time, end_time))
labels.append(sample["label"])
# st.markdown(f"Showing Foley segments from a clip in movie: **{video_id}**")
# Create a grid of videos
grid = make_grid(3, 3)
per_video_width = 360
per_video_height = 240
# Add videos to the grid
for idx in range(0, min(len(segments), 9)):
i, j = idx // 3, idx % 3
start, end = segments[idx]
duration = end - start
video_id = videoids[idx]
grid[i][j].caption(f"Segment duration: {duration}")
url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
html_code = f"""
<iframe height="{per_video_height}" width="{per_video_width}" src="{url}" frameborder="0" allowfullscreen></iframe>
"""
grid[i][j].markdown(html_code, unsafe_allow_html=True)
grid[i][j].caption(f"{labels[idx]}")