bpiyush commited on
Commit
622f342
·
1 Parent(s): 296bded

Adds basic streamlit app

Browse files
Files changed (4) hide show
  1. app.py +76 -0
  2. data/test.csv +0 -0
  3. data/train.csv +0 -0
  4. data/val.csv +0 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit app"""
2
+ from os.path import join, exists, dirname, abspath
3
+ from glob import glob
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import torch
8
+ import torchvision
9
+ import streamlit as st
10
+
11
+ import warnings
12
+ warnings.simplefilter(action='ignore')
13
+
14
+ curr_filepath = abspath(__file__)
15
+ repo_path = dirname(curr_filepath)
16
+
17
+
18
+ temporal_terms = [
19
+ "then",
20
+ "before",
21
+ "after",
22
+ "followed by",
23
+ "preceded by",
24
+ ]
25
+
26
+
27
+ def make_grid(cols,rows):
28
+ grid = [0]*cols
29
+ for i in range(cols):
30
+ with st.container():
31
+ grid[i] = st.columns(rows)
32
+ return grid
33
+
34
+
35
+ if __name__ == "__main__":
36
+
37
+ # Streamlit app code
38
+ st.set_page_config(layout="wide")
39
+ st.title("Clips from AudioCaps (possibly of temporal nature) 🎬")
40
+
41
+ # load data
42
+ if "df" not in st.session_state:
43
+ splits = ["train.csv", "val.csv", "test.csv"]
44
+ dfs = [pd.read_csv(join(repo_path, "data", split)) for split in splits]
45
+ df = pd.concat(dfs, axis=0)
46
+ # Filter df based on whether the temporal term is in the sentence
47
+ indices = df.caption.apply(lambda x: any([term in x for term in temporal_terms]))
48
+ df = df[indices]
49
+ st.session_state.df = df
50
+ else:
51
+ df = st.session_state.df
52
+ st.markdown(f"**Total number of relevant clips**: {len(df)}", unsafe_allow_html=True)
53
+
54
+ reload_button = st.button("Reload")
55
+ NUM = 9
56
+ indices = np.random.randint(0, len(st.session_state.df), NUM)
57
+ if reload_button:
58
+ indices = np.random.randint(0, len(st.session_state.df), NUM)
59
+
60
+ grid = make_grid(3, 3)
61
+ per_video_width = 360
62
+ per_video_height = 240
63
+ for i, idx in enumerate(indices):
64
+ row = i // 3
65
+ col = i % 3
66
+
67
+ video_id = df.iloc[idx].youtube_id
68
+ start = df.iloc[idx].start_time
69
+ end = start + 10.
70
+
71
+ url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
72
+ html_code = f"""
73
+ <iframe height="{per_video_height}" width="{per_video_width}" src="{url}" frameborder="0" allowfullscreen></iframe>
74
+ """
75
+ grid[row][col].markdown(html_code, unsafe_allow_html=True)
76
+ grid[row][col].markdown(f"**Caption**: {df.iloc[idx].caption}", unsafe_allow_html=True)
data/test.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/train.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/val.csv ADDED
The diff for this file is too large to render. See raw diff