ns-devel
commited on
Commit
·
38efda5
1
Parent(s):
8677234
Added youtube url option
Browse files- __pycache__/settings.cpython-311.pyc +0 -0
- app.py +20 -2
- lib/services/__pycache__/hf_model.cpython-311.pyc +0 -0
- lib/services/hf_model.py +29 -1
- requirements.txt +26 -0
__pycache__/settings.cpython-311.pyc
CHANGED
Binary files a/__pycache__/settings.cpython-311.pyc and b/__pycache__/settings.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
from lib.services.hf_model import get_transcript
|
3 |
from lib.services.gemini import gemini
|
4 |
from lib.services.openai import get_completion
|
@@ -9,14 +11,30 @@ def get_cached_transcript(video_url):
|
|
9 |
return get_transcript(video_url)
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def main():
|
13 |
st.title("VideoClarify")
|
14 |
|
15 |
# Get video URL from user
|
16 |
video_url = st.text_input("Enter Video URL:", key="video_url")
|
17 |
selected_model = st.sidebar.selectbox("Select Model", ["Gemini", "OpenAI"])
|
18 |
-
|
19 |
-
|
|
|
20 |
st.video(video_url)
|
21 |
# Get transcript from the video
|
22 |
transcript = get_cached_transcript(video_url)
|
|
|
1 |
import streamlit as st
|
2 |
+
from pytube import YouTube
|
3 |
+
from settings import DATA_DIR
|
4 |
from lib.services.hf_model import get_transcript
|
5 |
from lib.services.gemini import gemini
|
6 |
from lib.services.openai import get_completion
|
|
|
11 |
return get_transcript(video_url)
|
12 |
|
13 |
|
14 |
+
def download_youtube_video(video_url):
|
15 |
+
try:
|
16 |
+
# Create a YouTube object
|
17 |
+
yt = YouTube(video_url)
|
18 |
+
|
19 |
+
# Get the highest resolution stream
|
20 |
+
video_stream = yt.streams.get_highest_resolution()
|
21 |
+
|
22 |
+
# Download the video to the specified output path
|
23 |
+
file_path = video_stream.download(DATA_DIR)
|
24 |
+
return file_path
|
25 |
+
except Exception as e:
|
26 |
+
return None
|
27 |
+
|
28 |
+
|
29 |
def main():
|
30 |
st.title("VideoClarify")
|
31 |
|
32 |
# Get video URL from user
|
33 |
video_url = st.text_input("Enter Video URL:", key="video_url")
|
34 |
selected_model = st.sidebar.selectbox("Select Model", ["Gemini", "OpenAI"])
|
35 |
+
if len(video_url):
|
36 |
+
video_url = download_youtube_video(video_url)
|
37 |
+
print(video_url)
|
38 |
st.video(video_url)
|
39 |
# Get transcript from the video
|
40 |
transcript = get_cached_transcript(video_url)
|
lib/services/__pycache__/hf_model.cpython-311.pyc
CHANGED
Binary files a/lib/services/__pycache__/hf_model.cpython-311.pyc and b/lib/services/__pycache__/hf_model.cpython-311.pyc differ
|
|
lib/services/hf_model.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import os
|
2 |
import requests
|
|
|
|
|
3 |
from settings import HF_API_URL, DATA_DIR
|
4 |
from pathlib import Path
|
5 |
from moviepy.editor import VideoFileClip
|
@@ -19,8 +21,9 @@ def convert_video_to_wav(video_path, output_path):
|
|
19 |
audio_clip = video_clip.audio
|
20 |
audio_clip.write_audiofile(output_path)
|
21 |
|
22 |
-
def
|
23 |
audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
|
|
|
24 |
if not audio_file.exists():
|
25 |
convert_video_to_wav(filepath, audio_file)
|
26 |
headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
|
@@ -28,4 +31,29 @@ def get_transcript(filepath):
|
|
28 |
data = f.read()
|
29 |
response = requests.post(HF_API_URL, headers=headers,
|
30 |
data=data)
|
|
|
31 |
return response.json()["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import requests
|
3 |
+
import torch
|
4 |
+
from transformers import pipeline
|
5 |
from settings import HF_API_URL, DATA_DIR
|
6 |
from pathlib import Path
|
7 |
from moviepy.editor import VideoFileClip
|
|
|
21 |
audio_clip = video_clip.audio
|
22 |
audio_clip.write_audiofile(output_path)
|
23 |
|
24 |
+
def get_transcript1(filepath):
|
25 |
audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
|
26 |
+
print(audio_file)
|
27 |
if not audio_file.exists():
|
28 |
convert_video_to_wav(filepath, audio_file)
|
29 |
headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
|
|
|
31 |
data = f.read()
|
32 |
response = requests.post(HF_API_URL, headers=headers,
|
33 |
data=data)
|
34 |
+
print(response, response.json())
|
35 |
return response.json()["text"]
|
36 |
+
|
37 |
+
def get_transcript(url):
|
38 |
+
"""
|
39 |
+
Converts a audio file to text and provides corresponding time stamps.
|
40 |
+
"""
|
41 |
+
|
42 |
+
# Model to find wav to text and time stamps
|
43 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
44 |
+
pipe = pipeline(
|
45 |
+
"automatic-speech-recognition", model="openai/whisper-base", device=device
|
46 |
+
)
|
47 |
+
|
48 |
+
file_data = pipe(
|
49 |
+
url,
|
50 |
+
max_new_tokens=256,
|
51 |
+
generate_kwargs={"task": "transcribe"},
|
52 |
+
chunk_length_s=30,
|
53 |
+
batch_size=8,
|
54 |
+
return_timestamps=True,
|
55 |
+
)["chunks"]
|
56 |
+
text = ""
|
57 |
+
for doc in file_data:
|
58 |
+
text += doc["text"]
|
59 |
+
return text
|
requirements.txt
CHANGED
@@ -8,7 +8,9 @@ certifi==2023.11.17
|
|
8 |
charset-normalizer==3.3.2
|
9 |
click==8.1.7
|
10 |
decorator==4.4.2
|
|
|
11 |
frozenlist==1.4.1
|
|
|
12 |
gitdb==4.0.11
|
13 |
GitPython==3.1.41
|
14 |
google-ai-generativelanguage==0.4.0
|
@@ -18,6 +20,7 @@ google-generativeai==0.3.2
|
|
18 |
googleapis-common-protos==1.62.0
|
19 |
grpcio==1.60.0
|
20 |
grpcio-status==1.60.0
|
|
|
21 |
idna==3.6
|
22 |
imageio==2.33.1
|
23 |
imageio-ffmpeg==0.4.9
|
@@ -29,8 +32,22 @@ markdown-it-py==3.0.0
|
|
29 |
MarkupSafe==2.1.4
|
30 |
mdurl==0.1.2
|
31 |
moviepy==1.0.3
|
|
|
32 |
multidict==6.0.4
|
|
|
33 |
numpy==1.26.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
openai==0.28.0
|
35 |
packaging==23.2
|
36 |
pandas==2.2.0
|
@@ -44,20 +61,29 @@ pyasn1-modules==0.3.0
|
|
44 |
pydeck==0.8.1b0
|
45 |
Pygments==2.17.2
|
46 |
python-dateutil==2.8.2
|
|
|
47 |
pytz==2023.3.post1
|
|
|
48 |
referencing==0.32.1
|
|
|
49 |
requests==2.31.0
|
50 |
rich==13.7.0
|
51 |
rpds-py==0.17.1
|
52 |
rsa==4.9
|
|
|
53 |
six==1.16.0
|
54 |
smmap==5.0.1
|
55 |
streamlit==1.30.0
|
|
|
56 |
tenacity==8.2.3
|
|
|
57 |
toml==0.10.2
|
58 |
toolz==0.12.0
|
|
|
59 |
tornado==6.4
|
60 |
tqdm==4.66.1
|
|
|
|
|
61 |
typing_extensions==4.9.0
|
62 |
tzdata==2023.4
|
63 |
tzlocal==5.2
|
|
|
8 |
charset-normalizer==3.3.2
|
9 |
click==8.1.7
|
10 |
decorator==4.4.2
|
11 |
+
filelock==3.13.1
|
12 |
frozenlist==1.4.1
|
13 |
+
fsspec==2023.12.2
|
14 |
gitdb==4.0.11
|
15 |
GitPython==3.1.41
|
16 |
google-ai-generativelanguage==0.4.0
|
|
|
20 |
googleapis-common-protos==1.62.0
|
21 |
grpcio==1.60.0
|
22 |
grpcio-status==1.60.0
|
23 |
+
huggingface-hub==0.20.2
|
24 |
idna==3.6
|
25 |
imageio==2.33.1
|
26 |
imageio-ffmpeg==0.4.9
|
|
|
32 |
MarkupSafe==2.1.4
|
33 |
mdurl==0.1.2
|
34 |
moviepy==1.0.3
|
35 |
+
mpmath==1.3.0
|
36 |
multidict==6.0.4
|
37 |
+
networkx==3.2.1
|
38 |
numpy==1.26.3
|
39 |
+
nvidia-cublas-cu12==12.1.3.1
|
40 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
41 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
42 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
43 |
+
nvidia-cudnn-cu12==8.9.2.26
|
44 |
+
nvidia-cufft-cu12==11.0.2.54
|
45 |
+
nvidia-curand-cu12==10.3.2.106
|
46 |
+
nvidia-cusolver-cu12==11.4.5.107
|
47 |
+
nvidia-cusparse-cu12==12.1.0.106
|
48 |
+
nvidia-nccl-cu12==2.18.1
|
49 |
+
nvidia-nvjitlink-cu12==12.3.101
|
50 |
+
nvidia-nvtx-cu12==12.1.105
|
51 |
openai==0.28.0
|
52 |
packaging==23.2
|
53 |
pandas==2.2.0
|
|
|
61 |
pydeck==0.8.1b0
|
62 |
Pygments==2.17.2
|
63 |
python-dateutil==2.8.2
|
64 |
+
pytube==15.0.0
|
65 |
pytz==2023.3.post1
|
66 |
+
PyYAML==6.0.1
|
67 |
referencing==0.32.1
|
68 |
+
regex==2023.12.25
|
69 |
requests==2.31.0
|
70 |
rich==13.7.0
|
71 |
rpds-py==0.17.1
|
72 |
rsa==4.9
|
73 |
+
safetensors==0.4.1
|
74 |
six==1.16.0
|
75 |
smmap==5.0.1
|
76 |
streamlit==1.30.0
|
77 |
+
sympy==1.12
|
78 |
tenacity==8.2.3
|
79 |
+
tokenizers==0.15.0
|
80 |
toml==0.10.2
|
81 |
toolz==0.12.0
|
82 |
+
torch==2.1.2
|
83 |
tornado==6.4
|
84 |
tqdm==4.66.1
|
85 |
+
transformers==4.36.2
|
86 |
+
triton==2.1.0
|
87 |
typing_extensions==4.9.0
|
88 |
tzdata==2023.4
|
89 |
tzlocal==5.2
|