Spaces:
Sleeping
Sleeping
File size: 4,802 Bytes
89b078e 138ee9c 9d2b048 8d0bba0 89b078e 9d2b048 89b078e 9d2b048 89b078e 27558c9 89b078e 27558c9 16a9748 f52f378 7c3d441 def4cb3 924c916 89b078e df5d2d4 89b078e ade57f6 89b078e 72f25c5 f46450e a052a01 4742bfe a052a01 138ee9c 89b078e a01fcf4 27558c9 d4a3868 c76caf9 5e0aea9 df0a789 023ef49 89b078e 0798764 aa86aaf 89b078e 72670e5 0798764 72670e5 dcef7e3 cc92b76 2285607 cc92b76 9e71294 cc92b76 16a9748 cc92b76 89b078e 16a9748 89b078e ac669c3 89b078e e5255a2 aa86aaf cc92b76 9e71294 cc92b76 16a9748 cc92b76 aa86aaf a01fcf4 89b078e cc92b76 16a9748 9d2b048 cc92b76 16a9748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import streamlit as st
import time
from transformers import pipeline
from pytube import YouTube
from pydub import AudioSegment
from audio_extract import extract_audio
import os
from dotenv import load_dotenv
load_dotenv()
st.set_page_config(
page_title="VidText_distilled"
)
st.title('Vidtext_distilwhisper')
st.write('A web app for video/audio transcription(Youtube, mp4, mp3). Using distilled Whisper')
def youtube_video_downloader(url):
yt_vid = YouTube(url)
title = yt_vid.title
vid_dld = (
yt_vid.streams.filter(progressive=True, file_extension="mp4")
.order_by("resolution")
.desc()
.first()
)
vid_dld = vid_dld.download()
return vid_dld, title
def audio_extraction(video_file):
audio = AudioSegment.from_file(video_file, format="mp4")
audio_path = 'audio.wav'
audio.export(audio_path, format="wav")
return audio_path
def audio_processing(mp3_audio):
audio = AudioSegment.from_file(mp3_audio, format="mp3")
wav_file = "audio_file.wav"
audio = audio.export(wav_file, format="wav")
return wav_file
@st.cache_resource
def load_asr_model():
asr_model = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-large-v3")
return asr_model
transcriber_model = load_asr_model()
def transcriber_pass(processed_audio):
text_extract = transcriber_model(processed_audio)
return text_extract['text']
# Streamlit UI
youtube_url_tab, file_select_tab, audio_file_tab = st.tabs(["Youtube URL","Video file", "Audio file"])
with youtube_url_tab:
url = st.text_input("Enter the Youtube url")
try:
yt_video, title = youtube_video_downloader(url)
if url:
if st.button("Transcribe", key="yturl"):
with st.spinner("Transcribing..."):
with st.spinner('Extracting audio...'):
audio = audio_extraction(yt_video)
ytvideo_transcript = transcriber_pass(audio)
st.success(f"Transcription successful")
st.write(f'Video title: {title}')
st.write('___')
# st.write(ytvideo_transcript)
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<p> -> {ytvideo_transcript}</p>
</div>
''',
unsafe_allow_html=True)
except Exception as e:
st.error(e)
# Video file transcription
with file_select_tab:
uploaded_video_file = st.file_uploader("Upload video file", type="mp4")
try:
if uploaded_video_file:
if st.button("Transcribe", key="vidfile"):
with st.spinner("Transcribing..."):
with st.spinner('Extracting audio...'):
audio = audio_extraction(uploaded_video_file)
video_transcript = transcriber_pass(audio)
st.success(f"Transcription successful")
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<p> -> {video_transcript}</p>
</div>
''',
unsafe_allow_html=True)
except Exception as e:
st.error(e)
# Audio transcription
with audio_file_tab:
audio_file = st.file_uploader("Upload audio file", type="mp3")
try:
if audio_file:
if st.button("Transcribe", key="audiofile"):
with st.spinner("Transcribing..."):
processed_audio = audio_processing(audio_file)
audio_transcript = transcriber_pass(processed_audio)
st.success(f"Transcription successful")
# st.write(audio_transcript)
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<p> -> {audio_transcript}</p>
</div>
''',
unsafe_allow_html=True)
except Exception as e:
st.error(e)
# Footer
st.write('')
st.write('')
st.write('')
st.markdown("""
<div style="text-align: center; padding: 1rem;">
Project by <a href="https://github.com/kelechi-c" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
tensor_kelechi</a>
</div>
""",
unsafe_allow_html=True)
# Arigato :) |