Spaces:
Runtime error
Runtime error
from transformers import ( | |
pipeline, | |
AutoModelForSpeechSeq2Seq, | |
AutoProcessor, | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
BitsAndBytesConfig, | |
) | |
import torch | |
import os | |
import random | |
def yt2mp3(url, outputMp3F): | |
tmpVideoF=random.random() | |
os.system(f"./bin/youtube-dl -o /tmp/{tmpVideoF} --verbose " + url) | |
os.system(f"ffmpeg -y -i /tmp/{tmpVideoF}.* -vn -ar 44100 -ac 2 -b:a 192k {outputMp3F}") | |
def speech2text(mp3_file): | |
device = 'cuda:0' | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model_id = "distil-whisper/distil-large-v2" | |
model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
model_id, | |
torch_dtype=torch_dtype, | |
low_cpu_mem_usage=True, | |
use_safetensors=True, | |
use_flash_attention_2=True | |
) | |
model.to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=15, | |
batch_size=16, | |
torch_dtype=torch_dtype, | |
device=device, | |
) | |
result = pipe(mp3_file) | |
text_from_video = result["text"] | |
return text_from_video | |
def chat(system_prompt, text): | |
model_name = "meta-llama/Llama-2-7b-chat-hf" | |
token = os.environ['HUGGINGFACE_TOKEN'] | |
bnb_config = BitsAndBytesConfig( | |
load_in_8bit=True | |
) | |
device_map = {"": 0} | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
quantization_config=bnb_config, | |
device_map=device_map, | |
use_auth_token=token | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token) | |
llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer) | |
text = f""" | |
<s>[INST] <<SYS>> | |
{system_prompt} | |
<</SYS>> | |
{text}[/INST] | |
""" | |
sequences = llama_pipeline( | |
text, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=32000 | |
) | |
generated_text = sequences[0]["generated_text"] | |
generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):] | |
return generated_text | |
def summarize(text): | |
input_len = 10000 | |
while True: | |
summary = chat("", "Summarize the following: " + text[0:input_len]) | |
if len(text) < input_len: | |
return summary | |
text = summary + " " + text[input_len:] | |
import gradio as gr | |
def summarize_from_youtube(url): | |
outputMp3F = "./files/audio.mp3" | |
yt2mp3(url=url, outputMp3F=outputMp3F) | |
transcribed = speech2text(mp3_file=outputMp3F) | |
summary = summarize(transcribed) | |
return summary | |
youtube_url = gr.inputs.Textbox(lines=1, label="Enter YouTube URL") | |
output_text = gr.outputs.Textbox(label="Summary") | |
gr.Interface( | |
fn=summarize_from_youtube, | |
inputs=youtube_url, | |
outputs=output_text, | |
title="YouTube Summarizer", | |
description="Enter a YouTube URL to summarize its content." | |
).launch() | |