Spaces:

anshu-man853
/

YouTubeSummarizer

Runtime error

App Files Files Community

YouTubeSummarizer / app.py

anshu-man853

Update app.py

a8569f1 over 1 year ago

raw

history blame contribute delete

2.66 kB

	import transformers
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr
	from transformers import pipeline
	import youtube_dl
	import torch

	# Load the 'falcon-7b-instruct' model for summarization
	model = "tiiuae/falcon-7b"

	tokenizer = AutoTokenizer.from_pretrained(model)
	pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto",
	)

	sequences = pipeline(
	max_length=500,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	)

	def generate_text(text):
	sequences = pipeline(
	text,
	max_length=200,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	)
	# Function to extract YouTube transcript given a video URL
	def extract_youtube_transcript(url):
	ydl_opts = {
	'writesubtitles': True,
	'subtitleslangs': ['en'],
	'skip_download': True,
	'ignoreerrors': True
	}
	try:
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False)
	if 'subtitles' in info:
	for subtitle in info['subtitles']:
	if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
	return subtitle['url']
	except Exception as e:
	print(f"An error occurred while fetching the transcript: {e}")
	return None

	# Define the Gradio interface
	def summarize_youtube_transcript(url):
	transcript_url = extract_youtube_transcript(url)
	if transcript_url:
	# Fetch the transcript
	try:
	with youtube_dl.YoutubeDL({}) as ydl:
	transcript_info = ydl.extract_info(transcript_url, download=False)
	transcript = transcript_info['subtitles']['en'][0]['text']
	summary = generate_text("Write a summary of: ", transcript)
	return summary[0]['summary_text']
	except Exception as e:
	print(f"An error occurred while processing the transcript: {e}")
	return "Unable to fetch or process the transcript."

	# Create the Gradio interface
	iface = gr.Interface(
	fn=summarize_youtube_transcript,
	inputs=gr.inputs.Textbox(label="YouTube URL"),
	outputs=gr.outputs.Textbox(label="Summary"),
	title="YouTube Transcript Summarizer",
	description="Enter a YouTube URL and get a summary of the transcript.",
	theme="huggingface",
	use_auth_token=True, # Authenticate Hugging Face API calls
	)

	# Launch the interface
	iface.launch(share=True)