Spaces:
Runtime error
Runtime error
"""Simple Reader that reads transcript of youtube video.""" | |
from typing import Any, List | |
from gpt_index.readers.base import BaseReader | |
from gpt_index.readers.schema.base import Document | |
class YoutubeTranscriptReader(BaseReader): | |
"""Youtube Transcript reader.""" | |
def __init__(self) -> None: | |
"""Initialize with parameters.""" | |
def load_data(self, ytlinks: List[str], **load_kwargs: Any) -> List[Document]: | |
"""Load data from the input directory. | |
Args: | |
pages (List[str]): List of youtube links \ | |
for which transcripts are to be read. | |
""" | |
try: | |
from youtube_transcript_api import YouTubeTranscriptApi | |
except ImportError: | |
raise ImportError( | |
"`youtube_transcript_api` package not found, \ | |
please run `pip install youtube-transcript-api`" | |
) | |
results = [] | |
for link in ytlinks: | |
video_id = link.split("?v=")[-1] | |
srt = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript = "" | |
for chunk in srt: | |
transcript = transcript + chunk["text"] + "\n" | |
results.append(Document(transcript)) | |
return results | |