Spaces:
Runtime error
Runtime error
File size: 1,251 Bytes
35b22df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
"""Simple Reader that reads transcript of youtube video."""
from typing import Any, List
from gpt_index.readers.base import BaseReader
from gpt_index.readers.schema.base import Document
class YoutubeTranscriptReader(BaseReader):
"""Youtube Transcript reader."""
def __init__(self) -> None:
"""Initialize with parameters."""
def load_data(self, ytlinks: List[str], **load_kwargs: Any) -> List[Document]:
"""Load data from the input directory.
Args:
pages (List[str]): List of youtube links \
for which transcripts are to be read.
"""
try:
from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
raise ImportError(
"`youtube_transcript_api` package not found, \
please run `pip install youtube-transcript-api`"
)
results = []
for link in ytlinks:
video_id = link.split("?v=")[-1]
srt = YouTubeTranscriptApi.get_transcript(video_id)
transcript = ""
for chunk in srt:
transcript = transcript + chunk["text"] + "\n"
results.append(Document(transcript))
return results
|