File size: 1,251 Bytes
35b22df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""Simple Reader that reads transcript of youtube video."""
from typing import Any, List

from gpt_index.readers.base import BaseReader
from gpt_index.readers.schema.base import Document


class YoutubeTranscriptReader(BaseReader):
    """Youtube Transcript reader."""

    def __init__(self) -> None:
        """Initialize with parameters."""

    def load_data(self, ytlinks: List[str], **load_kwargs: Any) -> List[Document]:
        """Load data from the input directory.

        Args:
            pages (List[str]): List of youtube links \
                for which transcripts are to be read.

        """
        try:
            from youtube_transcript_api import YouTubeTranscriptApi
        except ImportError:
            raise ImportError(
                "`youtube_transcript_api` package not found, \
                    please run `pip install youtube-transcript-api`"
            )

        results = []
        for link in ytlinks:
            video_id = link.split("?v=")[-1]
            srt = YouTubeTranscriptApi.get_transcript(video_id)
            transcript = ""
            for chunk in srt:
                transcript = transcript + chunk["text"] + "\n"
            results.append(Document(transcript))
        return results