File size: 2,166 Bytes
f315fdc
 
 
82f7076
f315fdc
 
 
 
82f7076
 
f315fdc
82f7076
f315fdc
 
 
82f7076
 
e51e296
82f7076
e51e296
82f7076
e51e296
82f7076
f315fdc
82f7076
f315fdc
 
82f7076
f315fdc
82f7076
f315fdc
82f7076
f315fdc
82f7076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f315fdc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -------------------------------
# 1. Collector
# -------------------------------
from typing import List, Dict
from googleapiclient.discovery import build

from modules.youtube_utils import get_channel_id

from googleapiclient.discovery import build


def fetch_all_channel_videos(api_key: str, channel_url: str, max_results_per_call=50):
    youtube = build("youtube", "v3", developerKey=api_key)
    channel_id = get_channel_id(youtube, channel_url)

    final_videos = []
    for videos in fetch_channel_videos_by_id(api_key, channel_id, max_results_per_call):
        final_videos.extend(videos)
        print("Fetched", len(final_videos))
        yield (f"Fetched {len(final_videos)}", videos)  # <-- only yield the *new* batch

    yield (f"Fetched {len(final_videos)}", [])  # final "summary"


def fetch_channel_videos_by_id(api_key: str, channel_id: str, max_results=50):
    youtube = build("youtube", "v3", developerKey=api_key)

    # Get uploads playlist ID
    channel_response = youtube.channels().list(
        part="contentDetails,snippet", id=channel_id
    ).execute()

    channel_title = channel_response["items"][0]["snippet"]["title"]
    uploads_playlist_id = channel_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    next_page_token = None

    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=uploads_playlist_id,
            maxResults=max_results,
            pageToken=next_page_token,
        )
        response = request.execute()

        videos = []
        for item in response.get("items", []):
            snippet = item["snippet"]
            videos.append(
                {
                    "video_id": snippet["resourceId"]["videoId"],
                    "title": snippet["title"],
                    "description": snippet.get("description", ""),
                    "channel_id": channel_id,
                    "channel_title": channel_title,
                }
            )

        yield videos  # yield one page worth

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break