Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -4,19 +4,17 @@ import gradio as gr
|
|
4 |
from gradio_modal import Modal
|
5 |
import chromadb
|
6 |
from modules.collector import fetch_channel_videos_from_url
|
7 |
-
from modules.db import
|
|
|
|
|
|
|
|
|
8 |
from modules.indexer import index_videos
|
9 |
from modules.answerer import answer_query, LLMAnswer, VideoItem, build_video_html
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
load_dotenv()
|
13 |
|
14 |
-
# -------------------------------
|
15 |
-
# Setup Chroma
|
16 |
-
# -------------------------------
|
17 |
-
client = chromadb.PersistentClient(path="./youtube_db")
|
18 |
-
collection = client.get_or_create_collection("yt_metadata", embedding_function=None)
|
19 |
-
|
20 |
|
21 |
# -------------------------------
|
22 |
# Utility functions
|
@@ -67,7 +65,7 @@ def refresh_channel(api_key, channel_url: str):
|
|
67 |
videos = fetch_channel_videos_from_url(api_key, channel_url)
|
68 |
for v in videos:
|
69 |
v["channel_url"] = channel_url
|
70 |
-
index_videos(videos,
|
71 |
return len(videos)
|
72 |
|
73 |
|
@@ -76,13 +74,13 @@ def index_channels(channel_urls: str):
|
|
76 |
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
77 |
urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
|
78 |
total_videos = sum(refresh_channel(yt_api_key, url) for url in urls)
|
79 |
-
yield f"✅ Indexed {total_videos} videos from {len(urls)} channels.", list_channels_radio()
|
80 |
return
|
81 |
|
82 |
|
83 |
def refresh_all_channels():
|
84 |
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
85 |
-
channels = get_indexed_channels(
|
86 |
if not channels:
|
87 |
return "⚠️ No channels available to refresh.", list_channels_radio()
|
88 |
total_videos = 0
|
@@ -100,7 +98,7 @@ def refresh_all_channels():
|
|
100 |
# Channel selection as radio
|
101 |
# -------------------------------
|
102 |
def list_channels_radio():
|
103 |
-
channels = get_indexed_channels(
|
104 |
choices = []
|
105 |
for key, val in channels.items():
|
106 |
if isinstance(val, dict):
|
@@ -142,14 +140,14 @@ def fetch_channel_html(channel_url: str):
|
|
142 |
def delete_channel(channel_url: str):
|
143 |
delete_channel_from_collection(channel_url)
|
144 |
# Return updated radio choices
|
145 |
-
return list_channels_radio()
|
146 |
|
147 |
|
148 |
# -------------------------------
|
149 |
# LLM query
|
150 |
# -------------------------------
|
151 |
def handle_query(query: str):
|
152 |
-
answer_text, video_html = answer_query(query,
|
153 |
return answer_text, video_html
|
154 |
|
155 |
|
@@ -198,7 +196,7 @@ with gr.Blocks() as demo:
|
|
198 |
)
|
199 |
|
200 |
delete_channel_btn = gr.Button(
|
201 |
-
"🗑️ Delete", size="sm", scale=0, variant="stop"
|
202 |
)
|
203 |
|
204 |
refresh_status = gr.Markdown(label="Refresh Status", container=False)
|
|
|
4 |
from gradio_modal import Modal
|
5 |
import chromadb
|
6 |
from modules.collector import fetch_channel_videos_from_url
|
7 |
+
from modules.db import (
|
8 |
+
delete_channel_from_collection,
|
9 |
+
get_collection,
|
10 |
+
get_indexed_channels,
|
11 |
+
)
|
12 |
from modules.indexer import index_videos
|
13 |
from modules.answerer import answer_query, LLMAnswer, VideoItem, build_video_html
|
14 |
from dotenv import load_dotenv
|
15 |
|
16 |
load_dotenv()
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# -------------------------------
|
20 |
# Utility functions
|
|
|
65 |
videos = fetch_channel_videos_from_url(api_key, channel_url)
|
66 |
for v in videos:
|
67 |
v["channel_url"] = channel_url
|
68 |
+
index_videos(videos, get_collection(), channel_url=channel_url)
|
69 |
return len(videos)
|
70 |
|
71 |
|
|
|
74 |
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
75 |
urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
|
76 |
total_videos = sum(refresh_channel(yt_api_key, url) for url in urls)
|
77 |
+
yield f"✅ Indexed {total_videos} videos from {len(urls)} channels.", gr.update(choices=list_channels_radio())
|
78 |
return
|
79 |
|
80 |
|
81 |
def refresh_all_channels():
|
82 |
yt_api_key = os.environ["YOUTUBE_API_KEY"]
|
83 |
+
channels = get_indexed_channels(get_collection())
|
84 |
if not channels:
|
85 |
return "⚠️ No channels available to refresh.", list_channels_radio()
|
86 |
total_videos = 0
|
|
|
98 |
# Channel selection as radio
|
99 |
# -------------------------------
|
100 |
def list_channels_radio():
|
101 |
+
channels = get_indexed_channels(get_collection())
|
102 |
choices = []
|
103 |
for key, val in channels.items():
|
104 |
if isinstance(val, dict):
|
|
|
140 |
def delete_channel(channel_url: str):
|
141 |
delete_channel_from_collection(channel_url)
|
142 |
# Return updated radio choices
|
143 |
+
return gr.update(choices=list_channels_radio())
|
144 |
|
145 |
|
146 |
# -------------------------------
|
147 |
# LLM query
|
148 |
# -------------------------------
|
149 |
def handle_query(query: str):
|
150 |
+
answer_text, video_html = answer_query(query, get_collection())
|
151 |
return answer_text, video_html
|
152 |
|
153 |
|
|
|
196 |
)
|
197 |
|
198 |
delete_channel_btn = gr.Button(
|
199 |
+
"🗑️ Delete", size="sm", scale=0, variant="stop"
|
200 |
)
|
201 |
|
202 |
refresh_status = gr.Markdown(label="Refresh Status", container=False)
|