|
|
import os |
|
|
import re |
|
|
import gradio as gr |
|
|
from gradio_modal import Modal |
|
|
import chromadb |
|
|
from modules.collector import fetch_channel_videos_from_url |
|
|
from modules.db import delete_channel_from_collection, get_indexed_channels |
|
|
from modules.indexer import index_videos |
|
|
from modules.answerer import answer_query, LLMAnswer, VideoItem, build_video_html |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
client = chromadb.PersistentClient(path="./youtube_db") |
|
|
collection = client.get_or_create_collection("yt_metadata", embedding_function=None) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def show_component(): |
|
|
return gr.update(visible=True) |
|
|
|
|
|
|
|
|
def hide_component(): |
|
|
return gr.update(visible=False) |
|
|
|
|
|
|
|
|
def open_component(): |
|
|
return gr.update(open=True) |
|
|
|
|
|
|
|
|
def close_component(): |
|
|
return gr.update(open=False) |
|
|
|
|
|
|
|
|
def enable_component(): |
|
|
return gr.update(interactive=True) |
|
|
|
|
|
|
|
|
def disable_component(): |
|
|
return gr.update(interactive=False) |
|
|
|
|
|
|
|
|
def clear_component(): |
|
|
return gr.update(value="") |
|
|
|
|
|
|
|
|
def show_loading(): |
|
|
return gr.update(value="⏳Fetching ...") |
|
|
|
|
|
|
|
|
def enable_if_not_none(question): |
|
|
if question is None: |
|
|
return disable_component() |
|
|
else: |
|
|
return enable_component() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def refresh_channel(api_key, channel_url: str): |
|
|
videos = fetch_channel_videos_from_url(api_key, channel_url) |
|
|
for v in videos: |
|
|
v["channel_url"] = channel_url |
|
|
index_videos(videos, collection, channel_url=channel_url) |
|
|
return len(videos) |
|
|
|
|
|
|
|
|
def index_channels(channel_urls: str): |
|
|
yield "saving ...", gr.update() |
|
|
yt_api_key = os.environ["YOUTUBE_API_KEY"] |
|
|
urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()] |
|
|
total_videos = sum(refresh_channel(yt_api_key, url) for url in urls) |
|
|
yield f"✅ Indexed {total_videos} videos from {len(urls)} channels.", list_channels_radio() |
|
|
return |
|
|
|
|
|
|
|
|
def refresh_all_channels(): |
|
|
yt_api_key = os.environ["YOUTUBE_API_KEY"] |
|
|
channels = get_indexed_channels(collection) |
|
|
if not channels: |
|
|
return "⚠️ No channels available to refresh.", list_channels_radio() |
|
|
total_videos = 0 |
|
|
for key, val in channels.items(): |
|
|
url = val.get("channel_url") if isinstance(val, dict) else key |
|
|
if url: |
|
|
total_videos += refresh_channel(yt_api_key, url) |
|
|
return ( |
|
|
f"🔄 Refreshed {len(channels)} channels, re-indexed {total_videos} videos.", |
|
|
list_channels_radio(), |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_channels_radio(): |
|
|
channels = get_indexed_channels(collection) |
|
|
choices = [] |
|
|
for key, val in channels.items(): |
|
|
if isinstance(val, dict): |
|
|
cname = val.get("channel_title", "Unknown") |
|
|
curl = val.get("channel_url") |
|
|
else: |
|
|
cname = val |
|
|
curl = key |
|
|
if curl: |
|
|
choices.append((cname, curl)) |
|
|
return choices |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_channel_html(channel_url: str): |
|
|
api_key = os.environ["YOUTUBE_API_KEY"] |
|
|
videos = fetch_channel_videos_from_url(api_key, channel_url, max_results=50) |
|
|
if not videos: |
|
|
return "<p>No videos found.</p>" |
|
|
html = "<table border='1' style='border-collapse: collapse; width:100%'>" |
|
|
html += "<tr><th>#<th>Title</th><th>Video URL</th><th>Description</th></tr>" |
|
|
for idx, v in enumerate(videos): |
|
|
html += "<tr>" |
|
|
html += f"<td>{idx+1}</td>" |
|
|
html += f"<td>{v['title']}</td>" |
|
|
html += f"<td><a href='https://youtube.com/watch?v={v['video_id']}' target='_blank'>Watch Video</a></td>" |
|
|
html += f"<td>{v.get('description','')}</td>" |
|
|
html += "</tr>" |
|
|
html += "</table>" |
|
|
return html |
|
|
|
|
|
get_collection |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def delete_channel(channel_url: str): |
|
|
delete_channel_from_collection(channel_url) |
|
|
|
|
|
return list_channels_radio() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_query(query: str): |
|
|
answer_text, video_html = answer_query(query, collection) |
|
|
return answer_text, video_html |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## 📺 YouTube Metadata Q&A Agent") |
|
|
|
|
|
|
|
|
with Modal(visible=False) as videos_list_modal: |
|
|
gr.Markdown("### Videos List") |
|
|
modal_html = gr.HTML() |
|
|
|
|
|
|
|
|
with Modal(visible=False) as add_channel_modal: |
|
|
channel_input = gr.Textbox( |
|
|
label="Channel URLs", |
|
|
placeholder="Paste one or more YouTube channel URLs (comma or newline separated)", |
|
|
) |
|
|
save_add_channels_btn = gr.Button("Add Channels") |
|
|
index_status = gr.Markdown(label="Index Status", container=False) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Sidebar() as my_sidebar: |
|
|
gr.Markdown("### 📺 Channels") |
|
|
|
|
|
channel_radio = gr.Radio( |
|
|
choices=[c[0] for c in list_channels_radio()], label="Select a Channel" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
show_videos_btn = gr.Button( |
|
|
"🎬Videos", |
|
|
size="sm", |
|
|
scale=0, |
|
|
variant="secondary", |
|
|
interactive=False, |
|
|
) |
|
|
refresh_all_btn = gr.Button( |
|
|
"🔄Refresh", size="sm", scale=0, variant="huggingface" |
|
|
) |
|
|
add_channels_btn = gr.Button( |
|
|
"➕ Add", size="sm", scale=0, variant="primary" |
|
|
) |
|
|
|
|
|
delete_channel_btn = gr.Button( |
|
|
"🗑️ Delete", size="sm", scale=0, variant="stop", visible=False |
|
|
) |
|
|
|
|
|
refresh_status = gr.Markdown(label="Refresh Status", container=False) |
|
|
|
|
|
refresh_all_btn.click( |
|
|
fn=refresh_all_channels, |
|
|
inputs=None, |
|
|
outputs=[refresh_status, channel_radio], |
|
|
) |
|
|
|
|
|
add_channels_btn.click(close_component, outputs=[my_sidebar]).then( |
|
|
show_component, outputs=[add_channel_modal] |
|
|
) |
|
|
|
|
|
save_add_channels_btn.click( |
|
|
disable_component, outputs=[save_add_channels_btn] |
|
|
).then( |
|
|
index_channels, |
|
|
inputs=[channel_input], |
|
|
outputs=[index_status, channel_radio], |
|
|
).then( |
|
|
hide_component, outputs=[add_channel_modal] |
|
|
).then( |
|
|
open_component, outputs=[my_sidebar] |
|
|
).then( |
|
|
enable_component, outputs=[save_add_channels_btn] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Column(scale=3): |
|
|
question = gr.Textbox( |
|
|
label="Ask a Question", |
|
|
placeholder="e.g., What topics did they cover on AI ethics?", |
|
|
) |
|
|
gr.Examples( |
|
|
[ |
|
|
"Show me some videos that mention Ranganatha.", |
|
|
"Slokas that mention gajendra moksham", |
|
|
], |
|
|
inputs=question, |
|
|
) |
|
|
|
|
|
answer = gr.Markdown() |
|
|
video_embed = gr.HTML() |
|
|
|
|
|
with gr.Row(): |
|
|
gr.Column() |
|
|
ask_btn = gr.Button( |
|
|
"💡 Get Answer", |
|
|
size="sm", |
|
|
scale=0, |
|
|
variant="primary", |
|
|
interactive=False, |
|
|
) |
|
|
|
|
|
ask_status = gr.Markdown() |
|
|
gr.Column() |
|
|
|
|
|
ask_btn.click(show_loading, outputs=[ask_status]).then( |
|
|
disable_component, outputs=[ask_btn] |
|
|
).then(handle_query, inputs=[question], outputs=[answer, video_embed]).then( |
|
|
enable_component, outputs=[ask_btn] |
|
|
).then( |
|
|
clear_component, outputs=[ask_status] |
|
|
) |
|
|
|
|
|
question.change(enable_if_not_none, inputs=[question], outputs=[ask_btn]) |
|
|
question.submit(show_loading, outputs=[ask_status]).then( |
|
|
disable_component, outputs=[ask_btn] |
|
|
).then(handle_query, inputs=[question], outputs=[answer, video_embed]).then( |
|
|
enable_component, outputs=[ask_btn] |
|
|
).then( |
|
|
clear_component, outputs=[ask_status] |
|
|
) |
|
|
|
|
|
|
|
|
def show_selected_channel_videos(selected_channel_name): |
|
|
for cname, curl in list_channels_radio(): |
|
|
if cname == selected_channel_name: |
|
|
return fetch_channel_html(curl) |
|
|
return "<p>No videos found.</p>" |
|
|
|
|
|
channel_radio.change( |
|
|
enable_if_not_none, inputs=[channel_radio], outputs=[show_videos_btn] |
|
|
) |
|
|
show_videos_btn.click(disable_component, outputs=[show_videos_btn]).then( |
|
|
close_component, outputs=[my_sidebar] |
|
|
).then( |
|
|
show_selected_channel_videos, |
|
|
inputs=[channel_radio], |
|
|
outputs=[modal_html], |
|
|
).then( |
|
|
show_component, outputs=[videos_list_modal] |
|
|
).then( |
|
|
enable_component, outputs=[show_videos_btn] |
|
|
) |
|
|
|
|
|
delete_channel_btn.click( |
|
|
disable_component, outputs=[delete_channel_btn] |
|
|
).then( |
|
|
delete_channel, |
|
|
inputs=[channel_radio], |
|
|
outputs=[channel_radio], |
|
|
).then( |
|
|
enable_component, outputs=[delete_channel_btn] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|