vikramvasudevan commited on
Commit
02a5da0
·
verified ·
1 Parent(s): dc56dff

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -4,19 +4,17 @@ import gradio as gr
4
  from gradio_modal import Modal
5
  import chromadb
6
  from modules.collector import fetch_channel_videos_from_url
7
- from modules.db import delete_channel_from_collection, get_indexed_channels
 
 
 
 
8
  from modules.indexer import index_videos
9
  from modules.answerer import answer_query, LLMAnswer, VideoItem, build_video_html
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
 
14
- # -------------------------------
15
- # Setup Chroma
16
- # -------------------------------
17
- client = chromadb.PersistentClient(path="./youtube_db")
18
- collection = client.get_or_create_collection("yt_metadata", embedding_function=None)
19
-
20
 
21
  # -------------------------------
22
  # Utility functions
@@ -67,7 +65,7 @@ def refresh_channel(api_key, channel_url: str):
67
  videos = fetch_channel_videos_from_url(api_key, channel_url)
68
  for v in videos:
69
  v["channel_url"] = channel_url
70
- index_videos(videos, collection, channel_url=channel_url)
71
  return len(videos)
72
 
73
 
@@ -76,13 +74,13 @@ def index_channels(channel_urls: str):
76
  yt_api_key = os.environ["YOUTUBE_API_KEY"]
77
  urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
78
  total_videos = sum(refresh_channel(yt_api_key, url) for url in urls)
79
- yield f"✅ Indexed {total_videos} videos from {len(urls)} channels.", list_channels_radio()
80
  return
81
 
82
 
83
  def refresh_all_channels():
84
  yt_api_key = os.environ["YOUTUBE_API_KEY"]
85
- channels = get_indexed_channels(collection)
86
  if not channels:
87
  return "⚠️ No channels available to refresh.", list_channels_radio()
88
  total_videos = 0
@@ -100,7 +98,7 @@ def refresh_all_channels():
100
  # Channel selection as radio
101
  # -------------------------------
102
  def list_channels_radio():
103
- channels = get_indexed_channels(collection)
104
  choices = []
105
  for key, val in channels.items():
106
  if isinstance(val, dict):
@@ -142,14 +140,14 @@ def fetch_channel_html(channel_url: str):
142
  def delete_channel(channel_url: str):
143
  delete_channel_from_collection(channel_url)
144
  # Return updated radio choices
145
- return list_channels_radio()
146
 
147
 
148
  # -------------------------------
149
  # LLM query
150
  # -------------------------------
151
  def handle_query(query: str):
152
- answer_text, video_html = answer_query(query, collection)
153
  return answer_text, video_html
154
 
155
 
@@ -198,7 +196,7 @@ with gr.Blocks() as demo:
198
  )
199
 
200
  delete_channel_btn = gr.Button(
201
- "🗑️ Delete", size="sm", scale=0, variant="stop", visible=False
202
  )
203
 
204
  refresh_status = gr.Markdown(label="Refresh Status", container=False)
 
4
  from gradio_modal import Modal
5
  import chromadb
6
  from modules.collector import fetch_channel_videos_from_url
7
+ from modules.db import (
8
+ delete_channel_from_collection,
9
+ get_collection,
10
+ get_indexed_channels,
11
+ )
12
  from modules.indexer import index_videos
13
  from modules.answerer import answer_query, LLMAnswer, VideoItem, build_video_html
14
  from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
 
 
 
 
 
 
18
 
19
  # -------------------------------
20
  # Utility functions
 
65
  videos = fetch_channel_videos_from_url(api_key, channel_url)
66
  for v in videos:
67
  v["channel_url"] = channel_url
68
+ index_videos(videos, get_collection(), channel_url=channel_url)
69
  return len(videos)
70
 
71
 
 
74
  yt_api_key = os.environ["YOUTUBE_API_KEY"]
75
  urls = [u.strip() for u in re.split(r"[\n,]+", channel_urls) if u.strip()]
76
  total_videos = sum(refresh_channel(yt_api_key, url) for url in urls)
77
+ yield f"✅ Indexed {total_videos} videos from {len(urls)} channels.", gr.update(choices=list_channels_radio())
78
  return
79
 
80
 
81
  def refresh_all_channels():
82
  yt_api_key = os.environ["YOUTUBE_API_KEY"]
83
+ channels = get_indexed_channels(get_collection())
84
  if not channels:
85
  return "⚠️ No channels available to refresh.", list_channels_radio()
86
  total_videos = 0
 
98
  # Channel selection as radio
99
  # -------------------------------
100
  def list_channels_radio():
101
+ channels = get_indexed_channels(get_collection())
102
  choices = []
103
  for key, val in channels.items():
104
  if isinstance(val, dict):
 
140
  def delete_channel(channel_url: str):
141
  delete_channel_from_collection(channel_url)
142
  # Return updated radio choices
143
+ return gr.update(choices=list_channels_radio())
144
 
145
 
146
  # -------------------------------
147
  # LLM query
148
  # -------------------------------
149
  def handle_query(query: str):
150
+ answer_text, video_html = answer_query(query, get_collection())
151
  return answer_text, video_html
152
 
153
 
 
196
  )
197
 
198
  delete_channel_btn = gr.Button(
199
+ "🗑️ Delete", size="sm", scale=0, variant="stop"
200
  )
201
 
202
  refresh_status = gr.Markdown(label="Refresh Status", container=False)