Spaces:
Sleeping
Sleeping
publication version
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import json
|
|
8 |
from groq import Groq
|
9 |
import time
|
10 |
import pandas as pd
|
|
|
11 |
|
12 |
|
13 |
def normalize_url(url: str) -> str:
|
@@ -69,89 +70,139 @@ def extract_sitemap_urls_from_robots(robots_content: str) -> List[str]:
|
|
69 |
|
70 |
def generate_hyperbolic_summary(url: str, content: str, api_key: str) -> str:
|
71 |
try:
|
|
|
|
|
|
|
72 |
response = requests.post(
|
73 |
'https://api.hyperbolic.xyz/v1/chat/completions',
|
74 |
headers={
|
75 |
-
'Content-Type': 'application/json',
|
76 |
'Authorization': f'Bearer {api_key}',
|
77 |
},
|
78 |
json={
|
79 |
-
'model':
|
80 |
-
'meta-llama/Meta-Llama-3.1-8B-Instruct',
|
81 |
'messages': [{
|
82 |
-
'role':
|
83 |
-
'
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
86 |
}],
|
87 |
-
'max_tokens':
|
88 |
-
|
89 |
-
'
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
False
|
95 |
-
})
|
96 |
response.raise_for_status()
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
except Exception as e:
|
|
|
99 |
return f"Error generating Hyperbolic summary: {str(e)}"
|
100 |
|
101 |
|
102 |
def generate_groq_summary(url: str, content: str, api_key: str) -> str:
|
103 |
try:
|
|
|
|
|
|
|
104 |
client = Groq(api_key=api_key)
|
105 |
completion = client.chat.completions.create(
|
106 |
messages=[{
|
107 |
-
|
108 |
-
"
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
111 |
}],
|
112 |
model="llama-3.2-1b-preview",
|
113 |
temperature=0.7,
|
114 |
max_tokens=200,
|
|
|
|
|
115 |
)
|
116 |
-
|
|
|
|
|
|
|
117 |
except Exception as e:
|
|
|
118 |
return f"Error generating Groq summary: {str(e)}"
|
119 |
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
def get_page_content(url: str, markdowner_key: Optional[str] = None) -> str:
|
122 |
try:
|
123 |
-
headers = {
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
if markdowner_key:
|
125 |
headers["Authorization"] = f"Bearer {markdowner_key}"
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
response.raise_for_status()
|
131 |
-
return response.text
|
132 |
-
except Exception as e:
|
133 |
-
return f"Error fetching content: {str(e)}"
|
134 |
|
|
|
|
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
homepage = next(
|
142 |
-
(s for s in summaries if urlparse(s['url']).path in ['', '/']),
|
143 |
-
summaries[0])
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
content += "## Main Pages\n\n"
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
153 |
|
154 |
-
|
|
|
|
|
155 |
|
156 |
|
157 |
def process_website(
|
@@ -161,11 +212,10 @@ def process_website(
|
|
161 |
markdowner_key: str = "",
|
162 |
use_hyperbolic: bool = True,
|
163 |
progress=gr.Progress()
|
164 |
-
) -> Tuple[str, str, List[str]]:
|
165 |
try:
|
166 |
-
if not (use_hyperbolic and hyperbolic_key) and not (not use_hyperbolic
|
167 |
-
|
168 |
-
return "Error: Please provide an API key for the selected AI provider", None, []
|
169 |
|
170 |
base_url = normalize_url(url)
|
171 |
progress(0, desc="Initializing...")
|
@@ -194,7 +244,7 @@ def process_website(
|
|
194 |
continue
|
195 |
|
196 |
if not sitemap_urls:
|
197 |
-
return "Error: No sitemaps found", None, []
|
198 |
|
199 |
progress(0.4, desc="Processing sitemaps...")
|
200 |
|
@@ -209,7 +259,7 @@ def process_website(
|
|
209 |
continue
|
210 |
|
211 |
if not all_urls:
|
212 |
-
return "Error: No URLs found in sitemaps", None, []
|
213 |
|
214 |
progress(0.6, desc="Generating summaries...")
|
215 |
|
@@ -220,31 +270,40 @@ def process_website(
|
|
220 |
# Get content via Markdowner
|
221 |
content = get_page_content(page_url, markdowner_key)
|
222 |
|
|
|
|
|
|
|
223 |
# Generate summary with selected provider
|
224 |
if use_hyperbolic:
|
225 |
-
summary = generate_hyperbolic_summary(
|
226 |
-
page_url, content, hyperbolic_key)
|
227 |
else:
|
228 |
-
summary = generate_groq_summary(page_url, content,
|
229 |
-
groq_key)
|
230 |
|
231 |
-
summaries.append({
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
# Rate limiting
|
234 |
-
time.sleep(1)
|
235 |
|
236 |
progress((0.6 + (0.4 * (i + 1) / len(all_urls))),
|
237 |
-
|
238 |
except Exception as e:
|
239 |
-
print(f"Error processing {page_url}: {e}")
|
|
|
240 |
|
241 |
-
# Generate
|
242 |
llms_txt = generate_llms_txt(summaries)
|
|
|
243 |
|
244 |
-
return llms_txt, json.dumps(summaries, indent=2), all_urls
|
245 |
|
246 |
except Exception as e:
|
247 |
-
|
|
|
248 |
|
249 |
|
250 |
# Gradio Interface
|
@@ -255,33 +314,51 @@ with gr.Blocks(title="llms.txt Generator", theme=gr.themes.Soft()) as demo:
|
|
255 |
""")
|
256 |
|
257 |
with gr.Row():
|
258 |
-
url_input = gr.Textbox(
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
# AI Provider Selection
|
265 |
with gr.Row():
|
266 |
with gr.Column():
|
267 |
-
use_hyperbolic = gr.Checkbox(
|
268 |
-
|
269 |
-
|
|
|
|
|
270 |
hyperbolic_key = gr.Textbox(
|
271 |
label="Hyperbolic API Key",
|
272 |
type="password",
|
273 |
visible=True,
|
274 |
-
placeholder="Enter your Hyperbolic API key"
|
|
|
|
|
|
|
275 |
|
276 |
with gr.Column():
|
277 |
-
use_groq = gr.Checkbox(
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
|
|
285 |
def update_provider_visibility(use_hyp: bool, use_grq: bool):
|
286 |
# Ensure only one provider is selected
|
287 |
if use_hyp and use_grq:
|
@@ -298,12 +375,14 @@ with gr.Blocks(title="llms.txt Generator", theme=gr.themes.Soft()) as demo:
|
|
298 |
use_hyperbolic.change(
|
299 |
fn=update_provider_visibility,
|
300 |
inputs=[use_hyperbolic, use_groq],
|
301 |
-
outputs=[hyperbolic_key, groq_key, use_groq, use_hyperbolic]
|
|
|
302 |
|
303 |
use_groq.change(
|
304 |
fn=update_provider_visibility,
|
305 |
inputs=[use_hyperbolic, use_groq],
|
306 |
-
outputs=[hyperbolic_key, groq_key, use_groq, use_hyperbolic]
|
|
|
307 |
|
308 |
generate_btn = gr.Button("Generate π", variant="primary")
|
309 |
|
@@ -312,51 +391,98 @@ with gr.Blocks(title="llms.txt Generator", theme=gr.themes.Soft()) as demo:
|
|
312 |
label="Generated llms.txt",
|
313 |
placeholder="Generated content will appear here...",
|
314 |
lines=10,
|
315 |
-
show_copy_button=True
|
316 |
)
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
-
# Add download
|
320 |
-
def
|
321 |
"""Convert text to downloadable format"""
|
322 |
if not text:
|
323 |
return None
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
download_trigger = gr.Button("Download llms.txt π₯")
|
331 |
-
|
332 |
-
|
333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
335 |
-
urls_found = gr.Dataframe(
|
336 |
-
|
337 |
-
|
|
|
|
|
338 |
|
339 |
def process_and_update(*args):
|
340 |
-
result, summaries, urls = process_website(*args)
|
|
|
|
|
|
|
|
|
341 |
|
342 |
-
#
|
343 |
-
|
344 |
-
{"URLs Found": urls if urls else ["No URLs found"]})
|
345 |
|
346 |
return {
|
347 |
llms_output: result,
|
|
|
348 |
json_output: summaries if summaries else "",
|
349 |
urls_found: urls_df,
|
350 |
-
download_btn: None
|
|
|
351 |
}
|
352 |
|
353 |
generate_btn.click(
|
354 |
process_and_update,
|
355 |
-
inputs=[
|
356 |
-
|
357 |
-
|
358 |
-
outputs=[llms_output, json_output, urls_found, download_btn])
|
359 |
|
360 |
if __name__ == "__main__":
|
361 |
demo.launch()
|
362 |
-
|
|
|
8 |
from groq import Groq
|
9 |
import time
|
10 |
import pandas as pd
|
11 |
+
import os
|
12 |
|
13 |
|
14 |
def normalize_url(url: str) -> str:
|
|
|
70 |
|
71 |
def generate_hyperbolic_summary(url: str, content: str, api_key: str) -> str:
|
72 |
try:
|
73 |
+
# Ensure content is properly encoded
|
74 |
+
content = content.encode('utf-8', errors='ignore').decode('utf-8')
|
75 |
+
|
76 |
response = requests.post(
|
77 |
'https://api.hyperbolic.xyz/v1/chat/completions',
|
78 |
headers={
|
79 |
+
'Content-Type': 'application/json; charset=utf-8',
|
80 |
'Authorization': f'Bearer {api_key}',
|
81 |
},
|
82 |
json={
|
83 |
+
'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct',
|
|
|
84 |
'messages': [{
|
85 |
+
'role': 'user',
|
86 |
+
'content': f"""Generate a concise 1-sentence summary of this webpage content. Wrap your summary in <summary> tags.
|
87 |
+
|
88 |
+
URL: {url}
|
89 |
+
Content: {content}
|
90 |
+
|
91 |
+
Example response format:
|
92 |
+
<summary>This is a clear and concise one-sentence summary of the webpage.</summary>"""
|
93 |
}],
|
94 |
+
'max_tokens': 200,
|
95 |
+
'temperature': 0.7,
|
96 |
+
'top_p': 0.9,
|
97 |
+
'stream': False
|
98 |
+
},
|
99 |
+
timeout=30
|
100 |
+
)
|
|
|
|
|
101 |
response.raise_for_status()
|
102 |
+
result = response.json()
|
103 |
+
summary = result['choices'][0]['message']['content']
|
104 |
+
# Extract summary from tags
|
105 |
+
match = re.search(r'<summary>(.*?)</summary>', summary, re.DOTALL)
|
106 |
+
return match.group(1).strip() if match else summary.strip()
|
107 |
except Exception as e:
|
108 |
+
print(f"Error in generate_hyperbolic_summary: {str(e)}")
|
109 |
return f"Error generating Hyperbolic summary: {str(e)}"
|
110 |
|
111 |
|
112 |
def generate_groq_summary(url: str, content: str, api_key: str) -> str:
|
113 |
try:
|
114 |
+
# Ensure content is properly encoded
|
115 |
+
content = content.encode('utf-8', errors='ignore').decode('utf-8')
|
116 |
+
|
117 |
client = Groq(api_key=api_key)
|
118 |
completion = client.chat.completions.create(
|
119 |
messages=[{
|
120 |
+
'role': 'user',
|
121 |
+
'content': f"""Generate a concise 1-sentence summary of this webpage content. Wrap your summary in <summary> tags.
|
122 |
+
|
123 |
+
URL: {url}
|
124 |
+
Content: {content}
|
125 |
+
|
126 |
+
Example response format:
|
127 |
+
<summary>This is a clear and concise one-sentence summary of the webpage.</summary>"""
|
128 |
}],
|
129 |
model="llama-3.2-1b-preview",
|
130 |
temperature=0.7,
|
131 |
max_tokens=200,
|
132 |
+
top_p=0.9,
|
133 |
+
stream=False
|
134 |
)
|
135 |
+
summary = completion.choices[0].message.content
|
136 |
+
# Extract summary from tags
|
137 |
+
match = re.search(r'<summary>(.*?)</summary>', summary, re.DOTALL)
|
138 |
+
return match.group(1).strip() if match else summary.strip()
|
139 |
except Exception as e:
|
140 |
+
print(f"Error in generate_groq_summary: {str(e)}")
|
141 |
return f"Error generating Groq summary: {str(e)}"
|
142 |
|
143 |
|
144 |
+
def generate_llms_txt(summaries: List[Dict[str, str]]) -> str:
|
145 |
+
if not summaries:
|
146 |
+
return ""
|
147 |
+
|
148 |
+
return "\n".join([
|
149 |
+
f"# {summary['url']}\n\n{summary['summary']}\n\n---\n"
|
150 |
+
for summary in summaries
|
151 |
+
])
|
152 |
+
|
153 |
+
|
154 |
+
def generate_llms_full_txt(summaries: List[Dict]) -> str:
|
155 |
+
if not summaries:
|
156 |
+
return "No content generated"
|
157 |
+
|
158 |
+
content = ""
|
159 |
+
for summary in summaries:
|
160 |
+
content += f"# {summary['url']}\n\n"
|
161 |
+
content += f"{summary.get('fullContent', 'No content available')}\n\n"
|
162 |
+
content += "---\n\n"
|
163 |
+
|
164 |
+
return content
|
165 |
+
|
166 |
+
|
167 |
def get_page_content(url: str, markdowner_key: Optional[str] = None) -> str:
|
168 |
try:
|
169 |
+
headers = {
|
170 |
+
"Accept": "text/plain",
|
171 |
+
"Accept-Language": "en-US,en;q=0.9",
|
172 |
+
"User-Agent": "Mozilla/5.0 (compatible; SitemapParser/1.0)",
|
173 |
+
"Origin": "http://localhost:3000",
|
174 |
+
"Referer": "http://localhost:3000/",
|
175 |
+
}
|
176 |
if markdowner_key:
|
177 |
headers["Authorization"] = f"Bearer {markdowner_key}"
|
178 |
|
179 |
+
# Use direct URL construction like the curl command
|
180 |
+
encoded_url = requests.utils.quote(url)
|
181 |
+
full_url = f"https://md.dhr.wtf/?url={encoded_url}"
|
|
|
|
|
|
|
|
|
182 |
|
183 |
+
print(f"Requesting URL: {full_url}") # Debug logging
|
184 |
+
print(f"Headers: {headers}") # Debug logging
|
185 |
|
186 |
+
response = requests.get( # Changed to GET request
|
187 |
+
full_url,
|
188 |
+
headers=headers,
|
189 |
+
timeout=30
|
190 |
+
)
|
|
|
|
|
|
|
191 |
|
192 |
+
response.encoding = 'utf-8'
|
193 |
+
response.raise_for_status()
|
|
|
194 |
|
195 |
+
if response.status_code == 200:
|
196 |
+
return response.text
|
197 |
+
else:
|
198 |
+
print(f"Response status: {response.status_code}") # Debug logging
|
199 |
+
print(f"Response headers: {response.headers}") # Debug logging
|
200 |
+
print(f"Response text: {response.text[:500]}") # Debug logging
|
201 |
+
return f"Error fetching content: {response.status_code} {response.reason}"
|
202 |
|
203 |
+
except Exception as e:
|
204 |
+
print(f"Error fetching content for {url}: {str(e)}")
|
205 |
+
return f"Error fetching content: {str(e)}"
|
206 |
|
207 |
|
208 |
def process_website(
|
|
|
212 |
markdowner_key: str = "",
|
213 |
use_hyperbolic: bool = True,
|
214 |
progress=gr.Progress()
|
215 |
+
) -> Tuple[str, str, List[str], str]:
|
216 |
try:
|
217 |
+
if not (use_hyperbolic and hyperbolic_key) and not (not use_hyperbolic and groq_key):
|
218 |
+
return "Error: Please provide an API key for the selected AI provider", None, [], ""
|
|
|
219 |
|
220 |
base_url = normalize_url(url)
|
221 |
progress(0, desc="Initializing...")
|
|
|
244 |
continue
|
245 |
|
246 |
if not sitemap_urls:
|
247 |
+
return "Error: No sitemaps found", None, [], ""
|
248 |
|
249 |
progress(0.4, desc="Processing sitemaps...")
|
250 |
|
|
|
259 |
continue
|
260 |
|
261 |
if not all_urls:
|
262 |
+
return "Error: No URLs found in sitemaps", None, [], ""
|
263 |
|
264 |
progress(0.6, desc="Generating summaries...")
|
265 |
|
|
|
270 |
# Get content via Markdowner
|
271 |
content = get_page_content(page_url, markdowner_key)
|
272 |
|
273 |
+
# Store full content for llms-full.txt
|
274 |
+
full_content = content
|
275 |
+
|
276 |
# Generate summary with selected provider
|
277 |
if use_hyperbolic:
|
278 |
+
summary = generate_hyperbolic_summary(page_url, content, hyperbolic_key)
|
|
|
279 |
else:
|
280 |
+
summary = generate_groq_summary(page_url, content, groq_key)
|
|
|
281 |
|
282 |
+
summaries.append({
|
283 |
+
"url": page_url,
|
284 |
+
"summary": summary,
|
285 |
+
"fullContent": full_content,
|
286 |
+
"provider": "hyperbolic" if use_hyperbolic else "groq"
|
287 |
+
})
|
288 |
|
289 |
# Rate limiting
|
290 |
+
time.sleep(1)
|
291 |
|
292 |
progress((0.6 + (0.4 * (i + 1) / len(all_urls))),
|
293 |
+
desc=f"Processing URL {i+1}/{len(all_urls)}")
|
294 |
except Exception as e:
|
295 |
+
print(f"Error processing {page_url}: {str(e)}")
|
296 |
+
continue
|
297 |
|
298 |
+
# Generate both formats
|
299 |
llms_txt = generate_llms_txt(summaries)
|
300 |
+
llms_full_txt = generate_llms_full_txt(summaries)
|
301 |
|
302 |
+
return llms_txt, json.dumps(summaries, ensure_ascii=False, indent=2), all_urls, llms_full_txt
|
303 |
|
304 |
except Exception as e:
|
305 |
+
print(f"Error in process_website: {str(e)}")
|
306 |
+
return f"Processing failed: {str(e)}", None, [], ""
|
307 |
|
308 |
|
309 |
# Gradio Interface
|
|
|
314 |
""")
|
315 |
|
316 |
with gr.Row():
|
317 |
+
url_input = gr.Textbox(
|
318 |
+
label="Website URL",
|
319 |
+
placeholder="Enter website URL"
|
320 |
+
)
|
321 |
+
markdowner_key = gr.Textbox(
|
322 |
+
label="Markdowner API Key (Optional)",
|
323 |
+
placeholder="For higher rate limits",
|
324 |
+
type="password",
|
325 |
+
container=True,
|
326 |
+
scale=2
|
327 |
+
)
|
328 |
|
329 |
# AI Provider Selection
|
330 |
with gr.Row():
|
331 |
with gr.Column():
|
332 |
+
use_hyperbolic = gr.Checkbox(
|
333 |
+
label="Use Hyperbolic",
|
334 |
+
value=True,
|
335 |
+
interactive=True
|
336 |
+
)
|
337 |
hyperbolic_key = gr.Textbox(
|
338 |
label="Hyperbolic API Key",
|
339 |
type="password",
|
340 |
visible=True,
|
341 |
+
placeholder="Enter your Hyperbolic API key",
|
342 |
+
container=False,
|
343 |
+
scale=2
|
344 |
+
)
|
345 |
|
346 |
with gr.Column():
|
347 |
+
use_groq = gr.Checkbox(
|
348 |
+
label="Use Groq",
|
349 |
+
value=False,
|
350 |
+
interactive=True
|
351 |
+
)
|
352 |
+
groq_key = gr.Textbox(
|
353 |
+
label="Groq API Key",
|
354 |
+
type="password",
|
355 |
+
visible=False,
|
356 |
+
placeholder="Enter your Groq API key",
|
357 |
+
container=False,
|
358 |
+
scale=2
|
359 |
+
)
|
360 |
|
361 |
+
# Connect checkbox events
|
362 |
def update_provider_visibility(use_hyp: bool, use_grq: bool):
|
363 |
# Ensure only one provider is selected
|
364 |
if use_hyp and use_grq:
|
|
|
375 |
use_hyperbolic.change(
|
376 |
fn=update_provider_visibility,
|
377 |
inputs=[use_hyperbolic, use_groq],
|
378 |
+
outputs=[hyperbolic_key, groq_key, use_groq, use_hyperbolic]
|
379 |
+
)
|
380 |
|
381 |
use_groq.change(
|
382 |
fn=update_provider_visibility,
|
383 |
inputs=[use_hyperbolic, use_groq],
|
384 |
+
outputs=[hyperbolic_key, groq_key, use_groq, use_hyperbolic]
|
385 |
+
)
|
386 |
|
387 |
generate_btn = gr.Button("Generate π", variant="primary")
|
388 |
|
|
|
391 |
label="Generated llms.txt",
|
392 |
placeholder="Generated content will appear here...",
|
393 |
lines=10,
|
394 |
+
show_copy_button=True
|
395 |
)
|
396 |
+
llms_full_output = gr.TextArea(
|
397 |
+
label="Generated llms-full.txt",
|
398 |
+
placeholder="Full content will appear here...",
|
399 |
+
lines=10,
|
400 |
+
show_copy_button=True
|
401 |
+
)
|
402 |
+
|
403 |
+
# Add JSON output for debugging
|
404 |
+
json_output = gr.JSON(
|
405 |
+
label="Debug Output (JSON)",
|
406 |
+
visible=True
|
407 |
+
)
|
408 |
|
409 |
+
# Add download buttons for both files
|
410 |
+
def download_txt(text: str, filename: str) -> str:
|
411 |
"""Convert text to downloadable format"""
|
412 |
if not text:
|
413 |
return None
|
414 |
+
# Create a file with the proper name
|
415 |
+
with open(filename, "w", encoding="utf-8") as f:
|
416 |
+
f.write(text)
|
417 |
+
return filename
|
418 |
+
|
419 |
+
download_btn = gr.File(
|
420 |
+
label="Download llms.txt",
|
421 |
+
visible=True,
|
422 |
+
file_types=[".txt"]
|
423 |
+
)
|
424 |
+
|
425 |
+
download_full_btn = gr.File(
|
426 |
+
label="Download llms-full.txt",
|
427 |
+
visible=True,
|
428 |
+
file_types=[".txt"]
|
429 |
+
)
|
430 |
|
431 |
download_trigger = gr.Button("Download llms.txt π₯")
|
432 |
+
download_full_trigger = gr.Button("Download llms-full.txt π₯")
|
433 |
+
|
434 |
+
download_trigger.click(
|
435 |
+
fn=lambda x: download_txt(x, "llms.txt"),
|
436 |
+
inputs=[llms_output],
|
437 |
+
outputs=[download_btn]
|
438 |
+
)
|
439 |
+
|
440 |
+
download_full_trigger.click(
|
441 |
+
fn=lambda x: download_txt(x, "llms-full.txt"),
|
442 |
+
inputs=[llms_full_output],
|
443 |
+
outputs=[download_full_btn]
|
444 |
+
)
|
445 |
+
|
446 |
+
# Clean up function to remove temporary files
|
447 |
+
def cleanup():
|
448 |
+
try:
|
449 |
+
if os.path.exists("llms.txt"):
|
450 |
+
os.remove("llms.txt")
|
451 |
+
if os.path.exists("llms-full.txt"):
|
452 |
+
os.remove("llms-full.txt")
|
453 |
+
except:
|
454 |
+
pass
|
455 |
|
456 |
+
urls_found = gr.Dataframe(
|
457 |
+
headers=["URLs Found"],
|
458 |
+
label="Discovered URLs",
|
459 |
+
visible=True
|
460 |
+
)
|
461 |
|
462 |
def process_and_update(*args):
|
463 |
+
result, summaries, urls, full_result = process_website(*args)
|
464 |
+
|
465 |
+
urls_df = pd.DataFrame({
|
466 |
+
"URLs Found": urls if urls else ["No URLs found"]
|
467 |
+
})
|
468 |
|
469 |
+
# Clean up any existing temporary files
|
470 |
+
cleanup()
|
|
|
471 |
|
472 |
return {
|
473 |
llms_output: result,
|
474 |
+
llms_full_output: full_result,
|
475 |
json_output: summaries if summaries else "",
|
476 |
urls_found: urls_df,
|
477 |
+
download_btn: None,
|
478 |
+
download_full_btn: None
|
479 |
}
|
480 |
|
481 |
generate_btn.click(
|
482 |
process_and_update,
|
483 |
+
inputs=[url_input, hyperbolic_key, groq_key, markdowner_key, use_hyperbolic],
|
484 |
+
outputs=[llms_output, llms_full_output, json_output, urls_found, download_btn, download_full_btn]
|
485 |
+
)
|
|
|
486 |
|
487 |
if __name__ == "__main__":
|
488 |
demo.launch()
|
|