Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +624 -362
src/streamlit_app.py
CHANGED
@@ -1,137 +1,71 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
import json
|
4 |
-
from typing import Dict, List, Optional
|
5 |
import re
|
6 |
from urllib.parse import quote
|
7 |
import time
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
# Configure page
|
10 |
st.set_page_config(
|
11 |
-
page_title="WikiBot - Multilingual Assistant",
|
12 |
-
page_icon="
|
13 |
layout="wide",
|
14 |
-
initial_sidebar_state="
|
15 |
)
|
16 |
|
17 |
-
# Language codes mapping
|
18 |
LANGUAGES = {
|
19 |
-
"English": "en",
|
20 |
-
"Telugu": "te",
|
21 |
-
"Hindi": "hi",
|
22 |
-
"Spanish": "es",
|
23 |
-
"French": "fr",
|
24 |
-
"German": "de",
|
25 |
-
"Italian": "it",
|
26 |
-
"Portuguese": "pt",
|
27 |
-
"Russian": "ru",
|
28 |
-
"Japanese": "ja",
|
29 |
-
"Chinese": "zh",
|
30 |
-
"Arabic": "ar",
|
31 |
-
"Korean": "ko"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
"""Check if Ollama is running"""
|
42 |
-
try:
|
43 |
-
response = requests.get(self.models_url, timeout=3)
|
44 |
-
return response.status_code == 200
|
45 |
-
except Exception:
|
46 |
-
return False
|
47 |
-
|
48 |
-
def get_available_models(self) -> List[str]:
|
49 |
-
"""Get list of available models"""
|
50 |
-
try:
|
51 |
-
response = requests.get(self.models_url, timeout=5)
|
52 |
-
if response.status_code == 200:
|
53 |
-
data = response.json()
|
54 |
-
models = data.get("models", [])
|
55 |
-
return [model["name"] for model in models]
|
56 |
-
return []
|
57 |
-
except Exception:
|
58 |
-
return []
|
59 |
-
|
60 |
-
def generate_summary(self, text: str, model: str = "llama3.2", language: str = "English",
|
61 |
-
summary_type: str = "concise") -> str:
|
62 |
-
"""Generate AI summary using local LLM"""
|
63 |
-
try:
|
64 |
-
# Truncate text if too long
|
65 |
-
if len(text) > 2000:
|
66 |
-
text = text[:2000] + "..."
|
67 |
-
|
68 |
-
# Craft prompt based on summary type
|
69 |
-
if summary_type == "concise":
|
70 |
-
prompt = f"Summarize this Wikipedia content in {language} in 2-3 clear sentences:\n\n{text}\n\nSummary:"
|
71 |
-
elif summary_type == "detailed":
|
72 |
-
prompt = f"Provide a detailed summary of this Wikipedia content in {language}. Include key points and important facts:\n\n{text}\n\nDetailed Summary:"
|
73 |
-
else: # explanatory
|
74 |
-
prompt = f"Explain this Wikipedia content in {language} in simple terms that anyone can understand:\n\n{text}\n\nExplanation:"
|
75 |
-
|
76 |
-
# Request to Ollama
|
77 |
-
payload = {
|
78 |
-
"model": model,
|
79 |
-
"prompt": prompt,
|
80 |
-
"stream": False,
|
81 |
-
"options": {
|
82 |
-
"temperature": 0.7,
|
83 |
-
"num_predict": 300 if summary_type == "detailed" else 150
|
84 |
-
}
|
85 |
-
}
|
86 |
-
|
87 |
-
response = requests.post(self.api_url, json=payload, timeout=30)
|
88 |
-
|
89 |
-
if response.status_code == 200:
|
90 |
-
data = response.json()
|
91 |
-
summary = data.get("response", "").strip()
|
92 |
-
return summary if summary else "No summary generated"
|
93 |
-
else:
|
94 |
-
return f"Error: Status {response.status_code}"
|
95 |
-
|
96 |
-
except requests.exceptions.Timeout:
|
97 |
-
return "Error: Request timeout - try a smaller text"
|
98 |
-
except Exception as e:
|
99 |
-
return f"Error: {str(e)}"
|
100 |
-
|
101 |
-
def translate_text(self, text: str, target_language: str, model: str = "llama3.2") -> str:
|
102 |
-
"""Translate text using local LLM"""
|
103 |
-
try:
|
104 |
-
prompt = f"Translate this text to {target_language}. Only provide the translation:\n\n{text}\n\nTranslation:"
|
105 |
-
|
106 |
-
payload = {
|
107 |
-
"model": model,
|
108 |
-
"prompt": prompt,
|
109 |
-
"stream": False,
|
110 |
-
"options": {
|
111 |
-
"temperature": 0.3,
|
112 |
-
"num_predict": 200
|
113 |
-
}
|
114 |
-
}
|
115 |
-
|
116 |
-
response = requests.post(self.api_url, json=payload, timeout=20)
|
117 |
-
|
118 |
-
if response.status_code == 200:
|
119 |
-
data = response.json()
|
120 |
-
translation = data.get("response", "").strip()
|
121 |
-
return translation if translation else text
|
122 |
-
else:
|
123 |
-
return text
|
124 |
-
|
125 |
-
except Exception:
|
126 |
-
return text
|
127 |
|
128 |
class WikipediaAPI:
|
129 |
def __init__(self):
|
130 |
self.base_url = "https://{}.wikipedia.org/api/rest_v1"
|
131 |
self.search_url = "https://{}.wikipedia.org/w/api.php"
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
|
134 |
-
"""Search for Wikipedia articles"""
|
|
|
|
|
|
|
|
|
135 |
try:
|
136 |
params = {
|
137 |
"action": "query",
|
@@ -139,7 +73,7 @@ class WikipediaAPI:
|
|
139 |
"list": "search",
|
140 |
"srsearch": query,
|
141 |
"srlimit": limit,
|
142 |
-
"srprop": "snippet|titlesnippet"
|
143 |
}
|
144 |
|
145 |
url = self.search_url.format(lang)
|
@@ -147,14 +81,19 @@ class WikipediaAPI:
|
|
147 |
response.raise_for_status()
|
148 |
|
149 |
data = response.json()
|
150 |
-
|
151 |
-
|
|
|
152 |
except Exception as e:
|
153 |
st.error(f"Search error: {str(e)}")
|
154 |
return []
|
155 |
|
156 |
def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
|
157 |
-
"""Get page summary using REST API"""
|
|
|
|
|
|
|
|
|
158 |
try:
|
159 |
encoded_title = quote(title.replace(" ", "_"))
|
160 |
url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
|
@@ -162,23 +101,25 @@ class WikipediaAPI:
|
|
162 |
response = requests.get(url, timeout=10)
|
163 |
response.raise_for_status()
|
164 |
|
165 |
-
|
166 |
-
|
|
|
167 |
except Exception as e:
|
|
|
168 |
return None
|
169 |
|
170 |
-
def get_page_content(self, title: str, lang: str = "en",
|
171 |
"""Get page content sections"""
|
172 |
try:
|
173 |
params = {
|
174 |
"action": "query",
|
175 |
"format": "json",
|
176 |
"prop": "extracts",
|
177 |
-
"exintro":
|
178 |
"explaintext": True,
|
179 |
"exsectionformat": "plain",
|
180 |
"titles": title,
|
181 |
-
"exchars":
|
182 |
}
|
183 |
|
184 |
url = self.search_url.format(lang)
|
@@ -193,313 +134,634 @@ class WikipediaAPI:
|
|
193 |
return page_data["extract"]
|
194 |
|
195 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
-
except Exception:
|
198 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
def clean_html(text: str) -> str:
|
201 |
"""Remove HTML tags from text"""
|
202 |
-
if not text:
|
203 |
-
return ""
|
204 |
clean = re.compile('<.*?>')
|
205 |
return re.sub(clean, '', text)
|
206 |
|
207 |
-
def
|
208 |
-
"""
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
def main():
|
223 |
-
|
|
|
|
|
|
|
224 |
st.markdown("""
|
225 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
.main-header {
|
227 |
text-align: center;
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
margin-bottom: 2rem;
|
|
|
230 |
}
|
|
|
231 |
.search-container {
|
232 |
-
background
|
233 |
-
padding:
|
234 |
-
border-radius:
|
235 |
-
margin-bottom:
|
|
|
|
|
236 |
}
|
|
|
237 |
.result-card {
|
238 |
-
background
|
239 |
padding: 1.5rem;
|
240 |
-
border-radius:
|
241 |
-
border: 1px solid #
|
242 |
-
margin-bottom:
|
243 |
-
box-shadow: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
}
|
|
|
245 |
.article-title {
|
246 |
-
color: #
|
247 |
-
font-weight:
|
248 |
-
font-size: 1.
|
249 |
-
margin-bottom:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
}
|
251 |
-
|
252 |
-
|
253 |
-
|
|
|
|
|
|
|
254 |
margin-bottom: 1rem;
|
255 |
-
|
256 |
}
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
border: 1px solid #c3e6cb;
|
261 |
}
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
}
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
}
|
274 |
</style>
|
275 |
""", unsafe_allow_html=True)
|
276 |
|
277 |
-
#
|
278 |
-
|
279 |
-
st.markdown("<p style='text-align: center; color: #666;'>Wikipedia + Local LLM Intelligence</p>", unsafe_allow_html=True)
|
280 |
|
281 |
-
#
|
282 |
-
|
283 |
-
|
284 |
|
285 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
wiki_api = WikipediaAPI()
|
287 |
-
llm = OllamaLLM()
|
288 |
-
|
289 |
-
# Check LLM connection
|
290 |
-
with st.spinner("Checking Ollama connection..."):
|
291 |
-
llm_connected = llm.check_connection()
|
292 |
-
available_models = llm.get_available_models() if llm_connected else []
|
293 |
-
|
294 |
-
# Status display
|
295 |
-
if llm_connected:
|
296 |
-
st.markdown(f"""
|
297 |
-
<div class='status-box status-connected'>
|
298 |
-
✅ Ollama Connected - {len(available_models)} models available
|
299 |
-
</div>
|
300 |
-
""", unsafe_allow_html=True)
|
301 |
-
else:
|
302 |
-
st.markdown("""
|
303 |
-
<div class='status-box status-disconnected'>
|
304 |
-
❌ Ollama Offline - Basic mode only
|
305 |
-
</div>
|
306 |
-
""", unsafe_allow_html=True)
|
307 |
|
308 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
st.markdown("<div class='search-container'>", unsafe_allow_html=True)
|
310 |
|
311 |
-
# Search inputs
|
312 |
col1, col2 = st.columns([3, 1])
|
313 |
|
314 |
with col1:
|
315 |
query = st.text_input(
|
316 |
-
"🔍 Search
|
317 |
-
placeholder="e.g., 'Artificial Intelligence', '
|
318 |
-
help="Enter your
|
|
|
319 |
)
|
320 |
|
321 |
with col2:
|
322 |
-
|
|
|
|
|
|
|
|
|
323 |
"🌍 Language",
|
324 |
-
options=
|
325 |
-
index=
|
326 |
)
|
|
|
|
|
|
|
327 |
|
328 |
-
#
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
summary_type = st.selectbox(
|
342 |
-
"Summary",
|
343 |
-
["short", "medium", "long"]
|
344 |
-
)
|
345 |
-
|
346 |
-
with col3:
|
347 |
-
if llm_connected and available_models:
|
348 |
-
selected_model = st.selectbox("Model", available_models)
|
349 |
-
else:
|
350 |
-
selected_model = None
|
351 |
-
st.info("No models")
|
352 |
-
|
353 |
-
# Translation option
|
354 |
-
if llm_connected:
|
355 |
-
enable_translation = st.checkbox("🌐 Enable Translation")
|
356 |
-
if enable_translation:
|
357 |
-
target_lang = st.selectbox(
|
358 |
-
"Translate to",
|
359 |
-
[lang for lang in LANGUAGES.keys() if lang != selected_lang]
|
360 |
-
)
|
361 |
|
362 |
st.markdown("</div>", unsafe_allow_html=True)
|
363 |
|
364 |
# Search button
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
st.session_state.search_results = search_results
|
373 |
-
|
374 |
-
# Display results
|
375 |
-
if st.session_state.search_results:
|
376 |
-
st.success(f"Found {len(st.session_state.search_results)} results")
|
377 |
-
|
378 |
-
for idx, result in enumerate(st.session_state.search_results):
|
379 |
-
with st.container():
|
380 |
-
st.markdown("<div class='result-card'>", unsafe_allow_html=True)
|
381 |
-
|
382 |
-
# Title
|
383 |
-
title = result.get("title", "")
|
384 |
-
st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
|
385 |
-
|
386 |
-
# Get content
|
387 |
-
lang_code = LANGUAGES[selected_lang]
|
388 |
-
summary_data = wiki_api.get_page_summary(title, lang_code)
|
389 |
-
|
390 |
-
# Show thumbnail
|
391 |
-
if summary_data and "thumbnail" in summary_data:
|
392 |
-
col1, col2 = st.columns([1, 4])
|
393 |
-
with col1:
|
394 |
-
st.image(summary_data["thumbnail"]["source"], width=100)
|
395 |
-
content_col = col2
|
396 |
-
else:
|
397 |
-
content_col = st
|
398 |
|
399 |
-
|
400 |
-
#
|
401 |
-
|
402 |
-
# Get detailed content for AI
|
403 |
-
detailed_content = wiki_api.get_page_content(title, lang_code)
|
404 |
-
|
405 |
-
if detailed_content:
|
406 |
-
with st.spinner("Generating AI summary..."):
|
407 |
-
ai_summary = llm.generate_summary(
|
408 |
-
detailed_content,
|
409 |
-
selected_model,
|
410 |
-
selected_lang,
|
411 |
-
summary_type
|
412 |
-
)
|
413 |
-
|
414 |
-
if ai_summary and not ai_summary.startswith("Error"):
|
415 |
-
st.markdown("<div class='ai-summary'>", unsafe_allow_html=True)
|
416 |
-
st.markdown("**🤖 AI Summary:**")
|
417 |
-
st.write(ai_summary)
|
418 |
-
|
419 |
-
# Translation
|
420 |
-
if 'enable_translation' in locals() and enable_translation:
|
421 |
-
with st.spinner("Translating..."):
|
422 |
-
translated = llm.translate_text(ai_summary, target_lang, selected_model)
|
423 |
-
if translated != ai_summary:
|
424 |
-
st.markdown(f"**🌐 {target_lang}:**")
|
425 |
-
st.write(translated)
|
426 |
-
|
427 |
-
st.markdown("</div>", unsafe_allow_html=True)
|
428 |
-
else:
|
429 |
-
st.warning("AI summary failed")
|
430 |
-
if summary_data:
|
431 |
-
basic_summary = summary_data.get("extract", "")
|
432 |
-
st.write(simple_summarize(basic_summary, 3))
|
433 |
-
else:
|
434 |
-
st.warning("Could not fetch detailed content")
|
435 |
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
basic_summary = simple_summarize(basic_summary, 2)
|
442 |
-
elif summary_type == "medium":
|
443 |
-
basic_summary = simple_summarize(basic_summary, 4)
|
444 |
-
st.write(basic_summary)
|
445 |
-
else:
|
446 |
-
snippet = clean_html(result.get("snippet", ""))
|
447 |
-
st.write(snippet)
|
448 |
-
|
449 |
-
# Wikipedia link
|
450 |
-
if summary_data and "content_urls" in summary_data:
|
451 |
-
wiki_url = summary_data["content_urls"]["desktop"]["page"]
|
452 |
-
st.markdown(f"[📖 Read on Wikipedia]({wiki_url})")
|
453 |
|
454 |
-
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
-
|
458 |
-
|
|
|
459 |
|
460 |
-
# Footer
|
461 |
st.markdown("---")
|
462 |
-
|
463 |
|
|
|
464 |
with col1:
|
465 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
with col2:
|
467 |
-
st.
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
|
|
472 |
|
473 |
-
|
474 |
-
with st.expander("🛠️ Ollama Setup Guide"):
|
475 |
st.markdown("""
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
|
483 |
-
#
|
484 |
-
|
|
|
|
|
485 |
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
|
496 |
-
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
|
504 |
if __name__ == "__main__":
|
505 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
import json
|
4 |
+
from typing import Dict, List, Optional, Tuple
|
5 |
import re
|
6 |
from urllib.parse import quote
|
7 |
import time
|
8 |
+
from datetime import datetime
|
9 |
+
import plotly.express as px
|
10 |
+
import pandas as pd
|
11 |
+
from collections import Counter
|
12 |
+
import hashlib
|
13 |
|
|
|
14 |
st.set_page_config(
|
15 |
+
page_title="WikiBot Pro - AI-Powered Multilingual Assistant",
|
16 |
+
page_icon="🤖",
|
17 |
layout="wide",
|
18 |
+
initial_sidebar_state="expanded"
|
19 |
)
|
20 |
|
|
|
21 |
LANGUAGES = {
|
22 |
+
"English": {"code": "en", "flag": "🇺🇸", "native": "English"},
|
23 |
+
"Telugu": {"code": "te", "flag": "🇮🇳", "native": "తెలుగు"},
|
24 |
+
"Hindi": {"code": "hi", "flag": "🇮🇳", "native": "हिन्दी"},
|
25 |
+
"Spanish": {"code": "es", "flag": "🇪🇸", "native": "Español"},
|
26 |
+
"French": {"code": "fr", "flag": "🇫🇷", "native": "Français"},
|
27 |
+
"German": {"code": "de", "flag": "🇩🇪", "native": "Deutsch"},
|
28 |
+
"Italian": {"code": "it", "flag": "🇮🇹", "native": "Italiano"},
|
29 |
+
"Portuguese": {"code": "pt", "flag": "🇵🇹", "native": "Português"},
|
30 |
+
"Russian": {"code": "ru", "flag": "🇷🇺", "native": "Русский"},
|
31 |
+
"Japanese": {"code": "ja", "flag": "🇯🇵", "native": "日本語"},
|
32 |
+
"Chinese": {"code": "zh", "flag": "🇨🇳", "native": "中文"},
|
33 |
+
"Arabic": {"code": "ar", "flag": "🇸🇦", "native": "العربية"},
|
34 |
+
"Korean": {"code": "ko", "flag": "🇰🇷", "native": "한국어"},
|
35 |
+
"Tamil": {"code": "ta", "flag": "🇮🇳", "native": "தமிழ்"},
|
36 |
+
"Bengali": {"code": "bn", "flag": "🇧🇩", "native": "বাংলা"},
|
37 |
+
"Marathi": {"code": "mr", "flag": "🇮🇳", "native": "मराठी"},
|
38 |
+
"Gujarati": {"code": "gu", "flag": "🇮🇳", "native": "ગુજરાતી"},
|
39 |
+
"Kannada": {"code": "kn", "flag": "🇮🇳", "native": "ಕನ್ನಡ"},
|
40 |
+
"Malayalam": {"code": "ml", "flag": "🇮🇳", "native": "മലയാളം"},
|
41 |
+
"Punjabi": {"code": "pa", "flag": "🇮🇳", "native": "ਪੰਜਾਬੀ"}
|
42 |
}
|
43 |
|
44 |
+
THEMES = {
|
45 |
+
"Default": {"primary": "#1f77b4", "background": "#ffffff", "text": "#000000"},
|
46 |
+
"Dark": {"primary": "#00d4aa", "background": "#0e1117", "text": "#ffffff"},
|
47 |
+
"Ocean": {"primary": "#0077be", "background": "#f0f8ff", "text": "#003366"},
|
48 |
+
"Forest": {"primary": "#228b22", "background": "#f5fff5", "text": "#006400"},
|
49 |
+
"Sunset": {"primary": "#ff6b35", "background": "#fff5f0", "text": "#8b0000"}
|
50 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
class WikipediaAPI:
|
53 |
def __init__(self):
|
54 |
self.base_url = "https://{}.wikipedia.org/api/rest_v1"
|
55 |
self.search_url = "https://{}.wikipedia.org/w/api.php"
|
56 |
+
self.cache = {}
|
57 |
+
|
58 |
+
def _get_cache_key(self, *args) -> str:
|
59 |
+
"""Generate cache key from arguments"""
|
60 |
+
key_string = "_".join(str(arg) for arg in args)
|
61 |
+
return hashlib.md5(key_string.encode()).hexdigest()
|
62 |
|
63 |
def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
|
64 |
+
"""Search for Wikipedia articles with caching"""
|
65 |
+
cache_key = self._get_cache_key("search", query, lang, limit)
|
66 |
+
if cache_key in self.cache:
|
67 |
+
return self.cache[cache_key]
|
68 |
+
|
69 |
try:
|
70 |
params = {
|
71 |
"action": "query",
|
|
|
73 |
"list": "search",
|
74 |
"srsearch": query,
|
75 |
"srlimit": limit,
|
76 |
+
"srprop": "snippet|titlesnippet|size|wordcount|timestamp"
|
77 |
}
|
78 |
|
79 |
url = self.search_url.format(lang)
|
|
|
81 |
response.raise_for_status()
|
82 |
|
83 |
data = response.json()
|
84 |
+
results = data.get("query", {}).get("search", [])
|
85 |
+
self.cache[cache_key] = results
|
86 |
+
return results
|
87 |
except Exception as e:
|
88 |
st.error(f"Search error: {str(e)}")
|
89 |
return []
|
90 |
|
91 |
def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
|
92 |
+
"""Get page summary using REST API with caching"""
|
93 |
+
cache_key = self._get_cache_key("summary", title, lang)
|
94 |
+
if cache_key in self.cache:
|
95 |
+
return self.cache[cache_key]
|
96 |
+
|
97 |
try:
|
98 |
encoded_title = quote(title.replace(" ", "_"))
|
99 |
url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
|
|
|
101 |
response = requests.get(url, timeout=10)
|
102 |
response.raise_for_status()
|
103 |
|
104 |
+
result = response.json()
|
105 |
+
self.cache[cache_key] = result
|
106 |
+
return result
|
107 |
except Exception as e:
|
108 |
+
st.error(f"Summary error: {str(e)}")
|
109 |
return None
|
110 |
|
111 |
+
def get_page_content(self, title: str, lang: str = "en", sections: int = 3) -> Optional[str]:
|
112 |
"""Get page content sections"""
|
113 |
try:
|
114 |
params = {
|
115 |
"action": "query",
|
116 |
"format": "json",
|
117 |
"prop": "extracts",
|
118 |
+
"exintro": True,
|
119 |
"explaintext": True,
|
120 |
"exsectionformat": "plain",
|
121 |
"titles": title,
|
122 |
+
"exchars": 3000
|
123 |
}
|
124 |
|
125 |
url = self.search_url.format(lang)
|
|
|
134 |
return page_data["extract"]
|
135 |
|
136 |
return None
|
137 |
+
except Exception as e:
|
138 |
+
st.error(f"Content error: {str(e)}")
|
139 |
+
return None
|
140 |
+
|
141 |
+
def get_random_article(self, lang: str = "en") -> Optional[Dict]:
|
142 |
+
"""Get a random Wikipedia article"""
|
143 |
+
try:
|
144 |
+
params = {
|
145 |
+
"action": "query",
|
146 |
+
"format": "json",
|
147 |
+
"list": "random",
|
148 |
+
"rnnamespace": 0,
|
149 |
+
"rnlimit": 1
|
150 |
+
}
|
151 |
+
|
152 |
+
url = self.search_url.format(lang)
|
153 |
+
response = requests.get(url, params=params, timeout=10)
|
154 |
+
response.raise_for_status()
|
155 |
+
|
156 |
+
data = response.json()
|
157 |
+
random_pages = data.get("query", {}).get("random", [])
|
158 |
+
|
159 |
+
if random_pages:
|
160 |
+
title = random_pages[0]["title"]
|
161 |
+
return self.get_page_summary(title, lang)
|
162 |
|
|
|
163 |
return None
|
164 |
+
except Exception as e:
|
165 |
+
st.error(f"Random article error: {str(e)}")
|
166 |
+
return None
|
167 |
+
|
168 |
+
def get_page_categories(self, title: str, lang: str = "en") -> List[str]:
|
169 |
+
"""Get categories for a Wikipedia page"""
|
170 |
+
try:
|
171 |
+
params = {
|
172 |
+
"action": "query",
|
173 |
+
"format": "json",
|
174 |
+
"prop": "categories",
|
175 |
+
"titles": title,
|
176 |
+
"cllimit": 10
|
177 |
+
}
|
178 |
+
|
179 |
+
url = self.search_url.format(lang)
|
180 |
+
response = requests.get(url, params=params, timeout=10)
|
181 |
+
response.raise_for_status()
|
182 |
+
|
183 |
+
data = response.json()
|
184 |
+
pages = data.get("query", {}).get("pages", {})
|
185 |
+
|
186 |
+
for page_id, page_data in pages.items():
|
187 |
+
if "categories" in page_data:
|
188 |
+
return [cat["title"].replace("Category:", "") for cat in page_data["categories"]]
|
189 |
+
|
190 |
+
return []
|
191 |
+
except Exception as e:
|
192 |
+
return []
|
193 |
+
|
194 |
+
def init_session_state():
|
195 |
+
"""Initialize session state variables"""
|
196 |
+
if 'search_history' not in st.session_state:
|
197 |
+
st.session_state.search_history = []
|
198 |
+
if 'favorites' not in st.session_state:
|
199 |
+
st.session_state.favorites = []
|
200 |
+
if 'theme' not in st.session_state:
|
201 |
+
st.session_state.theme = "Default"
|
202 |
+
if 'user_preferences' not in st.session_state:
|
203 |
+
st.session_state.user_preferences = {
|
204 |
+
"default_language": "English",
|
205 |
+
"results_per_page": 5,
|
206 |
+
"summary_length": "Medium",
|
207 |
+
"show_images": True,
|
208 |
+
"auto_translate": False
|
209 |
+
}
|
210 |
+
|
211 |
+
def apply_theme(theme_name: str):
|
212 |
+
"""Apply selected theme"""
|
213 |
+
theme = THEMES[theme_name]
|
214 |
+
st.markdown(f"""
|
215 |
+
<style>
|
216 |
+
.main {{
|
217 |
+
background-color: {theme["background"]};
|
218 |
+
color: {theme["text"]};
|
219 |
+
}}
|
220 |
+
.stSelectbox label, .stTextInput label, .stSlider label {{
|
221 |
+
color: {theme["text"]} !important;
|
222 |
+
}}
|
223 |
+
.result-card {{
|
224 |
+
background-color: {theme["background"]};
|
225 |
+
border-color: {theme["primary"]};
|
226 |
+
color: {theme["text"]};
|
227 |
+
}}
|
228 |
+
.metric-card {{
|
229 |
+
background: linear-gradient(135deg, {theme["primary"]}20, {theme["primary"]}10);
|
230 |
+
border-left: 4px solid {theme["primary"]};
|
231 |
+
}}
|
232 |
+
</style>
|
233 |
+
""", unsafe_allow_html=True)
|
234 |
|
235 |
def clean_html(text: str) -> str:
|
236 |
"""Remove HTML tags from text"""
|
|
|
|
|
237 |
clean = re.compile('<.*?>')
|
238 |
return re.sub(clean, '', text)
|
239 |
|
240 |
+
def summarize_text(text: str, length: str = "Medium") -> str:
|
241 |
+
"""Advanced text summarization"""
|
242 |
+
sentences = re.split(r'[.!?]+', text)
|
243 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
244 |
+
|
245 |
+
if length == "Short":
|
246 |
+
return '. '.join(sentences[:2]) + '.'
|
247 |
+
elif length == "Medium":
|
248 |
+
return '. '.join(sentences[:4]) + '.'
|
249 |
+
else: # Long
|
250 |
+
return '. '.join(sentences[:6]) + '.'
|
251 |
+
|
252 |
+
def add_to_search_history(query: str, language: str, results_count: int):
|
253 |
+
"""Add search to history"""
|
254 |
+
search_entry = {
|
255 |
+
"query": query,
|
256 |
+
"language": language,
|
257 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
258 |
+
"results_count": results_count
|
259 |
+
}
|
260 |
+
st.session_state.search_history.insert(0, search_entry)
|
261 |
+
# Keep only last 50 searches
|
262 |
+
st.session_state.search_history = st.session_state.search_history[:50]
|
263 |
+
|
264 |
+
def create_search_analytics():
|
265 |
+
"""Create search analytics visualization"""
|
266 |
+
if not st.session_state.search_history:
|
267 |
+
return None
|
268 |
+
|
269 |
+
df = pd.DataFrame(st.session_state.search_history)
|
270 |
+
|
271 |
+
# Language distribution
|
272 |
+
lang_counts = df['language'].value_counts()
|
273 |
+
fig_lang = px.pie(
|
274 |
+
values=lang_counts.values,
|
275 |
+
names=lang_counts.index,
|
276 |
+
title="Search Languages Distribution",
|
277 |
+
color_discrete_sequence=px.colors.qualitative.Set3
|
278 |
+
)
|
279 |
+
|
280 |
+
# Search timeline
|
281 |
+
df['date'] = pd.to_datetime(df['timestamp']).dt.date
|
282 |
+
daily_searches = df.groupby('date').size().reset_index(name='searches')
|
283 |
+
fig_timeline = px.line(
|
284 |
+
daily_searches,
|
285 |
+
x='date',
|
286 |
+
y='searches',
|
287 |
+
title="Daily Search Activity",
|
288 |
+
markers=True
|
289 |
+
)
|
290 |
+
|
291 |
+
return fig_lang, fig_timeline
|
292 |
+
|
293 |
+
def sidebar_content():
|
294 |
+
"""Create enhanced sidebar content"""
|
295 |
+
st.sidebar.title("🤖 WikiBot Pro")
|
296 |
+
st.sidebar.markdown("---")
|
297 |
+
|
298 |
+
# Theme selector
|
299 |
+
st.sidebar.subheader("🎨 Appearance")
|
300 |
+
theme = st.sidebar.selectbox(
|
301 |
+
"Theme",
|
302 |
+
options=list(THEMES.keys()),
|
303 |
+
index=list(THEMES.keys()).index(st.session_state.theme),
|
304 |
+
key="theme_selector"
|
305 |
+
)
|
306 |
+
if theme != st.session_state.theme:
|
307 |
+
st.session_state.theme = theme
|
308 |
+
st.rerun()
|
309 |
+
|
310 |
+
# User preferences
|
311 |
+
st.sidebar.subheader("⚙️ Preferences")
|
312 |
+
|
313 |
+
default_lang = st.sidebar.selectbox(
|
314 |
+
"Default Language",
|
315 |
+
options=list(LANGUAGES.keys()),
|
316 |
+
index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
|
317 |
+
)
|
318 |
+
|
319 |
+
show_images = st.sidebar.checkbox(
|
320 |
+
"Show Images",
|
321 |
+
value=st.session_state.user_preferences["show_images"]
|
322 |
+
)
|
323 |
+
|
324 |
+
# Update preferences
|
325 |
+
st.session_state.user_preferences.update({
|
326 |
+
"default_language": default_lang,
|
327 |
+
"show_images": show_images
|
328 |
+
})
|
329 |
+
|
330 |
+
# Quick actions
|
331 |
+
st.sidebar.subheader("🚀 Quick Actions")
|
332 |
+
|
333 |
+
if st.sidebar.button("🎲 Random Article", use_container_width=True):
|
334 |
+
st.session_state.random_article_trigger = True
|
335 |
+
|
336 |
+
if st.sidebar.button("📊 Search Analytics", use_container_width=True):
|
337 |
+
st.session_state.show_analytics = True
|
338 |
+
|
339 |
+
if st.sidebar.button("🗑️ Clear History", use_container_width=True):
|
340 |
+
st.session_state.search_history = []
|
341 |
+
st.sidebar.success("History cleared!")
|
342 |
+
|
343 |
+
# Search history
|
344 |
+
if st.session_state.search_history:
|
345 |
+
st.sidebar.subheader("🕐 Recent Searches")
|
346 |
+
for i, search in enumerate(st.session_state.search_history[:5]):
|
347 |
+
with st.sidebar.expander(f"{search['query'][:20]}..."):
|
348 |
+
st.write(f"**Language:** {search['language']}")
|
349 |
+
st.write(f"**Time:** {search['timestamp']}")
|
350 |
+
st.write(f"**Results:** {search['results_count']}")
|
351 |
+
if st.button(f"🔄 Repeat", key=f"repeat_{i}"):
|
352 |
+
st.session_state.repeat_search = search
|
353 |
+
|
354 |
+
# Statistics
|
355 |
+
st.sidebar.subheader("📈 Statistics")
|
356 |
+
total_searches = len(st.session_state.search_history)
|
357 |
+
favorite_lang = "None"
|
358 |
+
if st.session_state.search_history:
|
359 |
+
lang_counter = Counter([s['language'] for s in st.session_state.search_history])
|
360 |
+
favorite_lang = lang_counter.most_common(1)[0][0] if lang_counter else "None"
|
361 |
+
|
362 |
+
st.sidebar.metric("Total Searches", total_searches)
|
363 |
+
st.sidebar.metric("Favorite Language", favorite_lang)
|
364 |
+
st.sidebar.metric("Favorites Saved", len(st.session_state.favorites))
|
365 |
|
366 |
def main():
|
367 |
+
init_session_state()
|
368 |
+
apply_theme(st.session_state.theme)
|
369 |
+
|
370 |
+
# Enhanced CSS
|
371 |
st.markdown("""
|
372 |
<style>
|
373 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
|
374 |
+
|
375 |
+
html, body, [class*="css"] {
|
376 |
+
font-family: 'Inter', sans-serif;
|
377 |
+
}
|
378 |
+
|
379 |
.main-header {
|
380 |
text-align: center;
|
381 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
382 |
+
-webkit-background-clip: text;
|
383 |
+
-webkit-text-fill-color: transparent;
|
384 |
+
font-weight: 700;
|
385 |
+
font-size: 3rem;
|
386 |
+
margin-bottom: 0.5rem;
|
387 |
+
}
|
388 |
+
|
389 |
+
.subtitle {
|
390 |
+
text-align: center;
|
391 |
+
color: #6c757d;
|
392 |
+
font-size: 1.2rem;
|
393 |
margin-bottom: 2rem;
|
394 |
+
font-weight: 300;
|
395 |
}
|
396 |
+
|
397 |
.search-container {
|
398 |
+
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
399 |
+
padding: 2rem;
|
400 |
+
border-radius: 20px;
|
401 |
+
margin-bottom: 2rem;
|
402 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
|
403 |
+
backdrop-filter: blur(10px);
|
404 |
}
|
405 |
+
|
406 |
.result-card {
|
407 |
+
background: white;
|
408 |
padding: 1.5rem;
|
409 |
+
border-radius: 15px;
|
410 |
+
border: 1px solid #e9ecef;
|
411 |
+
margin-bottom: 1.5rem;
|
412 |
+
box-shadow: 0 5px 15px rgba(0,0,0,0.08);
|
413 |
+
transition: all 0.3s ease;
|
414 |
+
position: relative;
|
415 |
+
overflow: hidden;
|
416 |
+
}
|
417 |
+
|
418 |
+
.result-card:hover {
|
419 |
+
transform: translateY(-2px);
|
420 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
421 |
+
}
|
422 |
+
|
423 |
+
.result-card::before {
|
424 |
+
content: '';
|
425 |
+
position: absolute;
|
426 |
+
top: 0;
|
427 |
+
left: 0;
|
428 |
+
right: 0;
|
429 |
+
height: 4px;
|
430 |
+
background: linear-gradient(90deg, #667eea, #764ba2);
|
431 |
}
|
432 |
+
|
433 |
.article-title {
|
434 |
+
color: #2c3e50;
|
435 |
+
font-weight: 600;
|
436 |
+
font-size: 1.3rem;
|
437 |
+
margin-bottom: 1rem;
|
438 |
+
line-height: 1.4;
|
439 |
+
}
|
440 |
+
|
441 |
+
.article-meta {
|
442 |
+
display: flex;
|
443 |
+
gap: 1rem;
|
444 |
+
margin-bottom: 1rem;
|
445 |
+
font-size: 0.9rem;
|
446 |
+
color: #6c757d;
|
447 |
}
|
448 |
+
|
449 |
+
.metric-card {
|
450 |
+
background: linear-gradient(135deg, #667eea20, #764ba210);
|
451 |
+
padding: 1rem;
|
452 |
+
border-radius: 10px;
|
453 |
+
border-left: 4px solid #667eea;
|
454 |
margin-bottom: 1rem;
|
455 |
+
transition: all 0.3s ease;
|
456 |
}
|
457 |
+
|
458 |
+
.metric-card:hover {
|
459 |
+
transform: scale(1.02);
|
|
|
460 |
}
|
461 |
+
|
462 |
+
.floating-button {
|
463 |
+
position: fixed;
|
464 |
+
bottom: 2rem;
|
465 |
+
right: 2rem;
|
466 |
+
background: linear-gradient(135deg, #667eea, #764ba2);
|
467 |
+
color: white;
|
468 |
+
border: none;
|
469 |
+
border-radius: 50%;
|
470 |
+
width: 60px;
|
471 |
+
height: 60px;
|
472 |
+
font-size: 1.5rem;
|
473 |
+
cursor: pointer;
|
474 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
|
475 |
+
transition: all 0.3s ease;
|
476 |
+
z-index: 1000;
|
477 |
}
|
478 |
+
|
479 |
+
.floating-button:hover {
|
480 |
+
transform: scale(1.1);
|
481 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.3);
|
482 |
+
}
|
483 |
+
|
484 |
+
.feature-badge {
|
485 |
+
display: inline-block;
|
486 |
+
background: linear-gradient(135deg, #ff9a56, #ff6b6b);
|
487 |
+
color: white;
|
488 |
+
padding: 0.25rem 0.75rem;
|
489 |
+
border-radius: 50px;
|
490 |
+
font-size: 0.8rem;
|
491 |
+
font-weight: 500;
|
492 |
+
margin: 0.25rem;
|
493 |
+
}
|
494 |
+
|
495 |
+
.category-tag {
|
496 |
+
display: inline-block;
|
497 |
+
background: #e3f2fd;
|
498 |
+
color: #1976d2;
|
499 |
+
padding: 0.25rem 0.5rem;
|
500 |
+
border-radius: 5px;
|
501 |
+
font-size: 0.8rem;
|
502 |
+
margin: 0.25rem;
|
503 |
+
}
|
504 |
+
|
505 |
+
@media (max-width: 768px) {
|
506 |
+
.main-header {
|
507 |
+
font-size: 2rem;
|
508 |
+
}
|
509 |
+
.search-container {
|
510 |
+
padding: 1rem;
|
511 |
+
}
|
512 |
+
.floating-button {
|
513 |
+
bottom: 1rem;
|
514 |
+
right: 1rem;
|
515 |
+
width: 50px;
|
516 |
+
height: 50px;
|
517 |
+
font-size: 1.2rem;
|
518 |
+
}
|
519 |
+
}
|
520 |
+
|
521 |
+
.animate-fade-in {
|
522 |
+
animation: fadeIn 0.5s ease-in;
|
523 |
+
}
|
524 |
+
|
525 |
+
@keyframes fadeIn {
|
526 |
+
from { opacity: 0; transform: translateY(20px); }
|
527 |
+
to { opacity: 1; transform: translateY(0); }
|
528 |
}
|
529 |
</style>
|
530 |
""", unsafe_allow_html=True)
|
531 |
|
532 |
+
# Sidebar
|
533 |
+
sidebar_content()
|
|
|
534 |
|
535 |
+
# Main header
|
536 |
+
st.markdown("<h1 class='main-header'>🤖 WikiBot Pro</h1>", unsafe_allow_html=True)
|
537 |
+
st.markdown("<p class='subtitle'>AI-Powered Multilingual Wikipedia Assistant with Advanced Features</p>", unsafe_allow_html=True)
|
538 |
|
539 |
+
# Feature highlights
|
540 |
+
col1, col2, col3, col4 = st.columns(4)
|
541 |
+
with col1:
|
542 |
+
st.markdown("<div class='metric-card'><h3>🌍</h3><p><strong>20+ Languages</strong><br>Multilingual Support</p></div>", unsafe_allow_html=True)
|
543 |
+
with col2:
|
544 |
+
st.markdown("<div class='metric-card'><h3>🤖</h3><p><strong>AI-Powered</strong><br>Smart Summaries</p></div>", unsafe_allow_html=True)
|
545 |
+
with col3:
|
546 |
+
st.markdown("<div class='metric-card'><h3>⚡</h3><p><strong>Fast Search</strong><br>Cached Results</p></div>", unsafe_allow_html=True)
|
547 |
+
with col4:
|
548 |
+
st.markdown("<div class='metric-card'><h3>📊</h3><p><strong>Analytics</strong><br>Search Insights</p></div>", unsafe_allow_html=True)
|
549 |
+
|
550 |
+
# Initialize API
|
551 |
wiki_api = WikipediaAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
+
# Handle random article trigger
|
554 |
+
if hasattr(st.session_state, 'random_article_trigger'):
|
555 |
+
with st.spinner("🎲 Finding a random article..."):
|
556 |
+
random_article = wiki_api.get_random_article(
|
557 |
+
LANGUAGES[st.session_state.user_preferences["default_language"]]["code"]
|
558 |
+
)
|
559 |
+
if random_article:
|
560 |
+
st.success("🎲 Random Article Discovery!")
|
561 |
+
display_article_card(random_article, wiki_api, 0,
|
562 |
+
LANGUAGES[st.session_state.user_preferences["default_language"]]["code"])
|
563 |
+
delattr(st.session_state, 'random_article_trigger')
|
564 |
+
|
565 |
+
# Handle analytics display
|
566 |
+
if hasattr(st.session_state, 'show_analytics'):
|
567 |
+
st.subheader("📊 Search Analytics")
|
568 |
+
analytics = create_search_analytics()
|
569 |
+
if analytics:
|
570 |
+
col1, col2 = st.columns(2)
|
571 |
+
with col1:
|
572 |
+
st.plotly_chart(analytics[0], use_container_width=True)
|
573 |
+
with col2:
|
574 |
+
st.plotly_chart(analytics[1], use_container_width=True)
|
575 |
+
else:
|
576 |
+
st.info("No search history available for analytics.")
|
577 |
+
delattr(st.session_state, 'show_analytics')
|
578 |
+
|
579 |
+
# Search interface
|
580 |
st.markdown("<div class='search-container'>", unsafe_allow_html=True)
|
581 |
|
|
|
582 |
col1, col2 = st.columns([3, 1])
|
583 |
|
584 |
with col1:
|
585 |
query = st.text_input(
|
586 |
+
"🔍 Search Wikipedia",
|
587 |
+
placeholder="e.g., 'Artificial Intelligence', 'కృష్ణ నది', 'गांधी जी'",
|
588 |
+
help="Enter your search query in any language",
|
589 |
+
value=getattr(st.session_state, 'repeat_search', {}).get('query', '')
|
590 |
)
|
591 |
|
592 |
with col2:
|
593 |
+
# Get language options with flags and native names
|
594 |
+
lang_options = [f"{LANGUAGES[lang]['flag']} {lang} ({LANGUAGES[lang]['native']})"
|
595 |
+
for lang in LANGUAGES.keys()]
|
596 |
+
|
597 |
+
selected_lang_display = st.selectbox(
|
598 |
"🌍 Language",
|
599 |
+
options=lang_options,
|
600 |
+
index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
|
601 |
)
|
602 |
+
|
603 |
+
# Extract actual language name
|
604 |
+
selected_lang = selected_lang_display.split(' ', 1)[1].split(' (')[0]
|
605 |
|
606 |
+
# Advanced options
|
607 |
+
with st.expander("⚙️ Advanced Search Options"):
|
608 |
+
col1, col2, col3 = st.columns(3)
|
609 |
+
with col1:
|
610 |
+
num_results = st.slider("📄 Number of results", 1, 15,
|
611 |
+
st.session_state.user_preferences["results_per_page"])
|
612 |
+
with col2:
|
613 |
+
summary_length = st.selectbox("📝 Summary length",
|
614 |
+
["Short", "Medium", "Long"],
|
615 |
+
index=["Short", "Medium", "Long"].index(st.session_state.user_preferences["summary_length"]))
|
616 |
+
with col3:
|
617 |
+
search_mode = st.selectbox("🔍 Search mode",
|
618 |
+
["Standard", "Recent", "Popular"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
|
620 |
st.markdown("</div>", unsafe_allow_html=True)
|
621 |
|
622 |
# Search button
|
623 |
+
if st.button("🔎 Search Wikipedia", type="primary", use_container_width=True):
|
624 |
+
if query:
|
625 |
+
lang_code = LANGUAGES[selected_lang]["code"]
|
626 |
+
|
627 |
+
with st.spinner(f"🔍 Searching Wikipedia in {selected_lang}..."):
|
628 |
+
# Search for articles
|
629 |
+
search_results = wiki_api.search_articles(query, lang_code, num_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
630 |
|
631 |
+
if search_results:
|
632 |
+
# Add to search history
|
633 |
+
add_to_search_history(query, selected_lang, len(search_results))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
|
635 |
+
st.success(f"✅ Found {len(search_results)} results in {selected_lang}")
|
636 |
+
|
637 |
+
# Display results with enhanced cards
|
638 |
+
for idx, result in enumerate(search_results):
|
639 |
+
display_article_card(result, wiki_api, idx, lang_code, summary_length)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
|
641 |
+
else:
|
642 |
+
st.warning(f"❌ No results found for '{query}' in {selected_lang}")
|
643 |
+
# Suggest alternative searches
|
644 |
+
st.info("💡 **Suggestions:**")
|
645 |
+
st.write("• Try different keywords or phrases")
|
646 |
+
st.write("• Switch to a different language")
|
647 |
+
st.write("• Check spelling and try simpler terms")
|
648 |
+
st.write("• Use the Random Article feature to explore")
|
649 |
+
else:
|
650 |
+
st.warning("⚠️ Please enter a search query")
|
651 |
|
652 |
+
# Clear repeat search
|
653 |
+
if hasattr(st.session_state, 'repeat_search'):
|
654 |
+
delattr(st.session_state, 'repeat_search')
|
655 |
|
656 |
+
# Footer with enhanced information
|
657 |
st.markdown("---")
|
658 |
+
st.markdown("### 🌟 WikiBot Pro Features")
|
659 |
|
660 |
+
col1, col2, col3 = st.columns(3)
|
661 |
with col1:
|
662 |
+
st.markdown("""
|
663 |
+
**🌍 Multilingual Support**
|
664 |
+
- 20+ languages including Indian languages
|
665 |
+
- Native script support
|
666 |
+
- Cultural context awareness
|
667 |
+
""")
|
668 |
+
|
669 |
with col2:
|
670 |
+
st.markdown("""
|
671 |
+
**🤖 AI-Powered Features**
|
672 |
+
- Smart text summarization
|
673 |
+
- Intelligent caching
|
674 |
+
- Personalized recommendations
|
675 |
+
""")
|
676 |
|
677 |
+
with col3:
|
|
|
678 |
st.markdown("""
|
679 |
+
**📊 Advanced Analytics**
|
680 |
+
- Search history tracking
|
681 |
+
- Language usage patterns
|
682 |
+
- Performance insights
|
683 |
+
""")
|
684 |
+
|
685 |
+
def display_article_card(result: Dict, wiki_api: WikipediaAPI, idx: int, lang_code: str, summary_length: str = "Medium"):
|
686 |
+
"""Display enhanced article card"""
|
687 |
+
st.markdown("<div class='result-card animate-fade-in'>", unsafe_allow_html=True)
|
688 |
+
|
689 |
+
title = result.get("title", "")
|
690 |
+
|
691 |
+
# Article header with metadata
|
692 |
+
col1, col2 = st.columns([3, 1])
|
693 |
+
|
694 |
+
with col1:
|
695 |
+
st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
|
696 |
|
697 |
+
# Metadata
|
698 |
+
wordcount = result.get("wordcount", 0)
|
699 |
+
size = result.get("size", 0)
|
700 |
+
timestamp = result.get("timestamp", "")
|
701 |
|
702 |
+
meta_html = f"""
|
703 |
+
<div class='article-meta'>
|
704 |
+
<span>📝 {wordcount} words</span>
|
705 |
+
<span>📊 {size} bytes</span>
|
706 |
+
<span>🕐 {timestamp[:10] if timestamp else 'Unknown'}</span>
|
707 |
+
</div>
|
708 |
+
"""
|
709 |
+
st.markdown(meta_html, unsafe_allow_html=True)
|
710 |
+
|
711 |
+
with col2:
|
712 |
+
# Action buttons
|
713 |
+
if st.button("⭐ Favorite", key=f"fav_{idx}"):
|
714 |
+
if title not in st.session_state.favorites:
|
715 |
+
st.session_state.favorites.append(title)
|
716 |
+
st.success("Added to favorites!")
|
717 |
|
718 |
+
if st.button("🔗 Share", key=f"share_{idx}"):
|
719 |
+
st.info(f"Share this article: {title}")
|
720 |
+
|
721 |
+
# Get detailed summary
|
722 |
+
summary_data = wiki_api.get_page_summary(title, lang_code)
|
723 |
+
|
724 |
+
if summary_data:
|
725 |
+
# Display with image if available
|
726 |
+
if st.session_state.user_preferences["show_images"] and "thumbnail" in summary_data:
|
727 |
+
col1, col2 = st.columns([1, 3])
|
728 |
+
with col1:
|
729 |
+
st.image(summary_data["thumbnail"]["source"], width=150, caption="Wikipedia Image")
|
730 |
+
with col2:
|
731 |
+
display_article_content(summary_data, summary_length)
|
732 |
+
else:
|
733 |
+
display_article_content(summary_data, summary_length)
|
734 |
|
735 |
+
# Categories
|
736 |
+
categories = wiki_api.get_page_categories(title, lang_code)
|
737 |
+
if categories:
|
738 |
+
st.markdown("**📚 Categories:**")
|
739 |
+
for cat in categories[:5]: # Show only first 5 categories
|
740 |
+
st.markdown(f"<span class='category-tag'>{cat}</span>", unsafe_allow_html=True)
|
741 |
|
742 |
+
# Wikipedia link
|
743 |
+
wiki_url = f"https://{lang_code}.wikipedia.org/wiki/{title.replace(' ', '_')}"
|
744 |
+
st.markdown(f"🔗 [Read full article on Wikipedia]({wiki_url})")
|
745 |
+
|
746 |
+
else:
|
747 |
+
# Fallback to search snippet
|
748 |
+
snippet = result.get("snippet", "No summary available")
|
749 |
+
cleaned_snippet = clean_html(snippet)
|
750 |
+
st.write(cleaned_snippet)
|
751 |
+
|
752 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
753 |
+
|
754 |
+
def display_article_content(summary_data: Dict, summary_length: str):
|
755 |
+
"""Display article content with smart summarization"""
|
756 |
+
extract = summary_data.get("extract", "")
|
757 |
+
if extract:
|
758 |
+
summarized = summarize_text(extract, summary_length)
|
759 |
+
st.write(summarized)
|
760 |
+
|
761 |
+
# Display additional info if available
|
762 |
+
if "coordinates" in summary_data:
|
763 |
+
coords = summary_data["coordinates"]
|
764 |
+
st.info(f"📍 Location: {coords.get('lat', 0):.4f}, {coords.get('lon', 0):.4f}")
|
765 |
|
766 |
if __name__ == "__main__":
|
767 |
main()
|