qwerty45-uiop commited on
Commit
301db05
·
verified ·
1 Parent(s): 91fc2e4

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +624 -362
src/streamlit_app.py CHANGED
@@ -1,137 +1,71 @@
1
  import streamlit as st
2
  import requests
3
  import json
4
- from typing import Dict, List, Optional
5
  import re
6
  from urllib.parse import quote
7
  import time
 
 
 
 
 
8
 
9
- # Configure page
10
  st.set_page_config(
11
- page_title="WikiBot - Multilingual Assistant",
12
- page_icon="📚",
13
  layout="wide",
14
- initial_sidebar_state="collapsed"
15
  )
16
 
17
- # Language codes mapping
18
  LANGUAGES = {
19
- "English": "en",
20
- "Telugu": "te",
21
- "Hindi": "hi",
22
- "Spanish": "es",
23
- "French": "fr",
24
- "German": "de",
25
- "Italian": "it",
26
- "Portuguese": "pt",
27
- "Russian": "ru",
28
- "Japanese": "ja",
29
- "Chinese": "zh",
30
- "Arabic": "ar",
31
- "Korean": "ko"
 
 
 
 
 
 
 
32
  }
33
 
34
- class OllamaLLM:
35
- def __init__(self, base_url: str = "http://localhost:11434"):
36
- self.base_url = base_url
37
- self.api_url = f"{base_url}/api/generate"
38
- self.models_url = f"{base_url}/api/tags"
39
-
40
- def check_connection(self) -> bool:
41
- """Check if Ollama is running"""
42
- try:
43
- response = requests.get(self.models_url, timeout=3)
44
- return response.status_code == 200
45
- except Exception:
46
- return False
47
-
48
- def get_available_models(self) -> List[str]:
49
- """Get list of available models"""
50
- try:
51
- response = requests.get(self.models_url, timeout=5)
52
- if response.status_code == 200:
53
- data = response.json()
54
- models = data.get("models", [])
55
- return [model["name"] for model in models]
56
- return []
57
- except Exception:
58
- return []
59
-
60
- def generate_summary(self, text: str, model: str = "llama3.2", language: str = "English",
61
- summary_type: str = "concise") -> str:
62
- """Generate AI summary using local LLM"""
63
- try:
64
- # Truncate text if too long
65
- if len(text) > 2000:
66
- text = text[:2000] + "..."
67
-
68
- # Craft prompt based on summary type
69
- if summary_type == "concise":
70
- prompt = f"Summarize this Wikipedia content in {language} in 2-3 clear sentences:\n\n{text}\n\nSummary:"
71
- elif summary_type == "detailed":
72
- prompt = f"Provide a detailed summary of this Wikipedia content in {language}. Include key points and important facts:\n\n{text}\n\nDetailed Summary:"
73
- else: # explanatory
74
- prompt = f"Explain this Wikipedia content in {language} in simple terms that anyone can understand:\n\n{text}\n\nExplanation:"
75
-
76
- # Request to Ollama
77
- payload = {
78
- "model": model,
79
- "prompt": prompt,
80
- "stream": False,
81
- "options": {
82
- "temperature": 0.7,
83
- "num_predict": 300 if summary_type == "detailed" else 150
84
- }
85
- }
86
-
87
- response = requests.post(self.api_url, json=payload, timeout=30)
88
-
89
- if response.status_code == 200:
90
- data = response.json()
91
- summary = data.get("response", "").strip()
92
- return summary if summary else "No summary generated"
93
- else:
94
- return f"Error: Status {response.status_code}"
95
-
96
- except requests.exceptions.Timeout:
97
- return "Error: Request timeout - try a smaller text"
98
- except Exception as e:
99
- return f"Error: {str(e)}"
100
-
101
- def translate_text(self, text: str, target_language: str, model: str = "llama3.2") -> str:
102
- """Translate text using local LLM"""
103
- try:
104
- prompt = f"Translate this text to {target_language}. Only provide the translation:\n\n{text}\n\nTranslation:"
105
-
106
- payload = {
107
- "model": model,
108
- "prompt": prompt,
109
- "stream": False,
110
- "options": {
111
- "temperature": 0.3,
112
- "num_predict": 200
113
- }
114
- }
115
-
116
- response = requests.post(self.api_url, json=payload, timeout=20)
117
-
118
- if response.status_code == 200:
119
- data = response.json()
120
- translation = data.get("response", "").strip()
121
- return translation if translation else text
122
- else:
123
- return text
124
-
125
- except Exception:
126
- return text
127
 
128
  class WikipediaAPI:
129
  def __init__(self):
130
  self.base_url = "https://{}.wikipedia.org/api/rest_v1"
131
  self.search_url = "https://{}.wikipedia.org/w/api.php"
 
 
 
 
 
 
132
 
133
  def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
134
- """Search for Wikipedia articles"""
 
 
 
 
135
  try:
136
  params = {
137
  "action": "query",
@@ -139,7 +73,7 @@ class WikipediaAPI:
139
  "list": "search",
140
  "srsearch": query,
141
  "srlimit": limit,
142
- "srprop": "snippet|titlesnippet"
143
  }
144
 
145
  url = self.search_url.format(lang)
@@ -147,14 +81,19 @@ class WikipediaAPI:
147
  response.raise_for_status()
148
 
149
  data = response.json()
150
- return data.get("query", {}).get("search", [])
151
-
 
152
  except Exception as e:
153
  st.error(f"Search error: {str(e)}")
154
  return []
155
 
156
  def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
157
- """Get page summary using REST API"""
 
 
 
 
158
  try:
159
  encoded_title = quote(title.replace(" ", "_"))
160
  url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
@@ -162,23 +101,25 @@ class WikipediaAPI:
162
  response = requests.get(url, timeout=10)
163
  response.raise_for_status()
164
 
165
- return response.json()
166
-
 
167
  except Exception as e:
 
168
  return None
169
 
170
- def get_page_content(self, title: str, lang: str = "en", char_limit: int = 2000) -> Optional[str]:
171
  """Get page content sections"""
172
  try:
173
  params = {
174
  "action": "query",
175
  "format": "json",
176
  "prop": "extracts",
177
- "exintro": False,
178
  "explaintext": True,
179
  "exsectionformat": "plain",
180
  "titles": title,
181
- "exchars": char_limit
182
  }
183
 
184
  url = self.search_url.format(lang)
@@ -193,313 +134,634 @@ class WikipediaAPI:
193
  return page_data["extract"]
194
 
195
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
- except Exception:
198
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  def clean_html(text: str) -> str:
201
  """Remove HTML tags from text"""
202
- if not text:
203
- return ""
204
  clean = re.compile('<.*?>')
205
  return re.sub(clean, '', text)
206
 
207
- def simple_summarize(text: str, max_sentences: int = 3) -> str:
208
- """Fallback simple text summarization"""
209
- if not text:
210
- return "No content available"
211
-
212
- sentences = text.split('. ')
213
- if len(sentences) <= max_sentences:
214
- return text
215
-
216
- summary_sentences = sentences[:max_sentences]
217
- result = '. '.join(summary_sentences)
218
- if not result.endswith('.'):
219
- result += '.'
220
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  def main():
223
- # Custom CSS
 
 
 
224
  st.markdown("""
225
  <style>
 
 
 
 
 
 
226
  .main-header {
227
  text-align: center;
228
- color: #1f77b4;
 
 
 
 
 
 
 
 
 
 
 
229
  margin-bottom: 2rem;
 
230
  }
 
231
  .search-container {
232
- background-color: #f8f9fa;
233
- padding: 1.5rem;
234
- border-radius: 10px;
235
- margin-bottom: 1rem;
 
 
236
  }
 
237
  .result-card {
238
- background-color: white;
239
  padding: 1.5rem;
240
- border-radius: 8px;
241
- border: 1px solid #dee2e6;
242
- margin-bottom: 1rem;
243
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
 
245
  .article-title {
246
- color: #007bff;
247
- font-weight: bold;
248
- font-size: 1.2rem;
249
- margin-bottom: 0.5rem;
 
 
 
 
 
 
 
 
 
250
  }
251
- .status-box {
252
- padding: 0.8rem;
253
- border-radius: 8px;
 
 
 
254
  margin-bottom: 1rem;
255
- font-weight: bold;
256
  }
257
- .status-connected {
258
- background-color: #d4edda;
259
- color: #155724;
260
- border: 1px solid #c3e6cb;
261
  }
262
- .status-disconnected {
263
- background-color: #f8d7da;
264
- color: #721c24;
265
- border: 1px solid #f5c6cb;
 
 
 
 
 
 
 
 
 
 
 
 
266
  }
267
- .ai-summary {
268
- background-color: #f0f8ff;
269
- padding: 1rem;
270
- border-radius: 8px;
271
- border-left: 4px solid #007bff;
272
- margin: 1rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  }
274
  </style>
275
  """, unsafe_allow_html=True)
276
 
277
- # Header
278
- st.markdown("<h1 class='main-header'>🤖 WikiBot - AI-Powered Assistant</h1>", unsafe_allow_html=True)
279
- st.markdown("<p style='text-align: center; color: #666;'>Wikipedia + Local LLM Intelligence</p>", unsafe_allow_html=True)
280
 
281
- # Initialize session state
282
- if 'search_results' not in st.session_state:
283
- st.session_state.search_results = []
284
 
285
- # Initialize APIs
 
 
 
 
 
 
 
 
 
 
 
286
  wiki_api = WikipediaAPI()
287
- llm = OllamaLLM()
288
-
289
- # Check LLM connection
290
- with st.spinner("Checking Ollama connection..."):
291
- llm_connected = llm.check_connection()
292
- available_models = llm.get_available_models() if llm_connected else []
293
-
294
- # Status display
295
- if llm_connected:
296
- st.markdown(f"""
297
- <div class='status-box status-connected'>
298
- ✅ Ollama Connected - {len(available_models)} models available
299
- </div>
300
- """, unsafe_allow_html=True)
301
- else:
302
- st.markdown("""
303
- <div class='status-box status-disconnected'>
304
- ❌ Ollama Offline - Basic mode only
305
- </div>
306
- """, unsafe_allow_html=True)
307
 
308
- # Main search interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  st.markdown("<div class='search-container'>", unsafe_allow_html=True)
310
 
311
- # Search inputs
312
  col1, col2 = st.columns([3, 1])
313
 
314
  with col1:
315
  query = st.text_input(
316
- "🔍 Search Query",
317
- placeholder="e.g., 'Artificial Intelligence', 'Kargil War'",
318
- help="Enter your Wikipedia search query"
 
319
  )
320
 
321
  with col2:
322
- selected_lang = st.selectbox(
 
 
 
 
323
  "🌍 Language",
324
- options=list(LANGUAGES.keys()),
325
- index=0
326
  )
 
 
 
327
 
328
- # Options
329
- col1, col2, col3 = st.columns(3)
330
-
331
- with col1:
332
- num_results = st.slider("Results", 1, 8, 3)
333
-
334
- with col2:
335
- if llm_connected:
336
- summary_type = st.selectbox(
337
- "AI Summary",
338
- ["concise", "detailed", "explanatory"]
339
- )
340
- else:
341
- summary_type = st.selectbox(
342
- "Summary",
343
- ["short", "medium", "long"]
344
- )
345
-
346
- with col3:
347
- if llm_connected and available_models:
348
- selected_model = st.selectbox("Model", available_models)
349
- else:
350
- selected_model = None
351
- st.info("No models")
352
-
353
- # Translation option
354
- if llm_connected:
355
- enable_translation = st.checkbox("🌐 Enable Translation")
356
- if enable_translation:
357
- target_lang = st.selectbox(
358
- "Translate to",
359
- [lang for lang in LANGUAGES.keys() if lang != selected_lang]
360
- )
361
 
362
  st.markdown("</div>", unsafe_allow_html=True)
363
 
364
  # Search button
365
- search_clicked = st.button("🔎 Search", type="primary", use_container_width=True)
366
-
367
- if search_clicked and query:
368
- lang_code = LANGUAGES[selected_lang]
369
-
370
- with st.spinner("Searching Wikipedia..."):
371
- search_results = wiki_api.search_articles(query, lang_code, num_results)
372
- st.session_state.search_results = search_results
373
-
374
- # Display results
375
- if st.session_state.search_results:
376
- st.success(f"Found {len(st.session_state.search_results)} results")
377
-
378
- for idx, result in enumerate(st.session_state.search_results):
379
- with st.container():
380
- st.markdown("<div class='result-card'>", unsafe_allow_html=True)
381
-
382
- # Title
383
- title = result.get("title", "")
384
- st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
385
-
386
- # Get content
387
- lang_code = LANGUAGES[selected_lang]
388
- summary_data = wiki_api.get_page_summary(title, lang_code)
389
-
390
- # Show thumbnail
391
- if summary_data and "thumbnail" in summary_data:
392
- col1, col2 = st.columns([1, 4])
393
- with col1:
394
- st.image(summary_data["thumbnail"]["source"], width=100)
395
- content_col = col2
396
- else:
397
- content_col = st
398
 
399
- with content_col:
400
- # AI Summary
401
- if llm_connected and selected_model:
402
- # Get detailed content for AI
403
- detailed_content = wiki_api.get_page_content(title, lang_code)
404
-
405
- if detailed_content:
406
- with st.spinner("Generating AI summary..."):
407
- ai_summary = llm.generate_summary(
408
- detailed_content,
409
- selected_model,
410
- selected_lang,
411
- summary_type
412
- )
413
-
414
- if ai_summary and not ai_summary.startswith("Error"):
415
- st.markdown("<div class='ai-summary'>", unsafe_allow_html=True)
416
- st.markdown("**🤖 AI Summary:**")
417
- st.write(ai_summary)
418
-
419
- # Translation
420
- if 'enable_translation' in locals() and enable_translation:
421
- with st.spinner("Translating..."):
422
- translated = llm.translate_text(ai_summary, target_lang, selected_model)
423
- if translated != ai_summary:
424
- st.markdown(f"**🌐 {target_lang}:**")
425
- st.write(translated)
426
-
427
- st.markdown("</div>", unsafe_allow_html=True)
428
- else:
429
- st.warning("AI summary failed")
430
- if summary_data:
431
- basic_summary = summary_data.get("extract", "")
432
- st.write(simple_summarize(basic_summary, 3))
433
- else:
434
- st.warning("Could not fetch detailed content")
435
 
436
- else:
437
- # Basic summary
438
- if summary_data:
439
- basic_summary = summary_data.get("extract", "")
440
- if summary_type == "short":
441
- basic_summary = simple_summarize(basic_summary, 2)
442
- elif summary_type == "medium":
443
- basic_summary = simple_summarize(basic_summary, 4)
444
- st.write(basic_summary)
445
- else:
446
- snippet = clean_html(result.get("snippet", ""))
447
- st.write(snippet)
448
-
449
- # Wikipedia link
450
- if summary_data and "content_urls" in summary_data:
451
- wiki_url = summary_data["content_urls"]["desktop"]["page"]
452
- st.markdown(f"[📖 Read on Wikipedia]({wiki_url})")
453
 
454
- st.markdown("</div>", unsafe_allow_html=True)
455
- st.markdown("---")
 
 
 
 
 
 
 
 
456
 
457
- elif search_clicked and not query:
458
- st.warning("Please enter a search query")
 
459
 
460
- # Footer stats
461
  st.markdown("---")
462
- col1, col2, col3, col4 = st.columns(4)
463
 
 
464
  with col1:
465
- st.metric("🌍 Languages", len(LANGUAGES))
 
 
 
 
 
 
466
  with col2:
467
- st.metric("🤖 LLM", "ON" if llm_connected else "OFF")
468
- with col3:
469
- st.metric("📚 Models", len(available_models))
470
- with col4:
471
- st.metric("📊 Results", len(st.session_state.search_results))
 
472
 
473
- # Setup guide
474
- with st.expander("🛠️ Ollama Setup Guide"):
475
  st.markdown("""
476
- ### Quick Setup:
477
-
478
- **1. Install Ollama:**
479
- ```bash
480
- # macOS/Linux
481
- curl -fsSL https://ollama.ai/install.sh | sh
 
 
 
 
 
 
 
 
 
 
 
482
 
483
- # Windows: Download from https://ollama.ai
484
- ```
 
 
485
 
486
- **2. Pull a model:**
487
- ```bash
488
- ollama pull llama3.2
489
- ```
 
 
 
 
 
 
 
 
 
 
 
490
 
491
- **3. Start server:**
492
- ```bash
493
- ollama serve
494
- ```
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
- **4. Refresh this page!**
 
 
 
 
 
497
 
498
- ### Recommended Models:
499
- - `llama3.2` - Best overall performance
500
- - `mistral` - Fast and efficient
501
- - `qwen2` - Good for multilingual content
502
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
  if __name__ == "__main__":
505
  main()
 
1
  import streamlit as st
2
  import requests
3
  import json
4
+ from typing import Dict, List, Optional, Tuple
5
  import re
6
  from urllib.parse import quote
7
  import time
8
+ from datetime import datetime
9
+ import plotly.express as px
10
+ import pandas as pd
11
+ from collections import Counter
12
+ import hashlib
13
 
 
14
  st.set_page_config(
15
+ page_title="WikiBot Pro - AI-Powered Multilingual Assistant",
16
+ page_icon="🤖",
17
  layout="wide",
18
+ initial_sidebar_state="expanded"
19
  )
20
 
 
21
  LANGUAGES = {
22
+ "English": {"code": "en", "flag": "🇺🇸", "native": "English"},
23
+ "Telugu": {"code": "te", "flag": "🇮🇳", "native": "తెలుగు"},
24
+ "Hindi": {"code": "hi", "flag": "🇮🇳", "native": "हिन्दी"},
25
+ "Spanish": {"code": "es", "flag": "🇪🇸", "native": "Español"},
26
+ "French": {"code": "fr", "flag": "🇫🇷", "native": "Français"},
27
+ "German": {"code": "de", "flag": "🇩🇪", "native": "Deutsch"},
28
+ "Italian": {"code": "it", "flag": "🇮🇹", "native": "Italiano"},
29
+ "Portuguese": {"code": "pt", "flag": "🇵🇹", "native": "Português"},
30
+ "Russian": {"code": "ru", "flag": "🇷🇺", "native": "Русский"},
31
+ "Japanese": {"code": "ja", "flag": "🇯🇵", "native": "日本語"},
32
+ "Chinese": {"code": "zh", "flag": "🇨🇳", "native": "中文"},
33
+ "Arabic": {"code": "ar", "flag": "🇸🇦", "native": "العربية"},
34
+ "Korean": {"code": "ko", "flag": "🇰🇷", "native": "한국어"},
35
+ "Tamil": {"code": "ta", "flag": "🇮🇳", "native": "தமிழ்"},
36
+ "Bengali": {"code": "bn", "flag": "🇧🇩", "native": "বাংলা"},
37
+ "Marathi": {"code": "mr", "flag": "🇮🇳", "native": "मराठी"},
38
+ "Gujarati": {"code": "gu", "flag": "🇮🇳", "native": "ગુજરાતી"},
39
+ "Kannada": {"code": "kn", "flag": "🇮🇳", "native": "ಕನ್ನಡ"},
40
+ "Malayalam": {"code": "ml", "flag": "🇮🇳", "native": "മലയാളം"},
41
+ "Punjabi": {"code": "pa", "flag": "🇮🇳", "native": "ਪੰਜਾਬੀ"}
42
  }
43
 
44
+ THEMES = {
45
+ "Default": {"primary": "#1f77b4", "background": "#ffffff", "text": "#000000"},
46
+ "Dark": {"primary": "#00d4aa", "background": "#0e1117", "text": "#ffffff"},
47
+ "Ocean": {"primary": "#0077be", "background": "#f0f8ff", "text": "#003366"},
48
+ "Forest": {"primary": "#228b22", "background": "#f5fff5", "text": "#006400"},
49
+ "Sunset": {"primary": "#ff6b35", "background": "#fff5f0", "text": "#8b0000"}
50
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  class WikipediaAPI:
53
  def __init__(self):
54
  self.base_url = "https://{}.wikipedia.org/api/rest_v1"
55
  self.search_url = "https://{}.wikipedia.org/w/api.php"
56
+ self.cache = {}
57
+
58
+ def _get_cache_key(self, *args) -> str:
59
+ """Generate cache key from arguments"""
60
+ key_string = "_".join(str(arg) for arg in args)
61
+ return hashlib.md5(key_string.encode()).hexdigest()
62
 
63
  def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
64
+ """Search for Wikipedia articles with caching"""
65
+ cache_key = self._get_cache_key("search", query, lang, limit)
66
+ if cache_key in self.cache:
67
+ return self.cache[cache_key]
68
+
69
  try:
70
  params = {
71
  "action": "query",
 
73
  "list": "search",
74
  "srsearch": query,
75
  "srlimit": limit,
76
+ "srprop": "snippet|titlesnippet|size|wordcount|timestamp"
77
  }
78
 
79
  url = self.search_url.format(lang)
 
81
  response.raise_for_status()
82
 
83
  data = response.json()
84
+ results = data.get("query", {}).get("search", [])
85
+ self.cache[cache_key] = results
86
+ return results
87
  except Exception as e:
88
  st.error(f"Search error: {str(e)}")
89
  return []
90
 
91
  def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
92
+ """Get page summary using REST API with caching"""
93
+ cache_key = self._get_cache_key("summary", title, lang)
94
+ if cache_key in self.cache:
95
+ return self.cache[cache_key]
96
+
97
  try:
98
  encoded_title = quote(title.replace(" ", "_"))
99
  url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
 
101
  response = requests.get(url, timeout=10)
102
  response.raise_for_status()
103
 
104
+ result = response.json()
105
+ self.cache[cache_key] = result
106
+ return result
107
  except Exception as e:
108
+ st.error(f"Summary error: {str(e)}")
109
  return None
110
 
111
+ def get_page_content(self, title: str, lang: str = "en", sections: int = 3) -> Optional[str]:
112
  """Get page content sections"""
113
  try:
114
  params = {
115
  "action": "query",
116
  "format": "json",
117
  "prop": "extracts",
118
+ "exintro": True,
119
  "explaintext": True,
120
  "exsectionformat": "plain",
121
  "titles": title,
122
+ "exchars": 3000
123
  }
124
 
125
  url = self.search_url.format(lang)
 
134
  return page_data["extract"]
135
 
136
  return None
137
+ except Exception as e:
138
+ st.error(f"Content error: {str(e)}")
139
+ return None
140
+
141
+ def get_random_article(self, lang: str = "en") -> Optional[Dict]:
142
+ """Get a random Wikipedia article"""
143
+ try:
144
+ params = {
145
+ "action": "query",
146
+ "format": "json",
147
+ "list": "random",
148
+ "rnnamespace": 0,
149
+ "rnlimit": 1
150
+ }
151
+
152
+ url = self.search_url.format(lang)
153
+ response = requests.get(url, params=params, timeout=10)
154
+ response.raise_for_status()
155
+
156
+ data = response.json()
157
+ random_pages = data.get("query", {}).get("random", [])
158
+
159
+ if random_pages:
160
+ title = random_pages[0]["title"]
161
+ return self.get_page_summary(title, lang)
162
 
 
163
  return None
164
+ except Exception as e:
165
+ st.error(f"Random article error: {str(e)}")
166
+ return None
167
+
168
+ def get_page_categories(self, title: str, lang: str = "en") -> List[str]:
169
+ """Get categories for a Wikipedia page"""
170
+ try:
171
+ params = {
172
+ "action": "query",
173
+ "format": "json",
174
+ "prop": "categories",
175
+ "titles": title,
176
+ "cllimit": 10
177
+ }
178
+
179
+ url = self.search_url.format(lang)
180
+ response = requests.get(url, params=params, timeout=10)
181
+ response.raise_for_status()
182
+
183
+ data = response.json()
184
+ pages = data.get("query", {}).get("pages", {})
185
+
186
+ for page_id, page_data in pages.items():
187
+ if "categories" in page_data:
188
+ return [cat["title"].replace("Category:", "") for cat in page_data["categories"]]
189
+
190
+ return []
191
+ except Exception as e:
192
+ return []
193
+
194
+ def init_session_state():
195
+ """Initialize session state variables"""
196
+ if 'search_history' not in st.session_state:
197
+ st.session_state.search_history = []
198
+ if 'favorites' not in st.session_state:
199
+ st.session_state.favorites = []
200
+ if 'theme' not in st.session_state:
201
+ st.session_state.theme = "Default"
202
+ if 'user_preferences' not in st.session_state:
203
+ st.session_state.user_preferences = {
204
+ "default_language": "English",
205
+ "results_per_page": 5,
206
+ "summary_length": "Medium",
207
+ "show_images": True,
208
+ "auto_translate": False
209
+ }
210
+
211
+ def apply_theme(theme_name: str):
212
+ """Apply selected theme"""
213
+ theme = THEMES[theme_name]
214
+ st.markdown(f"""
215
+ <style>
216
+ .main {{
217
+ background-color: {theme["background"]};
218
+ color: {theme["text"]};
219
+ }}
220
+ .stSelectbox label, .stTextInput label, .stSlider label {{
221
+ color: {theme["text"]} !important;
222
+ }}
223
+ .result-card {{
224
+ background-color: {theme["background"]};
225
+ border-color: {theme["primary"]};
226
+ color: {theme["text"]};
227
+ }}
228
+ .metric-card {{
229
+ background: linear-gradient(135deg, {theme["primary"]}20, {theme["primary"]}10);
230
+ border-left: 4px solid {theme["primary"]};
231
+ }}
232
+ </style>
233
+ """, unsafe_allow_html=True)
234
 
235
  def clean_html(text: str) -> str:
236
  """Remove HTML tags from text"""
 
 
237
  clean = re.compile('<.*?>')
238
  return re.sub(clean, '', text)
239
 
240
+ def summarize_text(text: str, length: str = "Medium") -> str:
241
+ """Advanced text summarization"""
242
+ sentences = re.split(r'[.!?]+', text)
243
+ sentences = [s.strip() for s in sentences if s.strip()]
244
+
245
+ if length == "Short":
246
+ return '. '.join(sentences[:2]) + '.'
247
+ elif length == "Medium":
248
+ return '. '.join(sentences[:4]) + '.'
249
+ else: # Long
250
+ return '. '.join(sentences[:6]) + '.'
251
+
252
+ def add_to_search_history(query: str, language: str, results_count: int):
253
+ """Add search to history"""
254
+ search_entry = {
255
+ "query": query,
256
+ "language": language,
257
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
258
+ "results_count": results_count
259
+ }
260
+ st.session_state.search_history.insert(0, search_entry)
261
+ # Keep only last 50 searches
262
+ st.session_state.search_history = st.session_state.search_history[:50]
263
+
264
+ def create_search_analytics():
265
+ """Create search analytics visualization"""
266
+ if not st.session_state.search_history:
267
+ return None
268
+
269
+ df = pd.DataFrame(st.session_state.search_history)
270
+
271
+ # Language distribution
272
+ lang_counts = df['language'].value_counts()
273
+ fig_lang = px.pie(
274
+ values=lang_counts.values,
275
+ names=lang_counts.index,
276
+ title="Search Languages Distribution",
277
+ color_discrete_sequence=px.colors.qualitative.Set3
278
+ )
279
+
280
+ # Search timeline
281
+ df['date'] = pd.to_datetime(df['timestamp']).dt.date
282
+ daily_searches = df.groupby('date').size().reset_index(name='searches')
283
+ fig_timeline = px.line(
284
+ daily_searches,
285
+ x='date',
286
+ y='searches',
287
+ title="Daily Search Activity",
288
+ markers=True
289
+ )
290
+
291
+ return fig_lang, fig_timeline
292
+
293
+ def sidebar_content():
294
+ """Create enhanced sidebar content"""
295
+ st.sidebar.title("🤖 WikiBot Pro")
296
+ st.sidebar.markdown("---")
297
+
298
+ # Theme selector
299
+ st.sidebar.subheader("🎨 Appearance")
300
+ theme = st.sidebar.selectbox(
301
+ "Theme",
302
+ options=list(THEMES.keys()),
303
+ index=list(THEMES.keys()).index(st.session_state.theme),
304
+ key="theme_selector"
305
+ )
306
+ if theme != st.session_state.theme:
307
+ st.session_state.theme = theme
308
+ st.rerun()
309
+
310
+ # User preferences
311
+ st.sidebar.subheader("⚙️ Preferences")
312
+
313
+ default_lang = st.sidebar.selectbox(
314
+ "Default Language",
315
+ options=list(LANGUAGES.keys()),
316
+ index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
317
+ )
318
+
319
+ show_images = st.sidebar.checkbox(
320
+ "Show Images",
321
+ value=st.session_state.user_preferences["show_images"]
322
+ )
323
+
324
+ # Update preferences
325
+ st.session_state.user_preferences.update({
326
+ "default_language": default_lang,
327
+ "show_images": show_images
328
+ })
329
+
330
+ # Quick actions
331
+ st.sidebar.subheader("🚀 Quick Actions")
332
+
333
+ if st.sidebar.button("🎲 Random Article", use_container_width=True):
334
+ st.session_state.random_article_trigger = True
335
+
336
+ if st.sidebar.button("📊 Search Analytics", use_container_width=True):
337
+ st.session_state.show_analytics = True
338
+
339
+ if st.sidebar.button("🗑️ Clear History", use_container_width=True):
340
+ st.session_state.search_history = []
341
+ st.sidebar.success("History cleared!")
342
+
343
+ # Search history
344
+ if st.session_state.search_history:
345
+ st.sidebar.subheader("🕐 Recent Searches")
346
+ for i, search in enumerate(st.session_state.search_history[:5]):
347
+ with st.sidebar.expander(f"{search['query'][:20]}..."):
348
+ st.write(f"**Language:** {search['language']}")
349
+ st.write(f"**Time:** {search['timestamp']}")
350
+ st.write(f"**Results:** {search['results_count']}")
351
+ if st.button(f"🔄 Repeat", key=f"repeat_{i}"):
352
+ st.session_state.repeat_search = search
353
+
354
+ # Statistics
355
+ st.sidebar.subheader("📈 Statistics")
356
+ total_searches = len(st.session_state.search_history)
357
+ favorite_lang = "None"
358
+ if st.session_state.search_history:
359
+ lang_counter = Counter([s['language'] for s in st.session_state.search_history])
360
+ favorite_lang = lang_counter.most_common(1)[0][0] if lang_counter else "None"
361
+
362
+ st.sidebar.metric("Total Searches", total_searches)
363
+ st.sidebar.metric("Favorite Language", favorite_lang)
364
+ st.sidebar.metric("Favorites Saved", len(st.session_state.favorites))
365
 
366
  def main():
367
+ init_session_state()
368
+ apply_theme(st.session_state.theme)
369
+
370
+ # Enhanced CSS
371
  st.markdown("""
372
  <style>
373
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
374
+
375
+ html, body, [class*="css"] {
376
+ font-family: 'Inter', sans-serif;
377
+ }
378
+
379
  .main-header {
380
  text-align: center;
381
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
382
+ -webkit-background-clip: text;
383
+ -webkit-text-fill-color: transparent;
384
+ font-weight: 700;
385
+ font-size: 3rem;
386
+ margin-bottom: 0.5rem;
387
+ }
388
+
389
+ .subtitle {
390
+ text-align: center;
391
+ color: #6c757d;
392
+ font-size: 1.2rem;
393
  margin-bottom: 2rem;
394
+ font-weight: 300;
395
  }
396
+
397
  .search-container {
398
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
399
+ padding: 2rem;
400
+ border-radius: 20px;
401
+ margin-bottom: 2rem;
402
+ box-shadow: 0 10px 30px rgba(0,0,0,0.1);
403
+ backdrop-filter: blur(10px);
404
  }
405
+
406
  .result-card {
407
+ background: white;
408
  padding: 1.5rem;
409
+ border-radius: 15px;
410
+ border: 1px solid #e9ecef;
411
+ margin-bottom: 1.5rem;
412
+ box-shadow: 0 5px 15px rgba(0,0,0,0.08);
413
+ transition: all 0.3s ease;
414
+ position: relative;
415
+ overflow: hidden;
416
+ }
417
+
418
+ .result-card:hover {
419
+ transform: translateY(-2px);
420
+ box-shadow: 0 8px 25px rgba(0,0,0,0.15);
421
+ }
422
+
423
+ .result-card::before {
424
+ content: '';
425
+ position: absolute;
426
+ top: 0;
427
+ left: 0;
428
+ right: 0;
429
+ height: 4px;
430
+ background: linear-gradient(90deg, #667eea, #764ba2);
431
  }
432
+
433
  .article-title {
434
+ color: #2c3e50;
435
+ font-weight: 600;
436
+ font-size: 1.3rem;
437
+ margin-bottom: 1rem;
438
+ line-height: 1.4;
439
+ }
440
+
441
+ .article-meta {
442
+ display: flex;
443
+ gap: 1rem;
444
+ margin-bottom: 1rem;
445
+ font-size: 0.9rem;
446
+ color: #6c757d;
447
  }
448
+
449
+ .metric-card {
450
+ background: linear-gradient(135deg, #667eea20, #764ba210);
451
+ padding: 1rem;
452
+ border-radius: 10px;
453
+ border-left: 4px solid #667eea;
454
  margin-bottom: 1rem;
455
+ transition: all 0.3s ease;
456
  }
457
+
458
+ .metric-card:hover {
459
+ transform: scale(1.02);
 
460
  }
461
+
462
+ .floating-button {
463
+ position: fixed;
464
+ bottom: 2rem;
465
+ right: 2rem;
466
+ background: linear-gradient(135deg, #667eea, #764ba2);
467
+ color: white;
468
+ border: none;
469
+ border-radius: 50%;
470
+ width: 60px;
471
+ height: 60px;
472
+ font-size: 1.5rem;
473
+ cursor: pointer;
474
+ box-shadow: 0 4px 15px rgba(0,0,0,0.2);
475
+ transition: all 0.3s ease;
476
+ z-index: 1000;
477
  }
478
+
479
+ .floating-button:hover {
480
+ transform: scale(1.1);
481
+ box-shadow: 0 6px 20px rgba(0,0,0,0.3);
482
+ }
483
+
484
+ .feature-badge {
485
+ display: inline-block;
486
+ background: linear-gradient(135deg, #ff9a56, #ff6b6b);
487
+ color: white;
488
+ padding: 0.25rem 0.75rem;
489
+ border-radius: 50px;
490
+ font-size: 0.8rem;
491
+ font-weight: 500;
492
+ margin: 0.25rem;
493
+ }
494
+
495
+ .category-tag {
496
+ display: inline-block;
497
+ background: #e3f2fd;
498
+ color: #1976d2;
499
+ padding: 0.25rem 0.5rem;
500
+ border-radius: 5px;
501
+ font-size: 0.8rem;
502
+ margin: 0.25rem;
503
+ }
504
+
505
+ @media (max-width: 768px) {
506
+ .main-header {
507
+ font-size: 2rem;
508
+ }
509
+ .search-container {
510
+ padding: 1rem;
511
+ }
512
+ .floating-button {
513
+ bottom: 1rem;
514
+ right: 1rem;
515
+ width: 50px;
516
+ height: 50px;
517
+ font-size: 1.2rem;
518
+ }
519
+ }
520
+
521
+ .animate-fade-in {
522
+ animation: fadeIn 0.5s ease-in;
523
+ }
524
+
525
+ @keyframes fadeIn {
526
+ from { opacity: 0; transform: translateY(20px); }
527
+ to { opacity: 1; transform: translateY(0); }
528
  }
529
  </style>
530
  """, unsafe_allow_html=True)
531
 
532
+ # Sidebar
533
+ sidebar_content()
 
534
 
535
+ # Main header
536
+ st.markdown("<h1 class='main-header'>🤖 WikiBot Pro</h1>", unsafe_allow_html=True)
537
+ st.markdown("<p class='subtitle'>AI-Powered Multilingual Wikipedia Assistant with Advanced Features</p>", unsafe_allow_html=True)
538
 
539
+ # Feature highlights
540
+ col1, col2, col3, col4 = st.columns(4)
541
+ with col1:
542
+ st.markdown("<div class='metric-card'><h3>🌍</h3><p><strong>20+ Languages</strong><br>Multilingual Support</p></div>", unsafe_allow_html=True)
543
+ with col2:
544
+ st.markdown("<div class='metric-card'><h3>🤖</h3><p><strong>AI-Powered</strong><br>Smart Summaries</p></div>", unsafe_allow_html=True)
545
+ with col3:
546
+ st.markdown("<div class='metric-card'><h3>⚡</h3><p><strong>Fast Search</strong><br>Cached Results</p></div>", unsafe_allow_html=True)
547
+ with col4:
548
+ st.markdown("<div class='metric-card'><h3>📊</h3><p><strong>Analytics</strong><br>Search Insights</p></div>", unsafe_allow_html=True)
549
+
550
+ # Initialize API
551
  wiki_api = WikipediaAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
+ # Handle random article trigger
554
+ if hasattr(st.session_state, 'random_article_trigger'):
555
+ with st.spinner("🎲 Finding a random article..."):
556
+ random_article = wiki_api.get_random_article(
557
+ LANGUAGES[st.session_state.user_preferences["default_language"]]["code"]
558
+ )
559
+ if random_article:
560
+ st.success("🎲 Random Article Discovery!")
561
+ display_article_card(random_article, wiki_api, 0,
562
+ LANGUAGES[st.session_state.user_preferences["default_language"]]["code"])
563
+ delattr(st.session_state, 'random_article_trigger')
564
+
565
+ # Handle analytics display
566
+ if hasattr(st.session_state, 'show_analytics'):
567
+ st.subheader("📊 Search Analytics")
568
+ analytics = create_search_analytics()
569
+ if analytics:
570
+ col1, col2 = st.columns(2)
571
+ with col1:
572
+ st.plotly_chart(analytics[0], use_container_width=True)
573
+ with col2:
574
+ st.plotly_chart(analytics[1], use_container_width=True)
575
+ else:
576
+ st.info("No search history available for analytics.")
577
+ delattr(st.session_state, 'show_analytics')
578
+
579
+ # Search interface
580
  st.markdown("<div class='search-container'>", unsafe_allow_html=True)
581
 
 
582
  col1, col2 = st.columns([3, 1])
583
 
584
  with col1:
585
  query = st.text_input(
586
+ "🔍 Search Wikipedia",
587
+ placeholder="e.g., 'Artificial Intelligence', 'కృష్ణ నది', 'गांधी जी'",
588
+ help="Enter your search query in any language",
589
+ value=getattr(st.session_state, 'repeat_search', {}).get('query', '')
590
  )
591
 
592
  with col2:
593
+ # Get language options with flags and native names
594
+ lang_options = [f"{LANGUAGES[lang]['flag']} {lang} ({LANGUAGES[lang]['native']})"
595
+ for lang in LANGUAGES.keys()]
596
+
597
+ selected_lang_display = st.selectbox(
598
  "🌍 Language",
599
+ options=lang_options,
600
+ index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"])
601
  )
602
+
603
+ # Extract actual language name
604
+ selected_lang = selected_lang_display.split(' ', 1)[1].split(' (')[0]
605
 
606
+ # Advanced options
607
+ with st.expander("⚙️ Advanced Search Options"):
608
+ col1, col2, col3 = st.columns(3)
609
+ with col1:
610
+ num_results = st.slider("📄 Number of results", 1, 15,
611
+ st.session_state.user_preferences["results_per_page"])
612
+ with col2:
613
+ summary_length = st.selectbox("📝 Summary length",
614
+ ["Short", "Medium", "Long"],
615
+ index=["Short", "Medium", "Long"].index(st.session_state.user_preferences["summary_length"]))
616
+ with col3:
617
+ search_mode = st.selectbox("🔍 Search mode",
618
+ ["Standard", "Recent", "Popular"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
  st.markdown("</div>", unsafe_allow_html=True)
621
 
622
  # Search button
623
+ if st.button("🔎 Search Wikipedia", type="primary", use_container_width=True):
624
+ if query:
625
+ lang_code = LANGUAGES[selected_lang]["code"]
626
+
627
+ with st.spinner(f"🔍 Searching Wikipedia in {selected_lang}..."):
628
+ # Search for articles
629
+ search_results = wiki_api.search_articles(query, lang_code, num_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
+ if search_results:
632
+ # Add to search history
633
+ add_to_search_history(query, selected_lang, len(search_results))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
 
635
+ st.success(f"✅ Found {len(search_results)} results in {selected_lang}")
636
+
637
+ # Display results with enhanced cards
638
+ for idx, result in enumerate(search_results):
639
+ display_article_card(result, wiki_api, idx, lang_code, summary_length)
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
+ else:
642
+ st.warning(f"❌ No results found for '{query}' in {selected_lang}")
643
+ # Suggest alternative searches
644
+ st.info("💡 **Suggestions:**")
645
+ st.write("• Try different keywords or phrases")
646
+ st.write("• Switch to a different language")
647
+ st.write("• Check spelling and try simpler terms")
648
+ st.write("• Use the Random Article feature to explore")
649
+ else:
650
+ st.warning("⚠️ Please enter a search query")
651
 
652
+ # Clear repeat search
653
+ if hasattr(st.session_state, 'repeat_search'):
654
+ delattr(st.session_state, 'repeat_search')
655
 
656
+ # Footer with enhanced information
657
  st.markdown("---")
658
+ st.markdown("### 🌟 WikiBot Pro Features")
659
 
660
+ col1, col2, col3 = st.columns(3)
661
  with col1:
662
+ st.markdown("""
663
+ **🌍 Multilingual Support**
664
+ - 20+ languages including Indian languages
665
+ - Native script support
666
+ - Cultural context awareness
667
+ """)
668
+
669
  with col2:
670
+ st.markdown("""
671
+ **🤖 AI-Powered Features**
672
+ - Smart text summarization
673
+ - Intelligent caching
674
+ - Personalized recommendations
675
+ """)
676
 
677
+ with col3:
 
678
  st.markdown("""
679
+ **📊 Advanced Analytics**
680
+ - Search history tracking
681
+ - Language usage patterns
682
+ - Performance insights
683
+ """)
684
+
685
+ def display_article_card(result: Dict, wiki_api: WikipediaAPI, idx: int, lang_code: str, summary_length: str = "Medium"):
686
+ """Display enhanced article card"""
687
+ st.markdown("<div class='result-card animate-fade-in'>", unsafe_allow_html=True)
688
+
689
+ title = result.get("title", "")
690
+
691
+ # Article header with metadata
692
+ col1, col2 = st.columns([3, 1])
693
+
694
+ with col1:
695
+ st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
696
 
697
+ # Metadata
698
+ wordcount = result.get("wordcount", 0)
699
+ size = result.get("size", 0)
700
+ timestamp = result.get("timestamp", "")
701
 
702
+ meta_html = f"""
703
+ <div class='article-meta'>
704
+ <span>📝 {wordcount} words</span>
705
+ <span>📊 {size} bytes</span>
706
+ <span>🕐 {timestamp[:10] if timestamp else 'Unknown'}</span>
707
+ </div>
708
+ """
709
+ st.markdown(meta_html, unsafe_allow_html=True)
710
+
711
+ with col2:
712
+ # Action buttons
713
+ if st.button("⭐ Favorite", key=f"fav_{idx}"):
714
+ if title not in st.session_state.favorites:
715
+ st.session_state.favorites.append(title)
716
+ st.success("Added to favorites!")
717
 
718
+ if st.button("🔗 Share", key=f"share_{idx}"):
719
+ st.info(f"Share this article: {title}")
720
+
721
+ # Get detailed summary
722
+ summary_data = wiki_api.get_page_summary(title, lang_code)
723
+
724
+ if summary_data:
725
+ # Display with image if available
726
+ if st.session_state.user_preferences["show_images"] and "thumbnail" in summary_data:
727
+ col1, col2 = st.columns([1, 3])
728
+ with col1:
729
+ st.image(summary_data["thumbnail"]["source"], width=150, caption="Wikipedia Image")
730
+ with col2:
731
+ display_article_content(summary_data, summary_length)
732
+ else:
733
+ display_article_content(summary_data, summary_length)
734
 
735
+ # Categories
736
+ categories = wiki_api.get_page_categories(title, lang_code)
737
+ if categories:
738
+ st.markdown("**📚 Categories:**")
739
+ for cat in categories[:5]: # Show only first 5 categories
740
+ st.markdown(f"<span class='category-tag'>{cat}</span>", unsafe_allow_html=True)
741
 
742
+ # Wikipedia link
743
+ wiki_url = f"https://{lang_code}.wikipedia.org/wiki/{title.replace(' ', '_')}"
744
+ st.markdown(f"🔗 [Read full article on Wikipedia]({wiki_url})")
745
+
746
+ else:
747
+ # Fallback to search snippet
748
+ snippet = result.get("snippet", "No summary available")
749
+ cleaned_snippet = clean_html(snippet)
750
+ st.write(cleaned_snippet)
751
+
752
+ st.markdown("</div>", unsafe_allow_html=True)
753
+
754
+ def display_article_content(summary_data: Dict, summary_length: str):
755
+ """Display article content with smart summarization"""
756
+ extract = summary_data.get("extract", "")
757
+ if extract:
758
+ summarized = summarize_text(extract, summary_length)
759
+ st.write(summarized)
760
+
761
+ # Display additional info if available
762
+ if "coordinates" in summary_data:
763
+ coords = summary_data["coordinates"]
764
+ st.info(f"📍 Location: {coords.get('lat', 0):.4f}, {coords.get('lon', 0):.4f}")
765
 
766
  if __name__ == "__main__":
767
  main()