import streamlit as st import requests import json from typing import Dict, List, Optional, Tuple import re from urllib.parse import quote import time from datetime import datetime import pandas as pd from collections import Counter import hashlib st.set_page_config( page_title="WikiBot Pro - AI-Powered Multilingual Assistant", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) LANGUAGES = { "English": {"code": "en", "flag": "🇺🇸", "native": "English"}, "Telugu": {"code": "te", "flag": "🇮🇳", "native": "తెలుగు"}, "Hindi": {"code": "hi", "flag": "🇮🇳", "native": "हिन्दी"}, "Spanish": {"code": "es", "flag": "🇪🇸", "native": "Español"}, "French": {"code": "fr", "flag": "🇫🇷", "native": "Français"}, "German": {"code": "de", "flag": "🇩🇪", "native": "Deutsch"}, "Italian": {"code": "it", "flag": "🇮🇹", "native": "Italiano"}, "Portuguese": {"code": "pt", "flag": "🇵🇹", "native": "Português"}, "Russian": {"code": "ru", "flag": "🇷🇺", "native": "Русский"}, "Japanese": {"code": "ja", "flag": "🇯🇵", "native": "日本語"}, "Chinese": {"code": "zh", "flag": "🇨🇳", "native": "中文"}, "Arabic": {"code": "ar", "flag": "🇸🇦", "native": "العربية"}, "Korean": {"code": "ko", "flag": "🇰🇷", "native": "한국어"}, "Tamil": {"code": "ta", "flag": "🇮🇳", "native": "தமிழ்"}, "Bengali": {"code": "bn", "flag": "🇧🇩", "native": "বাংলা"}, "Marathi": {"code": "mr", "flag": "🇮🇳", "native": "मराठी"}, "Gujarati": {"code": "gu", "flag": "🇮🇳", "native": "ગુજરાતી"}, "Kannada": {"code": "kn", "flag": "🇮🇳", "native": "ಕನ್ನಡ"}, "Malayalam": {"code": "ml", "flag": "🇮🇳", "native": "മലയാളം"}, "Punjabi": {"code": "pa", "flag": "🇮🇳", "native": "ਪੰਜਾਬੀ"} } THEMES = { "Default": {"primary": "#1f77b4", "background": "#ffffff", "text": "#000000"}, "Dark": {"primary": "#00d4aa", "background": "#0e1117", "text": "#ffffff"}, "Ocean": {"primary": "#0077be", "background": "#f0f8ff", "text": "#003366"}, "Forest": {"primary": "#228b22", "background": "#f5fff5", "text": "#006400"}, "Sunset": {"primary": "#ff6b35", "background": "#fff5f0", "text": "#8b0000"} } class WikipediaAPI: def __init__(self): self.base_url = "https://{}.wikipedia.org/api/rest_v1" self.search_url = "https://{}.wikipedia.org/w/api.php" self.cache = {} def _get_cache_key(self, *args) -> str: """Generate cache key from arguments""" key_string = "_".join(str(arg) for arg in args) return hashlib.md5(key_string.encode()).hexdigest() def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]: """Search for Wikipedia articles with caching""" cache_key = self._get_cache_key("search", query, lang, limit) if cache_key in self.cache: return self.cache[cache_key] try: params = { "action": "query", "format": "json", "list": "search", "srsearch": query, "srlimit": limit, "srprop": "snippet|titlesnippet|size|wordcount|timestamp" } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() results = data.get("query", {}).get("search", []) self.cache[cache_key] = results return results except Exception as e: st.error(f"Search error: {str(e)}") return [] def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]: """Get page summary using REST API with caching""" cache_key = self._get_cache_key("summary", title, lang) if cache_key in self.cache: return self.cache[cache_key] try: encoded_title = quote(title.replace(" ", "_")) url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}" response = requests.get(url, timeout=10) response.raise_for_status() result = response.json() self.cache[cache_key] = result return result except Exception as e: st.error(f"Summary error: {str(e)}") return None def get_page_content(self, title: str, lang: str = "en", sections: int = 3) -> Optional[str]: """Get page content sections""" try: params = { "action": "query", "format": "json", "prop": "extracts", "exintro": True, "explaintext": True, "exsectionformat": "plain", "titles": title, "exchars": 3000 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() pages = data.get("query", {}).get("pages", {}) for page_id, page_data in pages.items(): if "extract" in page_data: return page_data["extract"] return None except Exception as e: st.error(f"Content error: {str(e)}") return None def get_random_article(self, lang: str = "en") -> Optional[Dict]: """Get a random Wikipedia article""" try: params = { "action": "query", "format": "json", "list": "random", "rnnamespace": 0, "rnlimit": 1 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() random_pages = data.get("query", {}).get("random", []) if random_pages: title = random_pages[0]["title"] return self.get_page_summary(title, lang) return None except Exception as e: st.error(f"Random article error: {str(e)}") return None def get_page_categories(self, title: str, lang: str = "en") -> List[str]: """Get categories for a Wikipedia page""" try: params = { "action": "query", "format": "json", "prop": "categories", "titles": title, "cllimit": 10 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() pages = data.get("query", {}).get("pages", {}) for page_id, page_data in pages.items(): if "categories" in page_data: return [cat["title"].replace("Category:", "") for cat in page_data["categories"]] return [] except Exception as e: return [] def init_session_state(): """Initialize session state variables""" if 'search_history' not in st.session_state: st.session_state.search_history = [] if 'favorites' not in st.session_state: st.session_state.favorites = [] if 'theme' not in st.session_state: st.session_state.theme = "Default" if 'user_preferences' not in st.session_state: st.session_state.user_preferences = { "default_language": "English", "results_per_page": 5, "summary_length": "Medium", "show_images": True, "auto_translate": False } def apply_theme(theme_name: str): """Apply selected theme""" theme = THEMES[theme_name] st.markdown(f""" """, unsafe_allow_html=True) def clean_html(text: str) -> str: """Remove HTML tags from text""" clean = re.compile('<.*?>') return re.sub(clean, '', text) def summarize_text(text: str, length: str = "Medium") -> str: """Advanced text summarization""" sentences = re.split(r'[.!?]+', text) sentences = [s.strip() for s in sentences if s.strip()] if length == "Short": return '. '.join(sentences[:2]) + '.' elif length == "Medium": return '. '.join(sentences[:4]) + '.' else: # Long return '. '.join(sentences[:6]) + '.' def add_to_search_history(query: str, language: str, results_count: int): """Add search to history""" search_entry = { "query": query, "language": language, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "results_count": results_count } st.session_state.search_history.insert(0, search_entry) # Keep only last 50 searches st.session_state.search_history = st.session_state.search_history[:50] def create_search_analytics(): """Create search analytics visualization""" if not st.session_state.search_history: return None df = pd.DataFrame(st.session_state.search_history) # Language distribution lang_counts = df['language'].value_counts() fig_lang = px.pie( values=lang_counts.values, names=lang_counts.index, title="Search Languages Distribution", color_discrete_sequence=px.colors.qualitative.Set3 ) # Search timeline df['date'] = pd.to_datetime(df['timestamp']).dt.date daily_searches = df.groupby('date').size().reset_index(name='searches') fig_timeline = px.line( daily_searches, x='date', y='searches', title="Daily Search Activity", markers=True ) return fig_lang, fig_timeline def sidebar_content(): """Create enhanced sidebar content""" st.sidebar.title("🤖 WikiBot Pro") st.sidebar.markdown("---") # Theme selector st.sidebar.subheader("🎨 Appearance") theme = st.sidebar.selectbox( "Theme", options=list(THEMES.keys()), index=list(THEMES.keys()).index(st.session_state.theme), key="theme_selector" ) if theme != st.session_state.theme: st.session_state.theme = theme st.rerun() # User preferences st.sidebar.subheader("⚙️ Preferences") default_lang = st.sidebar.selectbox( "Default Language", options=list(LANGUAGES.keys()), index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"]) ) show_images = st.sidebar.checkbox( "Show Images", value=st.session_state.user_preferences["show_images"] ) # Update preferences st.session_state.user_preferences.update({ "default_language": default_lang, "show_images": show_images }) # Quick actions st.sidebar.subheader("🚀 Quick Actions") if st.sidebar.button("🎲 Random Article", use_container_width=True): st.session_state.random_article_trigger = True if st.sidebar.button("📊 Search Analytics", use_container_width=True): st.session_state.show_analytics = True if st.sidebar.button("🗑️ Clear History", use_container_width=True): st.session_state.search_history = [] st.sidebar.success("History cleared!") # Search history if st.session_state.search_history: st.sidebar.subheader("🕐 Recent Searches") for i, search in enumerate(st.session_state.search_history[:5]): with st.sidebar.expander(f"{search['query'][:20]}..."): st.write(f"**Language:** {search['language']}") st.write(f"**Time:** {search['timestamp']}") st.write(f"**Results:** {search['results_count']}") if st.button(f"🔄 Repeat", key=f"repeat_{i}"): st.session_state.repeat_search = search # Statistics st.sidebar.subheader("📈 Statistics") total_searches = len(st.session_state.search_history) favorite_lang = "None" if st.session_state.search_history: lang_counter = Counter([s['language'] for s in st.session_state.search_history]) favorite_lang = lang_counter.most_common(1)[0][0] if lang_counter else "None" st.sidebar.metric("Total Searches", total_searches) st.sidebar.metric("Favorite Language", favorite_lang) st.sidebar.metric("Favorites Saved", len(st.session_state.favorites)) def main(): init_session_state() apply_theme(st.session_state.theme) # Enhanced CSS st.markdown(""" """, unsafe_allow_html=True) # Sidebar sidebar_content() # Main header st.markdown("
AI-Powered Multilingual Wikipedia Assistant with Advanced Features
", unsafe_allow_html=True) # Feature highlights col1, col2, col3, col4 = st.columns(4) with col1: st.markdown("20+ Languages
Multilingual Support
AI-Powered
Smart Summaries
Fast Search
Cached Results
Analytics
Search Insights