import streamlit as st import requests import json from typing import Dict, List, Optional, Tuple import re from urllib.parse import quote import time from datetime import datetime import pandas as pd from collections import Counter import hashlib st.set_page_config( page_title="WikiBot Pro - AI-Powered Multilingual Assistant", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) LANGUAGES = { "English": {"code": "en", "flag": "🇺🇸", "native": "English"}, "Telugu": {"code": "te", "flag": "🇮🇳", "native": "తెలుగు"}, "Hindi": {"code": "hi", "flag": "🇮🇳", "native": "हिन्दी"}, "Spanish": {"code": "es", "flag": "🇪🇸", "native": "Español"}, "French": {"code": "fr", "flag": "🇫🇷", "native": "Français"}, "German": {"code": "de", "flag": "🇩🇪", "native": "Deutsch"}, "Italian": {"code": "it", "flag": "🇮🇹", "native": "Italiano"}, "Portuguese": {"code": "pt", "flag": "🇵🇹", "native": "Português"}, "Russian": {"code": "ru", "flag": "🇷🇺", "native": "Русский"}, "Japanese": {"code": "ja", "flag": "🇯🇵", "native": "日本語"}, "Chinese": {"code": "zh", "flag": "🇨🇳", "native": "中文"}, "Arabic": {"code": "ar", "flag": "🇸🇦", "native": "العربية"}, "Korean": {"code": "ko", "flag": "🇰🇷", "native": "한국어"}, "Tamil": {"code": "ta", "flag": "🇮🇳", "native": "தமிழ்"}, "Bengali": {"code": "bn", "flag": "🇧🇩", "native": "বাংলা"}, "Marathi": {"code": "mr", "flag": "🇮🇳", "native": "मराठी"}, "Gujarati": {"code": "gu", "flag": "🇮🇳", "native": "ગુજરાતી"}, "Kannada": {"code": "kn", "flag": "🇮🇳", "native": "ಕನ್ನಡ"}, "Malayalam": {"code": "ml", "flag": "🇮🇳", "native": "മലയാളം"}, "Punjabi": {"code": "pa", "flag": "🇮🇳", "native": "ਪੰਜਾਬੀ"} } THEMES = { "Default": {"primary": "#1f77b4", "background": "#ffffff", "text": "#000000"}, "Dark": {"primary": "#00d4aa", "background": "#0e1117", "text": "#ffffff"}, "Ocean": {"primary": "#0077be", "background": "#f0f8ff", "text": "#003366"}, "Forest": {"primary": "#228b22", "background": "#f5fff5", "text": "#006400"}, "Sunset": {"primary": "#ff6b35", "background": "#fff5f0", "text": "#8b0000"} } class WikipediaAPI: def __init__(self): self.base_url = "https://{}.wikipedia.org/api/rest_v1" self.search_url = "https://{}.wikipedia.org/w/api.php" self.cache = {} def _get_cache_key(self, *args) -> str: """Generate cache key from arguments""" key_string = "_".join(str(arg) for arg in args) return hashlib.md5(key_string.encode()).hexdigest() def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]: """Search for Wikipedia articles with caching""" cache_key = self._get_cache_key("search", query, lang, limit) if cache_key in self.cache: return self.cache[cache_key] try: params = { "action": "query", "format": "json", "list": "search", "srsearch": query, "srlimit": limit, "srprop": "snippet|titlesnippet|size|wordcount|timestamp" } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() results = data.get("query", {}).get("search", []) self.cache[cache_key] = results return results except Exception as e: st.error(f"Search error: {str(e)}") return [] def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]: """Get page summary using REST API with caching""" cache_key = self._get_cache_key("summary", title, lang) if cache_key in self.cache: return self.cache[cache_key] try: encoded_title = quote(title.replace(" ", "_")) url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}" response = requests.get(url, timeout=10) response.raise_for_status() result = response.json() self.cache[cache_key] = result return result except Exception as e: st.error(f"Summary error: {str(e)}") return None def get_page_content(self, title: str, lang: str = "en", sections: int = 3) -> Optional[str]: """Get page content sections""" try: params = { "action": "query", "format": "json", "prop": "extracts", "exintro": True, "explaintext": True, "exsectionformat": "plain", "titles": title, "exchars": 3000 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() pages = data.get("query", {}).get("pages", {}) for page_id, page_data in pages.items(): if "extract" in page_data: return page_data["extract"] return None except Exception as e: st.error(f"Content error: {str(e)}") return None def get_random_article(self, lang: str = "en") -> Optional[Dict]: """Get a random Wikipedia article""" try: params = { "action": "query", "format": "json", "list": "random", "rnnamespace": 0, "rnlimit": 1 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() random_pages = data.get("query", {}).get("random", []) if random_pages: title = random_pages[0]["title"] return self.get_page_summary(title, lang) return None except Exception as e: st.error(f"Random article error: {str(e)}") return None def get_page_categories(self, title: str, lang: str = "en") -> List[str]: """Get categories for a Wikipedia page""" try: params = { "action": "query", "format": "json", "prop": "categories", "titles": title, "cllimit": 10 } url = self.search_url.format(lang) response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() pages = data.get("query", {}).get("pages", {}) for page_id, page_data in pages.items(): if "categories" in page_data: return [cat["title"].replace("Category:", "") for cat in page_data["categories"]] return [] except Exception as e: return [] def init_session_state(): """Initialize session state variables""" if 'search_history' not in st.session_state: st.session_state.search_history = [] if 'favorites' not in st.session_state: st.session_state.favorites = [] if 'theme' not in st.session_state: st.session_state.theme = "Default" if 'user_preferences' not in st.session_state: st.session_state.user_preferences = { "default_language": "English", "results_per_page": 5, "summary_length": "Medium", "show_images": True, "auto_translate": False } def apply_theme(theme_name: str): """Apply selected theme""" theme = THEMES[theme_name] st.markdown(f""" """, unsafe_allow_html=True) def clean_html(text: str) -> str: """Remove HTML tags from text""" clean = re.compile('<.*?>') return re.sub(clean, '', text) def summarize_text(text: str, length: str = "Medium") -> str: """Advanced text summarization""" sentences = re.split(r'[.!?]+', text) sentences = [s.strip() for s in sentences if s.strip()] if length == "Short": return '. '.join(sentences[:2]) + '.' elif length == "Medium": return '. '.join(sentences[:4]) + '.' else: # Long return '. '.join(sentences[:6]) + '.' def add_to_search_history(query: str, language: str, results_count: int): """Add search to history""" search_entry = { "query": query, "language": language, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "results_count": results_count } st.session_state.search_history.insert(0, search_entry) # Keep only last 50 searches st.session_state.search_history = st.session_state.search_history[:50] def create_search_analytics(): """Create search analytics visualization""" if not st.session_state.search_history: return None df = pd.DataFrame(st.session_state.search_history) # Language distribution lang_counts = df['language'].value_counts() fig_lang = px.pie( values=lang_counts.values, names=lang_counts.index, title="Search Languages Distribution", color_discrete_sequence=px.colors.qualitative.Set3 ) # Search timeline df['date'] = pd.to_datetime(df['timestamp']).dt.date daily_searches = df.groupby('date').size().reset_index(name='searches') fig_timeline = px.line( daily_searches, x='date', y='searches', title="Daily Search Activity", markers=True ) return fig_lang, fig_timeline def sidebar_content(): """Create enhanced sidebar content""" st.sidebar.title("🤖 WikiBot Pro") st.sidebar.markdown("---") # Theme selector st.sidebar.subheader("🎨 Appearance") theme = st.sidebar.selectbox( "Theme", options=list(THEMES.keys()), index=list(THEMES.keys()).index(st.session_state.theme), key="theme_selector" ) if theme != st.session_state.theme: st.session_state.theme = theme st.rerun() # User preferences st.sidebar.subheader("⚙️ Preferences") default_lang = st.sidebar.selectbox( "Default Language", options=list(LANGUAGES.keys()), index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"]) ) show_images = st.sidebar.checkbox( "Show Images", value=st.session_state.user_preferences["show_images"] ) # Update preferences st.session_state.user_preferences.update({ "default_language": default_lang, "show_images": show_images }) # Quick actions st.sidebar.subheader("🚀 Quick Actions") if st.sidebar.button("🎲 Random Article", use_container_width=True): st.session_state.random_article_trigger = True if st.sidebar.button("📊 Search Analytics", use_container_width=True): st.session_state.show_analytics = True if st.sidebar.button("🗑️ Clear History", use_container_width=True): st.session_state.search_history = [] st.sidebar.success("History cleared!") # Search history if st.session_state.search_history: st.sidebar.subheader("🕐 Recent Searches") for i, search in enumerate(st.session_state.search_history[:5]): with st.sidebar.expander(f"{search['query'][:20]}..."): st.write(f"**Language:** {search['language']}") st.write(f"**Time:** {search['timestamp']}") st.write(f"**Results:** {search['results_count']}") if st.button(f"🔄 Repeat", key=f"repeat_{i}"): st.session_state.repeat_search = search # Statistics st.sidebar.subheader("📈 Statistics") total_searches = len(st.session_state.search_history) favorite_lang = "None" if st.session_state.search_history: lang_counter = Counter([s['language'] for s in st.session_state.search_history]) favorite_lang = lang_counter.most_common(1)[0][0] if lang_counter else "None" st.sidebar.metric("Total Searches", total_searches) st.sidebar.metric("Favorite Language", favorite_lang) st.sidebar.metric("Favorites Saved", len(st.session_state.favorites)) def main(): init_session_state() apply_theme(st.session_state.theme) # Enhanced CSS st.markdown(""" """, unsafe_allow_html=True) # Sidebar sidebar_content() # Main header st.markdown("

🤖 WikiBot Pro

", unsafe_allow_html=True) st.markdown("

AI-Powered Multilingual Wikipedia Assistant with Advanced Features

", unsafe_allow_html=True) # Feature highlights col1, col2, col3, col4 = st.columns(4) with col1: st.markdown("

🌍

20+ Languages
Multilingual Support

", unsafe_allow_html=True) with col2: st.markdown("

🤖

AI-Powered
Smart Summaries

", unsafe_allow_html=True) with col3: st.markdown("

Fast Search
Cached Results

", unsafe_allow_html=True) with col4: st.markdown("

📊

Analytics
Search Insights

", unsafe_allow_html=True) # Initialize API wiki_api = WikipediaAPI() # Handle random article trigger if hasattr(st.session_state, 'random_article_trigger'): with st.spinner("🎲 Finding a random article..."): random_article = wiki_api.get_random_article( LANGUAGES[st.session_state.user_preferences["default_language"]]["code"] ) if random_article: st.success("🎲 Random Article Discovery!") display_article_card(random_article, wiki_api, 0, LANGUAGES[st.session_state.user_preferences["default_language"]]["code"]) delattr(st.session_state, 'random_article_trigger') # Handle analytics display if hasattr(st.session_state, 'show_analytics'): st.subheader("📊 Search Analytics") analytics = create_search_analytics() if analytics: col1, col2 = st.columns(2) with col1: st.plotly_chart(analytics[0], use_container_width=True) with col2: st.plotly_chart(analytics[1], use_container_width=True) else: st.info("No search history available for analytics.") delattr(st.session_state, 'show_analytics') # Search interface st.markdown("
", unsafe_allow_html=True) col1, col2 = st.columns([3, 1]) with col1: query = st.text_input( "🔍 Search Wikipedia", placeholder="e.g., 'Artificial Intelligence', 'కృష్ణ నది', 'गांधी जी'", help="Enter your search query in any language", value=getattr(st.session_state, 'repeat_search', {}).get('query', '') ) with col2: # Get language options with flags and native names lang_options = [f"{LANGUAGES[lang]['flag']} {lang} ({LANGUAGES[lang]['native']})" for lang in LANGUAGES.keys()] selected_lang_display = st.selectbox( "🌍 Language", options=lang_options, index=list(LANGUAGES.keys()).index(st.session_state.user_preferences["default_language"]) ) # Extract actual language name selected_lang = selected_lang_display.split(' ', 1)[1].split(' (')[0] # Advanced options with st.expander("⚙️ Advanced Search Options"): col1, col2, col3 = st.columns(3) with col1: num_results = st.slider("📄 Number of results", 1, 15, st.session_state.user_preferences["results_per_page"]) with col2: summary_length = st.selectbox("📝 Summary length", ["Short", "Medium", "Long"], index=["Short", "Medium", "Long"].index(st.session_state.user_preferences["summary_length"])) with col3: search_mode = st.selectbox("🔍 Search mode", ["Standard", "Recent", "Popular"]) st.markdown("
", unsafe_allow_html=True) # Search button if st.button("🔎 Search Wikipedia", type="primary", use_container_width=True): if query: lang_code = LANGUAGES[selected_lang]["code"] with st.spinner(f"🔍 Searching Wikipedia in {selected_lang}..."): # Search for articles search_results = wiki_api.search_articles(query, lang_code, num_results) if search_results: # Add to search history add_to_search_history(query, selected_lang, len(search_results)) st.success(f"✅ Found {len(search_results)} results in {selected_lang}") # Display results with enhanced cards for idx, result in enumerate(search_results): display_article_card(result, wiki_api, idx, lang_code, summary_length) else: st.warning(f"❌ No results found for '{query}' in {selected_lang}") # Suggest alternative searches st.info("💡 **Suggestions:**") st.write("• Try different keywords or phrases") st.write("• Switch to a different language") st.write("• Check spelling and try simpler terms") st.write("• Use the Random Article feature to explore") else: st.warning("⚠️ Please enter a search query") # Clear repeat search if hasattr(st.session_state, 'repeat_search'): delattr(st.session_state, 'repeat_search') # Footer with enhanced information st.markdown("---") st.markdown("### 🌟 WikiBot Pro Features") col1, col2, col3 = st.columns(3) with col1: st.markdown(""" **🌍 Multilingual Support** - 20+ languages including Indian languages - Native script support - Cultural context awareness """) with col2: st.markdown(""" **🤖 AI-Powered Features** - Smart text summarization - Intelligent caching - Personalized recommendations """) with col3: st.markdown(""" **📊 Advanced Analytics** - Search history tracking - Language usage patterns - Performance insights """) def display_article_card(result: Dict, wiki_api: WikipediaAPI, idx: int, lang_code: str, summary_length: str = "Medium"): """Display enhanced article card""" st.markdown("
", unsafe_allow_html=True) title = result.get("title", "") # Article header with metadata col1, col2 = st.columns([3, 1]) with col1: st.markdown(f"
{idx+1}. {title}
", unsafe_allow_html=True) # Metadata wordcount = result.get("wordcount", 0) size = result.get("size", 0) timestamp = result.get("timestamp", "") meta_html = f"""
📝 {wordcount} words 📊 {size} bytes 🕐 {timestamp[:10] if timestamp else 'Unknown'}
""" st.markdown(meta_html, unsafe_allow_html=True) with col2: # Action buttons if st.button("⭐ Favorite", key=f"fav_{idx}"): if title not in st.session_state.favorites: st.session_state.favorites.append(title) st.success("Added to favorites!") if st.button("🔗 Share", key=f"share_{idx}"): st.info(f"Share this article: {title}") # Get detailed summary summary_data = wiki_api.get_page_summary(title, lang_code) if summary_data: # Display with image if available if st.session_state.user_preferences["show_images"] and "thumbnail" in summary_data: col1, col2 = st.columns([1, 3]) with col1: st.image(summary_data["thumbnail"]["source"], width=150, caption="Wikipedia Image") with col2: display_article_content(summary_data, summary_length) else: display_article_content(summary_data, summary_length) # Categories categories = wiki_api.get_page_categories(title, lang_code) if categories: st.markdown("**📚 Categories:**") for cat in categories[:5]: # Show only first 5 categories st.markdown(f"{cat}", unsafe_allow_html=True) # Wikipedia link wiki_url = f"https://{lang_code}.wikipedia.org/wiki/{title.replace(' ', '_')}" st.markdown(f"🔗 [Read full article on Wikipedia]({wiki_url})") else: # Fallback to search snippet snippet = result.get("snippet", "No summary available") cleaned_snippet = clean_html(snippet) st.write(cleaned_snippet) st.markdown("
", unsafe_allow_html=True) def display_article_content(summary_data: Dict, summary_length: str): """Display article content with smart summarization""" extract = summary_data.get("extract", "") if extract: summarized = summarize_text(extract, summary_length) st.write(summarized) # Display additional info if available if "coordinates" in summary_data: coords = summary_data["coordinates"] st.info(f"📍 Location: {coords.get('lat', 0):.4f}, {coords.get('lon', 0):.4f}") if __name__ == "__main__": main()