Spaces:

qwerty45-uiop
/

WikiBot

Sleeping

App Files Files Community

qwerty45-uiop commited on Jul 1

Commit

498d485

verified ·

1 Parent(s): 31ee490

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +540 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,542 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import requests
+import json
+from typing import Dict, List, Optional
+import re
+from urllib.parse import quote
+import asyncio
+import aiohttp
+# Configure page
+st.set_page_config(
+    page_title="WikiBot - Multilingual Assistant",
+    page_icon="📚",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# Language codes mapping
+LANGUAGES = {
+    "English": "en",
+    "Telugu": "te",
+    "Hindi": "hi",
+    "Spanish": "es",
+    "French": "fr",
+    "German": "de",
+    "Italian": "it",
+    "Portuguese": "pt",
+    "Russian": "ru",
+    "Japanese": "ja",
+    "Chinese": "zh",
+    "Arabic": "ar",
+    "Korean": "ko"
+}
+class OllamaLLM:
+    def __init__(self, base_url: str = "http://localhost:11434"):
+        self.base_url = base_url
+        self.api_url = f"{base_url}/api/generate"
+        self.models_url = f"{base_url}/api/tags"
+    def check_connection(self) -> bool:
+        """Check if Ollama is running"""
+        try:
+            response = requests.get(self.models_url, timeout=5)
+            return response.status_code == 200
+        except:
+            return False
+    def get_available_models(self) -> List[str]:
+        """Get list of available models"""
+        try:
+            response = requests.get(self.models_url, timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                return [model["name"] for model in data.get("models", [])]
+            return []
+        except:
+            return []
+    def generate_summary(self, text: str, model: str = "llama3.2", language: str = "English",
+                        summary_type: str = "concise") -> str:
+        """Generate AI summary using local LLM"""
+        try:
+            # Craft prompt based on language and summary type
+            if summary_type == "concise":
+                prompt = f"""Summarize the following Wikipedia content in {language} in 2-3 sentences.
+                Make it clear and informative:
+                {text}
+                Summary:"""
+            elif summary_type == "detailed":
+                prompt = f"""Provide a comprehensive summary of the following Wikipedia content in {language}.
+                Include key points, important facts, and context:
+                {text}
+                Detailed Summary:"""
+            else:  # explanatory
+                prompt = f"""Explain the following Wikipedia content in {language} in a simple,
+                easy-to-understand way as if explaining to someone unfamiliar with the topic:
+                {text}
+                Explanation:"""
+            # Request to Ollama
+            payload = {
+                "model": model,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "temperature": 0.7,
+                    "num_predict": 500 if summary_type == "detailed" else 200
+                }
+            }
+            response = requests.post(self.api_url, json=payload, timeout=30)
+            if response.status_code == 200:
+                data = response.json()
+                return data.get("response", "").strip()
+            else:
+                return f"Error: {response.status_code}"
+        except Exception as e:
+            return f"LLM Error: {str(e)}"
+    def translate_text(self, text: str, target_language: str, model: str = "llama3.2") -> str:
+        """Translate text using local LLM"""
+        try:
+            prompt = f"""Translate the following text to {target_language}.
+            Provide only the translation, no additional text:
+            {text}
+            Translation:"""
+            payload = {
+                "model": model,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "temperature": 0.3,
+                    "num_predict": 300
+                }
+            }
+            response = requests.post(self.api_url, json=payload, timeout=20)
+            if response.status_code == 200:
+                data = response.json()
+                return data.get("response", "").strip()
+            else:
+                return text  # Return original if translation fails
+        except Exception as e:
+            return text
+class WikipediaAPI:
+    def __init__(self):
+        self.base_url = "https://{}.wikipedia.org/api/rest_v1"
+        self.search_url = "https://{}.wikipedia.org/w/api.php"
+    def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
+        """Search for Wikipedia articles"""
+        try:
+            params = {
+                "action": "query",
+                "format": "json",
+                "list": "search",
+                "srsearch": query,
+                "srlimit": limit,
+                "srprop": "snippet|titlesnippet"
+            }
+            url = self.search_url.format(lang)
+            response = requests.get(url, params=params, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            return data.get("query", {}).get("search", [])
+        except Exception as e:
+            st.error(f"Search error: {str(e)}")
+            return []
+    def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
+        """Get page summary using REST API"""
+        try:
+            encoded_title = quote(title.replace(" ", "_"))
+            url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            st.error(f"Summary error: {str(e)}")
+            return None
+    def get_page_content(self, title: str, lang: str = "en", char_limit: int = 3000) -> Optional[str]:
+        """Get page content sections"""
+        try:
+            params = {
+                "action": "query",
+                "format": "json",
+                "prop": "extracts",
+                "exintro": False,
+                "explaintext": True,
+                "exsectionformat": "plain",
+                "titles": title,
+                "exchars": char_limit
+            }
+            url = self.search_url.format(lang)
+            response = requests.get(url, params=params, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            pages = data.get("query", {}).get("pages", {})
+            for page_id, page_data in pages.items():
+                if "extract" in page_data:
+                    return page_data["extract"]
+            return None
+        except Exception as e:
+            st.error(f"Content error: {str(e)}")
+            return None
+def clean_html(text: str) -> str:
+    """Remove HTML tags from text"""
+    clean = re.compile('<.*?>')
+    return re.sub(clean, '', text)
+def simple_summarize(text: str, max_sentences: int = 3) -> str:
+    """Fallback simple text summarization"""
+    sentences = text.split('. ')
+    summary_sentences = sentences[:max_sentences]
+    return '. '.join(summary_sentences) + ('.' if not summary_sentences[-1].endswith('.') else '')
+def main():
+    # Custom CSS for mobile-first design
+    st.markdown("""
+    <style>
+    .main-header {
+        text-align: center;
+        color: #1f77b4;
+        margin-bottom: 2rem;
+    }
+    .search-container {
+        background-color: #f8f9fa;
+        padding: 1rem;
+        border-radius: 10px;
+        margin-bottom: 1rem;
+    }
+    .result-card {
+        background-color: white;
+        padding: 1rem;
+        border-radius: 8px;
+        border: 1px solid #dee2e6;
+        margin-bottom: 1rem;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .article-title {
+        color: #007bff;
+        font-weight: bold;
+        margin-bottom: 0.5rem;
+    }
+    .llm-status {
+        padding: 0.5rem;
+        border-radius: 5px;
+        margin-bottom: 1rem;
+        font-size: 0.9rem;
+    }
+    .status-connected {
+        background-color: #d4edda;
+        color: #155724;
+        border: 1px solid #c3e6cb;
+    }
+    .status-disconnected {
+        background-color: #f8d7da;
+        color: #721c24;
+        border: 1px solid #f5c6cb;
+    }
+    .ai-summary {
+        background-color: #f0f8ff;
+        padding: 1rem;
+        border-radius: 8px;
+        border-left: 4px solid #007bff;
+        margin: 1rem 0;
+    }
+    @media (max-width: 768px) {
+        .stSelectbox, .stTextInput {
+            font-size: 16px;
+        }
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    # Header
+    st.markdown("<h1 class='main-header'>🤖 WikiBot - AI-Powered Multilingual Assistant</h1>", unsafe_allow_html=True)
+    st.markdown("<p style='text-align: center; color: #666;'>Search Wikipedia with Local LLM Intelligence</p>", unsafe_allow_html=True)
+    # Initialize APIs
+    wiki_api = WikipediaAPI()
+    llm = OllamaLLM()
+    # Check LLM connection
+    llm_connected = llm.check_connection()
+    available_models = llm.get_available_models() if llm_connected else []
+    # LLM Status
+    if llm_connected:
+        st.markdown(f"""
+        <div class='llm-status status-connected'>
+            ✅ <strong>Local LLM Connected</strong> - Ollama running with {len(available_models)} models
+        </div>
+        """, unsafe_allow_html=True)
+    else:
+        st.markdown("""
+        <div class='llm-status status-disconnected'>
+            ❌ <strong>Local LLM Disconnected</strong> - Install and run Ollama for AI features
+        </div>
+        """, unsafe_allow_html=True)
+        st.info("To enable AI features: Install Ollama from https://ollama.ai and run `ollama serve`")
+    # Search interface
+    st.markdown("<div class='search-container'>", unsafe_allow_html=True)
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        query = st.text_input(
+            "🔍 Search Wikipedia",
+            placeholder="e.g., 'Explain Kargil War in Telugu'",
+            help="Enter your search query in any language"
+        )
+    with col2:
+        selected_lang = st.selectbox(
+            "🌍 Language",
+            options=list(LANGUAGES.keys()),
+            index=0
+        )
+    # Advanced options
+    with st.expander("⚙️ Advanced Options"):
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            num_results = st.slider("Number of results", 1, 10, 3)
+        with col2:
+            if llm_connected:
+                summary_mode = st.selectbox(
+                    "AI Summary Type",
+                    ["concise", "detailed", "explanatory"],
+                    index=0
+                )
+            else:
+                summary_mode = st.selectbox(
+                    "Summary Type",
+                    ["short", "medium", "long"],
+                    index=1
+                )
+        with col3:
+            if llm_connected and available_models:
+                selected_model = st.selectbox(
+                    "LLM Model",
+                    options=available_models,
+                    index=0
+                )
+            else:
+                st.info("No models available")
+                selected_model = None
+        # Translation options
+        if llm_connected:
+            col1, col2 = st.columns(2)
+            with col1:
+                enable_translation = st.checkbox("🌐 Enable Translation", value=False)
+            with col2:
+                if enable_translation:
+                    target_lang = st.selectbox(
+                        "Translate to",
+                        options=list(LANGUAGES.keys()),
+                        index=1
+                    )
+    st.markdown("</div>", unsafe_allow_html=True)
+    # Search button
+    if st.button("🔎 Search with AI", type="primary", use_container_width=True):
+        if query:
+            lang_code = LANGUAGES[selected_lang]
+            with st.spinner(f"Searching Wikipedia and processing with AI..."):
+                # Search for articles
+                search_results = wiki_api.search_articles(query, lang_code, num_results)
+                if search_results:
+                    st.success(f"Found {len(search_results)} results - Processing with {'AI' if llm_connected else 'basic'} summarization")
+                    for idx, result in enumerate(search_results):
+                        with st.container():
+                            st.markdown("<div class='result-card'>", unsafe_allow_html=True)
+                            # Article title
+                            title = result.get("title", "")
+                            st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
+                            # Get detailed content for AI processing
+                            content = wiki_api.get_page_content(title, lang_code)
+                            summary_data = wiki_api.get_page_summary(title, lang_code)
+                            if content and llm_connected and selected_model:
+                                # AI-powered summary
+                                with st.spinner("Generating AI summary..."):
+                                    ai_summary = llm.generate_summary(
+                                        content,
+                                        selected_model,
+                                        selected_lang,
+                                        summary_mode
+                                    )
+                                if ai_summary and not ai_summary.startswith("Error") and not ai_summary.startswith("LLM Error"):
+                                    st.markdown("<div class='ai-summary'>", unsafe_allow_html=True)
+                                    st.markdown("**🤖 AI Summary:**")
+                                    st.write(ai_summary)
+                                    # Translation if enabled
+                                    if 'enable_translation' in locals() and enable_translation and target_lang != selected_lang:
+                                        with st.spinner(f"Translating to {target_lang}..."):
+                                            translated = llm.translate_text(ai_summary, target_lang, selected_model)
+                                            if translated != ai_summary:
+                                                st.markdown(f"**🌐 Translation to {target_lang}:**")
+                                                st.write(translated)
+                                    st.markdown("</div>", unsafe_allow_html=True)
+                                else:
+                                    # Fallback to simple summary
+                                    st.warning("AI summary failed, using fallback")
+                                    fallback_summary = simple_summarize(content, 3)
+                                    st.write(fallback_summary)
+                            elif summary_data:
+                                # Standard Wikipedia summary
+                                summary_text = summary_data.get("extract", "")
+                                if not llm_connected:
+                                    if summary_mode == "short":
+                                        summary_text = simple_summarize(summary_text, 2)
+                                    elif summary_mode == "medium":
+                                        summary_text = simple_summarize(summary_text, 4)
+                                st.write(summary_text)
+                            else:
+                                # Fallback to search snippet
+                                snippet = clean_html(result.get("snippet", ""))
+                                st.write(snippet)
+                            # Display thumbnail if available
+                            if summary_data and "thumbnail" in summary_data:
+                                st.image(summary_data["thumbnail"]["source"], width=150)
+                            # Wikipedia link
+                            if summary_data and "content_urls" in summary_data:
+                                wiki_url = summary_data["content_urls"]["desktop"]["page"]
+                                st.markdown(f"[📖 Read full article on Wikipedia]({wiki_url})")
+                            # Detailed content button
+                            if st.button(f"📝 Show detailed content", key=f"detail_{idx}"):
+                                if content:
+                                    st.text_area(
+                                        "Full Content",
+                                        content,
+                                        height=300,
+                                        key=f"content_{idx}"
+                                    )
+                                else:
+                                    st.warning("Detailed content not available")
+                            st.markdown("</div>", unsafe_allow_html=True)
+                            st.markdown("---")
+                else:
+                    st.warning(f"No results found for '{query}' in {selected_lang}")
+                    st.info("Try different keywords or switch to another language")
+        else:
+            st.warning("Please enter a search query")
+    # Status dashboard
+    st.markdown("---")
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("🌍 Languages", len(LANGUAGES))
+    with col2:
+        st.metric("🤖 LLM Status", "Connected" if llm_connected else "Offline")
+    with col3:
+        st.metric("📚 Models", len(available_models))
+    with col4:
+        st.metric("🔍 Search Mode", "AI-Powered" if llm_connected else "Standard")
+    # Setup instructions
+    with st.expander("🛠️ Setup Instructions"):
+        st.markdown("""
+        ### Install Ollama for AI Features:
+        1. **Install Ollama:**
+           ```bash
+           # MacOS/Linux
+           curl -fsSL https://ollama.ai/install.sh | sh
+           # Windows - Download from https://ollama.ai
+           ```
+        2. **Pull a model:**
+           ```bash
+           ollama pull llama3.2
+           # or
+           ollama pull mistral
+           ollama pull codellama
+           ```
+        3. **Start Ollama server:**
+           ```bash
+           ollama serve
+           ```
+        4. **Restart this app** - LLM features will be automatically enabled!
+        ### Recommended Models:
+        - **llama3.2** - Great for general summarization
+        - **mistral** - Fast and efficient
+        - **codellama** - Good for technical content
+        """)
+    # Usage examples
+    with st.expander("💡 Usage Examples"):
+        st.markdown("""
+        **Try these example queries:**
+        - "Explain Kargil War in Telugu" → AI generates Telugu explanation
+        - "Machine Learning" → Detailed AI summary with translation
+        - "Climate Change" → AI explanatory summary
+        - "Quantum Computing" → Technical AI analysis
+        **AI Features:**
+        - 🤖 Intelligent summarization (concise/detailed/explanatory)
+        - 🌐 Multi-language translation
+        - 📝 Context-aware explanations
+        - 🔍 Enhanced content understanding
+        """)
+if __name__ == "__main__":
+    main()