qwerty45-uiop commited on
Commit
498d485
Β·
verified Β·
1 Parent(s): 31ee490

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +540 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,542 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import requests
3
+ import json
4
+ from typing import Dict, List, Optional
5
+ import re
6
+ from urllib.parse import quote
7
+ import asyncio
8
+ import aiohttp
9
 
10
+ # Configure page
11
+ st.set_page_config(
12
+ page_title="WikiBot - Multilingual Assistant",
13
+ page_icon="πŸ“š",
14
+ layout="wide",
15
+ initial_sidebar_state="collapsed"
16
+ )
17
+
18
+ # Language codes mapping
19
+ LANGUAGES = {
20
+ "English": "en",
21
+ "Telugu": "te",
22
+ "Hindi": "hi",
23
+ "Spanish": "es",
24
+ "French": "fr",
25
+ "German": "de",
26
+ "Italian": "it",
27
+ "Portuguese": "pt",
28
+ "Russian": "ru",
29
+ "Japanese": "ja",
30
+ "Chinese": "zh",
31
+ "Arabic": "ar",
32
+ "Korean": "ko"
33
+ }
34
+
35
+ class OllamaLLM:
36
+ def __init__(self, base_url: str = "http://localhost:11434"):
37
+ self.base_url = base_url
38
+ self.api_url = f"{base_url}/api/generate"
39
+ self.models_url = f"{base_url}/api/tags"
40
+
41
+ def check_connection(self) -> bool:
42
+ """Check if Ollama is running"""
43
+ try:
44
+ response = requests.get(self.models_url, timeout=5)
45
+ return response.status_code == 200
46
+ except:
47
+ return False
48
+
49
+ def get_available_models(self) -> List[str]:
50
+ """Get list of available models"""
51
+ try:
52
+ response = requests.get(self.models_url, timeout=10)
53
+ if response.status_code == 200:
54
+ data = response.json()
55
+ return [model["name"] for model in data.get("models", [])]
56
+ return []
57
+ except:
58
+ return []
59
+
60
+ def generate_summary(self, text: str, model: str = "llama3.2", language: str = "English",
61
+ summary_type: str = "concise") -> str:
62
+ """Generate AI summary using local LLM"""
63
+ try:
64
+ # Craft prompt based on language and summary type
65
+ if summary_type == "concise":
66
+ prompt = f"""Summarize the following Wikipedia content in {language} in 2-3 sentences.
67
+ Make it clear and informative:
68
+
69
+ {text}
70
+
71
+ Summary:"""
72
+ elif summary_type == "detailed":
73
+ prompt = f"""Provide a comprehensive summary of the following Wikipedia content in {language}.
74
+ Include key points, important facts, and context:
75
+
76
+ {text}
77
+
78
+ Detailed Summary:"""
79
+ else: # explanatory
80
+ prompt = f"""Explain the following Wikipedia content in {language} in a simple,
81
+ easy-to-understand way as if explaining to someone unfamiliar with the topic:
82
+
83
+ {text}
84
+
85
+ Explanation:"""
86
+
87
+ # Request to Ollama
88
+ payload = {
89
+ "model": model,
90
+ "prompt": prompt,
91
+ "stream": False,
92
+ "options": {
93
+ "temperature": 0.7,
94
+ "num_predict": 500 if summary_type == "detailed" else 200
95
+ }
96
+ }
97
+
98
+ response = requests.post(self.api_url, json=payload, timeout=30)
99
+
100
+ if response.status_code == 200:
101
+ data = response.json()
102
+ return data.get("response", "").strip()
103
+ else:
104
+ return f"Error: {response.status_code}"
105
+
106
+ except Exception as e:
107
+ return f"LLM Error: {str(e)}"
108
+
109
+ def translate_text(self, text: str, target_language: str, model: str = "llama3.2") -> str:
110
+ """Translate text using local LLM"""
111
+ try:
112
+ prompt = f"""Translate the following text to {target_language}.
113
+ Provide only the translation, no additional text:
114
+
115
+ {text}
116
+
117
+ Translation:"""
118
+
119
+ payload = {
120
+ "model": model,
121
+ "prompt": prompt,
122
+ "stream": False,
123
+ "options": {
124
+ "temperature": 0.3,
125
+ "num_predict": 300
126
+ }
127
+ }
128
+
129
+ response = requests.post(self.api_url, json=payload, timeout=20)
130
+
131
+ if response.status_code == 200:
132
+ data = response.json()
133
+ return data.get("response", "").strip()
134
+ else:
135
+ return text # Return original if translation fails
136
+
137
+ except Exception as e:
138
+ return text
139
+
140
+ class WikipediaAPI:
141
+ def __init__(self):
142
+ self.base_url = "https://{}.wikipedia.org/api/rest_v1"
143
+ self.search_url = "https://{}.wikipedia.org/w/api.php"
144
+
145
+ def search_articles(self, query: str, lang: str = "en", limit: int = 5) -> List[Dict]:
146
+ """Search for Wikipedia articles"""
147
+ try:
148
+ params = {
149
+ "action": "query",
150
+ "format": "json",
151
+ "list": "search",
152
+ "srsearch": query,
153
+ "srlimit": limit,
154
+ "srprop": "snippet|titlesnippet"
155
+ }
156
+
157
+ url = self.search_url.format(lang)
158
+ response = requests.get(url, params=params, timeout=10)
159
+ response.raise_for_status()
160
+
161
+ data = response.json()
162
+ return data.get("query", {}).get("search", [])
163
+ except Exception as e:
164
+ st.error(f"Search error: {str(e)}")
165
+ return []
166
+
167
+ def get_page_summary(self, title: str, lang: str = "en") -> Optional[Dict]:
168
+ """Get page summary using REST API"""
169
+ try:
170
+ encoded_title = quote(title.replace(" ", "_"))
171
+ url = f"{self.base_url.format(lang)}/page/summary/{encoded_title}"
172
+
173
+ response = requests.get(url, timeout=10)
174
+ response.raise_for_status()
175
+
176
+ return response.json()
177
+ except Exception as e:
178
+ st.error(f"Summary error: {str(e)}")
179
+ return None
180
+
181
+ def get_page_content(self, title: str, lang: str = "en", char_limit: int = 3000) -> Optional[str]:
182
+ """Get page content sections"""
183
+ try:
184
+ params = {
185
+ "action": "query",
186
+ "format": "json",
187
+ "prop": "extracts",
188
+ "exintro": False,
189
+ "explaintext": True,
190
+ "exsectionformat": "plain",
191
+ "titles": title,
192
+ "exchars": char_limit
193
+ }
194
+
195
+ url = self.search_url.format(lang)
196
+ response = requests.get(url, params=params, timeout=10)
197
+ response.raise_for_status()
198
+
199
+ data = response.json()
200
+ pages = data.get("query", {}).get("pages", {})
201
+
202
+ for page_id, page_data in pages.items():
203
+ if "extract" in page_data:
204
+ return page_data["extract"]
205
+
206
+ return None
207
+ except Exception as e:
208
+ st.error(f"Content error: {str(e)}")
209
+ return None
210
+
211
+ def clean_html(text: str) -> str:
212
+ """Remove HTML tags from text"""
213
+ clean = re.compile('<.*?>')
214
+ return re.sub(clean, '', text)
215
+
216
+ def simple_summarize(text: str, max_sentences: int = 3) -> str:
217
+ """Fallback simple text summarization"""
218
+ sentences = text.split('. ')
219
+ summary_sentences = sentences[:max_sentences]
220
+ return '. '.join(summary_sentences) + ('.' if not summary_sentences[-1].endswith('.') else '')
221
+
222
+ def main():
223
+ # Custom CSS for mobile-first design
224
+ st.markdown("""
225
+ <style>
226
+ .main-header {
227
+ text-align: center;
228
+ color: #1f77b4;
229
+ margin-bottom: 2rem;
230
+ }
231
+ .search-container {
232
+ background-color: #f8f9fa;
233
+ padding: 1rem;
234
+ border-radius: 10px;
235
+ margin-bottom: 1rem;
236
+ }
237
+ .result-card {
238
+ background-color: white;
239
+ padding: 1rem;
240
+ border-radius: 8px;
241
+ border: 1px solid #dee2e6;
242
+ margin-bottom: 1rem;
243
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
244
+ }
245
+ .article-title {
246
+ color: #007bff;
247
+ font-weight: bold;
248
+ margin-bottom: 0.5rem;
249
+ }
250
+ .llm-status {
251
+ padding: 0.5rem;
252
+ border-radius: 5px;
253
+ margin-bottom: 1rem;
254
+ font-size: 0.9rem;
255
+ }
256
+ .status-connected {
257
+ background-color: #d4edda;
258
+ color: #155724;
259
+ border: 1px solid #c3e6cb;
260
+ }
261
+ .status-disconnected {
262
+ background-color: #f8d7da;
263
+ color: #721c24;
264
+ border: 1px solid #f5c6cb;
265
+ }
266
+ .ai-summary {
267
+ background-color: #f0f8ff;
268
+ padding: 1rem;
269
+ border-radius: 8px;
270
+ border-left: 4px solid #007bff;
271
+ margin: 1rem 0;
272
+ }
273
+ @media (max-width: 768px) {
274
+ .stSelectbox, .stTextInput {
275
+ font-size: 16px;
276
+ }
277
+ }
278
+ </style>
279
+ """, unsafe_allow_html=True)
280
+
281
+ # Header
282
+ st.markdown("<h1 class='main-header'>πŸ€– WikiBot - AI-Powered Multilingual Assistant</h1>", unsafe_allow_html=True)
283
+ st.markdown("<p style='text-align: center; color: #666;'>Search Wikipedia with Local LLM Intelligence</p>", unsafe_allow_html=True)
284
+
285
+ # Initialize APIs
286
+ wiki_api = WikipediaAPI()
287
+ llm = OllamaLLM()
288
+
289
+ # Check LLM connection
290
+ llm_connected = llm.check_connection()
291
+ available_models = llm.get_available_models() if llm_connected else []
292
+
293
+ # LLM Status
294
+ if llm_connected:
295
+ st.markdown(f"""
296
+ <div class='llm-status status-connected'>
297
+ βœ… <strong>Local LLM Connected</strong> - Ollama running with {len(available_models)} models
298
+ </div>
299
+ """, unsafe_allow_html=True)
300
+ else:
301
+ st.markdown("""
302
+ <div class='llm-status status-disconnected'>
303
+ ❌ <strong>Local LLM Disconnected</strong> - Install and run Ollama for AI features
304
+ </div>
305
+ """, unsafe_allow_html=True)
306
+ st.info("To enable AI features: Install Ollama from https://ollama.ai and run `ollama serve`")
307
+
308
+ # Search interface
309
+ st.markdown("<div class='search-container'>", unsafe_allow_html=True)
310
+
311
+ col1, col2 = st.columns([3, 1])
312
+
313
+ with col1:
314
+ query = st.text_input(
315
+ "πŸ” Search Wikipedia",
316
+ placeholder="e.g., 'Explain Kargil War in Telugu'",
317
+ help="Enter your search query in any language"
318
+ )
319
+
320
+ with col2:
321
+ selected_lang = st.selectbox(
322
+ "🌍 Language",
323
+ options=list(LANGUAGES.keys()),
324
+ index=0
325
+ )
326
+
327
+ # Advanced options
328
+ with st.expander("βš™οΈ Advanced Options"):
329
+ col1, col2, col3 = st.columns(3)
330
+
331
+ with col1:
332
+ num_results = st.slider("Number of results", 1, 10, 3)
333
+
334
+ with col2:
335
+ if llm_connected:
336
+ summary_mode = st.selectbox(
337
+ "AI Summary Type",
338
+ ["concise", "detailed", "explanatory"],
339
+ index=0
340
+ )
341
+ else:
342
+ summary_mode = st.selectbox(
343
+ "Summary Type",
344
+ ["short", "medium", "long"],
345
+ index=1
346
+ )
347
+
348
+ with col3:
349
+ if llm_connected and available_models:
350
+ selected_model = st.selectbox(
351
+ "LLM Model",
352
+ options=available_models,
353
+ index=0
354
+ )
355
+ else:
356
+ st.info("No models available")
357
+ selected_model = None
358
+
359
+ # Translation options
360
+ if llm_connected:
361
+ col1, col2 = st.columns(2)
362
+ with col1:
363
+ enable_translation = st.checkbox("🌐 Enable Translation", value=False)
364
+ with col2:
365
+ if enable_translation:
366
+ target_lang = st.selectbox(
367
+ "Translate to",
368
+ options=list(LANGUAGES.keys()),
369
+ index=1
370
+ )
371
+
372
+ st.markdown("</div>", unsafe_allow_html=True)
373
+
374
+ # Search button
375
+ if st.button("πŸ”Ž Search with AI", type="primary", use_container_width=True):
376
+ if query:
377
+ lang_code = LANGUAGES[selected_lang]
378
+
379
+ with st.spinner(f"Searching Wikipedia and processing with AI..."):
380
+ # Search for articles
381
+ search_results = wiki_api.search_articles(query, lang_code, num_results)
382
+
383
+ if search_results:
384
+ st.success(f"Found {len(search_results)} results - Processing with {'AI' if llm_connected else 'basic'} summarization")
385
+
386
+ for idx, result in enumerate(search_results):
387
+ with st.container():
388
+ st.markdown("<div class='result-card'>", unsafe_allow_html=True)
389
+
390
+ # Article title
391
+ title = result.get("title", "")
392
+ st.markdown(f"<div class='article-title'>{idx+1}. {title}</div>", unsafe_allow_html=True)
393
+
394
+ # Get detailed content for AI processing
395
+ content = wiki_api.get_page_content(title, lang_code)
396
+ summary_data = wiki_api.get_page_summary(title, lang_code)
397
+
398
+ if content and llm_connected and selected_model:
399
+ # AI-powered summary
400
+ with st.spinner("Generating AI summary..."):
401
+ ai_summary = llm.generate_summary(
402
+ content,
403
+ selected_model,
404
+ selected_lang,
405
+ summary_mode
406
+ )
407
+
408
+ if ai_summary and not ai_summary.startswith("Error") and not ai_summary.startswith("LLM Error"):
409
+ st.markdown("<div class='ai-summary'>", unsafe_allow_html=True)
410
+ st.markdown("**πŸ€– AI Summary:**")
411
+ st.write(ai_summary)
412
+
413
+ # Translation if enabled
414
+ if 'enable_translation' in locals() and enable_translation and target_lang != selected_lang:
415
+ with st.spinner(f"Translating to {target_lang}..."):
416
+ translated = llm.translate_text(ai_summary, target_lang, selected_model)
417
+ if translated != ai_summary:
418
+ st.markdown(f"**🌐 Translation to {target_lang}:**")
419
+ st.write(translated)
420
+
421
+ st.markdown("</div>", unsafe_allow_html=True)
422
+ else:
423
+ # Fallback to simple summary
424
+ st.warning("AI summary failed, using fallback")
425
+ fallback_summary = simple_summarize(content, 3)
426
+ st.write(fallback_summary)
427
+
428
+ elif summary_data:
429
+ # Standard Wikipedia summary
430
+ summary_text = summary_data.get("extract", "")
431
+ if not llm_connected:
432
+ if summary_mode == "short":
433
+ summary_text = simple_summarize(summary_text, 2)
434
+ elif summary_mode == "medium":
435
+ summary_text = simple_summarize(summary_text, 4)
436
+
437
+ st.write(summary_text)
438
+
439
+ else:
440
+ # Fallback to search snippet
441
+ snippet = clean_html(result.get("snippet", ""))
442
+ st.write(snippet)
443
+
444
+ # Display thumbnail if available
445
+ if summary_data and "thumbnail" in summary_data:
446
+ st.image(summary_data["thumbnail"]["source"], width=150)
447
+
448
+ # Wikipedia link
449
+ if summary_data and "content_urls" in summary_data:
450
+ wiki_url = summary_data["content_urls"]["desktop"]["page"]
451
+ st.markdown(f"[πŸ“– Read full article on Wikipedia]({wiki_url})")
452
+
453
+ # Detailed content button
454
+ if st.button(f"πŸ“ Show detailed content", key=f"detail_{idx}"):
455
+ if content:
456
+ st.text_area(
457
+ "Full Content",
458
+ content,
459
+ height=300,
460
+ key=f"content_{idx}"
461
+ )
462
+ else:
463
+ st.warning("Detailed content not available")
464
+
465
+ st.markdown("</div>", unsafe_allow_html=True)
466
+ st.markdown("---")
467
+
468
+ else:
469
+ st.warning(f"No results found for '{query}' in {selected_lang}")
470
+ st.info("Try different keywords or switch to another language")
471
+
472
+ else:
473
+ st.warning("Please enter a search query")
474
+
475
+ # Status dashboard
476
+ st.markdown("---")
477
+ col1, col2, col3, col4 = st.columns(4)
478
+
479
+ with col1:
480
+ st.metric("🌍 Languages", len(LANGUAGES))
481
+
482
+ with col2:
483
+ st.metric("πŸ€– LLM Status", "Connected" if llm_connected else "Offline")
484
+
485
+ with col3:
486
+ st.metric("πŸ“š Models", len(available_models))
487
+
488
+ with col4:
489
+ st.metric("πŸ” Search Mode", "AI-Powered" if llm_connected else "Standard")
490
+
491
+ # Setup instructions
492
+ with st.expander("πŸ› οΈ Setup Instructions"):
493
+ st.markdown("""
494
+ ### Install Ollama for AI Features:
495
+
496
+ 1. **Install Ollama:**
497
+ ```bash
498
+ # MacOS/Linux
499
+ curl -fsSL https://ollama.ai/install.sh | sh
500
+
501
+ # Windows - Download from https://ollama.ai
502
+ ```
503
+
504
+ 2. **Pull a model:**
505
+ ```bash
506
+ ollama pull llama3.2
507
+ # or
508
+ ollama pull mistral
509
+ ollama pull codellama
510
+ ```
511
+
512
+ 3. **Start Ollama server:**
513
+ ```bash
514
+ ollama serve
515
+ ```
516
+
517
+ 4. **Restart this app** - LLM features will be automatically enabled!
518
+
519
+ ### Recommended Models:
520
+ - **llama3.2** - Great for general summarization
521
+ - **mistral** - Fast and efficient
522
+ - **codellama** - Good for technical content
523
+ """)
524
+
525
+ # Usage examples
526
+ with st.expander("πŸ’‘ Usage Examples"):
527
+ st.markdown("""
528
+ **Try these example queries:**
529
+ - "Explain Kargil War in Telugu" β†’ AI generates Telugu explanation
530
+ - "Machine Learning" β†’ Detailed AI summary with translation
531
+ - "Climate Change" β†’ AI explanatory summary
532
+ - "Quantum Computing" β†’ Technical AI analysis
533
+
534
+ **AI Features:**
535
+ - πŸ€– Intelligent summarization (concise/detailed/explanatory)
536
+ - 🌐 Multi-language translation
537
+ - πŸ“ Context-aware explanations
538
+ - πŸ” Enhanced content understanding
539
+ """)
540
+
541
+ if __name__ == "__main__":
542
+ main()