Spaces:

loayshabet
/

news-sumarry

Sleeping

App Files Files Community

loayshabet commited on Nov 17, 2024

Commit

41e7bb5

verified ·

1 Parent(s): c96106e

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -9

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
 import feedparser
 from datetime import datetime, timedelta
 import json
@@ -20,6 +20,30 @@ logging.basicConfig(
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 # Language codes and their corresponding MarianMT model names
 LANGUAGE_CODES = {
     "English": {"code": "en", "model": None},  # No translation needed for English
@@ -35,8 +59,6 @@ LANGUAGE_CODES = {
     "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
 }
-# [Previous NEWS_SOURCES definition remains the same...]
 # Initialize global variables
 summarizer = None
 translators = {}
@@ -49,11 +71,8 @@ class NewsCache:
     def store_summary(self, content_hash, summary, language=None):
         cache_key = f"{content_hash}_{language}" if language else content_hash
         if len(self.summaries) >= self.max_cache_size:
-            # Remove oldest entry if cache is full
             self.summaries.pop(next(iter(self.summaries)))
         self.summaries[cache_key] = summary
     def get_summary(self, content_hash, language=None):
@@ -62,6 +81,44 @@ class NewsCache:
 news_cache = NewsCache()
 def initialize_models():
     """Initialize the summarization and translation models"""
     global summarizer, translators
@@ -78,7 +135,7 @@ def initialize_models():
         for lang, info in LANGUAGE_CODES.items():
             if info["model"]:  # Skip English as it doesn't need translation
                 try:
-                    model = AutoModelForSeq2SeqGeneration.from_pretrained(info["model"])
                     tokenizer = AutoTokenizer.from_pretrained(info["model"])
                     translators[lang] = (model, tokenizer)
                     logging.info(f"Initialized translator for {lang}")
@@ -258,7 +315,6 @@ def get_personalized_summary(name, progress=gr.Progress()):
     progress(1.0, desc="Done!")
     return "\n".join(summaries)
 # Gradio interface
 with gr.Blocks(title="Enhanced News Summarizer") as demo:
     gr.Markdown("# 📰 Enhanced AI News Summarizer")
@@ -318,7 +374,7 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
         )
 if __name__ == "__main__":
-    if initialize_summarizer():
         demo.launch()
     else:
         print("Failed to initialize summarizer. Please check the logs.")

 import gradio as gr
+from transformers import pipeline, MarianMTModel, AutoTokenizer
 import feedparser
 from datetime import datetime, timedelta
 import json
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+# News sources and their RSS feeds
+NEWS_SOURCES = {
+    "Technology": {
+        "TechCrunch": "https://techcrunch.com/feed/",
+        "Wired": "https://www.wired.com/feed/rss",
+        "The Verge": "https://www.theverge.com/rss/index.xml"
+    },
+    "Business": {
+        "Financial Times": "https://www.ft.com/rss/home",
+        "Business Insider": "https://www.businessinsider.com/rss",
+        "Forbes": "https://www.forbes.com/real-time/feed2/"
+    },
+    "Science": {
+        "Science Daily": "https://www.sciencedaily.com/rss/all.xml",
+        "Nature": "http://feeds.nature.com/nature/rss/current",
+        "Scientific American": "http://rss.sciam.com/ScientificAmerican-Global"
+    },
+    "World News": {
+        "Reuters": "http://feeds.reuters.com/reuters/topNews",
+        "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
+        "CNN": "http://rss.cnn.com/rss/edition_world.rss"
+    }
+}
 # Language codes and their corresponding MarianMT model names
 LANGUAGE_CODES = {
     "English": {"code": "en", "model": None},  # No translation needed for English
     "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
 }
 # Initialize global variables
 summarizer = None
 translators = {}
     def store_summary(self, content_hash, summary, language=None):
         cache_key = f"{content_hash}_{language}" if language else content_hash
         if len(self.summaries) >= self.max_cache_size:
             self.summaries.pop(next(iter(self.summaries)))
         self.summaries[cache_key] = summary
     def get_summary(self, content_hash, language=None):
 news_cache = NewsCache()
+def get_content_hash(content):
+    """Generate a hash for the content"""
+    return hashlib.md5(content.encode()).hexdigest()
+def parse_date(date_str):
+    """Parse date string to datetime object"""
+    try:
+        return parsedate_to_datetime(date_str).replace(tzinfo=pytz.UTC)
+    except:
+        return None
+def fetch_news_from_rss(categories):
+    """Fetch news from RSS feeds based on user interests"""
+    articles = []
+    cutoff_time = datetime.now(pytz.UTC) - timedelta(hours=8)
+    for category in categories:
+        if category in NEWS_SOURCES:
+            for source, feed_url in NEWS_SOURCES[category].items():
+                try:
+                    feed = feedparser.parse(feed_url)
+                    for entry in feed.entries:
+                        published = parse_date(entry.get('published'))
+                        if published and published > cutoff_time:
+                            articles.append({
+                                'title': entry.get('title', ''),
+                                'description': BeautifulSoup(entry.get('description', ''), 'html.parser').get_text(),
+                                'link': entry.get('link', ''),
+                                'published': entry.get('published', ''),
+                                'category': category,
+                                'source': source
+                            })
+                except Exception as e:
+                    logging.error(f"Error fetching from {feed_url}: {e}")
+                    continue
+    return articles
 def initialize_models():
     """Initialize the summarization and translation models"""
     global summarizer, translators
         for lang, info in LANGUAGE_CODES.items():
             if info["model"]:  # Skip English as it doesn't need translation
                 try:
+                    model = MarianMTModel.from_pretrained(info["model"])
                     tokenizer = AutoTokenizer.from_pretrained(info["model"])
                     translators[lang] = (model, tokenizer)
                     logging.info(f"Initialized translator for {lang}")
     progress(1.0, desc="Done!")
     return "\n".join(summaries)
 # Gradio interface
 with gr.Blocks(title="Enhanced News Summarizer") as demo:
     gr.Markdown("# 📰 Enhanced AI News Summarizer")
         )
 if __name__ == "__main__":
+    if initialize_models():
         demo.launch()
     else:
         print("Failed to initialize summarizer. Please check the logs.")