loayshabet commited on
Commit
41e7bb5
·
verified ·
1 Parent(s): c96106e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -9
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
3
  import feedparser
4
  from datetime import datetime, timedelta
5
  import json
@@ -20,6 +20,30 @@ logging.basicConfig(
20
  format='%(asctime)s - %(levelname)s - %(message)s'
21
  )
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # Language codes and their corresponding MarianMT model names
24
  LANGUAGE_CODES = {
25
  "English": {"code": "en", "model": None}, # No translation needed for English
@@ -35,8 +59,6 @@ LANGUAGE_CODES = {
35
  "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
36
  }
37
 
38
- # [Previous NEWS_SOURCES definition remains the same...]
39
-
40
  # Initialize global variables
41
  summarizer = None
42
  translators = {}
@@ -49,11 +71,8 @@ class NewsCache:
49
 
50
  def store_summary(self, content_hash, summary, language=None):
51
  cache_key = f"{content_hash}_{language}" if language else content_hash
52
-
53
  if len(self.summaries) >= self.max_cache_size:
54
- # Remove oldest entry if cache is full
55
  self.summaries.pop(next(iter(self.summaries)))
56
-
57
  self.summaries[cache_key] = summary
58
 
59
  def get_summary(self, content_hash, language=None):
@@ -62,6 +81,44 @@ class NewsCache:
62
 
63
  news_cache = NewsCache()
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def initialize_models():
66
  """Initialize the summarization and translation models"""
67
  global summarizer, translators
@@ -78,7 +135,7 @@ def initialize_models():
78
  for lang, info in LANGUAGE_CODES.items():
79
  if info["model"]: # Skip English as it doesn't need translation
80
  try:
81
- model = AutoModelForSeq2SeqGeneration.from_pretrained(info["model"])
82
  tokenizer = AutoTokenizer.from_pretrained(info["model"])
83
  translators[lang] = (model, tokenizer)
84
  logging.info(f"Initialized translator for {lang}")
@@ -258,7 +315,6 @@ def get_personalized_summary(name, progress=gr.Progress()):
258
  progress(1.0, desc="Done!")
259
  return "\n".join(summaries)
260
 
261
-
262
  # Gradio interface
263
  with gr.Blocks(title="Enhanced News Summarizer") as demo:
264
  gr.Markdown("# 📰 Enhanced AI News Summarizer")
@@ -318,7 +374,7 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
318
  )
319
 
320
  if __name__ == "__main__":
321
- if initialize_summarizer():
322
  demo.launch()
323
  else:
324
  print("Failed to initialize summarizer. Please check the logs.")
 
1
  import gradio as gr
2
+ from transformers import pipeline, MarianMTModel, AutoTokenizer
3
  import feedparser
4
  from datetime import datetime, timedelta
5
  import json
 
20
  format='%(asctime)s - %(levelname)s - %(message)s'
21
  )
22
 
23
+ # News sources and their RSS feeds
24
+ NEWS_SOURCES = {
25
+ "Technology": {
26
+ "TechCrunch": "https://techcrunch.com/feed/",
27
+ "Wired": "https://www.wired.com/feed/rss",
28
+ "The Verge": "https://www.theverge.com/rss/index.xml"
29
+ },
30
+ "Business": {
31
+ "Financial Times": "https://www.ft.com/rss/home",
32
+ "Business Insider": "https://www.businessinsider.com/rss",
33
+ "Forbes": "https://www.forbes.com/real-time/feed2/"
34
+ },
35
+ "Science": {
36
+ "Science Daily": "https://www.sciencedaily.com/rss/all.xml",
37
+ "Nature": "http://feeds.nature.com/nature/rss/current",
38
+ "Scientific American": "http://rss.sciam.com/ScientificAmerican-Global"
39
+ },
40
+ "World News": {
41
+ "Reuters": "http://feeds.reuters.com/reuters/topNews",
42
+ "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
43
+ "CNN": "http://rss.cnn.com/rss/edition_world.rss"
44
+ }
45
+ }
46
+
47
  # Language codes and their corresponding MarianMT model names
48
  LANGUAGE_CODES = {
49
  "English": {"code": "en", "model": None}, # No translation needed for English
 
59
  "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
60
  }
61
 
 
 
62
  # Initialize global variables
63
  summarizer = None
64
  translators = {}
 
71
 
72
  def store_summary(self, content_hash, summary, language=None):
73
  cache_key = f"{content_hash}_{language}" if language else content_hash
 
74
  if len(self.summaries) >= self.max_cache_size:
 
75
  self.summaries.pop(next(iter(self.summaries)))
 
76
  self.summaries[cache_key] = summary
77
 
78
  def get_summary(self, content_hash, language=None):
 
81
 
82
  news_cache = NewsCache()
83
 
84
+ def get_content_hash(content):
85
+ """Generate a hash for the content"""
86
+ return hashlib.md5(content.encode()).hexdigest()
87
+
88
+ def parse_date(date_str):
89
+ """Parse date string to datetime object"""
90
+ try:
91
+ return parsedate_to_datetime(date_str).replace(tzinfo=pytz.UTC)
92
+ except:
93
+ return None
94
+
95
+ def fetch_news_from_rss(categories):
96
+ """Fetch news from RSS feeds based on user interests"""
97
+ articles = []
98
+ cutoff_time = datetime.now(pytz.UTC) - timedelta(hours=8)
99
+
100
+ for category in categories:
101
+ if category in NEWS_SOURCES:
102
+ for source, feed_url in NEWS_SOURCES[category].items():
103
+ try:
104
+ feed = feedparser.parse(feed_url)
105
+ for entry in feed.entries:
106
+ published = parse_date(entry.get('published'))
107
+ if published and published > cutoff_time:
108
+ articles.append({
109
+ 'title': entry.get('title', ''),
110
+ 'description': BeautifulSoup(entry.get('description', ''), 'html.parser').get_text(),
111
+ 'link': entry.get('link', ''),
112
+ 'published': entry.get('published', ''),
113
+ 'category': category,
114
+ 'source': source
115
+ })
116
+ except Exception as e:
117
+ logging.error(f"Error fetching from {feed_url}: {e}")
118
+ continue
119
+
120
+ return articles
121
+
122
  def initialize_models():
123
  """Initialize the summarization and translation models"""
124
  global summarizer, translators
 
135
  for lang, info in LANGUAGE_CODES.items():
136
  if info["model"]: # Skip English as it doesn't need translation
137
  try:
138
+ model = MarianMTModel.from_pretrained(info["model"])
139
  tokenizer = AutoTokenizer.from_pretrained(info["model"])
140
  translators[lang] = (model, tokenizer)
141
  logging.info(f"Initialized translator for {lang}")
 
315
  progress(1.0, desc="Done!")
316
  return "\n".join(summaries)
317
 
 
318
  # Gradio interface
319
  with gr.Blocks(title="Enhanced News Summarizer") as demo:
320
  gr.Markdown("# 📰 Enhanced AI News Summarizer")
 
374
  )
375
 
376
  if __name__ == "__main__":
377
+ if initialize_models():
378
  demo.launch()
379
  else:
380
  print("Failed to initialize summarizer. Please check the logs.")