Spaces:

liamvbetts
/

bart-news-summary-v1

Sleeping

App Files Files Community

liamvbetts commited on Mar 1, 2024

Commit

caaf3b3

1 Parent(s): 7218a55

searchable

Browse files

Files changed (2) hide show

app.py +26 -34
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import gradio as gr
 import random
-from datasets import load_dataset
 import requests
 from bs4 import BeautifulSoup
 import os
-dataset = load_dataset("cnn_dailymail", "3.0.0")
 NEWS_API_KEY = os.environ['NEWS_API_KEY']
 HF_TOKEN = os.environ['HF_TOKEN']
@@ -17,12 +14,9 @@ def summarize(model_name, article):
     payload = {"inputs": article}
     response = requests.post(API_URL, headers=headers, json=payload)
-    # Check if the response is successful
     if response.status_code == 200:
-        # Assuming the response structure has a 'generated_text' field
         return format(response.json())
     else:
-        # Handle different types of errors
         if response.status_code == 401:
             return "Error: Unauthorized. Check your API token."
         elif response.status_code == 503:
@@ -34,23 +28,23 @@ def summarize(model_name, article):
 def format(response):
     return response[0]['generated_text']
-def get_random_article():
-    random.seed()
-    val_example = dataset["validation"].shuffle().select(range(1))
-    val_article = val_example['article'][0][:1024]
-    return val_article
-def load_article():
-    return get_random_article()
-def get_news_article():
-    url = 'https://newsapi.org/v2/top-headlines'
-    news_url = ''
-    params = {
-        'apiKey': NEWS_API_KEY,
-        'country': 'us',  # You can change this as needed
-        'pageSize': 100
-    }
     response = requests.get(url, params=params)
     articles = response.json().get('articles', [])
     if articles:
@@ -58,7 +52,7 @@ def get_news_article():
         news_url = random_article.get('url')
     else:
         return None
     if news_url:
         full_article, title = scrape_article(news_url)
         return full_article, title
@@ -70,36 +64,34 @@ def scrape_article(url):
         response = requests.get(url)
         soup = BeautifulSoup(response.content, 'html.parser')
-        # Extracting the title - this is a general approach
         title = soup.title.string if soup.title else "No Title Available"
-        article_content = soup.find_all('p')  # This is a simplification
         text = ' '.join([p.get_text() for p in article_content])
         words = text.split()
-        truncated_text = ' '.join(words[:512])  # Truncate to first 1024 words
         return truncated_text, title
     except Exception as e:
         return "Error scraping article: " + str(e), ""
 with gr.Blocks() as demo:
     gr.Markdown("# News Summary App")
-    gr.Markdown("Enter a news text and get its summary, or load a random article.")
     with gr.Row():
         with gr.Column():
-            load_dataset_article_button = gr.Button("Load Random Article from Dataset")
-            load_news_article_button = gr.Button("Load News Article")
-            article_title = gr.Label()  # Component to display the article title
-            input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text or load a random article...")
         with gr.Column():
             model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
             summarize_button = gr.Button("Summarize")
             output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
-    load_dataset_article_button.click(fn=load_article, inputs=[], outputs=[input_text, article_title])
-    load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
     summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
 demo.launch()

 import gradio as gr
 import random
 import requests
 from bs4 import BeautifulSoup
 import os
 NEWS_API_KEY = os.environ['NEWS_API_KEY']
 HF_TOKEN = os.environ['HF_TOKEN']
     payload = {"inputs": article}
     response = requests.post(API_URL, headers=headers, json=payload)
     if response.status_code == 200:
         return format(response.json())
     else:
         if response.status_code == 401:
             return "Error: Unauthorized. Check your API token."
         elif response.status_code == 503:
 def format(response):
     return response[0]['generated_text']
+def get_news_article(search_query):
+    if search_query.strip():
+        url = 'https://newsapi.org/v2/everything'
+        params = {
+            'apiKey': NEWS_API_KEY,
+            'q': search_query,
+            'pageSize': 100,
+            'language': 'en'
+        }
+    else:
+        url = 'https://newsapi.org/v2/top-headlines'
+        params = {
+            'apiKey': NEWS_API_KEY,
+            'country': 'us',
+            'pageSize': 100
+        }
     response = requests.get(url, params=params)
     articles = response.json().get('articles', [])
     if articles:
         news_url = random_article.get('url')
     else:
         return None
     if news_url:
         full_article, title = scrape_article(news_url)
         return full_article, title
         response = requests.get(url)
         soup = BeautifulSoup(response.content, 'html.parser')
         title = soup.title.string if soup.title else "No Title Available"
+        article_content = soup.find_all('p')
         text = ' '.join([p.get_text() for p in article_content])
         words = text.split()
+        truncated_text = ' '.join(words[:512])
         return truncated_text, title
     except Exception as e:
         return "Error scraping article: " + str(e), ""
 with gr.Blocks() as demo:
     gr.Markdown("# News Summary App")
+    gr.Markdown("Enter a news text, search for news articles, or load a random article.")
     with gr.Row():
         with gr.Column():
+            search_query_input = gr.Textbox(label="Search for News", placeholder="Enter a topic to search...")
+            load_news_article_button = gr.Button("Search News Article")
+            article_title = gr.Label()
+            input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text, load a random article, or search for news...")
         with gr.Column():
             model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
             summarize_button = gr.Button("Summarize")
             output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
+    load_news_article_button.click(fn=get_news_article, inputs=[search_query_input], outputs=[input_text, article_title])
     summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
 gradio
-datasets
 beautifulsoup4


1	gradio

2	beautifulsoup4