Spaces:
Build error
Build error
Commit
·
caaf3b3
1
Parent(s):
7218a55
searchable
Browse files- app.py +26 -34
- requirements.txt +0 -1
app.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import random
|
| 3 |
-
from datasets import load_dataset
|
| 4 |
import requests
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
import os
|
| 7 |
|
| 8 |
-
dataset = load_dataset("cnn_dailymail", "3.0.0")
|
| 9 |
-
|
| 10 |
NEWS_API_KEY = os.environ['NEWS_API_KEY']
|
| 11 |
HF_TOKEN = os.environ['HF_TOKEN']
|
| 12 |
|
|
@@ -17,12 +14,9 @@ def summarize(model_name, article):
|
|
| 17 |
payload = {"inputs": article}
|
| 18 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 19 |
|
| 20 |
-
# Check if the response is successful
|
| 21 |
if response.status_code == 200:
|
| 22 |
-
# Assuming the response structure has a 'generated_text' field
|
| 23 |
return format(response.json())
|
| 24 |
else:
|
| 25 |
-
# Handle different types of errors
|
| 26 |
if response.status_code == 401:
|
| 27 |
return "Error: Unauthorized. Check your API token."
|
| 28 |
elif response.status_code == 503:
|
|
@@ -34,23 +28,23 @@ def summarize(model_name, article):
|
|
| 34 |
def format(response):
|
| 35 |
return response[0]['generated_text']
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
def get_news_article():
|
| 47 |
-
url = 'https://newsapi.org/v2/top-headlines'
|
| 48 |
-
news_url = ''
|
| 49 |
-
params = {
|
| 50 |
-
'apiKey': NEWS_API_KEY,
|
| 51 |
-
'country': 'us', # You can change this as needed
|
| 52 |
-
'pageSize': 100
|
| 53 |
-
}
|
| 54 |
response = requests.get(url, params=params)
|
| 55 |
articles = response.json().get('articles', [])
|
| 56 |
if articles:
|
|
@@ -58,7 +52,7 @@ def get_news_article():
|
|
| 58 |
news_url = random_article.get('url')
|
| 59 |
else:
|
| 60 |
return None
|
| 61 |
-
|
| 62 |
if news_url:
|
| 63 |
full_article, title = scrape_article(news_url)
|
| 64 |
return full_article, title
|
|
@@ -70,36 +64,34 @@ def scrape_article(url):
|
|
| 70 |
response = requests.get(url)
|
| 71 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 72 |
|
| 73 |
-
# Extracting the title - this is a general approach
|
| 74 |
title = soup.title.string if soup.title else "No Title Available"
|
| 75 |
|
| 76 |
-
article_content = soup.find_all('p')
|
| 77 |
|
| 78 |
text = ' '.join([p.get_text() for p in article_content])
|
| 79 |
words = text.split()
|
| 80 |
-
truncated_text = ' '.join(words[:512])
|
| 81 |
-
|
| 82 |
return truncated_text, title
|
| 83 |
except Exception as e:
|
| 84 |
return "Error scraping article: " + str(e), ""
|
| 85 |
|
| 86 |
with gr.Blocks() as demo:
|
| 87 |
gr.Markdown("# News Summary App")
|
| 88 |
-
gr.Markdown("Enter a news text
|
| 89 |
|
| 90 |
with gr.Row():
|
| 91 |
with gr.Column():
|
| 92 |
-
|
| 93 |
-
load_news_article_button = gr.Button("
|
| 94 |
-
article_title = gr.Label()
|
| 95 |
-
input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text
|
| 96 |
with gr.Column():
|
| 97 |
model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
|
| 98 |
summarize_button = gr.Button("Summarize")
|
| 99 |
output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
|
| 100 |
|
| 101 |
-
|
| 102 |
-
load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
|
| 103 |
summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
|
| 104 |
|
| 105 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import random
|
|
|
|
| 3 |
import requests
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
import os
|
| 6 |
|
|
|
|
|
|
|
| 7 |
NEWS_API_KEY = os.environ['NEWS_API_KEY']
|
| 8 |
HF_TOKEN = os.environ['HF_TOKEN']
|
| 9 |
|
|
|
|
| 14 |
payload = {"inputs": article}
|
| 15 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 16 |
|
|
|
|
| 17 |
if response.status_code == 200:
|
|
|
|
| 18 |
return format(response.json())
|
| 19 |
else:
|
|
|
|
| 20 |
if response.status_code == 401:
|
| 21 |
return "Error: Unauthorized. Check your API token."
|
| 22 |
elif response.status_code == 503:
|
|
|
|
| 28 |
def format(response):
|
| 29 |
return response[0]['generated_text']
|
| 30 |
|
| 31 |
+
def get_news_article(search_query):
|
| 32 |
+
if search_query.strip():
|
| 33 |
+
url = 'https://newsapi.org/v2/everything'
|
| 34 |
+
params = {
|
| 35 |
+
'apiKey': NEWS_API_KEY,
|
| 36 |
+
'q': search_query,
|
| 37 |
+
'pageSize': 100,
|
| 38 |
+
'language': 'en'
|
| 39 |
+
}
|
| 40 |
+
else:
|
| 41 |
+
url = 'https://newsapi.org/v2/top-headlines'
|
| 42 |
+
params = {
|
| 43 |
+
'apiKey': NEWS_API_KEY,
|
| 44 |
+
'country': 'us',
|
| 45 |
+
'pageSize': 100
|
| 46 |
+
}
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
response = requests.get(url, params=params)
|
| 49 |
articles = response.json().get('articles', [])
|
| 50 |
if articles:
|
|
|
|
| 52 |
news_url = random_article.get('url')
|
| 53 |
else:
|
| 54 |
return None
|
| 55 |
+
|
| 56 |
if news_url:
|
| 57 |
full_article, title = scrape_article(news_url)
|
| 58 |
return full_article, title
|
|
|
|
| 64 |
response = requests.get(url)
|
| 65 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 66 |
|
|
|
|
| 67 |
title = soup.title.string if soup.title else "No Title Available"
|
| 68 |
|
| 69 |
+
article_content = soup.find_all('p')
|
| 70 |
|
| 71 |
text = ' '.join([p.get_text() for p in article_content])
|
| 72 |
words = text.split()
|
| 73 |
+
truncated_text = ' '.join(words[:512])
|
| 74 |
+
|
| 75 |
return truncated_text, title
|
| 76 |
except Exception as e:
|
| 77 |
return "Error scraping article: " + str(e), ""
|
| 78 |
|
| 79 |
with gr.Blocks() as demo:
|
| 80 |
gr.Markdown("# News Summary App")
|
| 81 |
+
gr.Markdown("Enter a news text, search for news articles, or load a random article.")
|
| 82 |
|
| 83 |
with gr.Row():
|
| 84 |
with gr.Column():
|
| 85 |
+
search_query_input = gr.Textbox(label="Search for News", placeholder="Enter a topic to search...")
|
| 86 |
+
load_news_article_button = gr.Button("Search News Article")
|
| 87 |
+
article_title = gr.Label()
|
| 88 |
+
input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text, load a random article, or search for news...")
|
| 89 |
with gr.Column():
|
| 90 |
model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
|
| 91 |
summarize_button = gr.Button("Summarize")
|
| 92 |
output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
|
| 93 |
|
| 94 |
+
load_news_article_button.click(fn=get_news_article, inputs=[search_query_input], outputs=[input_text, article_title])
|
|
|
|
| 95 |
summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
|
| 96 |
|
| 97 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,2 @@
|
|
| 1 |
gradio
|
| 2 |
-
datasets
|
| 3 |
beautifulsoup4
|
|
|
|
| 1 |
gradio
|
|
|
|
| 2 |
beautifulsoup4
|