Spaces:
Running
Running
import streamlit as st | |
from haystack.nodes.connector import Crawler | |
from haystack.nodes import TransformersSummarizer | |
import validators | |
import json | |
output_dir = "crawled_files" | |
crawler = Crawler(output_dir=output_dir) | |
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum") | |
documents = [] | |
def crawl_url_and_write_content(url): | |
docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True) | |
for doc in docs: | |
jsonObject = json.load(doc.open()) | |
documents[0] = jsonObject | |
# Streamlit App | |
st.title('Summarizer Demo with Haystack Summarizer') | |
url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html") | |
if validators.url(url_text): | |
crawl_url_and_write_content(url_text) | |
summarize = st.button('Summarize') | |
if summarize: | |
summary = summarizer.predict(documents=documents) | |
st.write(summary) |