import streamlit as st from haystack.nodes.connector import Crawler from haystack.nodes import TransformersSummarizer import validators import json output_dir = "crawled_files" crawler = Crawler(output_dir=output_dir) summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum") documents = [] def crawl_url_and_write_content(url): docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True) for doc in docs: jsonObject = json.load(doc.open()) documents[0] = jsonObject # Streamlit App st.title('Summarizer Demo with Haystack Summarizer') url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html") if validators.url(url_text): crawl_url_and_write_content(url_text) summarize = st.button('Summarize') if summarize: summary = summarizer.predict(documents=documents) st.write(summary)