GoT-QA-Haystack / app.py
Tuana's picture
first attempt
4de8fd3
raw
history blame
993 Bytes
import streamlit as st
from haystack.nodes.connector import Crawler
from haystack.nodes import TransformersSummarizer
import validators
import json
output_dir = "crawled_files"
crawler = Crawler(output_dir=output_dir)
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
documents = []
def crawl_url_and_write_content(url):
docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True)
for doc in docs:
jsonObject = json.load(doc.open())
documents[0] = jsonObject
# Streamlit App
st.title('Summarizer Demo with Haystack Summarizer')
url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html")
if validators.url(url_text):
crawl_url_and_write_content(url_text)
summarize = st.button('Summarize')
if summarize:
summary = summarizer.predict(documents=documents)
st.write(summary)