File size: 993 Bytes
4de8fd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import streamlit as st
from haystack.nodes.connector import Crawler
from haystack.nodes import TransformersSummarizer
import validators
import json

output_dir = "crawled_files"
crawler = Crawler(output_dir=output_dir)

summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")

documents = []

def crawl_url_and_write_content(url):
    docs = crawler.crawl(urls=['https://www.deepset.ai/blog/haystack-node-for-information-extraction'], crawler_depth=0, overwrite_existing_files=True)
    for doc in docs:
        jsonObject = json.load(doc.open())
        documents[0] = jsonObject
# Streamlit App

st.title('Summarizer Demo with Haystack Summarizer')
    
url_text = st.text_input("Please Enter a url here",value="https://www.rba.gov.au/media-releases/2022/mr-22-12.html")

if validators.url(url_text):
    crawl_url_and_write_content(url_text)

summarize = st.button('Summarize')

if summarize:
    summary = summarizer.predict(documents=documents)
    st.write(summary)