Spaces:
Sleeping
Sleeping
grapplerulrich
commited on
Commit
·
9c1234d
1
Parent(s):
037af6c
Add summary cache
Browse files- .gitignore +1 -0
- main.py +17 -4
.gitignore
CHANGED
@@ -4,3 +4,4 @@ __pycache__
|
|
4 |
/search-results
|
5 |
/web-pages
|
6 |
/page-content
|
|
|
|
4 |
/search-results
|
5 |
/web-pages
|
6 |
/page-content
|
7 |
+
/summaries
|
main.py
CHANGED
@@ -8,6 +8,7 @@ from dotenv import load_dotenv
|
|
8 |
from googleapiclient.discovery import build
|
9 |
from slugify import slugify
|
10 |
from transformers import pipeline
|
|
|
11 |
|
12 |
from beautiful_soup.app import get_url_content
|
13 |
|
@@ -60,15 +61,27 @@ def main():
|
|
60 |
|
61 |
for result in results:
|
62 |
st.write(result['link'])
|
|
|
63 |
try:
|
64 |
content = get_url_content( result['link'] )
|
65 |
-
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
66 |
-
summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
|
67 |
-
for sentence in summary:
|
68 |
-
st.write(sentence['summary_text'])
|
69 |
except Exception as exception:
|
70 |
st.exception(exception)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
if __name__ == '__main__':
|
74 |
main()
|
|
|
8 |
from googleapiclient.discovery import build
|
9 |
from slugify import slugify
|
10 |
from transformers import pipeline
|
11 |
+
import uuid
|
12 |
|
13 |
from beautiful_soup.app import get_url_content
|
14 |
|
|
|
61 |
|
62 |
for result in results:
|
63 |
st.write(result['link'])
|
64 |
+
|
65 |
try:
|
66 |
content = get_url_content( result['link'] )
|
|
|
|
|
|
|
|
|
67 |
except Exception as exception:
|
68 |
st.exception(exception)
|
69 |
|
70 |
+
file_path = 'summaries/' + uuid.uuid5( uuid.NAMESPACE_URL, result['link'] ).hex + '.json'
|
71 |
+
if exists( file_path ):
|
72 |
+
with open( file_path, 'r' ) as file:
|
73 |
+
summary = json.load( file )
|
74 |
+
else:
|
75 |
+
try:
|
76 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
77 |
+
summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True)
|
78 |
+
except Exception as exception:
|
79 |
+
raise exception
|
80 |
+
with open( file_path, 'w' ) as file:
|
81 |
+
json.dump( summary, file )
|
82 |
+
|
83 |
+
for sentence in summary:
|
84 |
+
st.write(sentence['summary_text'])
|
85 |
|
86 |
if __name__ == '__main__':
|
87 |
main()
|