Spaces:
Sleeping
Sleeping
from os import getenv | |
from os.path import exists | |
from functools import cache | |
import json | |
import streamlit as st | |
from dotenv import load_dotenv | |
from googleapiclient.discovery import build | |
from slugify import slugify | |
from transformers import pipeline | |
import uuid | |
from beautiful_soup.app import get_url_content | |
def google_search_api_request( query ): | |
load_dotenv() | |
api_key = getenv('GOOGLE_SEARCH_API_KEY') | |
# cx = os.getenv('GOOGLE_SEARCH_ENGINE_ID') | |
service = build( | |
"customsearch", | |
"v1", | |
developerKey=api_key, | |
cache_discovery=False | |
) | |
return service.cse().list( | |
q=query, | |
cx='05048cc2df6134a06', | |
num=5, | |
).execute() | |
def search_results( query ): | |
file_path = 'search-results/' + slugify( query ) + '.json' | |
results = [] | |
if exists( file_path ): | |
with open( file_path, 'r' ) as results_file: | |
results = json.load( results_file ) | |
else: | |
search_result = google_search_api_request( query ) | |
if ( int( search_result['searchInformation']['totalResults'] ) > 0 ): | |
results = search_result['items'] | |
with open( file_path, 'w' ) as results_file: | |
json.dump( results, results_file ) | |
if ( len( results ) == 0 ) : | |
raise Exception('No results found.') | |
return results | |
def main(): | |
st.title('Google Search') | |
query = st.text_input('Search query') | |
if query : | |
try: | |
results = search_results( query ) | |
except Exception as exception: | |
st.exception(exception) | |
for result in results: | |
st.write(result['link']) | |
try: | |
content = get_url_content( result['link'] ) | |
except Exception as exception: | |
st.exception(exception) | |
file_path = 'summaries/' + uuid.uuid5( uuid.NAMESPACE_URL, result['link'] ).hex + '.json' | |
if exists( file_path ): | |
with open( file_path, 'r' ) as file: | |
summary = json.load( file ) | |
else: | |
try: | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True) | |
except Exception as exception: | |
raise exception | |
with open( file_path, 'w' ) as file: | |
json.dump( summary, file ) | |
for sentence in summary: | |
st.write(sentence['summary_text']) | |
if __name__ == '__main__': | |
main() | |