from typing import List from diskcache import Cache from utils.data_proto import Page from utils.summary_utils import summarize cache = Cache(directory='/.cache/tmp/summary', size_limit=int(1e9)) # 1GB def get_summarize_from_cache(pages: List[Page]): pages_summaries = [] uncached_pages = [] for page in pages: id = page['id'] summary = cache.get(id) if summary is not None: pages_summaries.append(summary) else: uncached_pages.append(page) return pages_summaries, uncached_pages def summarize_un_cache_page(pages: List[Page]): for page in pages: id = page['id'] summary = summarize(id, page['text']) cache.set(id, summary) print(f'processed page {id}')