Spaces:
Sleeping
Sleeping
import streamlit as st | |
import tempfile | |
import logging | |
from typing import List | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.llms import HuggingFacePipeline | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain.schema import Document | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.prompts import PromptTemplate | |
from transformers import pipeline | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Constants | |
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' | |
DEFAULT_MODEL = "facebook/bart-large-cnn" | |
def load_embeddings(): | |
"""Load and cache the embedding model.""" | |
try: | |
return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL) | |
except Exception as e: | |
logger.error(f"Failed to load embeddings: {e}") | |
st.error("Failed to load the embedding model. Please try again later.") | |
return None | |
def load_llm(model_name): | |
"""Load and cache the language model.""" | |
try: | |
pipe = pipeline("text2text-generation", model=model_name, max_length=1024) | |
return HuggingFacePipeline(pipeline=pipe) | |
except Exception as e: | |
logger.error(f"Failed to load LLM: {e}") | |
st.error(f"Failed to load the model {model_name}. Please try again.") | |
return None | |
def process_pdf(file) -> List[Document]: | |
"""Process the uploaded PDF file.""" | |
try: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: | |
temp_file.write(file.getvalue()) | |
temp_file_path = temp_file.name | |
loader = PyPDFLoader(file_path=temp_file_path) | |
pages = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
documents = text_splitter.split_documents(pages) | |
return documents | |
except Exception as e: | |
logger.error(f"Error processing PDF: {e}") | |
st.error("Failed to process the PDF. Please make sure it's a valid PDF file.") | |
return [] | |
def create_vector_store(documents: List[Document], embeddings): | |
"""Create the vector store.""" | |
try: | |
return FAISS.from_documents(documents, embeddings) | |
except Exception as e: | |
logger.error(f"Error creating vector store: {e}") | |
st.error("Failed to create the vector store. Please try again.") | |
return None | |
def summarize_report(documents: List[Document], llm) -> str: | |
"""Summarize the report using the loaded model.""" | |
try: | |
prompt_template = """ | |
You are an AI specialized in summarizing comprehensive reports with a focus on funding, finances, and global comparisons. Given the detailed report content below, generate a concise and structured summary using bullet points and emojis. The summary should highlight key funding figures, financial data, budget allocations, comparisons between regions, and notable insights about [FOCUS_REGION]'s role in the global context of [TOPIC]. | |
Report Content: | |
{text} | |
Your summary should follow this structure: | |
Summary: | |
π° [TOPIC] Overview for [FOCUS_REGION]: | |
π΄ [FOCUS_REGION]'s Position in Global [TOPIC]: | |
π Total investment/funding: [amount] | |
π Breakdown of funding sources (e.g., government, private sector) | |
π [FOCUS_REGION]'s ranking in global investment | |
π Key statistics and figures | |
π΄ Financial Impact and Projections: | |
π Expected ROI or economic benefits | |
π Financial milestones or targets | |
π Impact on relevant areas | |
π΄ Global Comparison: | |
π [List of relevant countries/regions with their financial figures] | |
π Comparative analysis of [FOCUS_REGION] vs other major players | |
π΄ Budget Analysis: | |
π Major budget items | |
π Key budget allocations | |
π Year-over-year budget changes | |
π Comparison to industry benchmarks | |
π΄ Funding Strategies: | |
π Key funding mechanisms (e.g., grants, loans, public-private partnerships) | |
π Innovative financing approaches | |
π΄ Progress and Significance: | |
π Key achievements or milestones | |
π [1-2 concluding points about [FOCUS_REGION]'s role or significance in [TOPIC]] | |
Please ensure the summary is concise, informative, and easy to read at a glance. Use precise figures where available and highlight any significant financial trends or insights. The summary should provide a comprehensive overview of both the financial aspects and the broader context of [TOPIC] in [FOCUS_REGION]. | |
""" | |
prompt = PromptTemplate.from_template(prompt_template) | |
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt) | |
summary = chain.invoke(documents) | |
return summary['output_text'] | |
except Exception as e: | |
logger.error(f"Error summarizing report: {e}") | |
st.error("Failed to summarize the report. Please try again.") | |
return "" | |
def main(): | |
st.title("Report Summarizer") | |
model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL) | |
uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf") | |
llm = load_llm(model_option) | |
embeddings = load_embeddings() | |
if not llm or not embeddings: | |
return | |
if uploaded_file: | |
with st.spinner("Processing PDF..."): | |
documents = process_pdf(uploaded_file) | |
if documents: | |
with st.spinner("Creating vector store..."): | |
db = create_vector_store(documents, embeddings) | |
if db and st.button("Summarize"): | |
with st.spinner(f"Generating structured summary using {model_option}..."): | |
summary = summarize_report(documents, llm) | |
if summary: | |
st.subheader("Structured Summary:") | |
st.markdown(summary) | |
else: | |
st.warning("Failed to generate summary. Please try again.") | |
if __name__ == "__main__": | |
main() | |