Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Kathirsci commited on Aug 19, 2024

Commit

b47040f

verified ·

1 Parent(s): dc586cc

Create app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import streamlit as st
+import tempfile
+import logging
+from typing import List
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import HuggingFacePipeline
+from langchain.chains.summarize import load_summarize_chain
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.prompts import PromptTemplate
+from transformers import pipeline
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Constants
+EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
+DEFAULT_MODEL = "google/flan-t5-base"
+@st.cache_resource
+def load_embeddings():
+    """Load and cache the embedding model."""
+    try:
+        return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+    except Exception as e:
+        logger.error(f"Failed to load embeddings: {e}")
+        st.error("Failed to load the embedding model. Please try again later.")
+        return None
+@st.cache_resource
+def load_llm(model_name):
+    """Load and cache the language model."""
+    try:
+        pipe = pipeline("text2text-generation", model=model_name, max_length=512)
+        return HuggingFacePipeline(pipeline=pipe)
+    except Exception as e:
+        logger.error(f"Failed to load LLM: {e}")
+        st.error(f"Failed to load the model {model_name}. Please try again.")
+        return None
+def process_pdf(file) -> List[Document]:
+    """Process the uploaded PDF file."""
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(file.getvalue())
+            temp_file_path = temp_file.name
+        loader = PyPDFLoader(file_path=temp_file_path)
+        pages = loader.load()
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        documents = text_splitter.split_documents(pages)
+        return documents
+    except Exception as e:
+        logger.error(f"Error processing PDF: {e}")
+        st.error("Failed to process the PDF. Please make sure it's a valid PDF file.")
+        return []
+def create_vector_store(documents: List[Document], embeddings):
+    """Create the vector store."""
+    try:
+        return FAISS.from_documents(documents, embeddings)
+    except Exception as e:
+        logger.error(f"Error creating vector store: {e}")
+        st.error("Failed to create the vector store. Please try again.")
+        return None
+def summarize_report(documents: List[Document], llm) -> str:
+    """Summarize the report using the loaded model."""
+    try:
+        prompt_template = """
+        You are an AI specialized in summarizing comprehensive reports with a focus on funding, finances, and global comparisons. Given the detailed report content below, generate a concise and structured summary using bullet points and emojis. The summary should highlight key funding figures, financial data, budget allocations, comparisons between regions, and notable insights about [FOCUS_REGION]'s role in the global context of [TOPIC].
+        Report Content:
+        {text}
+        Your summary should follow this structure:
+        Summary:
+        💰 [TOPIC] Overview for [FOCUS_REGION]:
+        🔴 [FOCUS_REGION]'s Position in Global [TOPIC]:
+        📍 Total investment/funding: [amount]
+        📍 Breakdown of funding sources (e.g., government, private sector)
+        📍 [FOCUS_REGION]'s ranking in global investment
+        📍 Key statistics and figures
+        🔴 Financial Impact and Projections:
+        📍 Expected ROI or economic benefits
+        📍 Financial milestones or targets
+        📍 Impact on relevant areas
+        🔴 Global Comparison:
+        📍 [List of relevant countries/regions with their financial figures]
+        📍 Comparative analysis of [FOCUS_REGION] vs other major players
+        🔴 Budget Analysis:
+        📍 Major budget items
+        📍 Key budget allocations
+        📍 Year-over-year budget changes
+        📍 Comparison to industry benchmarks
+        🔴 Funding Strategies:
+        📍 Key funding mechanisms (e.g., grants, loans, public-private partnerships)
+        📍 Innovative financing approaches
+        🔴 Progress and Significance:
+        📍 Key achievements or milestones
+        📍 [1-2 concluding points about [FOCUS_REGION]'s role or significance in [TOPIC]]
+        Please ensure the summary is concise, informative, and easy to read at a glance. Use precise figures where available and highlight any significant financial trends or insights. The summary should provide a comprehensive overview of both the financial aspects and the broader context of [TOPIC] in [FOCUS_REGION].
+        """
+        prompt = PromptTemplate.from_template(prompt_template)
+        chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
+        summary = chain.invoke(documents)
+        return summary['output_text']
+    except Exception as e:
+        logger.error(f"Error summarizing report: {e}")
+        st.error("Failed to summarize the report. Please try again.")
+        return ""
+def main():
+    st.title("Report Summarizer")
+    model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
+    uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
+    llm = load_llm(model_option)
+    embeddings = load_embeddings()
+    if not llm or not embeddings:
+        return
+    if uploaded_file:
+        with st.spinner("Processing PDF..."):
+            documents = process_pdf(uploaded_file)
+        if documents:
+            with st.spinner("Creating vector store..."):
+                db = create_vector_store(documents, embeddings)
+            if db and st.button("Summarize"):
+                with st.spinner(f"Generating structured summary using {model_option}..."):
+                    summary = summarize_report(documents, llm)
+                    if summary:
+                        st.subheader("Structured Summary:")
+                        st.markdown(summary)
+                    else:
+                        st.warning("Failed to generate summary. Please try again.")
+if __name__ == "__main__":
+    main()