Kathirsci's picture
Update app.py
cdde9f4 verified
raw
history blame
6.34 kB
import streamlit as st
import tempfile
import logging
from typing import List
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains.summarize import load_summarize_chain
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from transformers import pipeline
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Constants
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
DEFAULT_MODEL = "facebook/bart-large-cnn"
@st.cache_resource
def load_embeddings():
"""Load and cache the embedding model."""
try:
return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
except Exception as e:
logger.error(f"Failed to load embeddings: {e}")
st.error("Failed to load the embedding model. Please try again later.")
return None
@st.cache_resource
def load_llm(model_name):
"""Load and cache the language model."""
try:
pipe = pipeline("text2text-generation", model=model_name, max_length=1024)
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
logger.error(f"Failed to load LLM: {e}")
st.error(f"Failed to load the model {model_name}. Please try again.")
return None
def process_pdf(file) -> List[Document]:
"""Process the uploaded PDF file."""
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(file.getvalue())
temp_file_path = temp_file.name
loader = PyPDFLoader(file_path=temp_file_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(pages)
return documents
except Exception as e:
logger.error(f"Error processing PDF: {e}")
st.error("Failed to process the PDF. Please make sure it's a valid PDF file.")
return []
def create_vector_store(documents: List[Document], embeddings):
"""Create the vector store."""
try:
return FAISS.from_documents(documents, embeddings)
except Exception as e:
logger.error(f"Error creating vector store: {e}")
st.error("Failed to create the vector store. Please try again.")
return None
def summarize_report(documents: List[Document], llm) -> str:
"""Summarize the report using the loaded model."""
try:
prompt_template = """
You are an AI specialized in summarizing comprehensive reports with a focus on funding, finances, and global comparisons. Given the detailed report content below, generate a concise and structured summary using bullet points and emojis. The summary should highlight key funding figures, financial data, budget allocations, comparisons between regions, and notable insights about [FOCUS_REGION]'s role in the global context of [TOPIC].
Report Content:
{text}
Your summary should follow this structure:
Summary:
πŸ’° [TOPIC] Overview for [FOCUS_REGION]:
πŸ”΄ [FOCUS_REGION]'s Position in Global [TOPIC]:
πŸ“ Total investment/funding: [amount]
πŸ“ Breakdown of funding sources (e.g., government, private sector)
πŸ“ [FOCUS_REGION]'s ranking in global investment
πŸ“ Key statistics and figures
πŸ”΄ Financial Impact and Projections:
πŸ“ Expected ROI or economic benefits
πŸ“ Financial milestones or targets
πŸ“ Impact on relevant areas
πŸ”΄ Global Comparison:
πŸ“ [List of relevant countries/regions with their financial figures]
πŸ“ Comparative analysis of [FOCUS_REGION] vs other major players
πŸ”΄ Budget Analysis:
πŸ“ Major budget items
πŸ“ Key budget allocations
πŸ“ Year-over-year budget changes
πŸ“ Comparison to industry benchmarks
πŸ”΄ Funding Strategies:
πŸ“ Key funding mechanisms (e.g., grants, loans, public-private partnerships)
πŸ“ Innovative financing approaches
πŸ”΄ Progress and Significance:
πŸ“ Key achievements or milestones
πŸ“ [1-2 concluding points about [FOCUS_REGION]'s role or significance in [TOPIC]]
Please ensure the summary is concise, informative, and easy to read at a glance. Use precise figures where available and highlight any significant financial trends or insights. The summary should provide a comprehensive overview of both the financial aspects and the broader context of [TOPIC] in [FOCUS_REGION].
"""
prompt = PromptTemplate.from_template(prompt_template)
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
summary = chain.invoke(documents)
return summary['output_text']
except Exception as e:
logger.error(f"Error summarizing report: {e}")
st.error("Failed to summarize the report. Please try again.")
return ""
def main():
st.title("Report Summarizer")
model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
llm = load_llm(model_option)
embeddings = load_embeddings()
if not llm or not embeddings:
return
if uploaded_file:
with st.spinner("Processing PDF..."):
documents = process_pdf(uploaded_file)
if documents:
with st.spinner("Creating vector store..."):
db = create_vector_store(documents, embeddings)
if db and st.button("Summarize"):
with st.spinner(f"Generating structured summary using {model_option}..."):
summary = summarize_report(documents, llm)
if summary:
st.subheader("Structured Summary:")
st.markdown(summary)
else:
st.warning("Failed to generate summary. Please try again.")
if __name__ == "__main__":
main()