Kathirsci commited on
Commit
b47040f
Β·
verified Β·
1 Parent(s): dc586cc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tempfile
3
+ import logging
4
+ from typing import List
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.llms import HuggingFacePipeline
9
+ from langchain.chains.summarize import load_summarize_chain
10
+ from langchain.schema import Document
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.prompts import PromptTemplate
13
+ from transformers import pipeline
14
+
15
+ # Set up logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Constants
20
+ EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
21
+ DEFAULT_MODEL = "google/flan-t5-base"
22
+
23
+ @st.cache_resource
24
+ def load_embeddings():
25
+ """Load and cache the embedding model."""
26
+ try:
27
+ return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
28
+ except Exception as e:
29
+ logger.error(f"Failed to load embeddings: {e}")
30
+ st.error("Failed to load the embedding model. Please try again later.")
31
+ return None
32
+
33
+ @st.cache_resource
34
+ def load_llm(model_name):
35
+ """Load and cache the language model."""
36
+ try:
37
+ pipe = pipeline("text2text-generation", model=model_name, max_length=512)
38
+ return HuggingFacePipeline(pipeline=pipe)
39
+ except Exception as e:
40
+ logger.error(f"Failed to load LLM: {e}")
41
+ st.error(f"Failed to load the model {model_name}. Please try again.")
42
+ return None
43
+
44
+ def process_pdf(file) -> List[Document]:
45
+ """Process the uploaded PDF file."""
46
+ try:
47
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
48
+ temp_file.write(file.getvalue())
49
+ temp_file_path = temp_file.name
50
+
51
+ loader = PyPDFLoader(file_path=temp_file_path)
52
+ pages = loader.load()
53
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
54
+ documents = text_splitter.split_documents(pages)
55
+ return documents
56
+ except Exception as e:
57
+ logger.error(f"Error processing PDF: {e}")
58
+ st.error("Failed to process the PDF. Please make sure it's a valid PDF file.")
59
+ return []
60
+
61
+ def create_vector_store(documents: List[Document], embeddings):
62
+ """Create the vector store."""
63
+ try:
64
+ return FAISS.from_documents(documents, embeddings)
65
+ except Exception as e:
66
+ logger.error(f"Error creating vector store: {e}")
67
+ st.error("Failed to create the vector store. Please try again.")
68
+ return None
69
+
70
+ def summarize_report(documents: List[Document], llm) -> str:
71
+ """Summarize the report using the loaded model."""
72
+ try:
73
+ prompt_template = """
74
+ You are an AI specialized in summarizing comprehensive reports with a focus on funding, finances, and global comparisons. Given the detailed report content below, generate a concise and structured summary using bullet points and emojis. The summary should highlight key funding figures, financial data, budget allocations, comparisons between regions, and notable insights about [FOCUS_REGION]'s role in the global context of [TOPIC].
75
+
76
+ Report Content:
77
+ {text}
78
+
79
+ Your summary should follow this structure:
80
+
81
+ Summary:
82
+ πŸ’° [TOPIC] Overview for [FOCUS_REGION]:
83
+
84
+ πŸ”΄ [FOCUS_REGION]'s Position in Global [TOPIC]:
85
+ πŸ“ Total investment/funding: [amount]
86
+ πŸ“ Breakdown of funding sources (e.g., government, private sector)
87
+ πŸ“ [FOCUS_REGION]'s ranking in global investment
88
+ πŸ“ Key statistics and figures
89
+
90
+ πŸ”΄ Financial Impact and Projections:
91
+ πŸ“ Expected ROI or economic benefits
92
+ πŸ“ Financial milestones or targets
93
+ πŸ“ Impact on relevant areas
94
+
95
+ πŸ”΄ Global Comparison:
96
+ πŸ“ [List of relevant countries/regions with their financial figures]
97
+ πŸ“ Comparative analysis of [FOCUS_REGION] vs other major players
98
+
99
+ πŸ”΄ Budget Analysis:
100
+ πŸ“ Major budget items
101
+ πŸ“ Key budget allocations
102
+ πŸ“ Year-over-year budget changes
103
+ πŸ“ Comparison to industry benchmarks
104
+
105
+ πŸ”΄ Funding Strategies:
106
+ πŸ“ Key funding mechanisms (e.g., grants, loans, public-private partnerships)
107
+ πŸ“ Innovative financing approaches
108
+
109
+ πŸ”΄ Progress and Significance:
110
+ πŸ“ Key achievements or milestones
111
+ πŸ“ [1-2 concluding points about [FOCUS_REGION]'s role or significance in [TOPIC]]
112
+
113
+ Please ensure the summary is concise, informative, and easy to read at a glance. Use precise figures where available and highlight any significant financial trends or insights. The summary should provide a comprehensive overview of both the financial aspects and the broader context of [TOPIC] in [FOCUS_REGION].
114
+ """
115
+
116
+ prompt = PromptTemplate.from_template(prompt_template)
117
+ chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
118
+ summary = chain.invoke(documents)
119
+ return summary['output_text']
120
+
121
+ except Exception as e:
122
+ logger.error(f"Error summarizing report: {e}")
123
+ st.error("Failed to summarize the report. Please try again.")
124
+ return ""
125
+
126
+ def main():
127
+ st.title("Report Summarizer")
128
+
129
+ model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
130
+
131
+ uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
132
+
133
+ llm = load_llm(model_option)
134
+ embeddings = load_embeddings()
135
+
136
+ if not llm or not embeddings:
137
+ return
138
+
139
+ if uploaded_file:
140
+ with st.spinner("Processing PDF..."):
141
+ documents = process_pdf(uploaded_file)
142
+
143
+ if documents:
144
+ with st.spinner("Creating vector store..."):
145
+ db = create_vector_store(documents, embeddings)
146
+
147
+ if db and st.button("Summarize"):
148
+ with st.spinner(f"Generating structured summary using {model_option}..."):
149
+ summary = summarize_report(documents, llm)
150
+
151
+ if summary:
152
+ st.subheader("Structured Summary:")
153
+ st.markdown(summary)
154
+ else:
155
+ st.warning("Failed to generate summary. Please try again.")
156
+
157
+ if __name__ == "__main__":
158
+ main()