Kathirsci commited on
Commit
d061dc5
·
verified ·
1 Parent(s): fae072e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -24
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import tempfile
3
  import logging
 
4
  from typing import List
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.embeddings import HuggingFaceEmbeddings
@@ -20,7 +21,10 @@ logger = logging.getLogger(__name__)
20
  EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
21
  DEFAULT_MODEL = "llava-v1.6-mistral-7b-hf"
22
 
23
- @st.cache_resource
 
 
 
24
  def load_embeddings():
25
  """Load and cache the embedding model."""
26
  try:
@@ -30,7 +34,7 @@ def load_embeddings():
30
  st.error("Failed to load the embedding model. Please try again later.")
31
  return None
32
 
33
- @st.cache_resource
34
  def load_llm(model_name):
35
  """Load and cache the language model."""
36
  try:
@@ -50,6 +54,12 @@ def process_pdf(file) -> List[Document]:
50
 
51
  loader = PyPDFLoader(file_path=temp_file_path)
52
  pages = loader.load()
 
 
 
 
 
 
53
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
54
  documents = text_splitter.split_documents(pages)
55
  return documents
@@ -72,27 +82,27 @@ def summarize_report(documents: List[Document], llm) -> str:
72
  try:
73
  prompt_template = """
74
  <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
75
- *Instructions:*
76
- - Analyze the provided context and input carefully.
77
- - Identify and highlight the key points, main arguments, and important details.
78
- - Format the summary using Markdown for clarity:
79
- - Use # for main headers and ## for subheaders.
80
- - Use **text** for important terms or concepts.
81
- - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
82
- - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
83
- *Example Summary Format:*
84
- # Overview
85
- *Document Title:* Technical Analysis Report
86
- *Summary:*
87
- The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
88
- # Key Findings
89
- - *Finding 1:* Description of finding 1.
90
- - *Finding 2:* Description of finding 2.
91
- # Conclusion
92
- The analysis highlights the significant advancements and future directions for AI technology.
93
- *Your Response:* [/INST]</s> {input}
94
- Context: {context}
95
- """
96
 
97
  prompt = PromptTemplate.from_template(prompt_template)
98
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
@@ -107,7 +117,7 @@ def summarize_report(documents: List[Document], llm) -> str:
107
  def main():
108
  st.title("Report Summarizer")
109
 
110
- model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
111
 
112
  uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
113
 
 
1
  import streamlit as st
2
  import tempfile
3
  import logging
4
+ import time
5
  from typing import List
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain.embeddings import HuggingFaceEmbeddings
 
21
  EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
22
  DEFAULT_MODEL = "llava-v1.6-mistral-7b-hf"
23
 
24
+ # Cache expiration time for models (adjust as needed)
25
+ MODEL_CACHE_EXPIRATION = 3600
26
+
27
+ @st.cache_resource(ttl=MODEL_CACHE_EXPIRATION)
28
  def load_embeddings():
29
  """Load and cache the embedding model."""
30
  try:
 
34
  st.error("Failed to load the embedding model. Please try again later.")
35
  return None
36
 
37
+ @st.cache_resource(ttl=MODEL_CACHE_EXPIRATION)
38
  def load_llm(model_name):
39
  """Load and cache the language model."""
40
  try:
 
54
 
55
  loader = PyPDFLoader(file_path=temp_file_path)
56
  pages = loader.load()
57
+
58
+ # Check for empty documents
59
+ if not pages:
60
+ st.warning("No text extracted from the PDF. Please ensure it's a valid PDF file.")
61
+ return []
62
+
63
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
64
  documents = text_splitter.split_documents(pages)
65
  return documents
 
82
  try:
83
  prompt_template = """
84
  <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
85
+ *Instructions:*
86
+ - Analyze the provided context and input carefully.
87
+ - Identify and highlight the key points, main arguments, and important details.
88
+ - Format the summary using Markdown for clarity:
89
+ - Use # for main headers and ## for subheaders.
90
+ - Use **text** for important terms or concepts.
91
+ - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
92
+ - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
93
+ *Example Summary Format:*
94
+ # Overview
95
+ *Document Title:* Technical Analysis Report
96
+ *Summary:*
97
+ The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
98
+ # Key Findings
99
+ - *Finding 1:* Description of finding 1.
100
+ - *Finding 2:* Description of finding 2.
101
+ # Conclusion
102
+ The analysis highlights the significant advancements and future directions for AI technology.
103
+ *Your Response:* [/INST]</s> {input}
104
+ Context: {context}
105
+ """
106
 
107
  prompt = PromptTemplate.from_template(prompt_template)
108
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
 
117
  def main():
118
  st.title("Report Summarizer")
119
 
120
+ model_option = st.sidebar.selectbox("Choose a model", options=["llava-v1.6-mistral-7b-hf", "Your_Own_Model"])
121
 
122
  uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
123