Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Kathirsci commited on Sep 25, 2024

Commit

d061dc5

verified ·

1 Parent(s): fae072e

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import tempfile
 import logging
 from typing import List
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceEmbeddings
@@ -20,7 +21,10 @@ logger = logging.getLogger(__name__)
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 DEFAULT_MODEL = "llava-v1.6-mistral-7b-hf"
-@st.cache_resource
 def load_embeddings():
     """Load and cache the embedding model."""
     try:
@@ -30,7 +34,7 @@ def load_embeddings():
         st.error("Failed to load the embedding model. Please try again later.")
         return None
-@st.cache_resource
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
@@ -50,6 +54,12 @@ def process_pdf(file) -> List[Document]:
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
         documents = text_splitter.split_documents(pages)
         return documents
@@ -72,27 +82,27 @@ def summarize_report(documents: List[Document], llm) -> str:
     try:
         prompt_template = """
         <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
-    *Instructions:*
-    - Analyze the provided context and input carefully.
-    - Identify and highlight the key points, main arguments, and important details.
-    - Format the summary using Markdown for clarity:
-        - Use # for main headers and ## for subheaders.
-        - Use **text** for important terms or concepts.
-        - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
-    - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
-    *Example Summary Format:*
-    # Overview
-    *Document Title:* Technical Analysis Report
-    *Summary:*
-    The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
-    # Key Findings
-    - *Finding 1:* Description of finding 1.
-    - *Finding 2:* Description of finding 2.
-    # Conclusion
-    The analysis highlights the significant advancements and future directions for AI technology.
-    *Your Response:* [/INST]</s> {input}
-    Context: {context}
-    """
         prompt = PromptTemplate.from_template(prompt_template)
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
@@ -107,7 +117,7 @@ def summarize_report(documents: List[Document], llm) -> str:
 def main():
     st.title("Report Summarizer")
-    model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")

 import streamlit as st
 import tempfile
 import logging
+import time
 from typing import List
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceEmbeddings
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 DEFAULT_MODEL = "llava-v1.6-mistral-7b-hf"
+# Cache expiration time for models (adjust as needed)
+MODEL_CACHE_EXPIRATION = 3600
+@st.cache_resource(ttl=MODEL_CACHE_EXPIRATION)
 def load_embeddings():
     """Load and cache the embedding model."""
     try:
         st.error("Failed to load the embedding model. Please try again later.")
         return None
+@st.cache_resource(ttl=MODEL_CACHE_EXPIRATION)
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
+        # Check for empty documents
+        if not pages:
+            st.warning("No text extracted from the PDF. Please ensure it's a valid PDF file.")
+            return []
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
         documents = text_splitter.split_documents(pages)
         return documents
     try:
         prompt_template = """
         <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
+        *Instructions:*
+        - Analyze the provided context and input carefully.
+        - Identify and highlight the key points, main arguments, and important details.
+        - Format the summary using Markdown for clarity:
+          - Use # for main headers and ## for subheaders.
+          - Use **text** for important terms or concepts.
+          - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
+        - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
+        *Example Summary Format:*
+        # Overview
+        *Document Title:* Technical Analysis Report
+        *Summary:*
+        The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
+        # Key Findings
+        - *Finding 1:* Description of finding 1.
+        - *Finding 2:* Description of finding 2.
+        # Conclusion
+        The analysis highlights the significant advancements and future directions for AI technology.
+        *Your Response:* [/INST]</s> {input}
+        Context: {context}
+        """
         prompt = PromptTemplate.from_template(prompt_template)
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
 def main():
     st.title("Report Summarizer")
+    model_option = st.sidebar.selectbox("Choose a model", options=["llava-v1.6-mistral-7b-hf", "Your_Own_Model"])
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")