Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Kathirsci commited on Sep 25, 2024

Commit

ec8e5d1

verified ·

1 Parent(s): a8c600f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -31

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 import tempfile
 import logging
-from typing import List
 import torch
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -20,25 +20,24 @@ logger = logging.getLogger(__name__)
 # Constants
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 DEFAULT_MODEL = "distilgpt2"
-DEFAULT_MAX_LENGTH = 1024  # Increased default max length
 # Check for GPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
 st.sidebar.write(f"Using device: {device}")
-@st.cache_resource
-def load_embeddings():
-    """Load and cache the embedding model."""
     try:
-        return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
     except Exception as e:
         logger.error(f"Failed to load embeddings: {e}")
-        st.error("Failed to load the embedding model. Please try again later.")
         return None
-@st.cache_resource
-def load_llm(model_name, max_length):
-    """Load and cache the language model."""
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -46,10 +45,9 @@ def load_llm(model_name, max_length):
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
-        st.error(f"Failed to load the model {model_name}. Please try another model or check your internet connection.")
         return None
-def process_pdf(file) -> List[Document]:
     """Process the uploaded PDF file."""
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
@@ -63,55 +61,50 @@ def process_pdf(file) -> List[Document]:
         return documents
     except Exception as e:
         logger.error(f"Error processing PDF: {e}")
-        st.error("Failed to process the PDF. Please make sure it's a valid PDF file.")
-        return []
-def create_vector_store(documents: List[Document], embeddings):
     """Create the vector store."""
     try:
         return FAISS.from_documents(documents, embeddings)
     except Exception as e:
         logger.error(f"Error creating vector store: {e}")
-        st.error("Failed to create the vector store. Please try again.")
         return None
-def summarize_report(documents: List[Document], llm) -> str:
     """Summarize the report using the loaded model."""
     try:
-        prompt_template = """
-        Summarize the following text in a clear and concise manner. Focus on the main points and key details:
-        {text}
         Summary:
         """
         prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
-        summary = chain.run(documents)
         return summary
     except Exception as e:
         logger.error(f"Error summarizing report: {e}")
-        st.error("Failed to summarize the report. Please try again.")
-        return ""
 def main():
     st.title("Report Summarizer")
     model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
-    max_length = st.sidebar.slider("Max summary length", min_value=256, max_value=2048, value=DEFAULT_MAX_LENGTH, step=128)
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
-    llm = load_llm(model_option, max_length)
     if not llm:
         st.error(f"Failed to load the model {model_option}. Please try another model.")
         return
-    embeddings = load_embeddings()
     if not embeddings:
-        st.error("Failed to load embeddings. Please try again later.")
         return
     if uploaded_file:
@@ -123,8 +116,15 @@ def main():
                 db = create_vector_store(documents, embeddings)
             if db and st.button("Summarize"):
                 with st.spinner(f"Generating summary using {model_option}..."):
-                    summary = summarize_report(documents, llm)
                     if summary:
                         st.subheader("Summary:")
@@ -133,4 +133,4 @@ def main():
                         st.warning("Failed to generate summary. Please try again.")
 if __name__ == "__main__":
-    main()

 import streamlit as st
 import tempfile
 import logging
+from typing import List, Optional
 import torch
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.embeddings import HuggingFaceEmbeddings
 # Constants
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 DEFAULT_MODEL = "distilgpt2"
+MAX_LENGTH_FRACTION = 0.2  # Set max_length to 20% of input length
 # Check for GPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
 st.sidebar.write(f"Using device: {device}")
+@st.cache_data
+def load_embeddings(model_name: str) -> Optional[HuggingFaceEmbeddings]:
+    """Load the embedding model."""
     try:
+        return HuggingFaceEmbeddings(model_name=model_name)
     except Exception as e:
         logger.error(f"Failed to load embeddings: {e}")
         return None
+@st.cache_data
+def load_llm(model_name: str, max_length: int) -> Optional[HuggingFacePipeline]:
+    """Load the language model."""
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(model_name)
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
         return None
+def process_pdf(file) -> Optional[List[Document]]:
     """Process the uploaded PDF file."""
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
         return documents
     except Exception as e:
         logger.error(f"Error processing PDF: {e}")
+        return None
+def create_vector_store(documents: List[Document], embeddings: HuggingFaceEmbeddings) -> Optional[FAISS]:
     """Create the vector store."""
     try:
         return FAISS.from_documents(documents, embeddings)
     except Exception as e:
         logger.error(f"Error creating vector store: {e}")
         return None
+def summarize_report(documents: List[Document], llm: HuggingFacePipeline, max_length: int, summary_style: str) -> Optional[str]:
     """Summarize the report using the loaded model."""
     try:
+        prompt_template = f"""
+        Summarize the following text in a {summary_style} manner. Focus on the main points and key details:
+        {{text}}
         Summary:
         """
         prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
+        summary = chain.run(documents, max_length=max_length)
         return summary
     except Exception as e:
         logger.error(f"Error summarizing report: {e}")
+        return None
 def main():
     st.title("Report Summarizer")
     model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
+    summary_style = st.sidebar.selectbox("Summary style", options=["clear and concise", "formal", "informal", "bullet points"])
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
+    llm = load_llm(model_option, 1024)  # Load the model with a default max_length
     if not llm:
         st.error(f"Failed to load the model {model_option}. Please try another model.")
         return
+    embeddings = load_embeddings(EMBEDDING_MODEL)
     if not embeddings:
+        st.error(f"Failed to load embeddings. Please try again later.")
         return
     if uploaded_file:
                 db = create_vector_store(documents, embeddings)
             if db and st.button("Summarize"):
+                # Calculate max_length based on input text
+                input_length = sum([len(doc.page_content.split()) for doc in documents])
+                max_length = int(input_length * MAX_LENGTH_FRACTION)
+                # Reload the model with the calculated max_length
+                llm = load_llm(model_option, max_length)
                 with st.spinner(f"Generating summary using {model_option}..."):
+                    summary = summarize_report(documents, llm, max_length, summary_style)
                     if summary:
                         st.subheader("Summary:")
                         st.warning("Failed to generate summary. Please try again.")
 if __name__ == "__main__":
+    main()