Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Kathirsci commited on Sep 25, 2024

Commit

ffb4b75

verified ·

1 Parent(s): 431a644

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -39

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from langchain.chains.summarize import load_summarize_chain
 from langchain.schema import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.prompts import PromptTemplate
-from transformers import pipeline
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -19,11 +19,11 @@ logger = logging.getLogger(__name__)
 # Constants
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
-DEFAULT_MODEL = "microsoft/phi-2"
 # Check for GPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
 @st.cache_resource
 def load_embeddings():
@@ -39,7 +39,9 @@ def load_embeddings():
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
-        pipe = pipeline("text-generation", model=model_name, device=device, max_length=1024)
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
@@ -55,13 +57,7 @@ def process_pdf(file) -> List[Document]:
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
-        # Check for empty documents
-        if not pages:
-            st.warning("No text extracted from the PDF. Please ensure it's a valid PDF file.")
-            return []
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
         documents = text_splitter.split_documents(pages)
         return documents
     except Exception as e:
@@ -82,30 +78,14 @@ def summarize_report(documents: List[Document], llm) -> str:
     """Summarize the report using the loaded model."""
     try:
         prompt_template = """
-        <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
-        *Instructions:*
-        - Analyze the provided context and input carefully.
-        - Identify and highlight the key points, main arguments, and important details.
-        - Format the summary using Markdown for clarity:
-          - Use # for main headers and ## for subheaders.
-          - Use **text** for important terms or concepts.
-          - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
-        - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
-        *Example Summary Format:*
-        # Overview
-        *Document Title:* Technical Analysis Report
-        *Summary:*
-        The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
-        # Key Findings
-        - *Finding 1:* Description of finding 1.
-        - *Finding 2:* Description of finding 2.
-        # Conclusion
-        The analysis highlights the significant advancements and future directions for AI technology.
-        *Your Response:* [/INST]</s> {input}
-        Context: {context}
         """
-        prompt = PromptTemplate.from_template(prompt_template)
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
         summary = chain.run(documents)
         return summary
@@ -118,14 +98,18 @@ def summarize_report(documents: List[Document], llm) -> str:
 def main():
     st.title("Report Summarizer")
-    model_option = st.sidebar.selectbox("Llm Model", options=["ChocoWu/nextgpt_7b_tiva_v0", "google-t5/t5-11b"])
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
     llm = load_llm(model_option)
-    embeddings = load_embeddings()
-    if not llm or not embeddings:
         return
     if uploaded_file:
@@ -137,12 +121,12 @@ def main():
                 db = create_vector_store(documents, embeddings)
             if db and st.button("Summarize"):
-                with st.spinner(f"Generating structured summary using {model_option}..."):
                     summary = summarize_report(documents, llm)
                     if summary:
-                        st.subheader("Structured Summary:")
-                        st.markdown(summary)
                     else:
                         st.warning("Failed to generate summary. Please try again.")

 from langchain.schema import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.prompts import PromptTemplate
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 # Constants
 EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
+DEFAULT_MODEL = "distilgpt2"  # A smaller model that's more likely to work in Spaces
 # Check for GPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
+st.sidebar.write(f"Using device: {device}")
 @st.cache_resource
 def load_embeddings():
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device, max_length=512)
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
         documents = text_splitter.split_documents(pages)
         return documents
     except Exception as e:
     """Summarize the report using the loaded model."""
     try:
         prompt_template = """
+        Summarize the following text in a clear and concise manner:
+        {text}
+        Summary:
         """
+        prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
         summary = chain.run(documents)
         return summary
 def main():
     st.title("Report Summarizer")
+    model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
     llm = load_llm(model_option)
+    if not llm:
+        st.error(f"Failed to load the model {model_option}. Please try another model.")
+        return
+    embeddings = load_embeddings()
+    if not embeddings:
+        st.error("Failed to load embeddings. Please try again later.")
         return
     if uploaded_file:
                 db = create_vector_store(documents, embeddings)
             if db and st.button("Summarize"):
+                with st.spinner(f"Generating summary using {model_option}..."):
                     summary = summarize_report(documents, llm)
                     if summary:
+                        st.subheader("Summary:")
+                        st.write(summary)
                     else:
                         st.warning("Failed to generate summary. Please try again.")