Kathirsci commited on
Commit
3f068be
·
verified ·
1 Parent(s): 895f085

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -2,11 +2,10 @@ import streamlit as st
2
  import tempfile
3
  import logging
4
  from typing import List
5
- from langchain_community.document_loaders import PyPDFLoader
6
- #from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
- from langchain_community.vectorstores import FAISS
9
- from langchain_community.llms import HuggingFacePipeline
10
  from langchain.chains.summarize import load_summarize_chain
11
  from langchain.schema import Document
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -35,7 +34,7 @@ def load_embeddings():
35
  def load_llm(model_name):
36
  """Load and cache the language model."""
37
  try:
38
- pipe = pipeline("text2text-generation", model=model_name, max_length=1024)
39
  return HuggingFacePipeline(pipeline=pipe)
40
  except Exception as e:
41
  logger.error(f"Failed to load LLM: {e}")
@@ -48,7 +47,7 @@ def process_pdf(file) -> List[Document]:
48
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
49
  temp_file.write(file.getvalue())
50
  temp_file_path = temp_file.name
51
-
52
  loader = PyPDFLoader(file_path=temp_file_path)
53
  pages = loader.load()
54
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
@@ -73,7 +72,6 @@ def summarize_report(documents: List[Document], llm) -> str:
73
  try:
74
  prompt_template = """
75
  <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
76
-
77
  *Instructions:*
78
  - Analyze the provided context and input carefully.
79
  - Identify and highlight the key points, main arguments, and important details.
@@ -82,30 +80,24 @@ def summarize_report(documents: List[Document], llm) -> str:
82
  - Use **text** for important terms or concepts.
83
  - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
84
  - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
85
-
86
  *Example Summary Format:*
87
-
88
  # Overview
89
  *Document Title:* Technical Analysis Report
90
-
91
  *Summary:*
92
  The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
93
-
94
  # Key Findings
95
  - *Finding 1:* Description of finding 1.
96
  - *Finding 2:* Description of finding 2.
97
-
98
  # Conclusion
99
  The analysis highlights the significant advancements and future directions for AI technology.
100
-
101
  *Your Response:* [/INST]</s> {input}
102
  Context: {context}
103
  """
104
 
105
  prompt = PromptTemplate.from_template(prompt_template)
106
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
107
- summary = chain.invoke(documents)
108
- return summary['output_text']
109
 
110
  except Exception as e:
111
  logger.error(f"Error summarizing report: {e}")
@@ -114,7 +106,7 @@ def summarize_report(documents: List[Document], llm) -> str:
114
 
115
  def main():
116
  st.title("Report Summarizer")
117
-
118
  model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
119
 
120
  uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")
 
2
  import tempfile
3
  import logging
4
  from typing import List
5
+ from langchain.document_loaders import PyPDFLoader
 
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.llms import HuggingFacePipeline
9
  from langchain.chains.summarize import load_summarize_chain
10
  from langchain.schema import Document
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
34
  def load_llm(model_name):
35
  """Load and cache the language model."""
36
  try:
37
+ pipe = pipeline("text-generation", model=model_name, max_length=1024)
38
  return HuggingFacePipeline(pipeline=pipe)
39
  except Exception as e:
40
  logger.error(f"Failed to load LLM: {e}")
 
47
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
48
  temp_file.write(file.getvalue())
49
  temp_file_path = temp_file.name
50
+
51
  loader = PyPDFLoader(file_path=temp_file_path)
52
  pages = loader.load()
53
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
 
72
  try:
73
  prompt_template = """
74
  <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
 
75
  *Instructions:*
76
  - Analyze the provided context and input carefully.
77
  - Identify and highlight the key points, main arguments, and important details.
 
80
  - Use **text** for important terms or concepts.
81
  - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
82
  - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
 
83
  *Example Summary Format:*
 
84
  # Overview
85
  *Document Title:* Technical Analysis Report
 
86
  *Summary:*
87
  The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
 
88
  # Key Findings
89
  - *Finding 1:* Description of finding 1.
90
  - *Finding 2:* Description of finding 2.
 
91
  # Conclusion
92
  The analysis highlights the significant advancements and future directions for AI technology.
 
93
  *Your Response:* [/INST]</s> {input}
94
  Context: {context}
95
  """
96
 
97
  prompt = PromptTemplate.from_template(prompt_template)
98
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
99
+ summary = chain.run(documents)
100
+ return summary
101
 
102
  except Exception as e:
103
  logger.error(f"Error summarizing report: {e}")
 
106
 
107
  def main():
108
  st.title("Report Summarizer")
109
+
110
  model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
111
 
112
  uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")