Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Report_summarizer / app.py

Kathirsci

Update app.py

ffb4b75 verified 6 months ago

raw

history blame

4.87 kB

	import streamlit as st
	import tempfile
	import logging
	from typing import List
	import torch
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_community.llms import HuggingFacePipeline
	from langchain.chains.summarize import load_summarize_chain
	from langchain.schema import Document
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.prompts import PromptTemplate
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Constants
	EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
	DEFAULT_MODEL = "distilgpt2" # A smaller model that's more likely to work in Spaces

	# Check for GPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	st.sidebar.write(f"Using device: {device}")

	@st.cache_resource
	def load_embeddings():
	"""Load and cache the embedding model."""
	try:
	return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
	except Exception as e:
	logger.error(f"Failed to load embeddings: {e}")
	st.error("Failed to load the embedding model. Please try again later.")
	return None

	@st.cache_resource
	def load_llm(model_name):
	"""Load and cache the language model."""
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device, max_length=512)
	return HuggingFacePipeline(pipeline=pipe)
	except Exception as e:
	logger.error(f"Failed to load LLM: {e}")
	st.error(f"Failed to load the model {model_name}. Please try another model or check your internet connection.")
	return None

	def process_pdf(file) -> List[Document]:
	"""Process the uploaded PDF file."""
	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	temp_file.write(file.getvalue())
	temp_file_path = temp_file.name

	loader = PyPDFLoader(file_path=temp_file_path)
	pages = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
	documents = text_splitter.split_documents(pages)
	return documents
	except Exception as e:
	logger.error(f"Error processing PDF: {e}")
	st.error("Failed to process the PDF. Please make sure it's a valid PDF file.")
	return []

	def create_vector_store(documents: List[Document], embeddings):
	"""Create the vector store."""
	try:
	return FAISS.from_documents(documents, embeddings)
	except Exception as e:
	logger.error(f"Error creating vector store: {e}")
	st.error("Failed to create the vector store. Please try again.")
	return None

	def summarize_report(documents: List[Document], llm) -> str:
	"""Summarize the report using the loaded model."""
	try:
	prompt_template = """
	Summarize the following text in a clear and concise manner:

	{text}

	Summary:
	"""

	prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
	chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
	summary = chain.run(documents)
	return summary

	except Exception as e:
	logger.error(f"Error summarizing report: {e}")
	st.error("Failed to summarize the report. Please try again.")
	return ""

	def main():
	st.title("Report Summarizer")

	model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)

	uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")

	llm = load_llm(model_option)
	if not llm:
	st.error(f"Failed to load the model {model_option}. Please try another model.")
	return

	embeddings = load_embeddings()
	if not embeddings:
	st.error("Failed to load embeddings. Please try again later.")
	return

	if uploaded_file:
	with st.spinner("Processing PDF..."):
	documents = process_pdf(uploaded_file)

	if documents:
	with st.spinner("Creating vector store..."):
	db = create_vector_store(documents, embeddings)

	if db and st.button("Summarize"):
	with st.spinner(f"Generating summary using {model_option}..."):
	summary = summarize_report(documents, llm)

	if summary:
	st.subheader("Summary:")
	st.write(summary)
	else:
	st.warning("Failed to generate summary. Please try again.")

	if __name__ == "__main__":
	main()