Spaces:
No application file
No application file
import streamlit as st | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.docstore.document import Document | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain_community.llms import CTransformers | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from pypdf import PdfReader | |
# Page title | |
st.set_page_config(page_title='π¦π Text Summarization App') | |
st.title('π¦π Text Summarization App') | |
# Function to read all PDF files and return text | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to split the text into smaller chunks and convert it into document format | |
def chunks_and_document(txt): | |
text_splitter = CharacterTextSplitter() | |
texts = text_splitter.split_text(txt) | |
docs = [Document(page_content=t) for t in texts] | |
return docs | |
# Loading the Llama 2's LLM | |
def load_llm(): | |
# We instantiate the callback with a streaming stdout handler | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Loading the LLM model | |
llm = CTransformers( | |
model="llama-2-7b-chat.ggmlv3.q2_K.bin", | |
model_type="llama", | |
config={'max_new_tokens': 600, | |
'temperature': 0.5, | |
'context_length': 700} | |
) | |
return llm | |
# Function to apply the LLM model with our document | |
def chains_and_response(docs): | |
llm = load_llm() | |
chain = load_summarize_chain(llm, chain_type='map_reduce') | |
return chain.invoke(docs) | |
def main(): | |
# Initialize messages if not already present | |
if "messages" not in st.session_state.keys(): | |
st.session_state.messages = [] | |
# Sidebar for uploading PDF files | |
with st.sidebar: | |
st.title("Menu:") | |
pdf_docs = st.file_uploader( | |
"Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True | |
) | |
if st.button("Submit & Process"): | |
with st.spinner("Processing..."): | |
txt_input = get_pdf_text(pdf_docs) | |
docs = chunks_and_document(txt_input) | |
response = chains_and_response(docs) | |
st.title('πβ Summarization Result') | |
for res in response: | |
st.info(res) | |
main() | |