Spaces:

iohanngrig
/

textSummary

Sleeping

App Files Files Community

iohanngrig commited on Jan 27, 2024

Commit

f39f2e1

verified ·

1 Parent(s): 01ccd2f

Upload 5 files

Browse files

Files changed (5) hide show

.gitignore +14 -0
app.py +51 -0
pages/1_using_LLM.py +41 -0
pages/2_using_LLM_QA.py +50 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+__pycache__
+.mypy_cache
+data/
+credential/
+artifacts/
+model/
+.streamlit/
+.streamlit/secrets.toml
+*.toml
+# ignore cache
+*.pyc

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import streamlit as st
+import torch
+from transformers import pipeline
+from utils.process_data import generate_chunks, pdf_to_text
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+st.set_page_config(page_title="Summarizer", page_icon="⚖️")
+st.title("Summarize Text")
+st.subheader("🚗 🔗 Transformers Summarization Pipeline")
+max = st.slider('Select max', 50, 500, step=10, value=150)
+min = st.slider('Select min', 10, 450, step=10, value=50)
+do_sample = st.checkbox("Do sample", value=False)
+sentence = st.text_area('Please paste your article:', height=50)
+button = st.button("Summarize")
+@st.cache_data
+def load_summarizer():
+    model = pipeline("summarization", model=st.secrets["SUM_MODEL"], device=device)
+    return model
+with st.spinner("Generating Summary.."):
+    if button and sentence:
+        chunks = generate_chunks(sentence)
+        summarizer = load_summarizer()
+        res = summarizer(chunks,
+                         max_length=max,
+                         min_length=min,
+                         do_sample=do_sample)
+        text = ' '.join([summ['summary_text'] for summ in res])
+        st.write(text)
+st.divider()
+st.subheader('🚙🔗 Summarize PDF')
+pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
+button2 = st.button("Summarize PDF")
+if pdf_path is not None and button2:
+    text = pdf_to_text(pdf_path)
+    with st.spinner("Generating PDF Summary.."):
+        chunks = generate_chunks(text)
+        summarizer = load_summarizer()
+        res = summarizer(chunks,
+                        max_length=max,
+                        min_length=min,
+                        do_sample=do_sample)
+        text_sum = ' '.join([summ['summary_text'] for summ in res])
+        st.write(text_sum)

pages/1_using_LLM.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import streamlit as st
+from langchain import OpenAI
+from langchain.docstore.document import Document
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains.summarize import load_summarize_chain
+from utils.process_data import  pdf_to_text
+MODEL = st.secrets["MODEL4"]
+def generate_response(txt):
+    llm = OpenAI(temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
+    text_splitter = CharacterTextSplitter()
+    texts = text_splitter.split_text(txt)
+    docs = [Document(page_content=t) for t in texts]
+    chain = load_summarize_chain(llm, chain_type='map_reduce')
+    return chain.run(docs)
+st.set_page_config(page_title="Summarizer with LLM", page_icon="⚖️")
+st.title("Summarize Text")
+st.subheader('🚕🔗 LLM/LoadSummarizeChain')
+sentence = st.text_area('Please paste your article:', height=100)
+button = st.button("Summarize")
+with st.spinner("Generating Summary.."):
+    if button and sentence:
+        response = generate_response(sentence)
+        st.write(response)
+st.divider()
+st.subheader('🚙🔗 Summarize PDF')
+pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
+button2 = st.button("Summarize PDF")
+if pdf_path is not None and button2:
+    text = pdf_to_text(pdf_path)
+    with st.spinner("Generating PDF Summary.."):
+        response2 = generate_response(text)
+        st.subheader('Summary Results:')
+        st.write(response2)

pages/2_using_LLM_QA.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import streamlit as st
+from langchain_community.chat_models import ChatOpenAI
+from langchain_community.callbacks import get_openai_callback
+from langchain.chains.question_answering import load_qa_chain
+from utils.process_data import process_text, pdf_to_text
+MODEL = st.secrets["MODEL4"]
+st.set_page_config(page_title="Summarizer with LLM QA", page_icon="⚖️")
+st.title("Summarize Text")
+st.subheader("🚗 🔗 LLM/Question Answering")
+maxw = st.slider('MAX words', 50, 1000, step=10, value=200)
+minw = st.slider('MIN words', 10, 500, step=10, value=50)
+sentence = st.text_area('Please paste your article:', height=50)
+button = st.button("Summarize")
+query = f"Summarize the content of the uploaded PDF file in more that {minw} words and less than {maxw} words. Focus on capturing the main ideas and key points discussed in the document. Use your own words and ensure clarity and coherence in the summary."
+with st.spinner("Generating Summary.."):
+    if button and sentence:
+        knowledgeBase = process_text(sentence)
+        docs = knowledgeBase.similarity_search(query)
+        llm = ChatOpenAI(model=MODEL, temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
+        chain = load_qa_chain(llm, chain_type='stuff')
+        with get_openai_callback() as cost:
+            response = chain.run(input_documents=docs, question=query)
+            print(cost)
+        st.subheader('Summary Results:')
+        st.write(response)
+st.divider()
+st.subheader('🚙🔗 Summarize PDF')
+pdf_path = st.file_uploader('Upload your PDF Document', type='pdf')
+button2 = st.button("Summarize PDF")
+if pdf_path is not None and button2:
+    text = pdf_to_text(pdf_path)
+    knowledgeBase = process_text(text)
+    with st.spinner("Generating PDF Summary.."):
+        docs = knowledgeBase.similarity_search(query)
+        llm = ChatOpenAI(model=MODEL, temperature=0.1, openai_api_key=st.secrets["OPENAI_API_KEY"])
+        chain = load_qa_chain(llm, chain_type='stuff')
+        with get_openai_callback() as cost:
+            response2 = chain.run(input_documents=docs, question=query)
+            print(cost)
+        st.subheader('Summary Results:')
+        st.write(response2)

requirements.txt ADDED Viewed

Binary file (23.6 kB). View file