update code
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from PyPDF2 import PdfReader
|
|
8 |
import streamlit as st
|
9 |
import torch
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
11 |
-
|
12 |
|
13 |
# notes
|
14 |
# https://huggingface.co/docs/transformers/pad_truncation
|
@@ -118,7 +118,7 @@ def main():
|
|
118 |
legacy=False,
|
119 |
model_max_length=1000,
|
120 |
trust_remote_code=True,
|
121 |
-
cache_dir="model_cache"
|
122 |
)
|
123 |
base_model = "model_cache/models--ccdv--lsg-bart-base-16384-pubmed/snapshots/4072bc1a7a94e2b4fd860a5fdf1b71d0487dcf15"
|
124 |
#base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
@@ -134,7 +134,7 @@ def main():
|
|
134 |
truncation=True,
|
135 |
legacy=False,
|
136 |
model_max_length=1000,
|
137 |
-
cache_dir="model_cache"
|
138 |
)
|
139 |
base_model = "model_cache/models--MBZUAI--LaMini-Flan-T5-77M/snapshots/c5b12d50a2616b9670a57189be20055d1357b474"
|
140 |
#base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
@@ -169,12 +169,17 @@ def main():
|
|
169 |
)
|
170 |
pdf_viewer = displayPDF(filepath)
|
171 |
with col2:
|
|
|
172 |
with st.spinner("Summarizing..."):
|
173 |
summary = llm_pipeline(tokenizer, base_model, input_text)
|
174 |
postproc_text_length = postproc_count(summary)
|
|
|
|
|
175 |
st.info(
|
176 |
"PDF Summary | Number of words: "
|
177 |
f"{postproc_text_length:,}"
|
|
|
|
|
178 |
)
|
179 |
st.success(summary)
|
180 |
|
|
|
8 |
import streamlit as st
|
9 |
import torch
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
11 |
+
import time
|
12 |
|
13 |
# notes
|
14 |
# https://huggingface.co/docs/transformers/pad_truncation
|
|
|
118 |
legacy=False,
|
119 |
model_max_length=1000,
|
120 |
trust_remote_code=True,
|
121 |
+
#cache_dir="model_cache"
|
122 |
)
|
123 |
base_model = "model_cache/models--ccdv--lsg-bart-base-16384-pubmed/snapshots/4072bc1a7a94e2b4fd860a5fdf1b71d0487dcf15"
|
124 |
#base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
|
134 |
truncation=True,
|
135 |
legacy=False,
|
136 |
model_max_length=1000,
|
137 |
+
#cache_dir="model_cache"
|
138 |
)
|
139 |
base_model = "model_cache/models--MBZUAI--LaMini-Flan-T5-77M/snapshots/c5b12d50a2616b9670a57189be20055d1357b474"
|
140 |
#base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
|
169 |
)
|
170 |
pdf_viewer = displayPDF(filepath)
|
171 |
with col2:
|
172 |
+
start = time.time()
|
173 |
with st.spinner("Summarizing..."):
|
174 |
summary = llm_pipeline(tokenizer, base_model, input_text)
|
175 |
postproc_text_length = postproc_count(summary)
|
176 |
+
end = time.time()
|
177 |
+
duration = end - start
|
178 |
st.info(
|
179 |
"PDF Summary | Number of words: "
|
180 |
f"{postproc_text_length:,}"
|
181 |
+
+ " | Summarization time: "
|
182 |
+
f"{duration:.0f}" + " seconds"
|
183 |
)
|
184 |
st.success(summary)
|
185 |
|