Spaces:

vanessbut
/

tldr_keywords

Build error

App Files Files Community

vanessbut commited on Mar 28, 2022

Commit

10f9383

1 Parent(s): 69b4bb2

Исправлен вывод.

Browse files

Files changed (1) hide show

app.py +48 -33

app.py CHANGED Viewed

@@ -1,11 +1,21 @@
 import streamlit as st
 st.markdown("""### TL;DR: give me the keywords!
 Here you can get the keywords and topic of the article based on it's title or abstract.
 The only supported language is English.""")
-st.markdown("<p style=\"text-align:center\"><img width=700px src='https://c.tenor.com/IKt-6tAk9CUAAAAd/thats-a-lot-of-words-lots-of-words.gif'></p>", unsafe_allow_html=True)
 #from transformers import pipeline
@@ -15,50 +25,55 @@ st.markdown("<p style=\"text-align:center\"><img width=700px src='https://c.teno
 title = st.text_area("Title:", value="How to cook a neural network", height=16, help="Title of the article")
 abstract = st.text_area("Abstract:",
     value="""
-          My dad fits hellish models in general.
-          Well, this is about an average recipe, because there are a lot of variations.
-          The model is taken, it is not finetuned, finetuning is not about my dad.
-          He takes this model, dumps it into the tensorboard and starts frying it.
-          Adds a huge amount of noize, convolutions, batch and spectral normalization DROPOUT! for regularization, maxpooling on top.
-          All this is fitted to smoke.
-          Then the computer is removed from the fire and cools on the balcony.
-          Then dad brings it in and generously sprinkles it with crossvalidation and starts predicting.
-          At the same time, he gets data from the web, scraping it with a fork.
-          Predicts and sentences in a half-whisper oh god.
-          At the same time, he has sweat on his forehead.
-          Kindly offers me sometimes, but I refuse.
-          Do I need to talk about what the wildest overfitting then?
-          The overfitting is such that the val loss peels off the walls.
-          """,
     height=512, help="Abstract of the article")
-from transformers import AutoModel, AutoTokenizer
-#from tqdm import tqdm as tqdm
-import transformers
-transformers.utils.logging.disable_progress_bar()
-model_name = "distilroberta-base"
-main_model = AutoModel.from_pretrained(model_name)
-main_tokenizer = AutoTokenizer.from_pretrained(model_name)
-from utils.utils import *
-import spacy
-#import en_core_web_sm
-import os
-os.system("python3 -m spacy download en")
-# Вообще, стоит найти pipeline, заточенный под научный текст.
-# Но этим займёмся потом, если будет время.
-main_nlp = spacy.load('en_core_web_sm')
-text = title + ". " + abstract
 if not text is None and len(text) > 0:
     #keywords = get_candidates(text, main_nlp)
     keywords = get_keywords(text, main_nlp, main_model, main_tokenizer)
     st.markdown(f"{keywords}")
 else:
-    st.markdown("Please, try to enter something.")

 import streamlit as st
+import spacy
+import transformers
+import os
+from transformers import AutoModel, AutoTokenizer
+from utils.utils import *
+transformers.utils.logging.disable_progress_bar()
+os.system("python3 -m spacy download en")
 st.markdown("""### TL;DR: give me the keywords!
 Here you can get the keywords and topic of the article based on it's title or abstract.
 The only supported language is English.""")
+st.markdown("<p style=\"text-align:center\"><img width=90% src='https://c.tenor.com/IKt-6tAk9CUAAAAd/thats-a-lot-of-words-lots-of-words.gif'></p>", unsafe_allow_html=True)
 #from transformers import pipeline
 title = st.text_area("Title:", value="How to cook a neural network", height=16, help="Title of the article")
 abstract = st.text_area("Abstract:",
     value="""
+My dad fits hellish models in general.
+Well, this is about an average recipe, because there are a lot of variations.
+The model is taken, it is not finetuned, finetuning is not about my dad.
+He takes this model, dumps it into the tensorboard and starts frying it.
+Adds a huge amount of noize, convolutions, batch and spectral normalization DROPOUT! for regularization, maxpooling on top.
+All this is fitted to smoke.
+Then the computer is removed from the fire and cools on the balcony.
+Then dad brings it in and generously sprinkles it with crossvalidation and starts predicting.
+At the same time, he gets data from the web, scraping it with a fork.
+Predicts and sentences in a half-whisper oh god.
+At the same time, he has sweat on his forehead.
+Kindly offers me sometimes, but I refuse.
+Do I need to talk about what the wildest overfitting then?
+The overfitting is such that the val loss peels off the walls.
+    """,
     height=512, help="Abstract of the article")
+# Spacy
+@st.cache
+def get_nlp(nlp_name):
+    return spacy.load(nlp_name)
+# Вообще, стоит найти pipeline, заточенный под научный текст.
+# Но этим займёмся потом, если будет время.
+nlp_name = 'en_core_web_sm'
+main_nlp = get_nlp(nlp_name)
+# Получение модели.
+@st.cache
+def get_model_and_tokenizer(model_name):
+    model = AutoModel.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+model_name = "distilroberta-base"
+main_model, main_tokenizer = get_model_and_tokenizer(model_name)
+# Обработка текста.
+text = preprocess([title + ". " + abstract])[0]
 if not text is None and len(text) > 0:
     #keywords = get_candidates(text, main_nlp)
     keywords = get_keywords(text, main_nlp, main_model, main_tokenizer)
     st.markdown(f"{keywords}")
 else:
+    st.markdown("Please, try to enter something.")