vanessbut commited on
Commit
10f9383
·
1 Parent(s): 69b4bb2

Исправлен вывод.

Browse files
Files changed (1) hide show
  1. app.py +48 -33
app.py CHANGED
@@ -1,11 +1,21 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
2
 
3
  st.markdown("""### TL;DR: give me the keywords!
4
  Here you can get the keywords and topic of the article based on it's title or abstract.
5
 
6
  The only supported language is English.""")
7
 
8
- st.markdown("<p style=\"text-align:center\"><img width=700px src='https://c.tenor.com/IKt-6tAk9CUAAAAd/thats-a-lot-of-words-lots-of-words.gif'></p>", unsafe_allow_html=True)
9
 
10
  #from transformers import pipeline
11
 
@@ -15,50 +25,55 @@ st.markdown("<p style=\"text-align:center\"><img width=700px src='https://c.teno
15
  title = st.text_area("Title:", value="How to cook a neural network", height=16, help="Title of the article")
16
  abstract = st.text_area("Abstract:",
17
  value="""
18
- My dad fits hellish models in general.
19
- Well, this is about an average recipe, because there are a lot of variations.
20
- The model is taken, it is not finetuned, finetuning is not about my dad.
21
- He takes this model, dumps it into the tensorboard and starts frying it.
22
- Adds a huge amount of noize, convolutions, batch and spectral normalization DROPOUT! for regularization, maxpooling on top.
23
- All this is fitted to smoke.
24
- Then the computer is removed from the fire and cools on the balcony.
25
- Then dad brings it in and generously sprinkles it with crossvalidation and starts predicting.
26
- At the same time, he gets data from the web, scraping it with a fork.
27
- Predicts and sentences in a half-whisper oh god.
28
- At the same time, he has sweat on his forehead.
29
- Kindly offers me sometimes, but I refuse.
30
- Do I need to talk about what the wildest overfitting then?
31
- The overfitting is such that the val loss peels off the walls.
32
- """,
33
  height=512, help="Abstract of the article")
34
 
35
- from transformers import AutoModel, AutoTokenizer
36
- #from tqdm import tqdm as tqdm
37
 
38
- import transformers
39
- transformers.utils.logging.disable_progress_bar()
 
40
 
41
- model_name = "distilroberta-base"
42
- main_model = AutoModel.from_pretrained(model_name)
43
- main_tokenizer = AutoTokenizer.from_pretrained(model_name)
 
44
 
45
 
46
- from utils.utils import *
47
- import spacy
48
- #import en_core_web_sm
49
 
50
- import os
51
- os.system("python3 -m spacy download en")
 
 
52
 
53
- # Вообще, стоит найти pipeline, заточенный под научный текст.
54
- # Но этим займёмся потом, если будет время.
55
- main_nlp = spacy.load('en_core_web_sm')
 
 
 
 
56
 
57
- text = title + ". " + abstract
58
 
59
  if not text is None and len(text) > 0:
60
  #keywords = get_candidates(text, main_nlp)
61
  keywords = get_keywords(text, main_nlp, main_model, main_tokenizer)
62
  st.markdown(f"{keywords}")
63
  else:
64
- st.markdown("Please, try to enter something.")
 
1
  import streamlit as st
2
+ import spacy
3
+ import transformers
4
+ import os
5
+
6
+
7
+ from transformers import AutoModel, AutoTokenizer
8
+ from utils.utils import *
9
+
10
+ transformers.utils.logging.disable_progress_bar()
11
+ os.system("python3 -m spacy download en")
12
 
13
  st.markdown("""### TL;DR: give me the keywords!
14
  Here you can get the keywords and topic of the article based on it's title or abstract.
15
 
16
  The only supported language is English.""")
17
 
18
+ st.markdown("<p style=\"text-align:center\"><img width=90% src='https://c.tenor.com/IKt-6tAk9CUAAAAd/thats-a-lot-of-words-lots-of-words.gif'></p>", unsafe_allow_html=True)
19
 
20
  #from transformers import pipeline
21
 
 
25
  title = st.text_area("Title:", value="How to cook a neural network", height=16, help="Title of the article")
26
  abstract = st.text_area("Abstract:",
27
  value="""
28
+ My dad fits hellish models in general.
29
+ Well, this is about an average recipe, because there are a lot of variations.
30
+ The model is taken, it is not finetuned, finetuning is not about my dad.
31
+ He takes this model, dumps it into the tensorboard and starts frying it.
32
+ Adds a huge amount of noize, convolutions, batch and spectral normalization DROPOUT! for regularization, maxpooling on top.
33
+ All this is fitted to smoke.
34
+ Then the computer is removed from the fire and cools on the balcony.
35
+ Then dad brings it in and generously sprinkles it with crossvalidation and starts predicting.
36
+ At the same time, he gets data from the web, scraping it with a fork.
37
+ Predicts and sentences in a half-whisper oh god.
38
+ At the same time, he has sweat on his forehead.
39
+ Kindly offers me sometimes, but I refuse.
40
+ Do I need to talk about what the wildest overfitting then?
41
+ The overfitting is such that the val loss peels off the walls.
42
+ """,
43
  height=512, help="Abstract of the article")
44
 
45
+ # Spacy
 
46
 
47
+ @st.cache
48
+ def get_nlp(nlp_name):
49
+ return spacy.load(nlp_name)
50
 
51
+ # Вообще, стоит найти pipeline, заточенный под научный текст.
52
+ # Но этим займёмся потом, если будет время.
53
+ nlp_name = 'en_core_web_sm'
54
+ main_nlp = get_nlp(nlp_name)
55
 
56
 
57
+ # Получение модели.
 
 
58
 
59
+ @st.cache
60
+ def get_model_and_tokenizer(model_name):
61
+ model = AutoModel.from_pretrained(model_name)
62
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
63
 
64
+ return model, tokenizer
65
+
66
+ model_name = "distilroberta-base"
67
+ main_model, main_tokenizer = get_model_and_tokenizer(model_name)
68
+
69
+
70
+ # Обработка текста.
71
 
72
+ text = preprocess([title + ". " + abstract])[0]
73
 
74
  if not text is None and len(text) > 0:
75
  #keywords = get_candidates(text, main_nlp)
76
  keywords = get_keywords(text, main_nlp, main_model, main_tokenizer)
77
  st.markdown(f"{keywords}")
78
  else:
79
+ st.markdown("Please, try to enter something.")