j-higgins commited on
Commit
f505ec7
·
unverified ·
1 Parent(s): cacad38

Update app.py

Browse files

removed threading causing Render issues

Files changed (1) hide show
  1. app.py +38 -36
app.py CHANGED
@@ -1,12 +1,5 @@
1
  import dash
2
  import dash_bootstrap_components as dbc
3
-
4
- # Initialize Dash app with Bootstrap theme and Font Awesome
5
- app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
6
-
7
- # Create server variable
8
- server = app.server
9
-
10
  import pandas as pd
11
  from dash import dcc, html
12
  from dash.dash_table import DataTable
@@ -17,11 +10,17 @@ from sentence_transformers import SentenceTransformer
17
  from sklearn.metrics.pairwise import cosine_similarity
18
  from gliner_spacy.pipeline import GlinerSpacy
19
  import warnings
20
- import threading
21
- warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
22
  import os
23
 
24
- # At the top of your script, after imports
 
 
 
 
 
 
 
 
25
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
26
  CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
27
 
@@ -37,47 +36,31 @@ custom_spacy_config = {
37
  # Model variables
38
  nlp = None
39
  sentence_model = None
40
- model_lock = threading.Lock()
41
- models_loaded = threading.Event()
42
 
43
  # Function to load models
44
  def load_models():
45
  global nlp, sentence_model
46
- with model_lock:
47
- if nlp is None:
48
- nlp = spacy.blank("en")
49
- nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
50
- if sentence_model is None:
51
- sentence_model = SentenceTransformer('all-roberta-large-v1')
52
- models_loaded.set()
53
 
54
- # Start loading models in a separate thread
55
- threading.Thread(target=load_models).start()
56
-
57
- # Function to ensure models are loaded
58
- def ensure_models_loaded():
59
- models_loaded.wait()
60
 
61
  # Function to perform NER using GLiNER with spaCy
62
  def perform_ner(text):
63
- ensure_models_loaded()
64
  doc = nlp(text)
65
  return [(ent.text, ent.label_) for ent in doc.ents]
66
 
67
  # Function to extract entities using GLiNER with spaCy
68
  def extract_entities(text):
69
- ensure_models_loaded()
70
  doc = nlp(text)
71
  entities = [(ent.text, ent.label_) for ent in doc.ents]
72
  return entities if entities else ["No specific entities found"]
73
 
74
- # Load Google's content categories
75
- with open(CATEGORIES_FILE, 'r') as f:
76
- google_categories = [line.strip() for line in f]
77
-
78
  # Function to precompute category embeddings
79
  def compute_category_embeddings():
80
- ensure_models_loaded()
81
  return sentence_model.encode(google_categories)
82
 
83
  # Function to perform topic modeling using sentence transformers
@@ -163,7 +146,6 @@ def sort_by_keyword_feature(f):
163
 
164
  # Optimized batch processing of keywords
165
  def batch_process_keywords(keywords, batch_size=32):
166
- ensure_models_loaded()
167
  processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
168
 
169
  # Precompute keyword embeddings once
@@ -207,6 +189,7 @@ def batch_process_keywords(keywords, batch_size=32):
207
 
208
  # Main layout of the dashboard
209
  app.layout = dbc.Container([
 
210
  dbc.NavbarSimple(
211
  children=[
212
  dbc.NavItem(dbc.NavLink("About", href="#about")),
@@ -221,16 +204,22 @@ app.layout = dbc.Container([
221
 
222
  dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
223
 
224
- dbc.Row([
225
  dbc.Col([
 
 
 
 
 
 
226
  dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
227
  dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
228
- dbc.Button('Submit', id='submit-button', color='primary', className='mb-3'),
229
  dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
230
  dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
231
  ], width=6)
232
  ], justify='center'),
233
-
234
  # Loading component
235
  dbc.Row([
236
  dbc.Col([
@@ -351,6 +340,19 @@ app.layout = dbc.Container([
351
 
352
  ], fluid=True)
353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  # Callback for smooth scrolling
355
  app.clientside_callback(
356
  """
 
1
  import dash
2
  import dash_bootstrap_components as dbc
 
 
 
 
 
 
 
3
  import pandas as pd
4
  from dash import dcc, html
5
  from dash.dash_table import DataTable
 
10
  from sklearn.metrics.pairwise import cosine_similarity
11
  from gliner_spacy.pipeline import GlinerSpacy
12
  import warnings
 
 
13
  import os
14
 
15
+ warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
16
+
17
+ # Initialize Dash app with Bootstrap theme and Font Awesome
18
+ app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
19
+
20
+ # Create server variable
21
+ server = app.server
22
+
23
+ # Reference absolute file path
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
  CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
26
 
 
36
  # Model variables
37
  nlp = None
38
  sentence_model = None
 
 
39
 
40
  # Function to load models
41
  def load_models():
42
  global nlp, sentence_model
43
+ nlp = spacy.blank("en")
44
+ nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
45
+ sentence_model = SentenceTransformer('all-roberta-large-v1')
 
 
 
 
46
 
47
+ # Load Google's content categories
48
+ with open(CATEGORIES_FILE, 'r') as f:
49
+ google_categories = [line.strip() for line in f]
 
 
 
50
 
51
  # Function to perform NER using GLiNER with spaCy
52
  def perform_ner(text):
 
53
  doc = nlp(text)
54
  return [(ent.text, ent.label_) for ent in doc.ents]
55
 
56
  # Function to extract entities using GLiNER with spaCy
57
  def extract_entities(text):
 
58
  doc = nlp(text)
59
  entities = [(ent.text, ent.label_) for ent in doc.ents]
60
  return entities if entities else ["No specific entities found"]
61
 
 
 
 
 
62
  # Function to precompute category embeddings
63
  def compute_category_embeddings():
 
64
  return sentence_model.encode(google_categories)
65
 
66
  # Function to perform topic modeling using sentence transformers
 
146
 
147
  # Optimized batch processing of keywords
148
  def batch_process_keywords(keywords, batch_size=32):
 
149
  processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
150
 
151
  # Precompute keyword embeddings once
 
189
 
190
  # Main layout of the dashboard
191
  app.layout = dbc.Container([
192
+ dcc.Store(id='models-loaded', data=False),
193
  dbc.NavbarSimple(
194
  children=[
195
  dbc.NavItem(dbc.NavLink("About", href="#about")),
 
204
 
205
  dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
206
 
207
+ dbc.Row([
208
  dbc.Col([
209
+ dbc.Alert(
210
+ "Models are loading. This may take a few minutes. Please wait...",
211
+ id="loading-alert",
212
+ color="info",
213
+ is_open=True,
214
+ ),
215
  dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
216
  dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
217
+ dbc.Button('Submit', id='submit-button', color='primary', className='mb-3', disabled=True),
218
  dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
219
  dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
220
  ], width=6)
221
  ], justify='center'),
222
+
223
  # Loading component
224
  dbc.Row([
225
  dbc.Col([
 
340
 
341
  ], fluid=True)
342
 
343
+ # Callback to load models and update the loading alert
344
+ @app.callback(
345
+ [Output('models-loaded', 'data'),
346
+ Output('loading-alert', 'is_open'),
347
+ Output('submit-button', 'disabled')],
348
+ [Input('models-loaded', 'data')]
349
+ )
350
+ def load_models_callback(loaded):
351
+ if not loaded:
352
+ load_models()
353
+ return True, False, False
354
+ return loaded, False, False
355
+
356
  # Callback for smooth scrolling
357
  app.clientside_callback(
358
  """