Spaces:
Running
Running
Update app.py
Browse filesremoved threading causing Render issues
app.py
CHANGED
@@ -1,12 +1,5 @@
|
|
1 |
import dash
|
2 |
import dash_bootstrap_components as dbc
|
3 |
-
|
4 |
-
# Initialize Dash app with Bootstrap theme and Font Awesome
|
5 |
-
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
|
6 |
-
|
7 |
-
# Create server variable
|
8 |
-
server = app.server
|
9 |
-
|
10 |
import pandas as pd
|
11 |
from dash import dcc, html
|
12 |
from dash.dash_table import DataTable
|
@@ -17,11 +10,17 @@ from sentence_transformers import SentenceTransformer
|
|
17 |
from sklearn.metrics.pairwise import cosine_similarity
|
18 |
from gliner_spacy.pipeline import GlinerSpacy
|
19 |
import warnings
|
20 |
-
import threading
|
21 |
-
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
22 |
import os
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
26 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
|
27 |
|
@@ -37,47 +36,31 @@ custom_spacy_config = {
|
|
37 |
# Model variables
|
38 |
nlp = None
|
39 |
sentence_model = None
|
40 |
-
model_lock = threading.Lock()
|
41 |
-
models_loaded = threading.Event()
|
42 |
|
43 |
# Function to load models
|
44 |
def load_models():
|
45 |
global nlp, sentence_model
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
50 |
-
if sentence_model is None:
|
51 |
-
sentence_model = SentenceTransformer('all-roberta-large-v1')
|
52 |
-
models_loaded.set()
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
# Function to ensure models are loaded
|
58 |
-
def ensure_models_loaded():
|
59 |
-
models_loaded.wait()
|
60 |
|
61 |
# Function to perform NER using GLiNER with spaCy
|
62 |
def perform_ner(text):
|
63 |
-
ensure_models_loaded()
|
64 |
doc = nlp(text)
|
65 |
return [(ent.text, ent.label_) for ent in doc.ents]
|
66 |
|
67 |
# Function to extract entities using GLiNER with spaCy
|
68 |
def extract_entities(text):
|
69 |
-
ensure_models_loaded()
|
70 |
doc = nlp(text)
|
71 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
72 |
return entities if entities else ["No specific entities found"]
|
73 |
|
74 |
-
# Load Google's content categories
|
75 |
-
with open(CATEGORIES_FILE, 'r') as f:
|
76 |
-
google_categories = [line.strip() for line in f]
|
77 |
-
|
78 |
# Function to precompute category embeddings
|
79 |
def compute_category_embeddings():
|
80 |
-
ensure_models_loaded()
|
81 |
return sentence_model.encode(google_categories)
|
82 |
|
83 |
# Function to perform topic modeling using sentence transformers
|
@@ -163,7 +146,6 @@ def sort_by_keyword_feature(f):
|
|
163 |
|
164 |
# Optimized batch processing of keywords
|
165 |
def batch_process_keywords(keywords, batch_size=32):
|
166 |
-
ensure_models_loaded()
|
167 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
|
168 |
|
169 |
# Precompute keyword embeddings once
|
@@ -207,6 +189,7 @@ def batch_process_keywords(keywords, batch_size=32):
|
|
207 |
|
208 |
# Main layout of the dashboard
|
209 |
app.layout = dbc.Container([
|
|
|
210 |
dbc.NavbarSimple(
|
211 |
children=[
|
212 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
|
@@ -221,16 +204,22 @@ app.layout = dbc.Container([
|
|
221 |
|
222 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
|
223 |
|
224 |
-
|
225 |
dbc.Col([
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
|
227 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
|
228 |
-
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3'),
|
229 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
|
230 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
|
231 |
], width=6)
|
232 |
], justify='center'),
|
233 |
-
|
234 |
# Loading component
|
235 |
dbc.Row([
|
236 |
dbc.Col([
|
@@ -351,6 +340,19 @@ app.layout = dbc.Container([
|
|
351 |
|
352 |
], fluid=True)
|
353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
# Callback for smooth scrolling
|
355 |
app.clientside_callback(
|
356 |
"""
|
|
|
1 |
import dash
|
2 |
import dash_bootstrap_components as dbc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
from dash import dcc, html
|
5 |
from dash.dash_table import DataTable
|
|
|
10 |
from sklearn.metrics.pairwise import cosine_similarity
|
11 |
from gliner_spacy.pipeline import GlinerSpacy
|
12 |
import warnings
|
|
|
|
|
13 |
import os
|
14 |
|
15 |
+
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
16 |
+
|
17 |
+
# Initialize Dash app with Bootstrap theme and Font Awesome
|
18 |
+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
|
19 |
+
|
20 |
+
# Create server variable
|
21 |
+
server = app.server
|
22 |
+
|
23 |
+
# Reference absolute file path
|
24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
25 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
|
26 |
|
|
|
36 |
# Model variables
|
37 |
nlp = None
|
38 |
sentence_model = None
|
|
|
|
|
39 |
|
40 |
# Function to load models
|
41 |
def load_models():
|
42 |
global nlp, sentence_model
|
43 |
+
nlp = spacy.blank("en")
|
44 |
+
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
45 |
+
sentence_model = SentenceTransformer('all-roberta-large-v1')
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
# Load Google's content categories
|
48 |
+
with open(CATEGORIES_FILE, 'r') as f:
|
49 |
+
google_categories = [line.strip() for line in f]
|
|
|
|
|
|
|
50 |
|
51 |
# Function to perform NER using GLiNER with spaCy
|
52 |
def perform_ner(text):
|
|
|
53 |
doc = nlp(text)
|
54 |
return [(ent.text, ent.label_) for ent in doc.ents]
|
55 |
|
56 |
# Function to extract entities using GLiNER with spaCy
|
57 |
def extract_entities(text):
|
|
|
58 |
doc = nlp(text)
|
59 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
60 |
return entities if entities else ["No specific entities found"]
|
61 |
|
|
|
|
|
|
|
|
|
62 |
# Function to precompute category embeddings
|
63 |
def compute_category_embeddings():
|
|
|
64 |
return sentence_model.encode(google_categories)
|
65 |
|
66 |
# Function to perform topic modeling using sentence transformers
|
|
|
146 |
|
147 |
# Optimized batch processing of keywords
|
148 |
def batch_process_keywords(keywords, batch_size=32):
|
|
|
149 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
|
150 |
|
151 |
# Precompute keyword embeddings once
|
|
|
189 |
|
190 |
# Main layout of the dashboard
|
191 |
app.layout = dbc.Container([
|
192 |
+
dcc.Store(id='models-loaded', data=False),
|
193 |
dbc.NavbarSimple(
|
194 |
children=[
|
195 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
|
|
|
204 |
|
205 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
|
206 |
|
207 |
+
dbc.Row([
|
208 |
dbc.Col([
|
209 |
+
dbc.Alert(
|
210 |
+
"Models are loading. This may take a few minutes. Please wait...",
|
211 |
+
id="loading-alert",
|
212 |
+
color="info",
|
213 |
+
is_open=True,
|
214 |
+
),
|
215 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
|
216 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
|
217 |
+
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3', disabled=True),
|
218 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
|
219 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
|
220 |
], width=6)
|
221 |
], justify='center'),
|
222 |
+
|
223 |
# Loading component
|
224 |
dbc.Row([
|
225 |
dbc.Col([
|
|
|
340 |
|
341 |
], fluid=True)
|
342 |
|
343 |
+
# Callback to load models and update the loading alert
|
344 |
+
@app.callback(
|
345 |
+
[Output('models-loaded', 'data'),
|
346 |
+
Output('loading-alert', 'is_open'),
|
347 |
+
Output('submit-button', 'disabled')],
|
348 |
+
[Input('models-loaded', 'data')]
|
349 |
+
)
|
350 |
+
def load_models_callback(loaded):
|
351 |
+
if not loaded:
|
352 |
+
load_models()
|
353 |
+
return True, False, False
|
354 |
+
return loaded, False, False
|
355 |
+
|
356 |
# Callback for smooth scrolling
|
357 |
app.clientside_callback(
|
358 |
"""
|