Browse filesremoved threading causing Render issues
@@ -1,12 +1,5 @@
1 |
import dash
2 |
import dash_bootstrap_components as dbc
3 |
4 |
# Initialize Dash app with Bootstrap theme and Font Awesome
5 |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, ''])
6 |
7 |
# Create server variable
8 |
server = app.server
9 |
10 |
import pandas as pd
11 |
from dash import dcc, html
12 |
from dash.dash_table import DataTable
@@ -17,11 +10,17 @@ from sentence_transformers import SentenceTransformer
17 |
from sklearn.metrics.pairwise import cosine_similarity
18 |
from gliner_spacy.pipeline import GlinerSpacy
19 |
import warnings
20 |
import threading
21 |
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
22 |
import os
23 |
24 |
25 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
26 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
27 |
@@ -37,47 +36,31 @@ custom_spacy_config = {
37 |
# Model variables
38 |
nlp = None
39 |
sentence_model = None
40 |
model_lock = threading.Lock()
41 |
models_loaded = threading.Event()
42 |
43 |
# Function to load models
44 |
def load_models():
45 |
global nlp, sentence_model
46 |
47 |
48 |
49 |
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
50 |
if sentence_model is None:
51 |
sentence_model = SentenceTransformer('all-roberta-large-v1')
52 |
53 |
54 |
55 |
56 |
57 |
# Function to ensure models are loaded
58 |
def ensure_models_loaded():
59 |
60 |
61 |
# Function to perform NER using GLiNER with spaCy
62 |
def perform_ner(text):
63 |
64 |
doc = nlp(text)
65 |
return [(ent.text, ent.label_) for ent in doc.ents]
66 |
67 |
# Function to extract entities using GLiNER with spaCy
68 |
def extract_entities(text):
69 |
70 |
doc = nlp(text)
71 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
72 |
return entities if entities else ["No specific entities found"]
73 |
74 |
# Load Google's content categories
75 |
with open(CATEGORIES_FILE, 'r') as f:
76 |
google_categories = [line.strip() for line in f]
77 |
78 |
# Function to precompute category embeddings
79 |
def compute_category_embeddings():
80 |
81 |
return sentence_model.encode(google_categories)
82 |
83 |
# Function to perform topic modeling using sentence transformers
@@ -163,7 +146,6 @@ def sort_by_keyword_feature(f):
163 |
164 |
# Optimized batch processing of keywords
165 |
def batch_process_keywords(keywords, batch_size=32):
166 |
167 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
168 |
169 |
# Precompute keyword embeddings once
@@ -207,6 +189,7 @@ def batch_process_keywords(keywords, batch_size=32):
207 |
208 |
# Main layout of the dashboard
209 |
app.layout = dbc.Container([
210 |
211 |
212 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
@@ -221,16 +204,22 @@ app.layout = dbc.Container([
221 |
222 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
223 |
224 |
225 |
226 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
227 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
228 |
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3'),
229 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
230 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
231 |
], width=6)
232 |
], justify='center'),
233 |
234 |
# Loading component
235 |
236 |
@@ -351,6 +340,19 @@ app.layout = dbc.Container([
351 |
352 |
], fluid=True)
353 |
354 |
# Callback for smooth scrolling
355 |
356 |
1 |
import dash
2 |
import dash_bootstrap_components as dbc
3 |
import pandas as pd
4 |
from dash import dcc, html
5 |
from dash.dash_table import DataTable
10 |
from sklearn.metrics.pairwise import cosine_similarity
11 |
from gliner_spacy.pipeline import GlinerSpacy
12 |
import warnings
13 |
import os
14 |
15 |
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
16 |
17 |
# Initialize Dash app with Bootstrap theme and Font Awesome
18 |
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, ''])
19 |
20 |
# Create server variable
21 |
server = app.server
22 |
23 |
# Reference absolute file path
24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
26 |
36 |
# Model variables
37 |
nlp = None
38 |
sentence_model = None
39 |
40 |
# Function to load models
41 |
def load_models():
42 |
global nlp, sentence_model
43 |
nlp = spacy.blank("en")
44 |
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
45 |
sentence_model = SentenceTransformer('all-roberta-large-v1')
46 |
47 |
# Load Google's content categories
48 |
with open(CATEGORIES_FILE, 'r') as f:
49 |
google_categories = [line.strip() for line in f]
50 |
51 |
# Function to perform NER using GLiNER with spaCy
52 |
def perform_ner(text):
53 |
doc = nlp(text)
54 |
return [(ent.text, ent.label_) for ent in doc.ents]
55 |
56 |
# Function to extract entities using GLiNER with spaCy
57 |
def extract_entities(text):
58 |
doc = nlp(text)
59 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
60 |
return entities if entities else ["No specific entities found"]
61 |
62 |
# Function to precompute category embeddings
63 |
def compute_category_embeddings():
64 |
return sentence_model.encode(google_categories)
65 |
66 |
# Function to perform topic modeling using sentence transformers
146 |
147 |
# Optimized batch processing of keywords
148 |
def batch_process_keywords(keywords, batch_size=32):
149 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
150 |
151 |
# Precompute keyword embeddings once
189 |
190 |
# Main layout of the dashboard
191 |
app.layout = dbc.Container([
192 |
dcc.Store(id='models-loaded', data=False),
193 |
194 |
195 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
204 |
205 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
206 |
207 |
208 |
209 |
210 |
"Models are loading. This may take a few minutes. Please wait...",
211 |
212 |
213 |
214 |
215 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
216 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
217 |
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3', disabled=True),
218 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
219 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
220 |
], width=6)
221 |
], justify='center'),
222 |
223 |
# Loading component
224 |
225 |
340 |
341 |
], fluid=True)
342 |
343 |
# Callback to load models and update the loading alert
344 |
345 |
[Output('models-loaded', 'data'),
346 |
Output('loading-alert', 'is_open'),
347 |
Output('submit-button', 'disabled')],
348 |
[Input('models-loaded', 'data')]
349 |
350 |
def load_models_callback(loaded):
351 |
if not loaded:
352 |
353 |
return True, False, False
354 |
return loaded, False, False
355 |
356 |
# Callback for smooth scrolling
357 |
358 |