3lv27 commited on
Commit
5dad55e
·
verified ·
1 Parent(s): c326442

Add BERTopic model

Browse files
Files changed (4) hide show
  1. README.md +74 -0
  2. config.json +16 -0
  3. topic_embeddings.safetensors +3 -0
  4. topics.json +461 -0
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - bertopic
5
+ library_name: bertopic
6
+ pipeline_tag: text-classification
7
+ ---
8
+
9
+ # rag-topic-model
10
+
11
+ This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
12
+ BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
13
+
14
+ ## Usage
15
+
16
+ To use this model, please install BERTopic:
17
+
18
+ ```
19
+ pip install -U bertopic
20
+ ```
21
+
22
+ You can use the model as follows:
23
+
24
+ ```python
25
+ from bertopic import BERTopic
26
+ topic_model = BERTopic.load("3lv27/rag-topic-model")
27
+
28
+ topic_model.get_topic_info()
29
+ ```
30
+
31
+ ## Topic overview
32
+
33
+ * Number of topics: 5
34
+ * Number of training documents: 201
35
+
36
+ <details>
37
+ <summary>Click here for an overview of all topics.</summary>
38
+
39
+ | Topic ID | Topic Keywords | Topic Frequency | Label |
40
+ |----------|----------------|-----------------|-------|
41
+ | -1 | my - for - was - payment - it | 17 | -1_my_for_was_payment |
42
+ | 0 | refund - nike - my - store - for | 41 | 0_refund_nike_my_store |
43
+ | 1 | my - the - payment - app - balance | 72 | 1_my_the_payment_app |
44
+ | 2 | to - email - my - account - the | 37 | 2_to_email_my_account |
45
+ | 3 | card - klarna - details - to - do | 34 | 3_card_klarna_details_to |
46
+
47
+ </details>
48
+
49
+ ## Training hyperparameters
50
+
51
+ * calculate_probabilities: False
52
+ * language: None
53
+ * low_memory: False
54
+ * min_topic_size: 10
55
+ * n_gram_range: (1, 1)
56
+ * nr_topics: None
57
+ * seed_topic_list: None
58
+ * top_n_words: 10
59
+ * verbose: False
60
+ * zeroshot_min_similarity: 0.7
61
+ * zeroshot_topic_list: None
62
+
63
+ ## Framework versions
64
+
65
+ * Numpy: 2.0.2
66
+ * HDBSCAN: 0.8.40
67
+ * UMAP: 0.5.7
68
+ * Pandas: 2.2.3
69
+ * Scikit-Learn: 1.6.1
70
+ * Sentence-transformers: 3.1.1
71
+ * Transformers: 4.45.2
72
+ * Numba: 0.60.0
73
+ * Plotly: 6.0.0
74
+ * Python: 3.9.6
config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 10,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": null,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 10,
13
+ "verbose": false,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null
16
+ }
topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27b5e6df023ab7f9dedbc8f97404369bc96ccf7f2c9db8d46cf3db2d1de6a2c
3
+ size 7768
topics.json ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "my",
6
+ 0.08775779807973855
7
+ ],
8
+ [
9
+ "for",
10
+ 0.06628866977236048
11
+ ],
12
+ [
13
+ "was",
14
+ 0.0505320178930812
15
+ ],
16
+ [
17
+ "payment",
18
+ 0.05018037484356471
19
+ ],
20
+ [
21
+ "it",
22
+ 0.04942005954395189
23
+ ],
24
+ [
25
+ "to",
26
+ 0.04929466805752189
27
+ ],
28
+ [
29
+ "account",
30
+ 0.045642695509656085
31
+ ],
32
+ [
33
+ "klarna",
34
+ 0.04515498165514218
35
+ ],
36
+ [
37
+ "and",
38
+ 0.044068215235971195
39
+ ],
40
+ [
41
+ "why",
42
+ 0.04301226356649548
43
+ ]
44
+ ],
45
+ "0": [
46
+ [
47
+ "refund",
48
+ 0.1006554344591509
49
+ ],
50
+ [
51
+ "nike",
52
+ 0.0794181762787875
53
+ ],
54
+ [
55
+ "my",
56
+ 0.06926034602939844
57
+ ],
58
+ [
59
+ "store",
60
+ 0.06749123928041868
61
+ ],
62
+ [
63
+ "for",
64
+ 0.06711649471397088
65
+ ],
66
+ [
67
+ "returned",
68
+ 0.06633459185101365
69
+ ],
70
+ [
71
+ "to",
72
+ 0.058642683878398376
73
+ ],
74
+ [
75
+ "credit",
76
+ 0.05670040240711006
77
+ ],
78
+ [
79
+ "week",
80
+ 0.05335338509330132
81
+ ],
82
+ [
83
+ "but",
84
+ 0.04873599925968905
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "my",
90
+ 0.08025131359001461
91
+ ],
92
+ [
93
+ "the",
94
+ 0.0758021017209259
95
+ ],
96
+ [
97
+ "payment",
98
+ 0.06589064412833702
99
+ ],
100
+ [
101
+ "app",
102
+ 0.05766680545629508
103
+ ],
104
+ [
105
+ "balance",
106
+ 0.05754731914082683
107
+ ],
108
+ [
109
+ "for",
110
+ 0.057264512525379244
111
+ ],
112
+ [
113
+ "to",
114
+ 0.05711262691361558
115
+ ],
116
+ [
117
+ "klarna",
118
+ 0.05701146261277651
119
+ ],
120
+ [
121
+ "it",
122
+ 0.0540769103187452
123
+ ],
124
+ [
125
+ "pay",
126
+ 0.050427115620566655
127
+ ]
128
+ ],
129
+ "2": [
130
+ [
131
+ "to",
132
+ 0.09120172610267988
133
+ ],
134
+ [
135
+ "email",
136
+ 0.0872841246047435
137
+ ],
138
+ [
139
+ "my",
140
+ 0.0826176984779253
141
+ ],
142
+ [
143
+ "account",
144
+ 0.06494229437231862
145
+ ],
146
+ [
147
+ "the",
148
+ 0.06384869337262605
149
+ ],
150
+ [
151
+ "im",
152
+ 0.061524278879038684
153
+ ],
154
+ [
155
+ "klarna",
156
+ 0.059826403579282345
157
+ ],
158
+ [
159
+ "and",
160
+ 0.05775189684191339
161
+ ],
162
+ [
163
+ "cant",
164
+ 0.05541270860895539
165
+ ],
166
+ [
167
+ "log",
168
+ 0.0497815437815026
169
+ ]
170
+ ],
171
+ "3": [
172
+ [
173
+ "card",
174
+ 0.18303800934636058
175
+ ],
176
+ [
177
+ "klarna",
178
+ 0.13514451506438238
179
+ ],
180
+ [
181
+ "details",
182
+ 0.1131565718428251
183
+ ],
184
+ [
185
+ "to",
186
+ 0.09166646868868897
187
+ ],
188
+ [
189
+ "do",
190
+ 0.08879272720550069
191
+ ],
192
+ [
193
+ "what",
194
+ 0.0880444369842009
195
+ ],
196
+ [
197
+ "it",
198
+ 0.08011763650544722
199
+ ],
200
+ [
201
+ "my",
202
+ 0.07926421256184715
203
+ ],
204
+ [
205
+ "call",
206
+ 0.078074226723468
207
+ ],
208
+ [
209
+ "gave",
210
+ 0.078074226723468
211
+ ]
212
+ ]
213
+ },
214
+ "topics": [
215
+ 2,
216
+ -1,
217
+ 2,
218
+ 1,
219
+ 2,
220
+ -1,
221
+ 0,
222
+ 0,
223
+ -1,
224
+ 1,
225
+ 1,
226
+ 2,
227
+ 2,
228
+ 0,
229
+ -1,
230
+ -1,
231
+ -1,
232
+ -1,
233
+ 2,
234
+ 0,
235
+ 2,
236
+ -1,
237
+ -1,
238
+ 1,
239
+ -1,
240
+ -1,
241
+ 2,
242
+ 1,
243
+ 0,
244
+ 0,
245
+ 0,
246
+ 2,
247
+ -1,
248
+ 1,
249
+ 1,
250
+ 0,
251
+ 0,
252
+ 0,
253
+ 1,
254
+ 2,
255
+ 2,
256
+ 3,
257
+ 0,
258
+ 0,
259
+ 0,
260
+ -1,
261
+ 2,
262
+ 1,
263
+ 2,
264
+ -1,
265
+ 3,
266
+ 1,
267
+ 3,
268
+ 2,
269
+ -1,
270
+ 0,
271
+ 0,
272
+ 0,
273
+ 0,
274
+ 0,
275
+ -1,
276
+ 0,
277
+ 0,
278
+ 0,
279
+ 0,
280
+ -1,
281
+ 1,
282
+ 0,
283
+ 0,
284
+ 0,
285
+ 0,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 0,
290
+ 0,
291
+ 0,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ 0,
296
+ 0,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0,
303
+ 0,
304
+ 0,
305
+ 0,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 0,
310
+ 0,
311
+ 0,
312
+ 0,
313
+ 0,
314
+ 3,
315
+ 1,
316
+ -1,
317
+ 3,
318
+ -1,
319
+ -1,
320
+ 0,
321
+ -1,
322
+ 0,
323
+ -1,
324
+ 2,
325
+ 2,
326
+ 1,
327
+ 1,
328
+ -1,
329
+ 3,
330
+ 1,
331
+ -1,
332
+ 3,
333
+ 0,
334
+ 1,
335
+ 1,
336
+ 1,
337
+ 1,
338
+ -1,
339
+ 3,
340
+ 0,
341
+ 3,
342
+ 0,
343
+ 2,
344
+ 0,
345
+ 2,
346
+ -1,
347
+ 0,
348
+ 1,
349
+ 2,
350
+ 0,
351
+ 0,
352
+ 2,
353
+ 0,
354
+ -1,
355
+ 0,
356
+ 1,
357
+ 2,
358
+ -1,
359
+ -1,
360
+ 1,
361
+ 0,
362
+ -1,
363
+ 1,
364
+ 1,
365
+ 3,
366
+ 1,
367
+ 2,
368
+ 0,
369
+ -1,
370
+ 3,
371
+ 3,
372
+ 2,
373
+ -1,
374
+ 0,
375
+ -1,
376
+ -1,
377
+ -1,
378
+ 0,
379
+ 2,
380
+ -1,
381
+ 0,
382
+ 0,
383
+ -1,
384
+ 1,
385
+ 1,
386
+ 1,
387
+ 1,
388
+ 1,
389
+ 1,
390
+ 1,
391
+ 1,
392
+ 2,
393
+ 3,
394
+ 1,
395
+ 1,
396
+ -1,
397
+ 1,
398
+ 1,
399
+ 2,
400
+ 2,
401
+ 2,
402
+ 2,
403
+ 2,
404
+ 2,
405
+ 2,
406
+ 2,
407
+ 2,
408
+ 0,
409
+ -1,
410
+ -1,
411
+ 3,
412
+ 3,
413
+ 3,
414
+ 3,
415
+ -1
416
+ ],
417
+ "topic_sizes": {
418
+ "2": 34,
419
+ "-1": 41,
420
+ "1": 37,
421
+ "0": 72,
422
+ "3": 17
423
+ },
424
+ "topic_mapper": [
425
+ [
426
+ -1,
427
+ -1,
428
+ -1
429
+ ],
430
+ [
431
+ 0,
432
+ 0,
433
+ 0
434
+ ],
435
+ [
436
+ 1,
437
+ 1,
438
+ 1
439
+ ],
440
+ [
441
+ 2,
442
+ 2,
443
+ 2
444
+ ],
445
+ [
446
+ 3,
447
+ 3,
448
+ 3
449
+ ]
450
+ ],
451
+ "topic_labels": {
452
+ "-1": "-1_my_for_was_payment",
453
+ "0": "0_refund_nike_my_store",
454
+ "1": "1_my_the_payment_app",
455
+ "2": "2_to_email_my_account",
456
+ "3": "3_card_klarna_details_to"
457
+ },
458
+ "custom_labels": null,
459
+ "_outliers": 1,
460
+ "topic_aspects": {}
461
+ }