romain125 commited on
Commit
24a9a04
·
verified ·
1 Parent(s): 98b1c93

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,690 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: intfloat/multilingual-e5-base
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy
6
+ - cosine_accuracy_threshold
7
+ - cosine_f1
8
+ - cosine_f1_threshold
9
+ - cosine_precision
10
+ - cosine_recall
11
+ - cosine_ap
12
+ - cosine_mcc
13
+ pipeline_tag: sentence-similarity
14
+ tags:
15
+ - sentence-transformers
16
+ - sentence-similarity
17
+ - feature-extraction
18
+ - generated_from_trainer
19
+ - dataset_size:2467
20
+ - loss:MultipleNegativesRankingLoss
21
+ widget:
22
+ - source_sentence: 'Date de début: non précisée
23
+
24
+ Date de fin (clôture): non précisée
25
+
26
+ Date de début de la future campagne: non précisée'
27
+ sentences:
28
+ - '''Aménageurs privés'':entité|INTERVIENT_POUR|''Établissements publics territoriaux
29
+ franciliens'':entité'
30
+ - '''Commission permanente du Conseil régional'':groupe|DÉSIGNE|''Projets retenus'':__inferred__'
31
+ - '''Date de fin'':concept|EST|''non précisée'':__inferred__'
32
+ - source_sentence: 'Procédures et démarches: Deux
33
+
34
+ appels à projets sont lancés chaque année. Le candidat doit prendre contact avec
35
+ la direction de
36
+
37
+ l’aménagement durable du territoire avant la date de dépôt afin de préciser
38
+
39
+ son projet et de s’assurer de son éligibilité (via votre interlocuteur habituel
40
+
41
+ ou [email protected]). Le dossier de candidature est à remplir sur mesdemarches.iledefrance.fr.  Un
42
+
43
+ jury d’élus et de personnalités qualifiées se réunit pour examiner les dossiers
44
+
45
+ et proposer des lauréats. L''attribution définitive des aides est votée en
46
+
47
+ commission permanente. Ce
48
+
49
+ dispositif d’aide peut être cumulable avec le Fonds Vert mis en place par l’Etat.
50
+
51
+ Les conditions d’éligibilité et d’intervention propres à chacun des dispositifs
52
+
53
+ ainsi que les contacts et liens utiles sont présentés dans le document "Tableau
54
+ AAP Friches 2023" en annexe de cette page.
55
+
56
+ Bénéficiaires: Collectivité ou institution - Autre (GIP, copropriété, EPA...),
57
+ Collectivité ou institution - Communes de 10 000 à 20 000 hab, Collectivité ou
58
+ institution - Communes de 2000 à 10 000 hab, Collectivité ou institution - Communes
59
+ de < 2000 hab, Collectivité ou institution - Communes de > 20 000 hab, Collectivité
60
+ ou institution - Département, Collectivité ou institution - EPT / Métropole du
61
+ Grand Paris, Collectivité ou institution - EPCI'
62
+ sentences:
63
+ - '''Fonds Vert'':programme|MIS_EN_PLACE_PAR|''Etat'':organisation'
64
+ - '''démonstration et initiation sportive'':activité|ENCADRÉ_PAR|''Ambassadrice
65
+ et Ambassadeur du Sport'':personne'
66
+ - '''Association'':entité|EST|''Bénéficiaires'':__inferred__'
67
+ - source_sentence: 'Procédures et démarches: Dépôt du dossier de candidature sur la
68
+ plateforme des aides régionales (mesdemarches.iledefrance.fr).
69
+
70
+ Bénéficiaires: Collectivité ou institution - Communes de < 2000 hab, Collectivité
71
+ ou institution - Communes de 2000 à 10 000 hab, Collectivité ou institution -
72
+ Communes de 10 000 à 20 000 hab, Collectivité ou institution - Communes de > 20
73
+ 000 hab, Collectivité ou institution - EPCI, Collectivité ou institution - EPT
74
+ / Métropole du Grand Paris, Collectivité ou institution - Département, Collectivité
75
+ ou institution - Bailleurs sociaux, Collectivité ou institution - Autre (GIP,
76
+ copropriété, EPA...)
77
+
78
+ Précision sure les bénéficiaires: Toutes les structures de droit public ou de
79
+ droit privé'
80
+ sentences:
81
+ - '''mesdemarches.iledefrance.fr'':plateforme|ACCEPTE_DEMANDE|''Collectivité ou
82
+ institution - Communes de 10 000 à 20 000 hab'':organisation'
83
+ - '''plateforme des aides régionales'':plateforme|CIBLE|''Collectivité ou institution
84
+ - EPT / Métropole du Grand Paris'':organisation'
85
+ - '''projets éligibles'':projet|AMÉLIORE_CONDITIONS_VIE|''résidents'':personne'
86
+ - source_sentence: 'Procédures et démarches: Les demandes d’aide devront être déposées
87
+ sur mesdemarches.iledefrance.fr, la plateforme des aides régionales.
88
+
89
+ Bénéficiaires: Particulier - Francilien, Professionnel - Culture, Professionnel
90
+ - Patrimoine, Association - Fondation, Association - ONG, Association - Régie
91
+ par la loi de 1901, Collectivité ou institution - Autre (GIP, copropriété, EPA...),
92
+ Collectivité ou institution - Bailleurs sociaux, Collectivité ou institution -
93
+ Communes de 10 000 à 20 000 hab, Collectivité ou institution - Communes de 2000
94
+ à 10 000 hab, Collectivité ou institution - Communes de < 2000 hab, Collectivité
95
+ ou institution - Communes de > 20 000 hab, Collectivité ou institution - Département,
96
+ Collectivité ou institution - EPCI, Collectivité ou institution - EPT / Métropole
97
+ du Grand Paris
98
+
99
+ Précision sure les bénéficiaires: Sont éligibles les propriétaires publics et
100
+ privés de maisons ou d’ateliers d’artistes.Les aménageurs mandatés par les collectivités
101
+ territoriales peuvent être bénéficiaires. Une convention de délégation de maîtrise
102
+ d’ouvrage doit avoir été signée entre la collectivité et l’aménageur.L’établissement
103
+ doit avoir fait l’objet d’un projet culturel et bénéficier d’une expertise scientifique.
104
+ La présence, le témoignage ou la trace tangibles de l’artiste ayant vécu sur place
105
+ doivent être attestés.Les établissements bénéficiant du label délivré par la DRAC
106
+ « Maisons des illustres » sont également concernés par le dispositif.'
107
+ sentences:
108
+ - '''mesdemarches.iledefrance.fr'':plateforme|ACCEPTE_DOSSIERS|''Collectivité ou
109
+ institution - Communes de 10 000 à 20 000 hab'':organisation'
110
+ - '''mesdemarches.iledefrance.fr'':plateforme|ACCEPTE_DEMANDE|''établissements avec
111
+ projet culturel et expertise scientifique'':bénéficiaire'
112
+ - '''plateforme des aides régionales'':plateforme|CIBLE|''Collectivité ou institution
113
+ - Communes de 2000 à 10 000 hab'':organisation'
114
+ - source_sentence: 'Procédures et démarches: Déposez sur mesdemarches.iledefrance.fr votre dossier
115
+ de demande de subvention présentant le projet de manière précise et comportant
116
+ toutes les pièces permettant l’instruction du dossier, réputé complet, par les
117
+ services de la Région. Après examen du dossier, la demande de subvention sera
118
+ soumise à la Commission permanente régionale pour délibération. Le versement
119
+ de la subvention est subordonné à la signature préalable d’une convention.
120
+
121
+ Bénéficiaires: Collectivité ou institution - Communes de 10 000 à 20 000 hab,
122
+ Collectivité ou institution - Communes de 2000 à 10 000 hab, Collectivité ou institution
123
+ - Communes de < 2000 hab, Collectivité ou institution - Communes de > 20 000 hab,
124
+ Collectivité ou institution - EPCI, Collectivité ou institution - EPT / Métropole
125
+ du Grand Paris
126
+
127
+ Précision sure les bénéficiaires: Pour les PEMR et aires de covoiturage : État,
128
+ Départements, EPCI, Communes, Syndicats mixtes,Ville de Paris.Pour les voies réservées :
129
+ État, Départements, EPCI.'
130
+ sentences:
131
+ - '''Date de début de la future campagne'':concept|EST|''non précisée'':__inferred__'
132
+ - '''prêt d''amorçage'':aide|FINANCE|''besoin en fonds de roulement'':concept'
133
+ - '''subvention'':__inferred__|SUBORDONNÉ_À|''convention'':document'
134
+ model-index:
135
+ - name: SentenceTransformer based on intfloat/multilingual-e5-base
136
+ results:
137
+ - task:
138
+ type: binary-classification
139
+ name: Binary Classification
140
+ dataset:
141
+ name: BinaryClassifEval
142
+ type: BinaryClassifEval
143
+ metrics:
144
+ - type: cosine_accuracy
145
+ value: 0.9983766233766234
146
+ name: Cosine Accuracy
147
+ - type: cosine_accuracy_threshold
148
+ value: -0.09276729822158813
149
+ name: Cosine Accuracy Threshold
150
+ - type: cosine_f1
151
+ value: 0.9991876523151909
152
+ name: Cosine F1
153
+ - type: cosine_f1_threshold
154
+ value: -0.09276729822158813
155
+ name: Cosine F1 Threshold
156
+ - type: cosine_precision
157
+ value: 1.0
158
+ name: Cosine Precision
159
+ - type: cosine_recall
160
+ value: 0.9983766233766234
161
+ name: Cosine Recall
162
+ - type: cosine_ap
163
+ value: 0.9999999999999999
164
+ name: Cosine Ap
165
+ - type: cosine_mcc
166
+ value: 0.0
167
+ name: Cosine Mcc
168
+ ---
169
+
170
+ # SentenceTransformer based on intfloat/multilingual-e5-base
171
+
172
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
173
+
174
+ ## Model Details
175
+
176
+ ### Model Description
177
+ - **Model Type:** Sentence Transformer
178
+ - **Base model:** [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) <!-- at revision 835193815a3936a24a0ee7dc9e3d48c1fbb19c55 -->
179
+ - **Maximum Sequence Length:** 512 tokens
180
+ - **Output Dimensionality:** 768 dimensions
181
+ - **Similarity Function:** Cosine Similarity
182
+ - **Training Dataset:**
183
+ - json
184
+ <!-- - **Language:** Unknown -->
185
+ <!-- - **License:** Unknown -->
186
+
187
+ ### Model Sources
188
+
189
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
190
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
191
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
192
+
193
+ ### Full Model Architecture
194
+
195
+ ```
196
+ SentenceTransformer(
197
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
198
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
199
+ (2): Normalize()
200
+ )
201
+ ```
202
+
203
+ ## Usage
204
+
205
+ ### Direct Usage (Sentence Transformers)
206
+
207
+ First install the Sentence Transformers library:
208
+
209
+ ```bash
210
+ pip install -U sentence-transformers
211
+ ```
212
+
213
+ Then you can load this model and run inference.
214
+ ```python
215
+ from sentence_transformers import SentenceTransformer
216
+
217
+ # Download from the 🤗 Hub
218
+ model = SentenceTransformer("Lettria/grag-go-idf-mult_neg_rk_10-trial-3")
219
+ # Run inference
220
+ sentences = [
221
+ 'Procédures et démarches: Déposez sur\xa0mesdemarches.iledefrance.fr\xa0votre\xa0dossier de demande de subvention présentant le projet de manière précise et comportant toutes les pièces permettant l’instruction du dossier, réputé complet, par les services de la Région. Après examen du dossier, la demande de subvention sera soumise à la Commission permanente régionale pour délibération. Le versement de la subvention est subordonné à la signature préalable d’une convention.\nBénéficiaires: Collectivité ou institution - Communes de 10 000 à 20 000 hab, Collectivité ou institution - Communes de 2000 à 10 000 hab, Collectivité ou institution - Communes de < 2000 hab, Collectivité ou institution - Communes de > 20 000 hab, Collectivité ou institution - EPCI, Collectivité ou institution - EPT / Métropole du Grand Paris\nPrécision sure les bénéficiaires: Pour les PEMR et aires de covoiturage : État, Départements, EPCI, Communes, Syndicats mixtes,Ville de Paris.Pour les voies réservées\xa0: État, Départements, EPCI.',
222
+ "'subvention':__inferred__|SUBORDONNÉ_À|'convention':document",
223
+ "'Date de début de la future campagne':concept|EST|'non précisée':__inferred__",
224
+ ]
225
+ embeddings = model.encode(sentences)
226
+ print(embeddings.shape)
227
+ # [3, 768]
228
+
229
+ # Get the similarity scores for the embeddings
230
+ similarities = model.similarity(embeddings, embeddings)
231
+ print(similarities.shape)
232
+ # [3, 3]
233
+ ```
234
+
235
+ <!--
236
+ ### Direct Usage (Transformers)
237
+
238
+ <details><summary>Click to see the direct usage in Transformers</summary>
239
+
240
+ </details>
241
+ -->
242
+
243
+ <!--
244
+ ### Downstream Usage (Sentence Transformers)
245
+
246
+ You can finetune this model on your own dataset.
247
+
248
+ <details><summary>Click to expand</summary>
249
+
250
+ </details>
251
+ -->
252
+
253
+ <!--
254
+ ### Out-of-Scope Use
255
+
256
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
257
+ -->
258
+
259
+ ## Evaluation
260
+
261
+ ### Metrics
262
+
263
+ #### Binary Classification
264
+
265
+ * Dataset: `BinaryClassifEval`
266
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
267
+
268
+ | Metric | Value |
269
+ |:--------------------------|:--------|
270
+ | cosine_accuracy | 0.9984 |
271
+ | cosine_accuracy_threshold | -0.0928 |
272
+ | cosine_f1 | 0.9992 |
273
+ | cosine_f1_threshold | -0.0928 |
274
+ | cosine_precision | 1.0 |
275
+ | cosine_recall | 0.9984 |
276
+ | **cosine_ap** | **1.0** |
277
+ | cosine_mcc | 0.0 |
278
+
279
+ <!--
280
+ ## Bias, Risks and Limitations
281
+
282
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
283
+ -->
284
+
285
+ <!--
286
+ ### Recommendations
287
+
288
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
289
+ -->
290
+
291
+ ## Training Details
292
+
293
+ ### Training Dataset
294
+
295
+ #### json
296
+
297
+ * Dataset: json
298
+ * Size: 2,467 training samples
299
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
300
+ * Approximate statistics based on the first 1000 samples:
301
+ | | sentence1 | sentence2 | label |
302
+ |:--------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-----------------------------|
303
+ | type | string | string | int |
304
+ | details | <ul><li>min: 26 tokens</li><li>mean: 191.64 tokens</li><li>max: 429 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 31.2 tokens</li><li>max: 72 tokens</li></ul> | <ul><li>1: 100.00%</li></ul> |
305
+ * Samples:
306
+ | sentence1 | sentence2 | label |
307
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|:---------------|
308
+ | <code>Type de project: L’excès de précipitations tout au long de l’année a conduit à une chute spectaculaire des rendements des céréales d’été et des protéagineux (blé, orge, pois, féverole, etc.) que produisent 90% des agriculteurs d’Île-de-France, historique grenier à blé du pays. Tributaires naturels du fleurissement des cultures, les apiculteurs professionnels de la région ont également souffert de ces dérèglements climatiques.La Région accompagne les exploitations concernées en leur apportant une aide exceptionnelle.</code> | <code>'excès de précipitations':phénomène|DIMINUE|'rendements des protéagineux':concept</code> | <code>1</code> |
309
+ | <code>Type de project: Dans le cadre de sa stratégie « Impact 2028 », la Région s’engage dans la défense de la souveraineté industrielle en renforçant son soutien à une industrie circulaire et décarbonée, porteuse d’innovations et créatrice d’emplois. PM'up Jeunes pousses industrielles soutient les projets d’implantation d’une première usine tournée vers la décarbonation, l’efficacité énergétique et la circularité des processus de production. Ces projets peuvent prendre l'une de ces formes : Une première unité de production industrielle, après une phase de prototypage,Une ligne pilote de production industrielle, en interne ou chez un tiers situé en Île-de-France, à condition que sa production soit destinée à de premières commercialisations,La transformation d’une unité de production pilote à une unité de production industrielle</code> | <code>'Région Île-de-France':organisation|soutient|'industrie décarbonée':concept</code> | <code>1</code> |
310
+ | <code>Procédures et démarches: Le dépôt des demandes de subvention se fait en ligne sur la plateforme régionale mesdemarches.iledefrance.fr : Session de dépôt unique pour les nouvelles demandes : du 30 septembre au 4 novembre 2024 (11 heures) pour des festivals qui se déroulent entre le 1er mars 2025 et le 28 février 2026 (vote à la CP de mars 2025). Pour les demandes de renouvellement, un mail est envoyé aux structures concernées par le service du Spectacle vivant en amont de chaque session de dépôt.<br>Bénéficiaires: Professionnel - Culture, Association - Fondation, Association - Régie par la loi de 1901, Association - ONG, Collectivité ou institution - Communes de 10 000 à 20 000 hab, Collectivité ou institution - Autre (GIP, copropriété, EPA...), Collectivité ou institution - Communes de 2000 à 10 000 hab, Collectivité ou institution - Communes de < 2000 hab, Collectivité ou institution - Communes de > 20 000 hab, Collectivité ou institution - Département, Collectivité ou institution - EPC...</code> | <code>'Collectivité ou institution - EPCI':bénéficiaire|PEUT_BÉNÉFICIER|'demandes de subvention':procédure</code> | <code>1</code> |
311
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
312
+ ```json
313
+ {
314
+ "scale": 20.0,
315
+ "similarity_fct": "cos_sim"
316
+ }
317
+ ```
318
+
319
+ ### Evaluation Dataset
320
+
321
+ #### json
322
+
323
+ * Dataset: json
324
+ * Size: 616 evaluation samples
325
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
326
+ * Approximate statistics based on the first 616 samples:
327
+ | | sentence1 | sentence2 | label |
328
+ |:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------|
329
+ | type | string | string | int |
330
+ | details | <ul><li>min: 24 tokens</li><li>mean: 188.12 tokens</li><li>max: 394 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 31.2 tokens</li><li>max: 133 tokens</li></ul> | <ul><li>1: 100.00%</li></ul> |
331
+ * Samples:
332
+ | sentence1 | sentence2 | label |
333
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------|
334
+ | <code>Type de project: Le programme propose des rencontres le samedi après-midi dans une université ou une grande école réputée, entre les professionnels bénévoles et les lycéens et collégiens sous la forme d'atelier thématiques. Ces moments de rencontre touchent à une grande multitude de domaines d’activités. L'objectif est de donner l’opportunité aux jeunes les plus enclavés d’échanger avec des intervenants professionnels aux parcours atypiques et inspirants. Les intervenants suscitent les ambitions et élargissent les perspectives des élèves.</code> | <code>'rencontres':événement|impliquent|'professionnels bénévoles':groupe</code> | <code>1</code> |
335
+ | <code>Précision sure les bénéficiaires: Communes,Établissements publics de coopération intercommunale (avec ou sans fiscalité propre),Établissements publics territoriaux franciliens,Départements,Aménageurs publics et privés (lorsque ces derniers interviennent à la demande ou pour le compte d'une collectivité précitée).</code> | <code>'Aménageurs privés':entité|INTERVIENT_POUR|'Départements':entité</code> | <code>1</code> |
336
+ | <code>Date de début: non précisée<br>Date de fin (clôture): non précisée<br>Date de début de la future campagne: non précisée</code> | <code>'Date de fin':concept|EST|'non précisée':__inferred__</code> | <code>1</code> |
337
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
338
+ ```json
339
+ {
340
+ "scale": 20.0,
341
+ "similarity_fct": "cos_sim"
342
+ }
343
+ ```
344
+
345
+ ### Training Hyperparameters
346
+ #### Non-Default Hyperparameters
347
+
348
+ - `eval_strategy`: epoch
349
+ - `per_device_train_batch_size`: 4
350
+ - `per_device_eval_batch_size`: 4
351
+ - `gradient_accumulation_steps`: 2
352
+ - `learning_rate`: 2.044202693407718e-05
353
+ - `num_train_epochs`: 20
354
+ - `lr_scheduler_type`: cosine
355
+ - `warmup_steps`: 124
356
+ - `bf16`: True
357
+ - `tf32`: True
358
+ - `load_best_model_at_end`: True
359
+ - `optim`: adamw_torch_fused
360
+ - `hub_model_id`: Lettria/grag-go-idf-mult_neg_rk_10-trial-3
361
+ - `batch_sampler`: no_duplicates
362
+
363
+ #### All Hyperparameters
364
+ <details><summary>Click to expand</summary>
365
+
366
+ - `overwrite_output_dir`: False
367
+ - `do_predict`: False
368
+ - `eval_strategy`: epoch
369
+ - `prediction_loss_only`: True
370
+ - `per_device_train_batch_size`: 4
371
+ - `per_device_eval_batch_size`: 4
372
+ - `per_gpu_train_batch_size`: None
373
+ - `per_gpu_eval_batch_size`: None
374
+ - `gradient_accumulation_steps`: 2
375
+ - `eval_accumulation_steps`: None
376
+ - `torch_empty_cache_steps`: None
377
+ - `learning_rate`: 2.044202693407718e-05
378
+ - `weight_decay`: 0.0
379
+ - `adam_beta1`: 0.9
380
+ - `adam_beta2`: 0.999
381
+ - `adam_epsilon`: 1e-08
382
+ - `max_grad_norm`: 1.0
383
+ - `num_train_epochs`: 20
384
+ - `max_steps`: -1
385
+ - `lr_scheduler_type`: cosine
386
+ - `lr_scheduler_kwargs`: {}
387
+ - `warmup_ratio`: 0.0
388
+ - `warmup_steps`: 124
389
+ - `log_level`: passive
390
+ - `log_level_replica`: warning
391
+ - `log_on_each_node`: True
392
+ - `logging_nan_inf_filter`: True
393
+ - `save_safetensors`: True
394
+ - `save_on_each_node`: False
395
+ - `save_only_model`: False
396
+ - `restore_callback_states_from_checkpoint`: False
397
+ - `no_cuda`: False
398
+ - `use_cpu`: False
399
+ - `use_mps_device`: False
400
+ - `seed`: 42
401
+ - `data_seed`: None
402
+ - `jit_mode_eval`: False
403
+ - `use_ipex`: False
404
+ - `bf16`: True
405
+ - `fp16`: False
406
+ - `fp16_opt_level`: O1
407
+ - `half_precision_backend`: auto
408
+ - `bf16_full_eval`: False
409
+ - `fp16_full_eval`: False
410
+ - `tf32`: True
411
+ - `local_rank`: 0
412
+ - `ddp_backend`: None
413
+ - `tpu_num_cores`: None
414
+ - `tpu_metrics_debug`: False
415
+ - `debug`: []
416
+ - `dataloader_drop_last`: False
417
+ - `dataloader_num_workers`: 0
418
+ - `dataloader_prefetch_factor`: None
419
+ - `past_index`: -1
420
+ - `disable_tqdm`: False
421
+ - `remove_unused_columns`: True
422
+ - `label_names`: None
423
+ - `load_best_model_at_end`: True
424
+ - `ignore_data_skip`: False
425
+ - `fsdp`: []
426
+ - `fsdp_min_num_params`: 0
427
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
428
+ - `fsdp_transformer_layer_cls_to_wrap`: None
429
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
430
+ - `deepspeed`: None
431
+ - `label_smoothing_factor`: 0.0
432
+ - `optim`: adamw_torch_fused
433
+ - `optim_args`: None
434
+ - `adafactor`: False
435
+ - `group_by_length`: False
436
+ - `length_column_name`: length
437
+ - `ddp_find_unused_parameters`: None
438
+ - `ddp_bucket_cap_mb`: None
439
+ - `ddp_broadcast_buffers`: False
440
+ - `dataloader_pin_memory`: True
441
+ - `dataloader_persistent_workers`: False
442
+ - `skip_memory_metrics`: True
443
+ - `use_legacy_prediction_loop`: False
444
+ - `push_to_hub`: False
445
+ - `resume_from_checkpoint`: None
446
+ - `hub_model_id`: Lettria/grag-go-idf-mult_neg_rk_10-trial-3
447
+ - `hub_strategy`: every_save
448
+ - `hub_private_repo`: None
449
+ - `hub_always_push`: False
450
+ - `gradient_checkpointing`: False
451
+ - `gradient_checkpointing_kwargs`: None
452
+ - `include_inputs_for_metrics`: False
453
+ - `include_for_metrics`: []
454
+ - `eval_do_concat_batches`: True
455
+ - `fp16_backend`: auto
456
+ - `push_to_hub_model_id`: None
457
+ - `push_to_hub_organization`: None
458
+ - `mp_parameters`:
459
+ - `auto_find_batch_size`: False
460
+ - `full_determinism`: False
461
+ - `torchdynamo`: None
462
+ - `ray_scope`: last
463
+ - `ddp_timeout`: 1800
464
+ - `torch_compile`: False
465
+ - `torch_compile_backend`: None
466
+ - `torch_compile_mode`: None
467
+ - `dispatch_batches`: None
468
+ - `split_batches`: None
469
+ - `include_tokens_per_second`: False
470
+ - `include_num_input_tokens_seen`: False
471
+ - `neftune_noise_alpha`: None
472
+ - `optim_target_modules`: None
473
+ - `batch_eval_metrics`: False
474
+ - `eval_on_start`: False
475
+ - `use_liger_kernel`: False
476
+ - `eval_use_gather_object`: False
477
+ - `average_tokens_across_devices`: False
478
+ - `prompts`: None
479
+ - `batch_sampler`: no_duplicates
480
+ - `multi_dataset_batch_sampler`: proportional
481
+
482
+ </details>
483
+
484
+ ### Training Logs
485
+ <details><summary>Click to expand</summary>
486
+
487
+ | Epoch | Step | Training Loss | Validation Loss | BinaryClassifEval_cosine_ap |
488
+ |:--------:|:--------:|:-------------:|:---------------:|:---------------------------:|
489
+ | 0.1621 | 50 | 1.4756 | - | - |
490
+ | 0.3241 | 100 | 0.6024 | - | - |
491
+ | 0.4862 | 150 | 0.5528 | - | - |
492
+ | 0.6483 | 200 | 0.3826 | - | - |
493
+ | 0.8104 | 250 | 0.3344 | - | - |
494
+ | 0.9724 | 300 | 0.355 | - | - |
495
+ | 1.0 | 309 | - | 0.1723 | 1.0 |
496
+ | 1.1329 | 350 | 0.2415 | - | - |
497
+ | 1.2950 | 400 | 0.1983 | - | - |
498
+ | 1.4571 | 450 | 0.2042 | - | - |
499
+ | 1.6191 | 500 | 0.1614 | - | - |
500
+ | 1.7812 | 550 | 0.245 | - | - |
501
+ | 1.9433 | 600 | 0.1246 | - | - |
502
+ | 2.0 | 618 | - | 0.1204 | 1.0 |
503
+ | 2.1037 | 650 | 0.1493 | - | - |
504
+ | 2.2658 | 700 | 0.1097 | - | - |
505
+ | 2.4279 | 750 | 0.0856 | - | - |
506
+ | 2.5900 | 800 | 0.0781 | - | - |
507
+ | 2.7520 | 850 | 0.1151 | - | - |
508
+ | 2.9141 | 900 | 0.1528 | - | - |
509
+ | 3.0 | 927 | - | 0.1297 | 1.0 |
510
+ | 3.0746 | 950 | 0.0552 | - | - |
511
+ | 3.2366 | 1000 | 0.0563 | - | - |
512
+ | 3.3987 | 1050 | 0.0625 | - | - |
513
+ | 3.5608 | 1100 | 0.0516 | - | - |
514
+ | 3.7229 | 1150 | 0.0674 | - | - |
515
+ | 3.8849 | 1200 | 0.129 | - | - |
516
+ | 4.0 | 1236 | - | 0.1648 | 1.0 |
517
+ | 4.0454 | 1250 | 0.0445 | - | - |
518
+ | 4.2075 | 1300 | 0.0603 | - | - |
519
+ | 4.3695 | 1350 | 0.0874 | - | - |
520
+ | 4.5316 | 1400 | 0.0353 | - | - |
521
+ | 4.6937 | 1450 | 0.064 | - | - |
522
+ | 4.8558 | 1500 | 0.0612 | - | - |
523
+ | 5.0 | 1545 | - | 0.2055 | 1.0 |
524
+ | 5.0162 | 1550 | 0.0554 | - | - |
525
+ | 5.1783 | 1600 | 0.0319 | - | - |
526
+ | 5.3404 | 1650 | 0.0698 | - | - |
527
+ | 5.5024 | 1700 | 0.0651 | - | - |
528
+ | 5.6645 | 1750 | 0.0555 | - | - |
529
+ | 5.8266 | 1800 | 0.122 | - | - |
530
+ | 5.9887 | 1850 | 0.0266 | - | - |
531
+ | 6.0 | 1854 | - | 0.1933 | 1.0 |
532
+ | 6.1491 | 1900 | 0.0636 | - | - |
533
+ | 6.3112 | 1950 | 0.0158 | - | - |
534
+ | 6.4733 | 2000 | 0.0156 | - | - |
535
+ | 6.6353 | 2050 | 0.0445 | - | - |
536
+ | 6.7974 | 2100 | 0.071 | - | - |
537
+ | 6.9595 | 2150 | 0.0318 | - | - |
538
+ | 7.0 | 2163 | - | 0.1893 | 1.0 |
539
+ | 7.1199 | 2200 | 0.046 | - | - |
540
+ | 7.2820 | 2250 | 0.0353 | - | - |
541
+ | 7.4441 | 2300 | 0.071 | - | - |
542
+ | 7.6062 | 2350 | 0.0373 | - | - |
543
+ | 7.7682 | 2400 | 0.0784 | - | - |
544
+ | 7.9303 | 2450 | 0.0684 | - | - |
545
+ | 8.0 | 2472 | - | 0.1226 | 1.0 |
546
+ | 8.0908 | 2500 | 0.0573 | - | - |
547
+ | 8.2528 | 2550 | 0.0146 | - | - |
548
+ | 8.4149 | 2600 | 0.0208 | - | - |
549
+ | 8.5770 | 2650 | 0.0143 | - | - |
550
+ | 8.7391 | 2700 | 0.0779 | - | - |
551
+ | 8.9011 | 2750 | 0.0312 | - | - |
552
+ | 9.0 | 2781 | - | 0.1612 | 1.0 |
553
+ | 9.0616 | 2800 | 0.034 | - | - |
554
+ | 9.2237 | 2850 | 0.0163 | - | - |
555
+ | 9.3857 | 2900 | 0.046 | - | - |
556
+ | 9.5478 | 2950 | 0.0745 | - | - |
557
+ | 9.7099 | 3000 | 0.0313 | - | - |
558
+ | 9.8720 | 3050 | 0.0238 | - | - |
559
+ | 10.0 | 3090 | - | 0.1342 | 1.0 |
560
+ | 10.0324 | 3100 | 0.028 | - | - |
561
+ | 10.1945 | 3150 | 0.0084 | - | - |
562
+ | 10.3566 | 3200 | 0.051 | - | - |
563
+ | 10.5186 | 3250 | 0.0118 | - | - |
564
+ | 10.6807 | 3300 | 0.032 | - | - |
565
+ | 10.8428 | 3350 | 0.0679 | - | - |
566
+ | 11.0 | 3399 | - | 0.1355 | 1.0 |
567
+ | 11.0032 | 3400 | 0.0084 | - | - |
568
+ | 11.1653 | 3450 | 0.0112 | - | - |
569
+ | 11.3274 | 3500 | 0.0228 | - | - |
570
+ | 11.4895 | 3550 | 0.0119 | - | - |
571
+ | 11.6515 | 3600 | 0.0511 | - | - |
572
+ | 11.8136 | 3650 | 0.0363 | - | - |
573
+ | 11.9757 | 3700 | 0.0161 | - | - |
574
+ | 12.0 | 3708 | - | 0.1345 | 1.0 |
575
+ | 12.1361 | 3750 | 0.0054 | - | - |
576
+ | 12.2982 | 3800 | 0.0142 | - | - |
577
+ | 12.4603 | 3850 | 0.0045 | - | - |
578
+ | 12.6224 | 3900 | 0.0272 | - | - |
579
+ | 12.7844 | 3950 | 0.0064 | - | - |
580
+ | 12.9465 | 4000 | 0.023 | - | - |
581
+ | **13.0** | **4017** | **-** | **0.1177** | **1.0** |
582
+ | 13.1070 | 4050 | 0.0234 | - | - |
583
+ | 13.2690 | 4100 | 0.0067 | - | - |
584
+ | 13.4311 | 4150 | 0.019 | - | - |
585
+ | 13.5932 | 4200 | 0.0051 | - | - |
586
+ | 13.7553 | 4250 | 0.0117 | - | - |
587
+ | 13.9173 | 4300 | 0.0244 | - | - |
588
+ | 14.0 | 4326 | - | 0.1225 | 1.0 |
589
+ | 14.0778 | 4350 | 0.0268 | - | - |
590
+ | 14.2399 | 4400 | 0.0041 | - | - |
591
+ | 14.4019 | 4450 | 0.0165 | - | - |
592
+ | 14.5640 | 4500 | 0.0028 | - | - |
593
+ | 14.7261 | 4550 | 0.0156 | - | - |
594
+ | 14.8882 | 4600 | 0.007 | - | - |
595
+ | 15.0 | 4635 | - | 0.1199 | 1.0000 |
596
+ | 15.0486 | 4650 | 0.0178 | - | - |
597
+ | 15.2107 | 4700 | 0.004 | - | - |
598
+ | 15.3728 | 4750 | 0.0063 | - | - |
599
+ | 15.5348 | 4800 | 0.0161 | - | - |
600
+ | 15.6969 | 4850 | 0.0119 | - | - |
601
+ | 15.8590 | 4900 | 0.0138 | - | - |
602
+ | 16.0 | 4944 | - | 0.1232 | 1.0 |
603
+ | 16.0194 | 4950 | 0.0154 | - | - |
604
+ | 16.1815 | 5000 | 0.0201 | - | - |
605
+ | 16.3436 | 5050 | 0.0135 | - | - |
606
+ | 16.5057 | 5100 | 0.0285 | - | - |
607
+ | 16.6677 | 5150 | 0.0395 | - | - |
608
+ | 16.8298 | 5200 | 0.0011 | - | - |
609
+ | 16.9919 | 5250 | 0.0104 | - | - |
610
+ | 17.0 | 5253 | - | 0.1274 | 1.0 |
611
+ | 17.1524 | 5300 | 0.0158 | - | - |
612
+ | 17.3144 | 5350 | 0.0502 | - | - |
613
+ | 17.4765 | 5400 | 0.0183 | - | - |
614
+ | 17.6386 | 5450 | 0.0052 | - | - |
615
+ | 17.8006 | 5500 | 0.054 | - | - |
616
+ | 17.9627 | 5550 | 0.0273 | - | - |
617
+ | 18.0 | 5562 | - | 0.1217 | 1.0 |
618
+ | 18.1232 | 5600 | 0.0102 | - | - |
619
+ | 18.2853 | 5650 | 0.0086 | - | - |
620
+ | 18.4473 | 5700 | 0.0012 | - | - |
621
+ | 18.6094 | 5750 | 0.0084 | - | - |
622
+ | 18.7715 | 5800 | 0.0178 | - | - |
623
+ | 18.9335 | 5850 | 0.0089 | - | - |
624
+ | 19.0 | 5871 | - | 0.1205 | 1.0000 |
625
+ | 19.0940 | 5900 | 0.0133 | - | - |
626
+ | 19.2561 | 5950 | 0.0173 | - | - |
627
+ | 19.4182 | 6000 | 0.0129 | - | - |
628
+ | 19.5802 | 6050 | 0.009 | - | - |
629
+ | 19.7423 | 6100 | 0.0019 | - | - |
630
+ | 19.9044 | 6150 | 0.0186 | - | - |
631
+ | 19.9368 | 6160 | - | 0.1177 | 1.0000 |
632
+
633
+ * The bold row denotes the saved checkpoint.
634
+ </details>
635
+
636
+ ### Framework Versions
637
+ - Python: 3.11.9
638
+ - Sentence Transformers: 3.4.1
639
+ - Transformers: 4.48.3
640
+ - PyTorch: 2.3.0
641
+ - Accelerate: 1.1.0
642
+ - Datasets: 3.3.2
643
+ - Tokenizers: 0.21.0
644
+
645
+ ## Citation
646
+
647
+ ### BibTeX
648
+
649
+ #### Sentence Transformers
650
+ ```bibtex
651
+ @inproceedings{reimers-2019-sentence-bert,
652
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
653
+ author = "Reimers, Nils and Gurevych, Iryna",
654
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
655
+ month = "11",
656
+ year = "2019",
657
+ publisher = "Association for Computational Linguistics",
658
+ url = "https://arxiv.org/abs/1908.10084",
659
+ }
660
+ ```
661
+
662
+ #### MultipleNegativesRankingLoss
663
+ ```bibtex
664
+ @misc{henderson2017efficient,
665
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
666
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
667
+ year={2017},
668
+ eprint={1705.00652},
669
+ archivePrefix={arXiv},
670
+ primaryClass={cs.CL}
671
+ }
672
+ ```
673
+
674
+ <!--
675
+ ## Glossary
676
+
677
+ *Clearly define terms in order to be accessible across audiences.*
678
+ -->
679
+
680
+ <!--
681
+ ## Model Card Authors
682
+
683
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
684
+ -->
685
+
686
+ <!--
687
+ ## Model Card Contact
688
+
689
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
690
+ -->
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "intfloat/multilingual-e5-base",
3
+ "architectures": [
4
+ "XLMRobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.48.3",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 250002
28
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.48.3",
5
+ "pytorch": "2.3.0"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
eval/binary_classification_evaluation_BinaryClassifEval_results.csv ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine_accuracy,cosine_accuracy_threshold,cosine_f1,cosine_precision,cosine_recall,cosine_f1_threshold,cosine_ap,cosine_mcc
2
+ 1.0,78,0.9983766233766234,0.2822584807872772,0.9991876523151909,1.0,0.9983766233766234,0.2822584807872772,1.0,0.0
3
+ 2.0,156,0.9983766233766234,0.1545478105545044,0.9991876523151909,1.0,0.9983766233766234,0.1545478105545044,1.0,0.0
4
+ 3.0,234,0.9983766233766234,0.13613104820251465,0.9991876523151909,1.0,0.9983766233766234,0.13613104820251465,1.0,0.0
5
+ 4.0,312,0.9983766233766234,0.11390984058380127,0.9991876523151909,1.0,0.9983766233766234,0.11390984058380127,1.0,0.0
6
+ 5.0,390,0.9983766233766234,0.1044171154499054,0.9991876523151909,1.0,0.9983766233766234,0.1044171154499054,0.9999999999999999,0.0
7
+ 6.0,468,0.9983766233766234,0.13716590404510498,0.9991876523151909,1.0,0.9983766233766234,0.13716590404510498,0.9999999999999999,0.0
8
+ 7.0,546,0.9983766233766234,0.07607868313789368,0.9991876523151909,1.0,0.9983766233766234,0.07607868313789368,0.9999999999999999,0.0
9
+ 8.0,624,0.9983766233766234,0.07861983776092529,0.9991876523151909,1.0,0.9983766233766234,0.07861983776092529,0.9999999999999999,0.0
10
+ 9.0,702,0.9983766233766234,0.03543373942375183,0.9991876523151909,1.0,0.9983766233766234,0.03543373942375183,0.9999999999999999,0.0
11
+ 10.0,780,0.9983766233766234,0.07437282800674438,0.9991876523151909,1.0,0.9983766233766234,0.07437282800674438,0.9999999999999999,0.0
12
+ 11.0,858,0.9983766233766234,0.06978517770767212,0.9991876523151909,1.0,0.9983766233766234,0.06978517770767212,0.9999999999999999,0.0
13
+ 12.0,936,0.9983766233766234,0.04839390516281128,0.9991876523151909,1.0,0.9983766233766234,0.04839390516281128,1.0,0.0
14
+ 13.0,1014,0.9983766233766234,0.07242611050605774,0.9991876523151909,1.0,0.9983766233766234,0.07242611050605774,0.9999999999999999,0.0
15
+ 14.0,1092,0.9983766233766234,0.05985987186431885,0.9991876523151909,1.0,0.9983766233766234,0.05985987186431885,0.9999999999999999,0.0
16
+ 15.0,1170,0.9983766233766234,0.040712565183639526,0.9991876523151909,1.0,0.9983766233766234,0.040712565183639526,0.9999999999999999,0.0
17
+ 16.0,1248,0.9983766233766234,0.036619722843170166,0.9991876523151909,1.0,0.9983766233766234,0.036619722843170166,1.0,0.0
18
+ 17.0,1326,0.9983766233766234,0.04132872819900513,0.9991876523151909,1.0,0.9983766233766234,0.04132872819900513,0.9999999999999999,0.0
19
+ 18.0,1404,0.9983766233766234,0.0414009690284729,0.9991876523151909,1.0,0.9983766233766234,0.0414009690284729,0.9999999999999999,0.0
20
+ 19.0,1482,0.9983766233766234,0.045636147260665894,0.9991876523151909,1.0,0.9983766233766234,0.045636147260665894,0.9999999999999999,0.0
21
+ 19.750809061488674,1540,0.9983766233766234,0.03946185111999512,0.9991876523151909,1.0,0.9983766233766234,0.03946185111999512,1.0,0.0
22
+ 19.750809061488674,1540,0.9983766233766234,0.06978517770767212,0.9991876523151909,1.0,0.9983766233766234,0.06978517770767212,0.9999999999999999,0.0
23
+ 1.0,155,0.9983766233766234,0.9999970197677612,0.9991876523151909,1.0,0.9983766233766234,0.9999970197677612,0.9999999999999999,0.0
24
+ 2.0,310,0.9983766233766234,0.9999969005584717,0.9991876523151909,1.0,0.9983766233766234,0.9999969005584717,1.0,0.0
25
+ 3.0,465,0.9983766233766234,0.9999979734420776,0.9991876523151909,1.0,0.9983766233766234,0.9999979734420776,0.9999999999999999,0.0
26
+ 4.0,620,0.9983766233766234,0.9999972581863403,0.9991876523151909,1.0,0.9983766233766234,0.9999972581863403,1.0,0.0
27
+ 5.0,775,0.9983766233766234,0.9999974966049194,0.9991876523151909,1.0,0.9983766233766234,0.9999974966049194,0.9999999999999999,0.0
28
+ 6.0,930,0.9983766233766234,0.9999969005584717,0.9991876523151909,1.0,0.9983766233766234,0.9999969005584717,1.0,0.0
29
+ 7.0,1085,0.9983766233766234,0.999997079372406,0.9991876523151909,1.0,0.9983766233766234,0.999997079372406,1.0,0.0
30
+ 8.0,1240,0.9983766233766234,0.999997615814209,0.9991876523151909,1.0,0.9983766233766234,0.999997615814209,0.9999999999999999,0.0
31
+ 9.0,1395,0.9983766233766234,0.9999971389770508,0.9991876523151909,1.0,0.9983766233766234,0.9999971389770508,1.0,0.0
32
+ 10.0,1550,0.9983766233766234,0.9999961853027344,0.9991876523151909,1.0,0.9983766233766234,0.9999961853027344,1.0,0.0
33
+ 11.0,1705,0.9983766233766234,0.9999973177909851,0.9991876523151909,1.0,0.9983766233766234,0.9999973177909851,0.9999999999999999,0.0
34
+ 12.0,1860,0.9983766233766234,0.9999964237213135,0.9991876523151909,1.0,0.9983766233766234,0.9999964237213135,1.0,0.0
35
+ 13.0,2015,0.9983766233766234,0.9999970197677612,0.9991876523151909,1.0,0.9983766233766234,0.9999970197677612,0.9999999999999999,0.0
36
+ 14.0,2170,0.9983766233766234,0.9999979734420776,0.9991876523151909,1.0,0.9983766233766234,0.9999979734420776,1.0,0.0
37
+ 15.0,2325,0.9983766233766234,0.999996542930603,0.9991876523151909,1.0,0.9983766233766234,0.999996542930603,1.0,0.0
38
+ 16.0,2480,0.9983766233766234,0.9999969005584717,0.9991876523151909,1.0,0.9983766233766234,0.9999969005584717,1.0,0.0
39
+ 17.0,2635,0.9983766233766234,0.9999967813491821,0.9991876523151909,1.0,0.9983766233766234,0.9999967813491821,0.9999999999999999,0.0
40
+ 18.0,2790,0.9983766233766234,0.9999974370002747,0.9991876523151909,1.0,0.9983766233766234,0.9999974370002747,1.0,0.0
41
+ 19.0,2945,0.9983766233766234,0.9999974966049194,0.9991876523151909,1.0,0.9983766233766234,0.9999974966049194,1.0,0.0
42
+ 19.875202593192867,3080,0.9983766233766234,0.9999974370002747,0.9991876523151909,1.0,0.9983766233766234,0.9999974370002747,1.0,0.0
43
+ 19.875202593192867,3080,0.9983766233766234,0.9999979734420776,0.9991876523151909,1.0,0.9983766233766234,0.9999979734420776,0.9999999999999999,0.0
44
+ 1.0,78,0.9983766233766234,0.25744861364364624,0.9991876523151909,1.0,0.9983766233766234,0.25744861364364624,1.0,0.0
45
+ 2.0,156,0.9983766233766234,0.2896910607814789,0.9991876523151909,1.0,0.9983766233766234,0.2896910607814789,0.9999999999999999,0.0
46
+ 3.0,234,0.9983766233766234,0.5955407619476318,0.9991876523151909,1.0,0.9983766233766234,0.5955407619476318,1.0,0.0
47
+ 4.0,312,0.9983766233766234,0.9999868869781494,0.9991876523151909,1.0,0.9983766233766234,0.9999868869781494,0.9999999999999999,0.0
48
+ 5.0,390,0.9983766233766234,0.9999954104423523,0.9991876523151909,1.0,0.9983766233766234,0.9999954104423523,0.9999999999999999,0.0
49
+ 6.0,468,0.9983766233766234,0.9999943971633911,0.9991876523151909,1.0,0.9983766233766234,0.9999943971633911,0.9999999999999999,0.0
50
+ 7.0,546,0.9983766233766234,0.9999949932098389,0.9991876523151909,1.0,0.9983766233766234,0.9999949932098389,1.0,0.0
51
+ 8.0,624,0.9983766233766234,0.9999951124191284,0.9991876523151909,1.0,0.9983766233766234,0.9999951124191284,1.0,0.0
52
+ 9.0,702,0.9983766233766234,0.9999951124191284,0.9991876523151909,1.0,0.9983766233766234,0.9999951124191284,1.0,0.0
53
+ 10.0,780,0.9983766233766234,0.9999953508377075,0.9991876523151909,1.0,0.9983766233766234,0.9999953508377075,1.0,0.0
54
+ 11.0,858,0.9983766233766234,0.9999960064888,0.9991876523151909,1.0,0.9983766233766234,0.9999960064888,1.0,0.0
55
+ 12.0,936,0.9983766233766234,0.9999961853027344,0.9991876523151909,1.0,0.9983766233766234,0.9999961853027344,0.9999999999999999,0.0
56
+ 13.0,1014,0.9983766233766234,0.9999964237213135,0.9991876523151909,1.0,0.9983766233766234,0.9999964237213135,1.0,0.0
57
+ 14.0,1092,0.9983766233766234,0.9999960064888,0.9991876523151909,1.0,0.9983766233766234,0.9999960064888,0.9999999999999999,0.0
58
+ 15.0,1170,0.9983766233766234,0.9999955892562866,0.9991876523151909,1.0,0.9983766233766234,0.9999955892562866,1.0,0.0
59
+ 16.0,1248,0.9983766233766234,0.9999949932098389,0.9991876523151909,1.0,0.9983766233766234,0.9999949932098389,0.9999999999999999,0.0
60
+ 17.0,1326,0.9983766233766234,0.9999959468841553,0.9991876523151909,1.0,0.9983766233766234,0.9999959468841553,1.0,0.0
61
+ 18.0,1404,0.9983766233766234,0.9999958276748657,0.9991876523151909,1.0,0.9983766233766234,0.9999958276748657,1.0,0.0
62
+ 19.0,1482,0.9983766233766234,0.9999954700469971,0.9991876523151909,1.0,0.9983766233766234,0.9999954700469971,1.0,0.0
63
+ 19.750809061488674,1540,0.9983766233766234,0.9999950528144836,0.9991876523151909,1.0,0.9983766233766234,0.9999950528144836,1.0,0.0
64
+ 19.750809061488674,1540,0.9983766233766234,0.25744861364364624,0.9991876523151909,1.0,0.9983766233766234,0.25744861364364624,1.0,0.0
65
+ 1.0,309,0.9983766233766234,0.17514750361442566,0.9991876523151909,1.0,0.9983766233766234,0.17514750361442566,1.0,0.0
66
+ 2.0,618,0.9983766233766234,-0.0026617348194122314,0.9991876523151909,1.0,0.9983766233766234,-0.0026617348194122314,1.0,0.0
67
+ 3.0,927,0.9983766233766234,0.025045007467269897,0.9991876523151909,1.0,0.9983766233766234,0.025045007467269897,1.0,0.0
68
+ 4.0,1236,0.9983766233766234,-0.05146974325180054,0.9991876523151909,1.0,0.9983766233766234,-0.05146974325180054,1.0,0.0
69
+ 5.0,1545,0.9983766233766234,-0.11910206079483032,0.9991876523151909,1.0,0.9983766233766234,-0.11910206079483032,1.0,0.0
70
+ 6.0,1854,0.9983766233766234,-0.16000312566757202,0.9991876523151909,1.0,0.9983766233766234,-0.16000312566757202,1.0,0.0
71
+ 7.0,2163,0.9983766233766234,-0.12202149629592896,0.9991876523151909,1.0,0.9983766233766234,-0.12202149629592896,1.0,0.0
72
+ 8.0,2472,0.9983766233766234,0.0393373966217041,0.9991876523151909,1.0,0.9983766233766234,0.0393373966217041,1.0,0.0
73
+ 9.0,2781,0.9983766233766234,-0.05355536937713623,0.9991876523151909,1.0,0.9983766233766234,-0.05355536937713623,1.0,0.0
74
+ 10.0,3090,0.9983766233766234,-0.007649779319763184,0.9991876523151909,1.0,0.9983766233766234,-0.007649779319763184,1.0,0.0
75
+ 11.0,3399,0.9983766233766234,-0.08037316799163818,0.9991876523151909,1.0,0.9983766233766234,-0.08037316799163818,1.0,0.0
76
+ 12.0,3708,0.9983766233766234,-0.16691505908966064,0.9991876523151909,1.0,0.9983766233766234,-0.16691505908966064,1.0,0.0
77
+ 13.0,4017,0.9983766233766234,-0.09276729822158813,0.9991876523151909,1.0,0.9983766233766234,-0.09276729822158813,0.9999999999999999,0.0
78
+ 14.0,4326,0.9983766233766234,-0.04847198724746704,0.9991876523151909,1.0,0.9983766233766234,-0.04847198724746704,1.0,0.0
79
+ 15.0,4635,0.9983766233766234,-0.04383087158203125,0.9991876523151909,1.0,0.9983766233766234,-0.04383087158203125,0.9999999999999999,0.0
80
+ 16.0,4944,0.9983766233766234,-0.05775594711303711,0.9991876523151909,1.0,0.9983766233766234,-0.05775594711303711,1.0,0.0
81
+ 17.0,5253,0.9983766233766234,-0.0806085467338562,0.9991876523151909,1.0,0.9983766233766234,-0.0806085467338562,1.0,0.0
82
+ 18.0,5562,0.9983766233766234,-0.09134870767593384,0.9991876523151909,1.0,0.9983766233766234,-0.09134870767593384,1.0,0.0
83
+ 19.0,5871,0.9983766233766234,-0.08964043855667114,0.9991876523151909,1.0,0.9983766233766234,-0.08964043855667114,0.9999999999999999,0.0
84
+ 19.936790923824958,6160,0.9983766233766234,-0.08734750747680664,0.9991876523151909,1.0,0.9983766233766234,-0.08734750747680664,0.9999999999999999,0.0
85
+ 19.936790923824958,6160,0.9983766233766234,-0.09276729822158813,0.9991876523151909,1.0,0.9983766233766234,-0.09276729822158813,0.9999999999999999,0.0
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e247978881213cfb8ac564583c112c40379c174865b4a382d0c04bfb2e880bff
3
+ size 1112197096
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
runs/Mar03_07-58-47_algo-1/events.out.tfevents.1740988728.algo-1.72.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda55d24558497360a4cd4fc86bbf5e2aeee2e1dca246edf3907af7a97e7716d
3
+ size 29950
runs/Mar03_07-58-47_algo-1/events.out.tfevents.1740989658.algo-1.72.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817dbfc854b9e3a384b569170e9b37092b6b24d85275bf251f88871c8e699343
3
+ size 1031
runs/Mar03_08-15-26_algo-1/events.out.tfevents.1740989727.algo-1.72.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:500b3dfa40a566db00dd5f0f084ce6b7f0e00ce4b5fef09f8d0d5613370358fd
3
+ size 36504
runs/Mar03_08-15-26_algo-1/events.out.tfevents.1740991057.algo-1.72.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f6948a2fa9b09983c174a1455e31fe6a69864c4ffcec51f339f07c54fcea21
3
+ size 1031
runs/Mar03_08-39-59_algo-1/events.out.tfevents.1740991200.algo-1.72.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799f4f65c4ace597846f8054e034e765c8ad549fceb969de634e2a2540e4cc76
3
+ size 29950
runs/Mar03_08-39-59_algo-1/events.out.tfevents.1740992135.algo-1.72.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ad6fe8bdb4762d1992eb27f2c768d3a5356eb51c412fc3c3ba3940407c6bf8
3
+ size 1031
runs/Mar03_08-57-05_algo-1/events.out.tfevents.1740992226.algo-1.72.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cfbd2ca324043bb2bc7a37a0f3ed6f3a518f6f077a61310cd0934090531b918
3
+ size 49587
runs/Mar03_08-57-05_algo-1/events.out.tfevents.1740993626.algo-1.72.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f799d336b2e9972fe2e8d8b66a909761771daaa976b4b048fabbf7deb62f4067
3
+ size 1031
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe845694a4a95bd25d4ceec0a2aea0ff8a7a128e57525a2ca3d1d6ccde7c04c
3
+ size 5624