Update README.md
Browse files
README.md
CHANGED
@@ -22,7 +22,7 @@ tags:
|
|
22 |
- code
|
23 |
---
|
24 |
|
25 |
-
# EuroBERT-
|
26 |
<div>
|
27 |
<img src="img/banner.png" width="100%" alt="EuroBERT" />
|
28 |
</div>
|
@@ -52,7 +52,7 @@ For more information about EuroBERT, please check our [blog](https://huggingface
|
|
52 |
```python
|
53 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
54 |
|
55 |
-
model_id = "EuroBERT/EuroBERT-
|
56 |
|
57 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
58 |
model = AutoModelForMaskedLM.from_pretrained(model_id, trust_remote_code=True)
|
@@ -120,37 +120,37 @@ If you plan to fine-tune this model on some downstream tasks, you can follow the
|
|
120 |
|
121 |
#### Task-Specific Learning Rates
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
##### Sequence Classification:
|
124 |
|
125 |
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
126 |
|--------------------------------------|----------------|----------------|----------------|
|
127 |
| XNLI | 3.6e-05 | 3.6e-05 | 2.8e-05 |
|
128 |
| PAWS-X | 3.6e-05 | 4.6e-05 | 3.6e-05 |
|
129 |
-
| QAM | 3.6e-05 | 2.8e-05 | 2.2e-05 |
|
130 |
| AmazonReviews | 3.6e-05 | 2.8e-05 | 3.6e-05 |
|
131 |
| MassiveIntent | 6.0e-05 | 4.6e-05 | 2.8e-05 |
|
132 |
-
| CodeDefect | 3.6e-05 | 2.8e-05 | 1.3e-05 |
|
133 |
| CodeComplexity | 3.6e-05 | 3.6e-05 | 1.0e-05 |
|
|
|
134 |
| MathShepherd | 7.7e-05 | 2.8e-05 | 1.7e-05 |
|
135 |
|
136 |
##### Sequence Regression:
|
137 |
|
138 |
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
139 |
|--------------------------|----------------|----------------|----------------|
|
|
|
|
|
140 |
| SeaHorse | 3.6e-05 | 3.6e-05 | 2.8e-05 |
|
141 |
-
| SummevalMultilingual | 3.6e-05 | 2.8e-05 | 3.6e-05 |
|
142 |
-
| WMT | 2.8e-05 | 2.8e-05 | 1.3e-05 |
|
143 |
|
144 |
-
##### Retrieval:
|
145 |
-
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
146 |
-
|-----------------------------------------|----------------|----------------|----------------|
|
147 |
-
| MIRACL | 4.6e-05 | 3.6e-05 | 2.8e-05 |
|
148 |
-
| MLDR | 2.8e-05 | 2.2e-05 | 4.6e-05 |
|
149 |
-
| CC-News | 4.6e-05 | 4.6e-05 | 3.6e-05 |
|
150 |
-
| Wikipedia | 2.8e-05 | 3.6e-05 | 2.8e-05 |
|
151 |
-
| CodeSearchNet | 4.6e-05 | 2.8e-05 | 3.6e-05 |
|
152 |
-
| CqaDupStackMath | 4.6e-05 | 2.8e-05 | 3.6e-05 |
|
153 |
-
| MathFormula | 1.7e-05 | 3.6e-05 | 3.6e-05 |
|
154 |
|
155 |
## License
|
156 |
|
|
|
22 |
- code
|
23 |
---
|
24 |
|
25 |
+
# EuroBERT-210m
|
26 |
<div>
|
27 |
<img src="img/banner.png" width="100%" alt="EuroBERT" />
|
28 |
</div>
|
|
|
52 |
```python
|
53 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
54 |
|
55 |
+
model_id = "EuroBERT/EuroBERT-210m"
|
56 |
|
57 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
58 |
model = AutoModelForMaskedLM.from_pretrained(model_id, trust_remote_code=True)
|
|
|
120 |
|
121 |
#### Task-Specific Learning Rates
|
122 |
|
123 |
+
##### Retrieval:
|
124 |
+
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
125 |
+
|-----------------------------------------|----------------|----------------|----------------|
|
126 |
+
| MIRACL | 4.6e-05 | 3.6e-05 | 2.8e-05 |
|
127 |
+
| MLDR | 2.8e-05 | 2.2e-05 | 4.6e-05 |
|
128 |
+
| CC-News | 4.6e-05 | 4.6e-05 | 3.6e-05 |
|
129 |
+
| Wikipedia | 2.8e-05 | 3.6e-05 | 2.8e-05 |
|
130 |
+
| CodeSearchNet | 4.6e-05 | 2.8e-05 | 3.6e-05 |
|
131 |
+
| DupStackMath | 4.6e-05 | 2.8e-05 | 3.6e-05 |
|
132 |
+
| MathFormula | 1.7e-05 | 3.6e-05 | 3.6e-05 |
|
133 |
+
|
134 |
##### Sequence Classification:
|
135 |
|
136 |
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
137 |
|--------------------------------------|----------------|----------------|----------------|
|
138 |
| XNLI | 3.6e-05 | 3.6e-05 | 2.8e-05 |
|
139 |
| PAWS-X | 3.6e-05 | 4.6e-05 | 3.6e-05 |
|
|
|
140 |
| AmazonReviews | 3.6e-05 | 2.8e-05 | 3.6e-05 |
|
141 |
| MassiveIntent | 6.0e-05 | 4.6e-05 | 2.8e-05 |
|
|
|
142 |
| CodeComplexity | 3.6e-05 | 3.6e-05 | 1.0e-05 |
|
143 |
+
| CodeDefect | 3.6e-05 | 2.8e-05 | 1.3e-05 |
|
144 |
| MathShepherd | 7.7e-05 | 2.8e-05 | 1.7e-05 |
|
145 |
|
146 |
##### Sequence Regression:
|
147 |
|
148 |
| Dataset | EuroBERT-210m | EuroBERT-610m | EuroBERT-2.1B |
|
149 |
|--------------------------|----------------|----------------|----------------|
|
150 |
+
| WMT (Ref-based) | 2.8e-05 | 2.8e-05 | 1.3e-05 |
|
151 |
+
| WMT (Ref-free) | 2.8e-05 | 2.8e-05 | 1.3e-05 |
|
152 |
| SeaHorse | 3.6e-05 | 3.6e-05 | 2.8e-05 |
|
|
|
|
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
## License
|
156 |
|