Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -204,20 +204,6 @@ shared_state = State()
|
|
| 204 |
def daten_laden(name):
|
| 205 |
return load_dataset('alexkueck/tis', 'alexkueck/tis')
|
| 206 |
|
| 207 |
-
|
| 208 |
-
def group_texts(examples):
|
| 209 |
-
# Concatenate all texts.
|
| 210 |
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
| 211 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
| 212 |
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
| 213 |
-
# customize this part to your needs.
|
| 214 |
-
total_length = (total_length // block_size) * block_size
|
| 215 |
-
# Split by chunks of max_len.
|
| 216 |
-
result = {
|
| 217 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
| 218 |
-
for k, t in concatenated_examples.items()
|
| 219 |
-
}
|
| 220 |
-
result["labels"] = result["input_ids"].copy()
|
| 221 |
-
return result
|
| 222 |
|
| 223 |
|
|
|
|
| 204 |
def daten_laden(name):
|
| 205 |
return load_dataset('alexkueck/tis', 'alexkueck/tis')
|
| 206 |
|
| 207 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
|