Spaces:
Runtime error
Runtime error
Fix errors in load data
Browse files- spanish_medica_llm.py +10 -12
spanish_medica_llm.py
CHANGED
|
@@ -682,14 +682,13 @@ def run_training_process():
|
|
| 682 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
| 683 |
os.environ['WANDB_DISABLED'] = 'true'
|
| 684 |
tokenizer = loadSpanishTokenizer()
|
| 685 |
-
medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
|
| 686 |
-
medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
|
| 687 |
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
|
|
|
| 693 |
train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
|
| 694 |
|
| 695 |
base_model = loadBaseModel(MISTRAL_BASE_MODEL_ID)
|
|
@@ -702,11 +701,10 @@ def run_finnetuning_process():
|
|
| 702 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
| 703 |
os.environ['WANDB_DISABLED'] = 'true'
|
| 704 |
tokenizer = loadSpanishTokenizer()
|
| 705 |
-
medicalSpanishDataset =
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
base_model = loadBaseModel(HUB_MODEL_ID)
|
| 711 |
|
| 712 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|
|
|
|
| 682 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
| 683 |
os.environ['WANDB_DISABLED'] = 'true'
|
| 684 |
tokenizer = loadSpanishTokenizer()
|
|
|
|
|
|
|
| 685 |
|
| 686 |
+
medicalSpanishDataset = loadSpanishDataset()
|
| 687 |
+
train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid(
|
| 688 |
+
getTokenizedDataset( medicalSpanishDataset, tokenizer)
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
|
| 692 |
train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
|
| 693 |
|
| 694 |
base_model = loadBaseModel(MISTRAL_BASE_MODEL_ID)
|
|
|
|
| 701 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
| 702 |
os.environ['WANDB_DISABLED'] = 'true'
|
| 703 |
tokenizer = loadSpanishTokenizer()
|
| 704 |
+
medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
|
| 705 |
+
medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
|
| 706 |
+
train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
|
| 707 |
+
|
|
|
|
| 708 |
base_model = loadBaseModel(HUB_MODEL_ID)
|
| 709 |
|
| 710 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|