Spaces:

MisterAI
/

Testing_BrokenSpace

Running

App Files Files Community

MisterAI commited on 7 days ago

Commit

50cfc1c

verified ·

1 Parent(s): f5a62d4

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -48,24 +48,15 @@ def generate_response(input_text):
 #    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
 def fine_tune_model(dataset_path, dataset_file, epochs, batch_size, prefix):
     import json  # Assurez-vous que json est importé
     # Récupération du fichier à partir de l'URL fournie
-    response = requests.get(dataset_path)
     dataset_lines = response.text.strip().split('\n')
-    # Vérifier si le fichier est au format JSONL ou JSON
-    if dataset_path.endswith('.jsonl'):
-        # Convertir les lignes en dictionnaires pour JSONL
-        dataset_dict = [json.loads(line) for line in dataset_lines if line]
-    elif dataset_path.endswith('.json'):
-        # Charger le fichier JSON directement
-        dataset_dict = json.loads(response.text)
-    else:
-        raise ValueError("Format de fichier non supporté. Utilisez .jsonl ou .json.")
     # Créer un Dataset Hugging Face
     dataset = Dataset.from_dict({
@@ -89,7 +80,10 @@ def fine_tune_model(dataset_path, dataset_file, epochs, batch_size, prefix):
     training_args = TrainingArguments(
         output_dir=f"./{prefix}_{model_name.split('/')[-1]}",
         num_train_epochs=epochs,
-        per_device_train_batch_size=batch_size,
         save_steps=10_000,
         save_total_limit=2,
         push_to_hub=True,
@@ -124,6 +118,10 @@ def fine_tune_model(dataset_path, dataset_file, epochs, batch_size, prefix):
 # Interface Gradio
 with gr.Blocks() as demo:
     with gr.Tab("Chatbot"):

 #    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
 def fine_tune_model(dataset_path, dataset_file, epochs, batch_size, prefix):
     import json  # Assurez-vous que json est importé
     # Récupération du fichier à partir de l'URL fournie
+    response = requests.get(f"{dataset_path}/resolve/main/{dataset_file}")
     dataset_lines = response.text.strip().split('\n')
+    # Convertir les lignes en dictionnaires pour JSONL
+    dataset_dict = [json.loads(line) for line in dataset_lines if line]
     # Créer un Dataset Hugging Face
     dataset = Dataset.from_dict({
     training_args = TrainingArguments(
         output_dir=f"./{prefix}_{model_name.split('/')[-1]}",
         num_train_epochs=epochs,
+        per_device_train_batch_size=batch_sl"
+    )
+    return "Fine-tuning terminé et modèle sauvegardé."ize,
         save_steps=10_000,
         save_total_limit=2,
         push_to_hub=True,
 # Interface Gradio
 with gr.Blocks() as demo:
     with gr.Tab("Chatbot"):