Spaces:

Merwan611
/

classification-text

Sleeping

Merwan6 commited on Jun 16

Commit

2cbc3d1

1 Parent(s): 11204e4

metric

Files changed (3) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

scripts/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

scripts/metric.py CHANGED Viewed

@@ -8,6 +8,7 @@ from inference import (
     few_shot_inference,
     base_model_inference,
 )
 # Dictionnaire des fonctions à évaluer
 models_to_evaluate = {
@@ -18,8 +19,9 @@ models_to_evaluate = {
 label_map = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
-# Charger un sous-ensemble du jeu de test AG News
-dataset = load_dataset("ag_news", split="test[:3]")
 def evaluate_model(name, inference_func):
     print(f"\n🔍 Évaluation du modèle : {name}")
@@ -40,7 +42,10 @@ def evaluate_model(name, inference_func):
             print(f"⚠️ Erreur sur un exemple : {e}")
             continue
-        # Scores pour les 4 classes dans le même ordre
         prob_dist = [scores.get(c, 0.0) for c in label_map.values()]
         pred_index = list(label_map.values()).index(pred_class)
@@ -58,11 +63,11 @@ def evaluate_model(name, inference_func):
     loss = log_loss(true_labels, all_probs, labels=[0, 1, 2, 3])
     print(f"✅ Résultats {name} :")
-    print(f"- Accuracy : {acc:.4f}")
-    print(f"- F1 Score : {f1:.4f}")
-    print(f"- Precision : {prec:.4f}")
-    print(f"- Recall : {rec:.4f}")
-    print(f"- Log Loss : {loss:.4f}")
     print(f"- Runtime : {runtime:.2f} sec\n")
     return {
@@ -82,4 +87,5 @@ for name, func in models_to_evaluate.items():
 # Affichage résumé
 df = pd.DataFrame(results)
 print(df)

     few_shot_inference,
     base_model_inference,
 )
+from datasets import load_dataset
 # Dictionnaire des fonctions à évaluer
 models_to_evaluate = {
 label_map = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
+# Charger tout le test set
+dataset = load_dataset("ag_news", split="test")
+dataset = dataset.shuffle(seed=42).select(range(500))
 def evaluate_model(name, inference_func):
     print(f"\n🔍 Évaluation du modèle : {name}")
             print(f"⚠️ Erreur sur un exemple : {e}")
             continue
+        if pred_class not in label_map.values():
+            print(f"⚠️ Classe prédite inconnue : '{pred_class}', exemple ignoré.")
+            continue
         prob_dist = [scores.get(c, 0.0) for c in label_map.values()]
         pred_index = list(label_map.values()).index(pred_class)
     loss = log_loss(true_labels, all_probs, labels=[0, 1, 2, 3])
     print(f"✅ Résultats {name} :")
+    print(f"- Accuracy : {acc:.2f}")
+    print(f"- F1 Score : {f1:.2f}")
+    print(f"- Precision : {prec:.2f}")
+    print(f"- Recall : {rec:.2f}")
+    print(f"- Log Loss : {loss:.2f}")
     print(f"- Runtime : {runtime:.2f} sec\n")
     return {
 # Affichage résumé
 df = pd.DataFrame(results)
+df["loss"] = df["loss"].round(4)
 print(df)