|
import pandas as pd |
|
from sklearn.model_selection import StratifiedKFold |
|
from sklearn.svm import SVC |
|
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix |
|
from sklearn.preprocessing import StandardScaler |
|
import numpy as np |
|
import os |
|
|
|
def evaluate_svm(file1, file2, features, output_txt='evaluation_results.txt'): |
|
|
|
data1 = pd.read_excel(file1) |
|
data2 = pd.read_excel(file2) |
|
|
|
|
|
rename_dict = {f: f"{f}_2" for f in features} |
|
data2.rename(columns=rename_dict, inplace=True) |
|
|
|
|
|
combined_data = pd.merge(data1, data2, on="Vertebra") |
|
print(combined_data) |
|
|
|
|
|
train_test_data = combined_data[combined_data['Dataset_x'].isin(['train', 'test'])] |
|
val_data = combined_data[combined_data['Dataset_x'] == 'val'] |
|
|
|
|
|
combined_features = features + [f"{f}_2" for f in features] |
|
X_train_test = train_test_data[combined_features] |
|
y_train_test = train_test_data['Label_x'] |
|
X_val = val_data[combined_features] |
|
y_val = val_data['Label_x'] |
|
|
|
|
|
scaler = StandardScaler() |
|
X_train_test_scaled = scaler.fit_transform(X_train_test) |
|
X_val_scaled = scaler.transform(X_val) |
|
|
|
|
|
svm_classifier = SVC(kernel='linear', class_weight='balanced') |
|
|
|
|
|
skf = StratifiedKFold(n_splits=5) |
|
|
|
|
|
results = [] |
|
|
|
for train_index, test_index in skf.split(X_train_test_scaled, y_train_test): |
|
X_train, X_test = X_train_test_scaled[train_index], X_train_test_scaled[test_index] |
|
y_train, y_test = y_train_test[train_index], y_train_test[test_index] |
|
|
|
svm_classifier.fit(X_train, y_train) |
|
y_pred_val = svm_classifier.predict(X_val_scaled) |
|
cm = confusion_matrix(y_val, y_pred_val) |
|
f1 = f1_score(y_val, y_pred_val, average='macro') |
|
precision = precision_score(y_val, y_pred_val, average='macro') |
|
recall = recall_score(y_val, y_pred_val, average='macro') |
|
accuracy = accuracy_score(y_val, y_pred_val) |
|
|
|
results.append((cm, f1, precision, recall, accuracy)) |
|
|
|
|
|
with open(output_txt, 'w') as file: |
|
for i, (cm, f1, precision, recall, accuracy) in enumerate(results): |
|
file.write(f"Fold {i+1}:\n") |
|
file.write("Confusion Matrix:\n") |
|
file.write(f"{cm}\n") |
|
file.write(f"F1 Score: {f1:.3f}, Precision: {precision:.3f}, Recall: {recall:.3f}, Accuracy: {accuracy:.3f}\n") |
|
file.write("\n") |
|
|
|
|
|
average_f1 = np.mean([r[1] for r in results]) |
|
average_precision = np.mean([r[2] for r in results]) |
|
average_recall = np.mean([r[3] for r in results]) |
|
average_accuracy = np.mean([r[4] for r in results]) |
|
|
|
file.write("Average Scores:\n") |
|
file.write(f"Average F1 Score: {average_f1:.3f}\n") |
|
file.write(f"Average Precision: {average_precision:.3f}\n") |
|
file.write(f"Average Recall: {average_recall:.3f}\n") |
|
file.write(f"Average Accuracy: {average_accuracy:.3f}\n") |
|
|
|
print(f"Results saved to {output_txt}") |
|
|
|
def main(): |
|
result_folder = 'RHLV_quantification' |
|
grading_folder = 'classification_metric' |
|
if not os.path.exists(grading_folder): |
|
os.makedirs(grading_folder) |
|
|
|
file_1 = os.path.join(result_folder,'fine.xlsx') |
|
|
|
file_2 = 'twostage_output.xlsx' |
|
features = ['Pre RHLV', 'Mid RHLV', 'Post RHLV'] |
|
|
|
output_txt_path = os.path.join(grading_folder, 'test.txt') |
|
evaluate_svm(file_1, file_2, features, output_txt_path) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|