Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| import sys | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.append('..') | |
| sys.path.append('.') | |
| def save_similarity_output(output_dict, method_name, leaderboard_path="./data/leaderboard_results.csv", similarity_path="./data/similarity_results.csv"): | |
| # Load or initialize the DataFrames | |
| if os.path.exists(leaderboard_path): | |
| leaderboard_df = pd.read_csv(leaderboard_path) | |
| else: | |
| leaderboard_df = pd.DataFrame() | |
| if os.path.exists(similarity_path): | |
| similarity_df = pd.read_csv(similarity_path) | |
| else: | |
| similarity_df = pd.DataFrame(columns=['Method']) | |
| # Check if method exists in similarity results | |
| if method_name not in similarity_df['Method'].values: | |
| similarity_df = pd.concat([similarity_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Initialize storage for averages | |
| averages = {} | |
| # Iterate through the output_dict and calculate averages if all aspects (MF, CC, BP) are present | |
| for dataset in ['sparse', '200', '500']: | |
| correlation_values = [] | |
| pvalue_values = [] | |
| # Check each aspect within the dataset (MF, BP, CC) | |
| for aspect in ['MF', 'BP', 'CC']: | |
| correlation_key = f"{dataset}_{aspect}_correlation" | |
| pvalue_key = f"{dataset}_{aspect}_pvalue" | |
| # Process correlation if present | |
| if correlation_key in output_dict: | |
| correlation_values.append(output_dict[correlation_key]) | |
| similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_correlation"] = output_dict[correlation_key] | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_correlation"] = output_dict[correlation_key] | |
| # Process pvalue if present | |
| if pvalue_key in output_dict: | |
| pvalue_values.append(output_dict[pvalue_key]) | |
| similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_pvalue"] = output_dict[pvalue_key] | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_pvalue"] = output_dict[pvalue_key] | |
| # Calculate averages if all three aspects (MF, BP, CC) are present | |
| if len(correlation_values) == 3: | |
| averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3 | |
| similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"] | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"] | |
| if len(pvalue_values) == 3: | |
| averages[f"{dataset}_Ave_pvalue"] = sum(pvalue_values) / 3 | |
| similarity_df.at[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"] | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"] | |
| # Save the updated DataFrames back to CSV | |
| leaderboard_df.to_csv(leaderboard_path, index=False) | |
| similarity_df.to_csv(similarity_path, index=False) | |
| return 0 | |
| def save_function_output(model_output, method_name, func_results_path="./data/function_results.csv", leaderboard_path="./data/leaderboard_results.csv"): | |
| # Load or initialize the DataFrames | |
| if os.path.exists(func_results_path): | |
| func_results_df = pd.read_csv(func_results_path) | |
| else: | |
| func_results_df = pd.DataFrame(columns=['Method']) | |
| if os.path.exists(leaderboard_path): | |
| leaderboard_df = pd.read_csv(leaderboard_path) | |
| else: | |
| leaderboard_df = pd.DataFrame() | |
| # Ensure the method_name row exists in function results | |
| if method_name not in func_results_df['Method'].values: | |
| func_results_df = pd.concat([func_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Storage for averaging in leaderboard results | |
| metrics_sum = { | |
| 'accuracy': {'BP': [], 'CC': [], 'MF': []}, | |
| 'F1': {'BP': [], 'CC': [], 'MF': []}, | |
| 'precision': {'BP': [], 'CC': [], 'MF': []}, | |
| 'recall': {'BP': [], 'CC': [], 'MF': []} | |
| } | |
| # Iterate over each entry in model_output | |
| for entry in model_output: | |
| key = entry[0] | |
| accuracy, f1, precision, recall = entry[1], entry[4], entry[7], entry[10] | |
| # Parse the key to extract the aspect and datasets | |
| aspect, dataset1, dataset2 = key.split('_') | |
| # Save each metric to function_results under its respective column | |
| func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_accuracy"] = accuracy | |
| func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_F1"] = f1 | |
| func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_precision"] = precision | |
| func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_recall"] = recall | |
| # Add values for leaderboard averaging | |
| metrics_sum['accuracy'][aspect].append(accuracy) | |
| metrics_sum['F1'][aspect].append(f1) | |
| metrics_sum['precision'][aspect].append(precision) | |
| metrics_sum['recall'][aspect].append(recall) | |
| # Calculate averages for each aspect and overall (if all aspects have entries) | |
| for metric in ['accuracy', 'F1', 'precision', 'recall']: | |
| for aspect in ['BP', 'CC', 'MF']: | |
| if metrics_sum[metric][aspect]: | |
| aspect_average = sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect]) | |
| leaderboard_df.at[0, f"func_{aspect}_{metric}"] = aspect_average | |
| # Calculate overall average if each aspect has entries | |
| if all(metrics_sum[metric][aspect] for aspect in ['BP', 'CC', 'MF']): | |
| overall_average = sum( | |
| sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect]) | |
| for aspect in ['BP', 'CC', 'MF'] | |
| ) / 3 | |
| leaderboard_df.at[0, f"func_Ave_{metric}"] = overall_average | |
| # Save updated DataFrames to CSV | |
| func_results_df.to_csv(func_results_path, index=False) | |
| leaderboard_df.to_csv(leaderboard_path, index=False) | |
| return 0 | |
| def save_family_output(model_output, method_name, leaderboard_path="./data/leaderboard_results.csv", family_results_path="./data/family_results.csv"): | |
| # Load or initialize the DataFrames | |
| if os.path.exists(leaderboard_path): | |
| leaderboard_df = pd.read_csv(leaderboard_path) | |
| else: | |
| leaderboard_df = pd.DataFrame(columns=['Method']) | |
| if os.path.exists(family_results_path): | |
| family_results_df = pd.read_csv(family_results_path) | |
| else: | |
| family_results_df = pd.DataFrame(columns=['Method']) | |
| # Ensure the method_name row exists in the leaderboard results | |
| if method_name not in leaderboard_df['Method'].values: | |
| leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Ensure the method_name row exists in family results | |
| if method_name not in family_results_df['Method'].values: | |
| family_results_df = pd.concat([family_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Iterate through the datasets and metrics | |
| for dataset, metrics in model_output.items(): | |
| for metric, values in metrics.items(): | |
| # Calculate the average for each metric in leaderboard results | |
| avg_value = sum(values) / len(values) if values else None | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, f"fam_{dataset}_{metric}_ave"] = avg_value | |
| # Save each fold result for family results | |
| for i, value in enumerate(values): | |
| family_results_df.at[family_results_df['Method'] == method_name, f"{dataset}_{metric}_{i}"] = value | |
| # Save updated DataFrames to CSV | |
| leaderboard_df.to_csv(leaderboard_path, index=False) | |
| family_results_df.to_csv(family_results_path, index=False) | |
| return leaderboard_df, family_results_df | |
| def save_affinity_output(model_output, method_name, leaderboard_path="./data/leaderboard_results.csv", affinity_results_path="./data/affinity_results.csv"): | |
| # Load or initialize DataFrames | |
| if os.path.exists(leaderboard_path): | |
| leaderboard_df = pd.read_csv(leaderboard_path) | |
| else: | |
| leaderboard_df = pd.DataFrame(columns=['Method']) | |
| if os.path.exists(affinity_results_path): | |
| affinity_results_df = pd.read_csv(affinity_results_path) | |
| else: | |
| affinity_results_df = pd.DataFrame(columns=['Method']) | |
| # Ensure the method_name row exists in the leaderboard results | |
| if method_name not in leaderboard_df['Method'].values: | |
| leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Ensure the method_name row exists in affinity results | |
| if method_name not in affinity_results_df['Method'].values: | |
| affinity_results_df = pd.concat([affinity_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True) | |
| # Process 'summary' section for leaderboard results | |
| summary = model_output.get('summary', {}) | |
| if summary: | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mse_ave'] = summary.get('val_mse_error') | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mae_ave'] = summary.get('val_mae_error') | |
| leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_corr_ave'] = summary.get('validation_corr') | |
| # Process 'detail' section for affinity results | |
| detail = model_output.get('detail', {}) | |
| if detail: | |
| # Save each 10-fold cross-validation result for mse, mae, and corr | |
| for i in range(10): | |
| if 'val_mse_errors' in detail: | |
| affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mse_{i}"] = detail['val_mse_errors'][i] | |
| if 'val_mae_errors' in detail: | |
| affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mae_{i}"] = detail['val_mae_errors'][i] | |
| if 'validation_corrs' in detail: | |
| affinity_results_df.at[affinity_results_df['Method'] == method_name, f"corr_{i}"] = detail['validation_corrs'][i] | |
| # Save updated DataFrames to CSV | |
| leaderboard_df.to_csv(leaderboard_path, index=False) | |
| affinity_results_df.to_csv(affinity_results_path, index=False) | |
| return 0 | |