Spaces:
Running
Running
| import yaml | |
| import pandas as pd | |
| import tqdm | |
| from . import semantic_similarity_infer as ssi | |
| from . import target_family_classifier as tfc | |
| from . import function_predictor as fp | |
| from . import binding_affinity_estimator as bae | |
| def load_representation(multi_col_representation_vector_file_path): | |
| multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path) | |
| vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))] | |
| original_values_as_df = pd.DataFrame({'Entry': pd.Series([], dtype='str'),'Vector': pd.Series([], dtype='object')}) | |
| for index, row in tqdm.tqdm(vals.iterrows(), total = len(vals)): | |
| list_of_floats = [float(item) for item in list(row)] | |
| original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats] | |
| return original_values_as_df | |
| def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspect="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False): | |
| print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n") | |
| if any(item in ['similarity', 'function', 'family', 'all'] for item in benchmarks): | |
| print("\nRepresentation vectors are loading...\n") | |
| human_representation_dataframe = load_representation(representation_file_human) | |
| if "similarity" in benchmarks: | |
| print("\nSemantic similarity Inference Benchmark is running...\n") | |
| ssi.representation_dataframe = human_representation_dataframe | |
| ssi.representation_name = representation_name | |
| ssi.protein_names = ssi.representation_dataframe['Entry'].tolist() | |
| ssi.similarity_tasks = similarity_tasks | |
| ssi.detailed_output = detailed_output | |
| similarity_result = ssi.calculate_all_correlations() | |
| print("Similarity Result:") | |
| print(similarity_result) | |
| if "function" in benchmarks: | |
| print("\n\nOntology-based protein function prediction benchmark is running...\n") | |
| fp.aspect_type = function_prediction_aspect | |
| fp.dataset_type = function_prediction_dataset | |
| fp.representation_dataframe = human_representation_dataframe | |
| fp.representation_name = representation_name | |
| fp.detailed_output = detailed_output | |
| function_results = fp.pred_output() | |
| print("Function results:") | |
| print(function_results) | |
| if "family" in benchmarks: | |
| print("\n\nDrug target protein family classification benchmark is running...\n") | |
| tfc.representation_path = representation_file_human | |
| tfc.representation_name = representation_name | |
| tfc.detailed_output = detailed_output | |
| for dataset in family_prediction_dataset: | |
| family_result = tfc.score_protein_rep(dataset) | |
| print(f"Family results for {dataset}:") | |
| print(family_result) | |
| if "affinity" in benchmarks: | |
| print("\n\nProtein-protein binding affinity estimation benchmark is running...\n") | |
| bae.skempi_vectors_path = representation_file_affinity | |
| bae.representation_name = representation_name | |
| affinity_result = bae.predict_affinities_and_report_results() | |
| print("Affinity Results:") | |
| print(affinity_result) | |
| print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n") | |
| return 0 | |