FusOn-pLM / fuson_plm /benchmarking /caid /plot.py

caid benchmark

bae913a about 2 months ago

46.9 kB

	import matplotlib.pyplot as plt
	import seaborn as sns
	import pandas as pd
	import numpy as np
	import os
	import matplotlib.colors as mcolors
	import matplotlib.patches as mpatches
	from matplotlib import font_manager
	import matplotlib.patches as patches
	from sklearn.metrics import roc_curve, auc, r2_score

	from fuson_plm.utils.visualizing import set_font

	global caid2_winners, caid2_model_rankings
	caid2_winners = pd.DataFrame(data=
	{
	'Model Name': ['Dispredict3','flDPnn2','flDPnn','flDPlr','flDPlr2','DisoPred',
	'IDP-Fusion','ESpritz-D','DeepIDP-2L','disomine','DISOPRED3-diso','IUPred3',
	'AlphaFold-rsa','AlphaFold-pLDDT'], # do the top 6 models, and IUPred because it's well-known
	'AUROC': [0.838,0.836,0.833,0.827,0.821,0.821,
	0.818,0.802,0.800,0.797,0.692,0.755,0.747,0.695],
	})
	caid2_winners['Model Type'] = ['caid2_competition']*len(caid2_winners)
	caid2_winners['Model Epoch'] = [np.nan]*len(caid2_winners)

	caid2_model_rankings = {
	'Dispredict3': 1,
	'flDPnn2': 2,
	'flDPnn': 3,
	'flDPlr': 4,
	'flDPlr2': 5,
	'DisoPred': 6,
	'IDP-Fusion': 7,
	'ESpritz-D': 8,
	'DeepIDP-2L': 9,
	'disomine': 10,
	'DISOPRED3-diso': 35,
	'IUPred3': 21,
	'AlphaFold-rsa': 24,
	'AlphaFold-pLDDT': 34
	}

	# Method for lengthening the model name
	def lengthen_model_name(row):
	model_type = row['Model Type']
	name = row['Model Name']
	epoch = row['Model Epoch']

	if 'esm' in name:
	return name
	if 'puncta' in name:
	return name
	if model_type=='caid2_competition':
	return name

	return f'{name}_e{epoch}'

	# Method for shortening the model name for display
	def shorten_model_name(row):
	model_type = row['Model Type']
	name = row['Model Name']
	epoch = row['Model Epoch']

	if 'esm' in name:
	return 'ESM-2-650M'
	if model_type=='caid2_competition':
	return name

	if 'snp_' in name:
	prob_type = 'snp'
	elif 'uniform_' in name:
	prob_type = 'uni'

	layers = name.split('layers')[0].split('_')[-1]
	maskrate = name.split('mask')[1].split('-', 1)[0]
	kqv_tag = name.split('layers_')[1].split('_')[0]
	dt = name.split('mask')[1].split('-', 1)[1]

	return f'{prob_type}_{layers}L_{kqv_tag}_mask{maskrate}_{dt}_e{epoch}'

	def make_heatmap(df, results_dir='.', gold_standard_model_name="esm2_t33_650M_UR50D",split="test",thresh=None,ax=None):
	# Set font to Ubuntu
	set_font()

	# Declare columns to compare: metrics
	columns_to_compare = ['AUROC']

	# Define the literature-reported values for CAID competition winners - only IF the split is not "benchmark"
	if not(split=="benchmark"):
	df = pd.concat([df,caid2_winners])

	# Create Short Model Name and Full Model Name columns for later use
	df['Model Epoch'] = df['Model Epoch'].apply(lambda x: str(int(x)) if not(np.isnan(x)) else '')
	df['Short Model Name'] = df.apply(lambda row: shorten_model_name(row),axis=1)
	df['Full Model Name'] = df.apply(lambda row: lengthen_model_name(row), axis=1)

	# Isolate gold standard row for later comparison
	gold_standard = df[df['Full Model Name'] == gold_standard_model_name].reset_index(drop=True).iloc[0]
	gold_standard_short_model_name = df[df['Full Model Name'] == gold_standard_model_name]['Short Model Name'].item()

	# Create a new dataframe for the heatmap; sort by model type and place gold standard on top
	heatmap_data = df[['Model Type','Short Model Name','Full Model Name'] + columns_to_compare].copy()
	heatmap_data['is_gold_standard'] = (heatmap_data['Full Model Name'] == gold_standard_model_name).astype(int)
	heatmap_data = heatmap_data.sort_values(by=['is_gold_standard','Model Type','AUROC'], ascending=[False,True,False]).reset_index(drop=True).drop(columns=['is_gold_standard'])
	# Save the original values before calculating differences so we can use them for annotation
	original_values = heatmap_data[columns_to_compare].copy()

	# Calculate differences from the gold standard
	for col in columns_to_compare:
	heatmap_data[col] = heatmap_data[col] - gold_standard[col]

	# Create a color map where values equal to 0 are white, above are red, and below are blue
	cmap = sns.color_palette("coolwarm", as_cmap=True) # other option is diverging_palette(220, 20, as_cmap=True)

	### Make the plot
	# can plot on a bigger plot, or make it an individual plot
	if ax is None:
	tallsize = max(8, 8 +.25*(len(heatmap_data)-26))
	fig, ax = plt.subplots(1, 1, figsize=(8, tallsize), dpi=300)

	# Plot the heatmap with original values as annotations
	hm = sns.heatmap(heatmap_data.set_index('Short Model Name').drop(columns=['Model Type','Full Model Name']),
	annot=False, fmt='', cmap=cmap, center=0,
	cbar_kws={'label': 'Difference from Gold Standard'})

	# Explicitly set tick labels to prevent them from being messed up
	ax.set_yticklabels(heatmap_data['Short Model Name'], rotation=0, fontsize=12)
	# Add padding to the y-axis label
	ax.set_ylabel("Short Model Name", labelpad=20) # Increase the labelpad value to add more padding

	# Bold any values values that exceed the gold standard
	for i in range(original_values.shape[0]):
	for j in range(original_values.shape[1]):
	value = original_values.iloc[i, j]
	if value > gold_standard[columns_to_compare[j]]:
	ax.text(j + 0.5, i + 0.5, f'{value:.3f}', ha='center', va='center', fontweight='bold', color='black')
	else:
	ax.text(j + 0.5, i + 0.5, f'{value:.3f}', ha='center', va='center', color='black')

	# Add horizontal lines between different model types
	model_type_series = heatmap_data['Model Type'].values
	last_index = 0
	labels_positions = [] # To store the positions for labels
	for i in range(1, len(model_type_series)):
	if model_type_series[i] != model_type_series[i - 1]:
	hm.axhline(i, color='white', linewidth=8) # Draw a thick white line between groups
	labels_positions.append((last_index + i) / 2) # Store the midpoint for labeling
	last_index = i

	# Add label for the last group
	labels_positions.append((last_index + len(model_type_series)) / 2)

	# Italic and bold models that win AUROC; apply yellow coloring to gold standard model
	for ytick, model_name in enumerate(heatmap_data['Short Model Name']):
	if model_name == gold_standard_short_model_name:
	# color yellow
	label = ax.get_yticklabels()[ytick]
	#label.set_color('gold')
	label.set_bbox(dict(facecolor='gold', alpha=0.5, edgecolor='gold'))
	if model_name != gold_standard_short_model_name:
	auroc_value = original_values.loc[ytick, 'AUROC']

	# Apply bold and italic for wins on either AUROC or F1 Score
	if (auroc_value > gold_standard['AUROC']):
	label = ax.get_yticklabels()[ytick]
	#label.set_style('italic')
	#label.set_weight('bold')
	label.set_bbox(dict(facecolor='red', alpha=0.3, edgecolor='red'))

	# Make legend
	gold_patch = mpatches.Patch(color='gold', alpha=0.5, label='Gold Standard')
	red_patch = mpatches.Patch(color='red', alpha=0.5, label='Winner')
	plt.legend(handles=[gold_patch, red_patch], loc='best', bbox_to_anchor=(0, 0)) # You can change loc to position the legend

	split_fname_dict = {
	"testing": "CAID2_test",
	"training": "CAID2_train",
	"benchmark": "FusionPDB_pLDDT_disorder"
	}
	split_title_dict = {
	"testing": "CAID-2 Disorder Prediction",
	"training": "CAID-2 Disorder Prediction",
	"benchmark": "FusionPDB_pLDDT Disorder Prediction"
	}
	ax.set_title(split_title_dict[split])

	# Rotate the color bar label
	cbar = hm.collections[0].colorbar
	cbar.ax.yaxis.set_label_position('right')
	cbar.ax.yaxis.set_ticks_position('right')
	cbar.set_label('Difference from Gold Standard', rotation=270, labelpad=20) # Rotate 270 degrees and add some padding

	# Set tight layout using fig
	fig.tight_layout(rect=[0, 0, 0.95, 1]) # Add extra padding on the right side to fit the label

	plt.savefig(f"{results_dir}/{split_fname_dict[split]}_heatmap_vs_{gold_standard_model_name}.png")

	# Plot AUROC curve of ONE model of interest on its fusion pdb performance
	def make_benchmark_auroc_curve(results_dir='.', seq_label_dict=None, path_to_results_of_interest='', model_alias=None):
	# Isolate the information for the model we'll be plotting
	benchmark_model = path_to_results_of_interest.split('trained_models/')[1].split('/')
	benchmark_model_type = benchmark_model[0]
	benchmark_model_epoch = np.nan
	benchmark_model_hyperparams = None
	if len(benchmark_model)==5:
	benchmark_model_name = benchmark_model[1]
	benchmark_model_epoch = benchmark_model[2].split('epoch')[1]
	benchmark_model_hyperparams = benchmark_model[3]
	else:
	benchmark_model_name = benchmark_model[0]
	benchmark_model_hyperparams = benchmark_model[1]
	benchmark_model_info = pd.DataFrame(data={
	'Model Type': [benchmark_model_type], 'Model Name': [benchmark_model_name], 'Model Epoch': [benchmark_model_epoch]
	})
	if model_alias is None:
	model_alias = benchmark_model_info.apply(lambda row: shorten_model_name(row),axis=1).iloc[0]

	color_map = {
	model_alias: 'black'
	}
	method_results = {model_alias: path_to_results_of_interest}
	method_results = {k:v for k,v in method_results.items() if v not in [None, '']}

	set_font()
	plt.figure(figsize=(10,6),dpi=300)

	# To store AUROC values and corresponding labels for sorting
	roc_data = []
	# Read each result file and plot the metrics
	for method, path in method_results.items():
	df = pd.read_csv(path) # columns = prob_1,labels

	# Extract probabilities and labels
	prob_1 = ",".join(df['prob_1'].tolist())
	df['labels'] = df['sequence'].apply(lambda x: seq_label_dict[x])
	labels = "".join(df['labels'].tolist())
	prob_1 = [float(x) for x in prob_1.split(",")]
	labels = [int(x) for x in list(labels)]
	sequences = "".join(df['sequence'].tolist())
	assert len(prob_1)==len(labels)==len(sequences)

	# Compute ROC curve and ROC area
	fpr, tpr, thresholds = roc_curve(labels, prob_1)
	roc_auc = auc(fpr, tpr)

	# Store data for sorting later
	roc_data.append((method, fpr, tpr, roc_auc))

	# Sort the methods by AUROC values
	roc_data = sorted(roc_data, key=lambda x: x[3], reverse=True)

	# Plot sorted ROC curves
	for method, fpr, tpr, roc_auc in roc_data:
	if method == model_alias:
	plt.plot(fpr, tpr, color=color_map[method], lw=2, label=f'{method} ({roc_auc:0.3f})')
	else:
	plt.plot(fpr, tpr, color=color_map[method], lw=1, alpha=0.7, label=f'{method} ({roc_auc:0.3f})')

	# Set other stylistic elements
	plt.xlim([0.0, 1.0])
	plt.ylim([0.0, 1.05])
	plt.plot([0, 1], [0, 1], color='darkgrey', lw=2, linestyle='--')
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('Receiver Operating Characteristic (ROC) Curve')

	# After plotting the ROC curves, customize the legend
	handles, labels = plt.gca().get_legend_handles_labels()

	# Create the legend first
	legend = plt.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5))

	# Iterate through the legend's text labels
	for text in legend.get_texts():
	if model_alias in text.get_text():
	text.set_fontweight('bold') # Bold the alias model

	plt.tight_layout()
	plt.savefig(f'{results_dir}/FusionPDB_pLDDT_disorder_{model_alias}_AUROC_curve.png')

	# Plot AUROC curve of ONE model of interest with all the CAID models
	def make_auroc_curve(results_dir='.', seq_label_dict=None, seq_ids_dict=None, path_to_results_of_interest='', model_alias=None, path_to_esm_results=None, with_rankings=False):
	# Isolate the information for the model we'll be plotting
	benchmark_model = path_to_results_of_interest.split('trained_models/')[1].split('/')
	benchmark_model_type = benchmark_model[0]
	benchmark_model_epoch = np.nan
	benchmark_model_hyperparams = None
	if len(benchmark_model)==5:
	benchmark_model_name = benchmark_model[1]
	benchmark_model_epoch = benchmark_model[2].split('epoch')[1]
	benchmark_model_hyperparams = benchmark_model[3]
	else:
	benchmark_model_name = benchmark_model[0]
	benchmark_model_hyperparams = benchmark_model[1]
	benchmark_model_info = pd.DataFrame(data={
	'Model Type': [benchmark_model_type], 'Model Name': [benchmark_model_name], 'Model Epoch': [benchmark_model_epoch]
	})
	if model_alias is None:
	model_alias = benchmark_model_info.apply(lambda row: shorten_model_name(row),axis=1).iloc[0]

	color_map = {
	'Dispredict3': '#d62727', #1
	'flDPnn2': '#ff7f0f', #2
	'flDPnn': '#1f77b4', #3
	'flDPlr': '#bcbd21', #4
	'flDPlr2': '#16becf', #5
	'DisoPred': '#1f77b4', #6
	'IDP-Fusion': '#d62727', #7
	'ESpritz-D': '#8b564c', #8
	'DeepIDP-2L': '#e377c2', #9
	'disomine': '#e377c2', #10
	'DISOPRED3-diso': '#ff892d',
	'IUPred3': '#8b564c',
	'AlphaFold-rsa': '#2ba02b',
	'AlphaFold-pLDDT': '#ff892d',
	model_alias: 'black'
	}
	method_results = {'Dispredict3': 'processed_data/caid2_competition_results/Dispredict3_CAID-2_Disorder_NOX.csv',
	'flDPnn2': 'processed_data/caid2_competition_results/flDPnn2_CAID-2_Disorder_NOX.csv',
	'flDPnn': 'processed_data/caid2_competition_results/flDPnn_CAID-2_Disorder_NOX.csv',
	'flDPlr': 'processed_data/caid2_competition_results/flDPtr_CAID-2_Disorder_NOX.csv', # name doesn't match but this is what it is in raw download
	'flDPlr2': 'processed_data/caid2_competition_results/flDPlr2_CAID-2_Disorder_NOX.csv',
	'DisoPred': 'processed_data/caid2_competition_results/DisoPred_CAID-2_Disorder_NOX.csv',
	'IDP-Fusion': 'processed_data/caid2_competition_results/IDP-Fusion_CAID-2_Disorder_NOX.csv',
	'ESpritz-D': 'processed_data/caid2_competition_results/ESpritz-D_CAID-2_Disorder_NOX.csv',
	'DeepIDP-2L': 'processed_data/caid2_competition_results/DeepIDP-2L_CAID-2_Disorder_NOX.csv',
	'disomine': 'processed_data/caid2_competition_results/disomine_CAID-2_Disorder_NOX.csv',
	'DISOPRED3-diso': 'processed_data/caid2_competition_results/DISOPRED3-diso_CAID-2_Disorder_NOX.csv',
	'AlphaFold-rsa': 'processed_data/caid2_competition_results/AlphaFold-rsa_CAID-2_Disorder_NOX.csv',
	'AlphaFold-pLDDT': 'processed_data/caid2_competition_results/AlphaFold-disorder_CAID-2_Disorder_NOX.csv', # name doesn't match but this is what it is in raw download
	'IUPred3': 'processed_data/caid2_competition_results/IUPred3_CAID-2_Disorder_NOX.csv',
	model_alias: path_to_results_of_interest
	}
	if path_to_esm_results is not None:
	method_results['ESM-2-650M'] = path_to_esm_results
	color_map['ESM-2-650M'] = 'black'

	method_results = {k:v for k,v in method_results.items() if v not in [None, '']}

	set_font()
	plt.figure(figsize=(12,6),dpi=300)

	# To store AUROC values and corresponding labels for sorting
	merged_preds = pd.DataFrame(data={'sequence':[]})
	merged_tpr_fpr = pd.DataFrame(data={'model': [],'fpr':[],'tpr':[]})
	roc_data = []
	# Read each result file and plot the metrics
	for method, path in method_results.items():
	df = pd.read_csv(path) # columns = prob_1,labels
	merged_preds = pd.merge(merged_preds,
	df.rename(columns={'prob_1':f"{method}_prob_1"})[['sequence',f"{method}_prob_1",]],
	on=['sequence'],how='outer')

	# Extract probabilities and labels
	prob_1 = ",".join(df['prob_1'].tolist())
	df['labels'] = df['sequence'].apply(lambda x: seq_label_dict[x])
	labels = "".join(df['labels'].tolist())
	prob_1 = [float(x) for x in prob_1.split(",")]
	labels = [int(x) for x in list(labels)]
	sequences = "".join(df['sequence'].tolist())
	assert len(prob_1)==len(labels)==len(sequences)

	# Compute ROC curve and ROC area
	fpr, tpr, thresholds = roc_curve(labels, prob_1)
	new_tpr_fpr = pd.DataFrame(data={
	'model': [method]*len(fpr),
	'fpr': fpr, 'tpr': tpr
	})
	merged_tpr_fpr = pd.concat([merged_tpr_fpr,new_tpr_fpr])
	roc_auc = auc(fpr, tpr)

	if method==model_alias:
	path_to_og_metrics = path_to_results_of_interest.rsplit('/',1)[0]+'/caid_hyperparam_screen_test_metrics.csv'
	og_metrics = pd.read_csv(path_to_og_metrics)
	roc_auc = og_metrics['AUROC'][0]

	# Store data for sorting later
	roc_data.append((method, fpr, tpr, roc_auc))

	# Save the merged dataframe as source data
	merged_preds['labels'] = merged_preds['sequence'].apply(lambda x: seq_label_dict[x])
	merged_preds['labels'] = merged_preds['labels'].apply(lambda x: ",".join([str(y) for y in x]))
	merged_preds['ids'] = merged_preds['sequence'].apply(lambda x: seq_ids_dict[x])
	merged_preds.drop(columns={'sequence'}).to_csv(f"{results_dir}/CAID_prediction_source_data.csv",index=False)
	merged_tpr_fpr.to_csv(f"{results_dir}/CAID_fpr_tpr_source_data.csv",index=False)
	# Sort the methods by AUROC values
	roc_data = sorted(roc_data, key=lambda x: x[3], reverse=True)

	# figure out the labels
	labels = {method: method for method in method_results}
	if with_rankings:
	for method in labels:
	if method in caid2_model_rankings:
	labels[method] = f"{caid2_model_rankings[method]}. {method}"

	# Plot sorted ROC curves
	for method, fpr, tpr, roc_auc in roc_data:
	if method=='ESM-2-650M' and path_to_esm_results is not None:
	plt.plot(fpr, tpr, color=color_map[method], lw=2, linestyle='--', label=f'{labels[method]} ({roc_auc:0.3f})')
	elif method == model_alias:
	plt.plot(fpr, tpr, color=color_map[method], lw=2, label=f'{labels[method]} ({roc_auc:0.3f})')
	else:
	plt.plot(fpr, tpr, color=color_map[method], lw=1, alpha=0.7, label=f'{labels[method]} ({roc_auc:0.3f})')

	# Set other stylistic elements
	plt.xlim([0.0, 1.0])
	plt.ylim([0.0, 1.05])
	plt.xticks(fontsize=20)
	plt.yticks(fontsize=20)
	plt.plot([0, 1], [0, 1], color='darkgrey', lw=2, linestyle='--')
	plt.xlabel('False Positive Rate', fontsize=22)
	plt.ylabel('True Positive Rate', fontsize=22)
	plt.title('CAID2 Disorder NOX Dataset: ROC Curve', fontsize=22)

	# After plotting the ROC curves, customize the legend
	handles, labels = plt.gca().get_legend_handles_labels()

	# Create the legend first
	legend = plt.legend(handles, labels, loc="center left", bbox_to_anchor=(1.1, 0.5), fontsize=16)

	# Iterate through the legend's text labels
	for text in legend.get_texts():
	if model_alias in text.get_text():
	text.set_fontweight('bold') # Bold the alias model
	elif (path_to_esm_results is not None) and "ESM-2-650M" in text.get_text():
	text.set_fontweight('bold') # Bold ESM if we're comparing to it

	plt.tight_layout()
	figpath = f'{results_dir}/CAID2_{model_alias}_AUROC_curve.png'
	if path_to_esm_results is not None:
	figpath = f'{results_dir}/CAID2_{model_alias}_with_ESM_AUROC_curve.png'
	plt.savefig(figpath)


	def plot_disorder_content_scatter(train_labels, test_labels, benchmark_labels, savepath='splits/disorder_content_scatter.png'):
	"""
	Compare disorder content between the train, test, and fusion benchmark sets based on the TRUE labels.
	Each labels vector should have ['11110000','0001110',...] format.
	"""

	# Get train disorder distribution
	train_lengths = []
	train_frac_disorder = []
	for vec in train_labels:
	veclist = [int(x) for x in vec]
	train_lengths.append(len(veclist))
	train_frac_disorder.append(sum(veclist)/len(veclist))

	# Get test disorder distribution
	test_lengths = []
	test_frac_disorder = []
	for vec in test_labels:
	veclist = [int(x) for x in vec]
	test_lengths.append(len(veclist))
	test_frac_disorder.append(sum(veclist)/len(veclist))

	# Get benchmark disorder distribution
	benchmark_lengths = []
	benchmark_frac_disorder = []
	for vec in benchmark_labels:
	veclist = [int(x) for x in vec]
	benchmark_lengths.append(len(veclist))
	benchmark_frac_disorder.append(sum(veclist)/len(veclist))

	# make a plot
	set_font()
	color_map = {
	'train': '#0072B2',
	'test': '#E69F00',
	'fusion': 'purple'
	}

	# Plotting
	fig, ax = plt.subplots(figsize=(10, 6))

	ax.scatter(train_lengths, train_frac_disorder, color=color_map['train'], label='Train', alpha=0.7)
	ax.scatter(test_lengths, test_frac_disorder, color=color_map['test'], label='Test', alpha=0.7)
	ax.scatter(benchmark_lengths, benchmark_frac_disorder, color=color_map['fusion'], label='Fusion', alpha=0.7)

	# Labels and title
	ax.set_xlabel('Length')
	ax.set_ylabel('Fraction of Disorder')
	ax.set_title('Length vs. Fraction of Disorder for Train, Test, and Benchmark Datasets')
	ax.legend()
	plt.tight_layout()
	plt.savefig(savepath)

	def plot_disorder_content_hist(labels, ids, title="data", color="black", savepath='splits/disorder_content_histograms.png'):
	"""
	Compare disorder content between the train, test, and fusion benchmark sets based on the TRUE labels.
	Each labels vector should have ['11110000','0001110',...] format.
	"""
	set_font()

	# Get disorder distribution
	lengths = []
	frac_disorder = []
	for vec in labels:
	veclist = [int(x) for x in vec]
	lengths.append(len(veclist))
	frac_disorder.append(100*sum(veclist)/len(veclist)) # make it a percent, i like this better

	# save the source data
	source_data = pd.DataFrame(data={
	'ID': ids,
	'Percent_Disordered': frac_disorder
	})
	source_data['Percent_Disordered'] = source_data['Percent_Disordered'].round(3)
	source_data.to_csv(savepath.replace(".png","_source_data.csv"),index=False)

	fig, ax = plt.subplots(1, 1, figsize=(20, 12))

	# Plot histogram for train data
	title_fontsize = 70
	axislabel_fontsize = 70
	tick_fontsize = 50
	ax.hist(frac_disorder, bins=20, color=color, alpha=0.7)
	ax.set_title(title, fontsize=title_fontsize)
	ax.set_xlabel('% Disordered', fontsize=axislabel_fontsize)
	ax.set_ylabel('Count', fontsize=axislabel_fontsize)
	ax.grid(True)
	ax.set_axisbelow(True)
	ax.tick_params(axis='both', which='major', labelsize=tick_fontsize)

	# Calculate the mean and median of the percent coverage
	mean_coverage = np.mean(frac_disorder)
	median_coverage = np.median(frac_disorder)

	# Add vertical line for the mean
	ax.axvline(mean_coverage, color='black', linestyle='--', linewidth=2, label=f'Mean: {mean_coverage:.1f}%')

	# Add vertical line for the median
	ax.axvline(median_coverage, color='black', linestyle='-', linewidth=2, label=f'Median: {median_coverage:.1f}%')

	ax.legend(fontsize=50, title_fontsize=50)

	plt.tight_layout()
	plt.savefig(savepath)

	def plot_group_disorder_content_hist(train_labels, test_labels, benchmark_labels, savepath='splits/disorder_content_histograms.png',orient='horizontal'):
	"""
	Compare disorder content between the train, test, and fusion benchmark sets based on the TRUE labels.
	Each labels vector should have ['11110000','0001110',...] format.
	"""

	# Get train disorder distribution
	train_lengths = []
	train_frac_disorder = []
	for vec in train_labels:
	veclist = [int(x) for x in vec]
	train_lengths.append(len(veclist))
	train_frac_disorder.append(sum(veclist)/len(veclist))

	# Get test disorder distribution
	test_lengths = []
	test_frac_disorder = []
	for vec in test_labels:
	veclist = [int(x) for x in vec]
	test_lengths.append(len(veclist))
	test_frac_disorder.append(sum(veclist)/len(veclist))

	# Get benchmark disorder distribution
	benchmark_lengths = []
	benchmark_frac_disorder = []
	for vec in benchmark_labels:
	veclist = [int(x) for x in vec]
	benchmark_lengths.append(len(veclist))
	benchmark_frac_disorder.append(sum(veclist)/len(veclist))

	# make a plot
	set_font()
	color_map = {
	'train': '#0072B2',
	'test': '#E69F00',
	'fusion': 'mediumpurple'
	}

	# Create a 1x3 subplot (1 row, 3 columns) or 3x1
	if orient=='horizontal':
	fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=False)
	if orient=='vertical':
	fig, axes = plt.subplots(3, 1, figsize=(5, 15), sharey=False)

	# Plot histogram for train data
	title_fontsize = 26
	axislabel_fontsize = 26
	tick_fontsize = 16
	axes[0].hist(train_frac_disorder, bins=20, color=color_map['train'], alpha=0.7)
	axes[0].set_title('CAID2 Train', fontsize=title_fontsize)
	if orient=="horizontal":
	axes[0].set_xlabel('Fraction of Disorder', fontsize=axislabel_fontsize)
	axes[0].set_ylabel('Frequency', fontsize=axislabel_fontsize)
	axes[0].grid(True)
	axes[0].set_axisbelow(True)
	axes[0].tick_params(axis='both', which='major', labelsize=tick_fontsize)


	# Plot histogram for test data
	axes[1].hist(test_frac_disorder, bins=20, color=color_map['test'], alpha=0.7)
	axes[1].set_title('CAID2 Test',fontsize=title_fontsize)
	if orient=="horizontal":
	axes[1].set_xlabel('Fraction of Disorder', fontsize=axislabel_fontsize)
	if orient=="vertical":
	axes[1].set_ylabel('Frequency', fontsize=axislabel_fontsize)
	axes[1].grid(True)
	axes[1].set_axisbelow(True)
	axes[1].tick_params(axis='both', which='major', labelsize=tick_fontsize)

	# Plot histogram for benchmark (fusion) data
	axes[2].hist(benchmark_frac_disorder, bins=20, color=color_map['fusion'], alpha=0.7)
	axes[2].set_title('Fusion Oncoproteins',fontsize=title_fontsize)
	axes[2].set_xlabel('Fraction of Disorder', fontsize=axislabel_fontsize)
	if orient=="vertical":
	axes[2].set_ylabel('Frequency', fontsize=axislabel_fontsize)
	axes[2].grid(True)
	axes[2].set_axisbelow(True)
	axes[2].tick_params(axis='both', which='major', labelsize=tick_fontsize)
	plt.tight_layout()
	plt.savefig(savepath)

	def categorize_plddt(values):
	categories = {
	"<= 50": sum(1 for x in values if x <= 50),
	"50-70": sum(1 for x in values if 50 < x <= 70),
	"70-90": sum(1 for x in values if 70 < x <= 90),
	"> 90": sum(1 for x in values if x > 90)
	}
	return categories


	def plot_fusion_sequence_pLDDT_left_to_right(fusion_structure_data, fusiongene, save_path=''):
	"""
	Plot each amino acid in the sequence as a separate colored bar based on pLDDT values.
	"""
	set_font()
	# Filter for specific fusion data and preprocess
	df_of_interest = fusion_structure_data[fusion_structure_data['FusionGene'] == fusiongene].copy()
	df_of_interest['Fusion_AA_pLDDTs'] = df_of_interest['Fusion_AA_pLDDTs'].apply(lambda x: [float(i) for i in x.split(',')])
	df_of_interest['Label'] = df_of_interest['Fusion_Length'].astype(str) + 'AAs'

	# Sort data by Fusion_Length
	df_of_interest = df_of_interest.sort_values(by='Fusion_Length', ascending=True).reset_index(drop=True)

	# Define colors for each pLDDT range
	category_colors = {"<= 50": "#f27842", "50-70": "#f8d514", "70-90": "#60c1e8", "> 90": "#004ecb"}

	# Helper function to get color based on pLDDT
	def get_color(pLDDT):
	if pLDDT > 90:
	return category_colors["> 90"]
	elif pLDDT > 70:
	return category_colors["70-90"]
	elif pLDDT > 50:
	return category_colors["50-70"]
	else:
	return category_colors["<= 50"]

	# Start plotting each sequence with colored bars
	fig, ax = plt.subplots(figsize=(10, 6))
	if len(df_of_interest)<3:
	fig, ax = plt.subplots(figsize=(10, 2))

	average_plddt = dict(zip(df_of_interest['Label'], df_of_interest['Fusion_pLDDT']))
	df_of_interest['Fusion_AA_colors'] = df_of_interest['Fusion_AA_pLDDTs'].apply(lambda x: [get_color(plddt) for plddt in x])
	df_of_interest['Fusion_pLDDT_color'] = df_of_interest['Fusion_pLDDT'].apply(lambda plddt: get_color(plddt))
	# just save the columns needed for the plot
	df_of_interest[['FusionGene','seq_id','Fusion_Length','Fusion_pLDDT','Fusion_AA_pLDDTs','Fusion_AA_colors','Fusion_pLDDT_color',
	'top_hg_UniProtID','top_hg_UniProt_isoform','top_hg_UniProt_fus_indices',
	'top_tg_UniProtID','top_tg_UniProt_isoform','top_tg_UniProt_fus_indices']].to_csv(f"{save_path}/plddt_sequence_{fusiongene}_source_data.csv",index=False)

	for idx, row in df_of_interest.iterrows():
	pLDDT_values = row['Fusion_AA_pLDDTs']
	colors = [get_color(plddt) for plddt in pLDDT_values]

	# Plot each amino acid in the sequence with the respective color
	ax.bar(range(len(pLDDT_values)),
	[0.7] * len(pLDDT_values), color=colors, edgecolor='none',
	bottom=idx - 0.7 / 2) # Centering each row at idx

	labels = df_of_interest['Label'].tolist()
	# Annotate each bar with the Fusion_pLDDT value on the right, colored by PLDDT category
	for idx, label in enumerate(labels):
	avg_plddt_value = average_plddt[label]

	# Determine color based on the PLDDT category
	if avg_plddt_value > 90:
	color = '#004ecb'
	elif avg_plddt_value > 70:
	color = "#60c1e8"
	elif avg_plddt_value > 50:
	color = '#f8d514'
	else:
	color = '#f27842'

	# Annotate with the determined color
	if len(df_of_interest)>10:
	markersize = 10
	elif len(df_of_interest)>5:
	markersize = 16
	else:
	markersize=12
	ax.plot(1.02*max(df_of_interest['Fusion_Length']),
	idx, marker='o', color="black", markersize=markersize, markerfacecolor=color, markeredgewidth=2)

	# Add breakpoint box - make sure we actually HAVE one of each
	hg_indices, tg_indices = None, None
	if not(type(df_of_interest['top_hg_UniProt_fus_indices'][idx])==float):
	hg_indices = [int(x) for x in df_of_interest['top_hg_UniProt_fus_indices'][idx].split(',')]
	if not(type(df_of_interest['top_tg_UniProt_fus_indices'][idx])==float):
	tg_indices = [int(x) for x in df_of_interest['top_tg_UniProt_fus_indices'][idx].split(',')]
	print(hg_indices, tg_indices)

	if (hg_indices is not None) and (tg_indices is not None):
	box_start = min(hg_indices[-1],tg_indices[0])
	box_end = max(hg_indices[-1],tg_indices[0])
	elif hg_indices is not None:
	box_start, box_end = hg_indices[-1], hg_indices[-1]
	elif tg_indices is not None:
	box_start, box_end = tg_indices[0], tg_indices[0]

	print(f"box indices for structure {idx}, fusion gene {fusiongene}", box_start, box_end)

	# Plot the rectangle, making it slightly larger than the rest of the bar
	rect = patches.Rectangle((box_start, idx - 0.7 / 2), box_end-box_start, 0.7, linewidth=2, edgecolor='black', facecolor='none')
	ax.add_patch(rect)

	# Customize plot
	ax.set_yticks([]) # Hide y-axis ticks
	ax.set_yticklabels([]) # Hide y-axis labels
	ax.set_ylim(-0.5, len(df_of_interest) - 0.5) # reduce white space at top
	ax.set_xlabel("Amino Acid Sequence (ordered)", fontsize=14)
	# Customize x-axis for labeling
	ax.set_xlim(left=0) # Start x-axis at 0 to make bars flush left
	ax.set_xlabel("Amino Acid Sequence (ordered)", fontsize=14)
	ax.tick_params(axis='x', labelsize=30)


	plt.title(f"{fusiongene} pLDDT Distribution by Amino Acid Sequence", fontsize=16)
	plt.tight_layout()

	# Save figure
	fusiongene_savename = fusiongene.replace("::","-")
	plt.savefig(f"{save_path}/plddt_sequence_{fusiongene_savename}.png", dpi=300)
	plt.show()

	def plot_favorite_fusion_pLDDT_distribution(fusion_structure_data, fusiongene, save_path=''):
	"""
	Make a stacked bar chart of the pLDDT distribution
	"""
	set_font()
	# Filter for EWSR1::FLI1 fusion data and preprocess
	df_of_interest = fusion_structure_data[fusion_structure_data['FusionGene'] == fusiongene].copy()
	df_of_interest['Fusion_AA_pLDDTs'] = df_of_interest['Fusion_AA_pLDDTs'].apply(lambda x: [float(i) for i in x.split(',')])
	df_of_interest['Label'] = df_of_interest['Fusion_Length'].astype(str) + 'AAs'
	# Sort data by Fusion_Length
	df_of_interest = df_of_interest.sort_values(by='Fusion_Length', ascending=True).reset_index(drop=True)
	# Convert to dictionary format
	data_dict = dict(zip(df_of_interest['Label'], df_of_interest['Fusion_AA_pLDDTs']))
	average_plddt = dict(zip(df_of_interest['Label'], df_of_interest['Fusion_pLDDT']))

	# Categorize each structure
	categorized_data = {structure: categorize_plddt(plddt_values) for structure, plddt_values in data_dict.items()}

	# Extract counts for each category
	labels = list(categorized_data.keys())
	categories = ["<= 50", "50-70", "70-90", "> 90"]
	counts = {cat: [categorized_data[structure][cat] for structure in labels] for cat in categories}

	# Define colors for each category
	category_colors = {"<= 50": "#f27842", "50-70": "#f8d514", "70-90": "#60c1e8", "> 90": "#004ecb"}

	# Re-categorize PLDDT values for the bar chart
	categorized_data = {structure: categorize_plddt(plddt_values) for structure, plddt_values in data_dict.items()}
	labels = list(categorized_data.keys())
	counts = {cat: [categorized_data[structure][cat] for structure in labels] for cat in categories}

	# Plotting the horizontal stacked bar chart with annotations for 'Fusion_pLDDT' values
	fig, ax = plt.subplots(figsize=(10, 6))
	if len(data_dict)<3:
	fig, ax = plt.subplots(figsize=(10, 2))
	bottom = np.zeros(len(labels))

	# Stack each category horizontally
	for cat in categories:
	ax.barh(labels, counts[cat], label=cat, color=category_colors[cat], left=bottom)
	bottom += counts[cat] # Update the left position for the next stack

	# Annotate each bar with the Fusion_pLDDT value on the right, colored by PLDDT category
	for idx, label in enumerate(labels):
	avg_plddt_value = average_plddt[label]

	# Determine color based on the PLDDT category
	if avg_plddt_value > 90:
	color = '#004ecb'
	elif avg_plddt_value > 70:
	color = "#60c1e8"
	elif avg_plddt_value > 50:
	color = '#f8d514'
	else:
	color = '#f27842'

	# Annotate with the determined color
	#ax.text(bottom[idx] + 1, idx, f"{avg_plddt_value:.2f}", va='center', ha='left', color="black", fontsize=18, fontweight='bold')
	if len(df_of_interest)>10:
	markersize = 10
	elif len(df_of_interest)>5:
	markersize = 16
	else:
	markersize=12
	ax.plot(bottom[idx] + .02*max(df_of_interest['Fusion_Length']), idx, marker='s', color="black", markersize=markersize, markerfacecolor=color, markeredgewidth=2)


	# Add labels and legend
	#ax.set_xlim([0,max(df_of_interest['Fusion_Length'])*1.0])
	#ax.set_ylabel("Structures")
	# Save original ticks before changing label size
	#ax.tick_params(axis='x', labelsize=16)
	#original_xticks = ax.get_xticks()
	# Set ticks explicitly to avoid automatic adjustment
	#ax.set_xticks(original_xticks)

	#ax.set_xlabel("Length",fontsize=40)
	ax.tick_params(axis='x', labelsize=30)
	#ax.tick_params(axis='y', labelsize=16)
	ax.tick_params(axis='y', left=False, labelleft=False)
	#ax.set_title(f"{fusiongene} pLDDT Distribution")
	#ax.legend(title="pLDDT Ranges", fontsize=16, bbox_to_anchor=(1, 1), title_fontsize=16)

	plt.tight_layout()
	fusiongene_savename = fusiongene.replace("::","-")
	plt.savefig(f"{save_path}/plddt_dist_{fusiongene_savename}.png",dpi=300)

	def make_all_favorite_fusion_pLDDT_plots(favorite_fusions,left_to_right=True):
	fusion_structure_data = pd.read_csv('processed_data/fusionpdb/FusionPDB_level2-3_cleaned_structure_info.csv')
	swissprot_top_alignments = pd.read_csv("../../data/blast/blast_outputs/swissprot_top_alignments.csv")
	fuson_db = pd.read_csv("../../data/fuson_db.csv")
	seq_id_dict = dict(zip(fuson_db['aa_seq'],fuson_db['seq_id']))
	fusion_structure_data['seq_id'] = fusion_structure_data['Fusion_Seq'].map(seq_id_dict)
	fusion_structure_data = pd.merge(
	fusion_structure_data,
	swissprot_top_alignments,
	on="seq_id",
	how="left"
	)
	for x in favorite_fusions:
	if left_to_right:
	plot_fusion_sequence_pLDDT_left_to_right(fusion_structure_data, x, save_path='processed_data/figures/fusion_disorder')
	else:
	plot_favorite_fusion_pLDDT_distribution(fusion_structure_data, x, save_path='processed_data/figures/fusion_disorder')

	def prep_data_for_ht_disorder_comparison():
	ht_structure_data = pd.read_csv('processed_data/fusionpdb/heads_tails_structural_data.csv')
	fusion_structure_data = pd.read_csv('processed_data/fusionpdb/FusionPDB_level2-3_cleaned_structure_info.csv')
	fusion_heads_and_tails = pd.read_csv('processed_data/fusionpdb/fusion_heads_and_tails.csv')

	all_hts_with_structures = ht_structure_data['UniProtID'].unique().tolist()

	fuson_ht_db = pd.read_csv('../../data/blast/fuson_ht_db.csv')[['seq_id','aa_seq','fusiongenes','hgUniProt','tgUniProt']]

	merge = pd.merge(
	fuson_ht_db.rename(columns={'aa_seq':'Fusion_Seq'}),
	fusion_structure_data[['FusionGID', 'Fusion_Seq','Fusion_pLDDT','Fusion_AA_pLDDTs']],
	on='Fusion_Seq',
	how='right'
	)
	# now merge again
	merge['hgUniProt'] = merge['hgUniProt'].apply(lambda x: x.split(','))
	merge['tgUniProt'] = merge['tgUniProt'].apply(lambda x: x.split(','))
	merge = merge.explode('hgUniProt')
	merge = merge.explode('tgUniProt')
	merge = merge.loc[
	merge['hgUniProt'].isin(all_hts_with_structures) &
	merge['tgUniProt'].isin(all_hts_with_structures)
	].reset_index(drop=True)

	merge = pd.merge(
	merge,
	ht_structure_data.rename(columns=
	{'UniProtID':'hgUniProt',
	'Avg pLDDT': 'hg_pLDDT',
	'All pLDDTs': 'hg_AA_pLDDTs',
	'Seq': 'hg_seq'}),
	on='hgUniProt',
	how='inner'
	)

	merge = pd.merge(
	merge,
	ht_structure_data.rename(columns=
	{'UniProtID':'tgUniProt',
	'Avg pLDDT': 'tg_pLDDT',
	'All pLDDTs': 'tg_AA_pLDDTs',
	'Seq': 'tg_seq'}),
	on='tgUniProt',
	how='inner'
	)
	merge = merge.loc[merge['hg_AA_pLDDTs'].notna()]
	merge = merge.loc[merge['tg_AA_pLDDTs'].notna()].reset_index(drop=True)

	# finally, calcualte label
	merge['hg_label'] = merge['hg_AA_pLDDTs'].apply(lambda x: x.split(','))
	merge['hg_label'] = merge['hg_label'].apply(lambda x: [float(y) for y in x])
	merge['hg_label'] = merge['hg_label'].apply(lambda x: [apply_plddt_thresh(y) for y in x])
	merge['hg_label'] = merge['hg_label'].apply(lambda x: ''.join(x))

	merge['tg_label'] = merge['tg_AA_pLDDTs'].apply(lambda x: x.split(','))
	merge['tg_label'] = merge['tg_label'].apply(lambda x: [float(y) for y in x])
	merge['tg_label'] = merge['tg_label'].apply(lambda x: [apply_plddt_thresh(y) for y in x])
	merge['tg_label'] = merge['tg_label'].apply(lambda x: ''.join(x))

	merge['fusion_label'] = merge['Fusion_AA_pLDDTs'].apply(lambda x: x.split(','))
	merge['fusion_label'] = merge['fusion_label'].apply(lambda x: [float(y) for y in x])
	merge['fusion_label'] = merge['fusion_label'].apply(lambda x: [apply_plddt_thresh(y) for y in x])
	merge['fusion_label'] = merge['fusion_label'].apply(lambda x: ''.join(x))

	return merge

	def apply_plddt_thresh(y):
	if y < 68.8:
	return '1'
	else:
	return '0'

	def plot_fusion_stats_boxplots(data, save_path="fusion_disorder_boxplots.png"):
	set_font()
	# Create box plots
	plt.figure(figsize=(6, 5))
	# for ones that are 100% disordered, AUROC was NaN, so drop these
	box = plt.boxplot([data[col].dropna() for col in data.columns], labels=data.columns, patch_artist=True)

	# Set color of each box plot
	for patch in box['boxes']:
	patch.set_facecolor('#ff68b4')
	patch.set_edgecolor('#ff68b4')

	# Customize other elements if needed
	#for whisker in box['whiskers']:
	#whisker.set_color('#ff68b4')
	#for cap in box['caps']:
	#cap.set_color('#ff68b4')
	for median in box['medians']:
	median.set_color('black')
	# Add labels and title
	#plt.xlabel('Metrics')
	#plt.ylabel('Values')
	plt.title(f"Per-Residue Disorder (n={len(data)})",fontsize=22)
	plt.xticks(rotation=20,fontsize=22)
	plt.yticks(fontsize=22)

	# Show plot
	plt.tight_layout()
	plt.show()
	plt.savefig(save_path,dpi=300)

	def plot_fusion_frac_disorder_r2(actual_values, predicted_values, save_path="fusion_pred_disorder_r2.png"):
	set_font()
	plt.figure(figsize=(6, 6))
	r2 = r2_score(actual_values, predicted_values)
	#sns.kdeplot(actual_values, label="Actual Values", shade=True)
	#sns.kdeplot(predicted_values, label="Predicted Values", shade=True)
	plt.scatter(actual_values, predicted_values, alpha=0.5, label=f"Predictions", color="#ff68b4")
	plt.plot([min(actual_values), max(actual_values)], [min(actual_values), max(actual_values)], 'k--', label='Ideal Fit')
	plt.text(0, 92, f"$R^2$={r2:.2f}", fontsize=32)
	# Adjusting font sizes and setting font properties
	plt.xlabel(f'AlphaFold-pLDDT',size=32)
	plt.ylabel(f'FusOn-pLM-Diso',size=32)
	plt.title(f"% Disordered (n={len(actual_values)})",size=32)
	plt.xticks(fontsize=24)
	plt.yticks(fontsize=24)
	#plt.xlabel("Values")
	#plt.ylabel("Density")
	#plt.title(f"Density Plot of Actual vs Predicted Values (R^2 = {r2:.2f})")
	plt.legend(prop={'size': 16})
	plt.tight_layout()
	plt.show()
	plt.savefig(save_path, dpi=300)

	def main():
	set_font()
	#output_dir = "results/test"
	output_dir = "results/final"
	seq_label_dict = pd.read_csv('splits/test_df.csv')
	seq_ids_dict = dict(zip(seq_label_dict['Sequence'],seq_label_dict['IDs']))
	seq_label_dict = dict(zip(seq_label_dict['Sequence'],seq_label_dict['Label']))
	best_caid_model_results = pd.read_csv(f"{output_dir}/best_caid_model_results.csv")
	make_auroc_curve(results_dir=output_dir,
	seq_label_dict=seq_label_dict,
	seq_ids_dict=seq_ids_dict,
	path_to_results_of_interest="trained_models/fuson_plm/best/caid_hyperparam_screen_test_probs.csv",
	model_alias="FusOn-pLM",
	path_to_esm_results="trained_models/esm2_t33_650M_UR50D/best/caid_hyperparam_screen_test_probs.csv",
	with_rankings=True)

	caid2_test_data = pd.read_csv(f"splits/splits.csv")
	caid2_test_data = caid2_test_data.loc[caid2_test_data['Split']=='Test']
	caid2_test_labels = caid2_test_data['Label'].tolist()
	caid2_test_ids = caid2_test_data['IDs'].tolist()
	# fusions, heads, and tails
	fusion_ht_data = prep_data_for_ht_disorder_comparison()
	os.makedirs("processed_data/figures",exist_ok=True)

	head_data = fusion_ht_data.drop_duplicates(['hg_seq']).reset_index(drop=True)
	head_labels = head_data['hg_label'].tolist()
	head_ids = head_data['hgUniProt'].tolist()
	tail_data = fusion_ht_data.drop_duplicates(['tg_seq']).reset_index(drop=True)
	tail_labels = tail_data['tg_label'].tolist()
	tail_ids = tail_data['tgUniProt'].tolist()
	fusion_data = fusion_ht_data.drop_duplicates(['Fusion_Seq']).reset_index(drop=True)
	fusion_labels = fusion_data['fusion_label'].tolist()
	fusion_ids = fusion_data['seq_id'].tolist()

	plt.rc('text', usetex=False)
	math_part = r"$n$"

	os.makedirs("processed_data/figures/histograms",exist_ok=True)
	plot_disorder_content_hist(caid2_test_labels, caid2_test_ids, title=f"CAID2 Disorder-NOX ({math_part}={len(caid2_test_labels):,})", color="black", savepath='processed_data/figures/histograms/disorder_nox_histogram.png')
	plot_disorder_content_hist(head_labels, head_ids, title=f"Head Proteins ({math_part}={len(head_labels):,})", color="#df8385", savepath='processed_data/figures/histograms/heads_histogram.png')
	plot_disorder_content_hist(tail_labels, tail_ids, title=f"Tail Proteins ({math_part}={len(tail_labels):,})", color="#6ea4da", savepath='processed_data/figures/histograms/tails_histogram.png')
	plot_disorder_content_hist(fusion_labels, fusion_ids, title=f"Fusion Oncoproteins ({math_part}={len(fusion_labels):,})", color="mediumpurple", savepath='processed_data/figures/histograms/fusions_histogram.png')

	os.makedirs("processed_data/figures/fusion_disorder",exist_ok=True)
	make_all_favorite_fusion_pLDDT_plots([
	"EWSR1::FLI1",
	"PAX3::FOXO1",
	"EML4::ALK",
	"SS18::SSX1"],
	left_to_right=True)

	if __name__ == "__main__":
	main()