ChatterjeeLab
/

FusOn-pLM

Inference Endpoints

Model card Files Files and versions Community

FusOn-pLM / fuson_plm /benchmarking /idr_prediction /config.py

svincoff's picture

fixed READMEs and added IDR Prediction benchmark

e048d40 about 2 months ago

history blame contribute delete

2.52 kB

	from fuson_plm.utils.logging import CustomParams

	# Clustering Parameters
	# Need to be stacked, because there are 4 properties
	CLUSTER = CustomParams(
	# MMSeqs2 parameters: see GitHub or MMSeqs2 Wiki for guidance
	MIN_SEQ_ID = 0.3, # % identity
	C = 0.5, # % sequence length overlap
	COV_MODE = 1, # cov-mode: 0 = bidirectional, 1 = target coverage, 2 = query coverage, 3 = target-in-query length coverage.
	CLUSTER_MODE = 2,
	# File paths
	INPUT_PATH = 'processed_data/all_albatross_seqs_and_properties.csv',
	PATH_TO_MMSEQS = '../../mmseqs' # path to where you installed MMSeqs2
	)

	# Here, we'll be splitting the train set into train and val. we aren't touching test
	SPLIT = CustomParams(
	IDR_DB_PATH = 'processed_data/all_albatross_seqs_and_properties.csv',
	CLUSTER_OUTPUT_PATH = 'clustering/mmseqs_full_results.csv',
	#RANDOM_STATE = 7, # random_state_1 = state for splitting all data into train & test
	#VAL_SIZE = 0.10, # val size for data -> train/val split. e.g. 20 means 80% clusters in train, 20% clusters in val
	RANDOM_STATE_1 = 2, # random_state_1 = state for splitting all data into train & other
	TEST_SIZE_1 = 0.21, # test size for data -> train/test split. e.g. 20 means 80% clusters in train, 20% clusters in other
	RANDOM_STATE_2 = 6, # random_state_2 = state for splitting other from ^ into val and test
	TEST_SIZE_2 = 0.50 # test size for train -> train/val split. e.g. 0.50 means 50% clusters in train, 50% clusters in test

	)

	# Which models to benchmark
	TRAIN = CustomParams(
	BENCHMARK_FUSONPLM = True,
	FUSONPLM_CKPTS= "FusOn-pLM", # Dictionary: key = run name, values = epochs, or string "FusOn-pLM"
	BENCHMARK_ESM = True,

	# GPU configs
	CUDA_VISIBLE_DEVICES="0",

	# Overwriting configs
	PERMISSION_TO_OVERWRITE_EMBEDDINGS = False, # if False, script will halt if it believes these embeddings have already been made.
	PERMISSION_TO_OVERWRITE_MODELS = False # if False, script will halt if it believes these embeddings have already been made.
	)