Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import pandas as pd | |
| import pdb | |
| import json | |
| from rdkit import Chem | |
| def count_substructures(radius,molecule): | |
| """Helper function for get the information of molecular signature of a | |
| metabolite. The relaxed signature requires the number of each substructure | |
| to construct a matrix for each molecule. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| molecule : Molecule | |
| a molecule object create by RDkit (e.g. Chem.MolFromInchi(inchi_code) | |
| or Chem.MolToSmiles(smiles_code)) | |
| Returns | |
| ------- | |
| dict | |
| dictionary of molecular signature for a molecule, | |
| {smiles: molecular_signature} | |
| """ | |
| m = molecule | |
| smi_count = dict() | |
| atomList = [atom for atom in m.GetAtoms()] | |
| for i in range(len(atomList)): | |
| env = Chem.FindAtomEnvironmentOfRadiusN(m,radius,i) | |
| atoms=set() | |
| for bidx in env: | |
| atoms.add(m.GetBondWithIdx(bidx).GetBeginAtomIdx()) | |
| atoms.add(m.GetBondWithIdx(bidx).GetEndAtomIdx()) | |
| # only one atom is in this environment, such as O in H2O | |
| if len(atoms) == 0: | |
| atoms = {i} | |
| smi = Chem.MolFragmentToSmiles(m,atomsToUse=list(atoms), | |
| bondsToUse=env,canonical=True) | |
| if smi in smi_count: | |
| smi_count[smi] = smi_count[smi] + 1 | |
| else: | |
| smi_count[smi] = 1 | |
| return smi_count | |
| def decompse_ac(db_smiles,radius=1): | |
| non_decomposable = [] | |
| decompose_vector = dict() | |
| for cid in db_smiles: | |
| # print cid | |
| smiles_pH7 = db_smiles[cid] | |
| try: | |
| mol = Chem.MolFromSmiles(smiles_pH7) | |
| mol = Chem.RemoveHs(mol) | |
| # Chem.RemoveStereochemistry(mol) | |
| smi_count = count_substructures(radius,mol) | |
| decompose_vector[cid] = smi_count | |
| except Exception as e: | |
| non_decomposable.append(cid) | |
| with open('./data/decompose_vector_ac.json','w') as fp: | |
| json.dump(decompose_vector,fp) | |
| def get_rxn_rule(): | |
| """calculate reaction rules based on the relaxed molecular signatures. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| Returns | |
| ------- | |
| None | |
| All of the reaction rules are saved in files (csv file) | |
| """ | |
| reaction_dict = json.load(open('./data/optstoic_v3_Sji_dict.json')) | |
| molecular_signature = json.load(open('./data/decompose_vector_ac.json')) | |
| molsigna_df = pd.DataFrame.from_dict(molecular_signature).fillna(0) | |
| all_mets = molsigna_df.columns.tolist() | |
| all_mets.append("C00080") | |
| all_mets.append("C00282") | |
| rule_df = pd.DataFrame(index=molsigna_df.index) | |
| for rid, value in list(reaction_dict.items()): | |
| # skip the reactions with missing metabolites | |
| mets = list(value.keys()) | |
| flag = False | |
| for met in mets: | |
| if met not in all_mets: | |
| flag = True | |
| break | |
| if flag: continue | |
| rule_df[rid] = 0 | |
| for met, stoic in list(value.items()): | |
| if met == "C00080" or met == "C00282": | |
| continue # hydogen is zero | |
| rule_df[rid] += molsigna_df[met] * stoic | |
| rule_df.to_csv("./data/reaction_rule.csv", index=True) | |
| def get_rxn_rule_no_stero(): | |
| """calculate reaction rules based on the relaxed molecular signatures. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| Returns | |
| ------- | |
| None | |
| All of the reaction rules are saved in files (csv file) | |
| """ | |
| reaction_dict = json.load(open('./data/optstoic_v3_Sji_dict.json')) | |
| molecular_signature = json.load(open('./data/decompose_vector_ac_nostereo.json')) | |
| molsigna_df = pd.DataFrame.from_dict(molecular_signature).fillna(0) | |
| all_mets = molsigna_df.columns.tolist() | |
| all_mets.append("C00080") | |
| all_mets.append("C00282") | |
| rule_df = pd.DataFrame(index=molsigna_df.index) | |
| for rid, value in list(reaction_dict.items()): | |
| # skip the reactions with missing metabolites | |
| mets = list(value.keys()) | |
| flag = False | |
| for met in mets: | |
| if met not in all_mets: | |
| flag = True | |
| break | |
| if flag: continue | |
| rule_df[rid] = 0 | |
| for met, stoic in list(value.items()): | |
| if met == "C00080" or met == "C00282": | |
| continue # hydogen is zero | |
| rule_df[rid] += molsigna_df[met] * stoic | |
| rule_df.to_csv("./data/reaction_rule_no_stero.csv", index=True) | |
| def get_rxn_rule_remove_TECRDB_mets(): | |
| """calculate reaction rules based on the relaxed molecular signatures. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| Returns | |
| ------- | |
| None | |
| All of the reaction rules are saved in files (csv file) | |
| """ | |
| reaction_dict = json.load(open('./data/optstoic_v3_Sji_dict.json')) | |
| molecular_signature = json.load(open('./data/decompose_vector_ac.json')) | |
| molsigna_df = pd.DataFrame.from_dict(molecular_signature).fillna(0) | |
| all_mets = molsigna_df.columns.tolist() | |
| all_mets.append("C00080") | |
| all_mets.append("C00282") | |
| mets_TECRDB_df = pd.read_csv('./data/TECRBD_mets.txt',header=None) | |
| mets_TECRDB = mets_TECRDB_df[0].tolist() | |
| # pdb.set_trace() | |
| all_mets = list(set(all_mets + mets_TECRDB)) | |
| rule_df = pd.DataFrame(index=molsigna_df.index) | |
| for rid, value in list(reaction_dict.items()): | |
| # skip the reactions with missing metabolites | |
| mets = list(value.keys()) | |
| flag = False | |
| for met in mets: | |
| if met not in all_mets: | |
| flag = True | |
| break | |
| if flag: continue | |
| rule_df[rid] = 0 | |
| for met, stoic in list(value.items()): | |
| if met in mets_TECRDB: | |
| continue # hydogen is zero | |
| rule_df[rid] += molsigna_df[met] * stoic | |
| rule_df.to_csv("./data/reaction_rule_remove_TECRDB_mets.csv", index=True) | |
| def get_rxn_rule_no_stero_remove_TECRDB_mets(): | |
| """calculate reaction rules based on the relaxed molecular signatures. | |
| Parameters | |
| ---------- | |
| radius : int | |
| the radius is bond-distance that defines how many neighbor atoms should | |
| be considered in a reaction center. | |
| Returns | |
| ------- | |
| None | |
| All of the reaction rules are saved in files (csv file) | |
| """ | |
| reaction_dict = json.load(open('./data/optstoic_v3_Sji_dict.json')) | |
| molecular_signature = json.load(open('./data/decompose_vector_ac_nostereo.json')) | |
| molsigna_df = pd.DataFrame.from_dict(molecular_signature).fillna(0) | |
| all_mets = molsigna_df.columns.tolist() | |
| all_mets.append("C00080") | |
| all_mets.append("C00282") | |
| mets_TECRDB_df = pd.read_csv('./data/TECRBD_mets.txt',header=None) | |
| mets_TECRDB = mets_TECRDB_df[0].tolist() | |
| # pdb.set_trace() | |
| all_mets = list(set(all_mets + mets_TECRDB)) | |
| rule_df = pd.DataFrame(index=molsigna_df.index) | |
| for rid, value in list(reaction_dict.items()): | |
| # skip the reactions with missing metabolites | |
| mets = list(value.keys()) | |
| flag = False | |
| for met in mets: | |
| if met not in all_mets: | |
| flag = True | |
| break | |
| if flag: continue | |
| rule_df[rid] = 0 | |
| for met, stoic in list(value.items()): | |
| if met in mets_TECRDB: | |
| continue # hydogen is zero | |
| rule_df[rid] += molsigna_df[met] * stoic | |
| rule_df.to_csv("./data/reaction_rule_nostereo_remove_TECRDB_mets.csv", index=True) | |
| if __name__ == '__main__': | |
| # db = pd.read_csv('./data/cache_compounds_20160818.csv',index_col='compound_id') | |
| # db_smiles = db['smiles_pH7'].to_dict() | |
| # decompse_ac(db_smiles) | |
| # get_rxn_rule() | |
| # get_rxn_rule_remove_TECRDB_mets() | |
| get_rxn_rule_no_stero_remove_TECRDB_mets() |