jtvae-demo / metrics.py
Trương Gia Bảo
Update version 1.01
1cf9b3e
raw
history blame
13.4 kB
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import RDConfig
import os
import sys
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
sys.path.append(os.path.join(RDConfig.RDContribDir, 'NP_Score'))
import sascorer
import npscorer
import pandas as pd
import numpy as np
from scopy.ScoFH import fh_filter
class Metrics():
def __init__(self,smiles):
#load filters and scores
if not os.path.isfile('./wehi_pains.csv'):
_pains = pd.read_csv('https://raw.githubusercontent.com/rdkit/rdkit/master/Data/Pains/wehi_pains.csv',names=['smarts', 'names'])
else:
_pains = pd.read_csv('./wehi_pains.csv',
names=['smarts', 'names'])
self._pains_filters = [Chem.MolFromSmarts(x) for x in
_pains['smarts'].values]
self.fscore = npscorer.readNPModel()
# Decriptors
self.smiles = smiles
try:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError('SMILES is not valid!')
else:
self.mol = mol
self.h_mol = Chem.AddHs(self.mol)
except ValueError as e:
print(e)
self.logp = Descriptors.MolLogP(self.mol)
self.mw = Descriptors.ExactMolWt(self.mol)
self.tpsa = Descriptors.TPSA(self.mol)
self.n_hba = Descriptors.NumHAcceptors(self.mol)
self.n_hbd = Descriptors.NumHDonors(self.mol)
pass
def ro5(self):
# True is good
"""
Test if input molecule (SMILES) fulfills Lipinski's rule of five.
Returns
-------
int
Number of rules fullfilled
"""
# Check if Ro5 conditions fulfilled
conditions = [self.mw <= 500, self.n_hba <= 10, self.n_hbd <= 5, self.logp <= 5, self.tpsa <= 140]
ro5_fulfilled = sum(conditions)
return ro5_fulfilled
def pfizer_rule_passed(self):
# True if pass, False if toxic
"""
Test if input molecule (SMILES) fulfills Pfizer Rule.
Returns
-------
bool
Pfizer Rule compliance for input molecule.
"""
# Check if Pfizer Rule conditions fulfilled
conditions = [self.logp > 3, self.tpsa < 75]
pfizer_pased = not (sum(conditions) == 2)
# Return False if 2 conditions are both fulfilled
return pfizer_pased
def gsk_rule_passed(self):
# True for favorable ADMET
"""
Test if input molecule (SMILES) fulfills GSK Rule.
Returns
-------
bool
GSK Rule compliance for input molecule.
"""
# Check if GSK Rule conditions fulfilled
conditions = [self.mw <= 400, self.logp <= 4]
gsk_fulfilled = sum(conditions) == 2
# Return True if 2 conditions are fulfilled
return gsk_fulfilled
def goldentriangle_rule_passed(self):
# True for favorable ADMET
"""
Test if input molecule (SMILES) fulfills GoldenTriangle Rule.
Returns
-------
bool
GoldenTriangle Rule compliance for input molecule.
"""
# Check if GoldenTrianlge Rule conditions fulfilled
conditions = [200 <= self.mw <= 450,-2 <= self.logp <= 5]
goldentriangle_fulfilled = sum(conditions) == 2
# Return True if 2 conditions are fulfilled
return goldentriangle_fulfilled
def qed(self):
"""
Calculate QED
Returns
-------
numpy.float64
QED for input molecule
"""
# Calculate QED of input molecule
qed = Chem.QED.qed(self.mol)
return qed
def qed_passed(self):
# True if attractive
"""
Test if input molecule (SMILES) is 'attractive'.
Returns
-------
bool
QED 'attractive'-ness.
"""
# Check if QED conditions fulfilled
qed_excellent = self.qed() > 0.67
# Return True if condition is fulfilled
return qed_excellent
def sascore(self):
"""
Calculate sascore
Returns
-------
float
SAscore for input molecule
"""
return sascorer.calculateScore(self.mol)
def sascore_passed(self):
# True if sa pass
"""
Test if input molecule (SMILES) is easy to synthesize.
Returns
-------
bool
synthetic accessibility.
"""
SAscore_excellent = self.sascore() <= 6
# Return True if condition is fulfilled
return SAscore_excellent
def fsp3(self):
"""
Calculate Fsp3
Returns
-------
float
Fsp3 for input molecule
"""
return Chem.rdMolDescriptors.CalcFractionCSP3(self.mol)
def fsp3_passed(self):
# True if if input molecule (SMILES) has suitable Fsp3 value.
"""
Test if input molecule (SMILES) has suitable Fsp3 value.
Returns
-------
bool
Fsp3 suitability.
"""
# Check if Fsp3 condition is fulfilled
fsp3_excellent = self.fsp3() >= 0.42
# Return True if condition is fulfilled
return fsp3_excellent
def pains_filter(self, detail=False):
# True if passed
# Detail return bool, list name, list atoms
"""
PAINS filter for an input molecule (SMILES).
Returns
-------
[bool, list, list]
[pains_accepted, pains_matched_name, pains_matched_atoms]
Check if PAINS not violated and matched names, atoms.
"""
# Check PAINS
pains = fh_filter.Check_PAINS(self.h_mol, detail = True)
# pains_accepted = pains['Disposed'] == 'Accepted' # Return True if not violating PAINS
# pains_matched_atoms = pains['MatchedAtoms']
# pains_matched_names = pains['MatchedNames']
# Return PAINS
if detail:
return pains
else:
return pains['Disposed']
# def pains_passed(self):
# h_mol = Chem.AddHs(self.mol)
# if any(h_mol.HasSubstructMatch(smarts) for smarts in self._pains_filters):
# return False
# else:
# return True
def mce18(self):
"""
Calculate MCE-18
Returns
-------
float
MCE-18 for input molecule
"""
# Calculate MCE-18 relevant properties
AR = rdMolDescriptors.CalcNumAromaticRings(self.mol) > 0
NAR = rdMolDescriptors.CalcNumAliphaticRings(self.mol) > 0
CHIRAL = len(Chem.FindMolChiralCenters(self.mol, force = True, includeUnassigned = True)) > 0
SPIRO = rdMolDescriptors.CalcNumSpiroAtoms(self.mol) > 0
SP3 = self.fsp3()
# Calculate Cyc and Acyc
Csp3_cyclic = 0
Csp3_acyclic = 0
C_total = 0
CYC = 0
ACYC = 0
for atom in self.mol.GetAtoms():
if atom.GetAtomicNum() == 6: C_total+=1
if sum([atom.GetAtomicNum() == 6, atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
Csp3_cyclic += 1
if sum([atom.GetAtomicNum() == 6, not atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
Csp3_acyclic += 1
if C_total>0:
CYC = Csp3_cyclic/C_total
ACYC = Csp3_acyclic/C_total
# Calculate Q1
deltas=[x.GetDegree() for x in self.mol.GetAtoms()]
M = sum(np.array(deltas)**2)
N = self.mol.GetNumAtoms()
Q1 = 3-2*N+M/2.0
# Calculate MCE-18
mce18 = (AR + NAR + CHIRAL + SPIRO + (SP3 + CYC - ACYC)/(1 + SP3))*Q1
return mce18
def mce18_passed(self):
# True if interesting
"""
Test if input molecule (SMILES) is interesting.
Returns
-------
bool
MCE-18 suitability.
"""
# Check if MCE-18 condition is fulfilled
mce18_excellent = self.mce18() >= 45
# Return True if condition is fulfilled
return mce18_excellent
def npscore(self):
# [-5,5], higher is more nature-like
"""
Calculate NPscore of molecule.
Returns
-------
float
NPscore for input molecule.
"""
# Calculate NPscore of input molecule
npscore = npscorer.scoreMol(self.mol, self.fscore)
# Return NPscore
return npscore
def alarm_nmr_filter(self,detail=False):
# True if passed
# Detail return bool, list name, list atoms
"""
ALARM NMR filter for an input molecule (SMILES).
Returns
-------
[bool, list, list]
[alarmnmr_accepted, alarmnmr_matched_names, alarmnmr_matched_atoms]
Check if ALARM NMR not violated and matched names, atoms.
"""
# Check ALARM NMR
alarmnmr = fh_filter.Check_Alarm_NMR(self.h_mol, detail = True)
# alarmnmr_accepted = alarmnmr['Disposed'] == 'Accepted' # Return True if not violating ALARM NMR
# alarmnmr_matched_atoms = alarmnmr['MatchedAtoms']
# alarmnmr_matched_names = alarmnmr['MatchedNames']
# Return ALARM NMR
if detail:
return alarmnmr
else:
return alarmnmr['Disposed']
def bms_filter(self,detail=False):
# True if passed
# Detail return bool, list name, list atoms
"""
BMS filter for an input molecule (SMILES).
Returns
-------
[bool, list, list]
[bms_accepted, bms_matched_names, bms_matched_atoms]
Check if BMS not violated and matched names, atoms.
"""
bms = fh_filter.Check_BMS(self.h_mol, detail = True)
# bms_accepted = bms['Disposed'] == 'Accepted' # Return True if not violating BMS
# bms_matched_atoms = bms['MatchedAtoms']
# bms_matched_names = bms['MatchedNames']
# Return BMS
if detail:
return bms
else:
return bms['Disposed']
def chelator_filter(self, detail=False):
"""
Chelator filter for an input molecule (SMILES).
Returns
-------
[bool, list, list]
[chelator_accepted, chelator_matched_names, chelator_matched_atoms]
Check if Chelator not violated and matched names, atoms.
"""
# Check Chelator
chelator = fh_filter.Check_Chelating(self.h_mol, detail = True)
# chelator_accepted = chelator['Disposed'] == 'Accepted' # Return True if not violating Chelator
# chelator_matched_atoms = chelator['MatchedAtoms']
# chelator_matched_names = chelator['MatchedNames']
# Return Chelator
if detail:
return chelator
else:
return chelator['Disposed']
def calculate_all(self, descriptors = True,rules=True,scores = True,scores_passed = True,filters = True,detail = True):
"""
Calculate all rules.
Parameters
----------
smiles : str
SMILES for a molecule.
descriptors : bool
Extract molecular descriptors of molecule. Default is 'False'.
Returns
-------
pandas.Series
All rules w/wo descriptors.
"""
# Calculate all rules of molecule
result = dict()
descrip_dict ={
'logp':self.logp,
'mw':self.mw,
'tpsa':self.tpsa,
'n_hba':self.n_hba,
'n_hbd':self.n_hbd
}
rule_dict = {
'ro5':self.ro5,
'pfizer_rule_passed':self.pfizer_rule_passed,
'gsk_rule_passed':self.gsk_rule_passed,
'goldentriangle_rule':self.goldentriangle_rule_passed
}
score_dict ={
'qed':self.qed,
'sascore' : self.sascore,
'fsp3' : self.fsp3,
'mce18' : self.mce18,
'npscore' : self.npscore
}
score_pass_dict = {
'qed_passed' : self.qed_passed,
'sascore_passed' : self.sascore_passed,
'fsp3_passed' : self.fsp3_passed,
'mce18_passed' : self.mce18_passed
}
filter_dict = {
'pains_filter' : self.pains_filter,
'alarm_nmr_filter' : self.alarm_nmr_filter,
'bms_filter' : self.bms_filter,
'chelator_filter' : self.chelator_filter
}
if descriptors:
for name, func in descrip_dict.items():
result[name] = func
if rules:
for name, func in rule_dict.items():
result[name] = func()
if scores:
for name, func in score_dict.items():
result[name] = func()
if scores_passed:
for name, func in score_pass_dict.items():
result[name] = func()
if filters:
for name, func in filter_dict.items():
result[name] = func(detail=detail)
return result