Spaces:

buchijw
/

jtvae-demo

Sleeping

App Files Files

Trương Gia Bảo commited on Jun 17, 2023

Commit

1cf9b3e

1 Parent(s): 97ff519

Update version 1.01

Browse files

Files changed (4) hide show

README.md +13 -3
app.py +185 -8
metrics.py +423 -0
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -1,12 +1,22 @@
 ---
-title: Jtvae Demo
 emoji: 💻
 colorFrom: red
 colorTo: purple
 sdk: streamlit
-sdk_version: 1.21.0
 app_file: app.py
-pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Molecular Optimization with JTVAE
 emoji: 💻
 colorFrom: red
 colorTo: purple
 sdk: streamlit
+sdk_version: 1.22.0
 app_file: app.py
+pinned: true
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Changelog
+## 1.01
+* Add `Compare` feature in `Optimize a molecule` page.
+* Add `What's new?` in sidebar.
+* Add `Changelog` in README.
+## 1.00
+* Initial release.

app.py CHANGED Viewed

@@ -11,12 +11,18 @@ import rdkit
 import rdkit.Chem as Chem
 from rdkit.Chem.Draw import MolToImage
 from rdkit.Chem import Descriptors
 import sascorer
 import networkx as nx
 from stqdm import stqdm
 import base64, io
 import pandas as pd
 import streamlit_ext as ste
 os.environ['KMP_DUPLICATE_LIB_OK']='True'
@@ -25,6 +31,16 @@ from mol_tree import Vocab, MolTree
 from jtprop_vae import JTPropVAE
 from molbloom import buy
 css='''
 [data-testid="metric-container"] {
     width: fit-content;
@@ -141,14 +157,14 @@ def df_to_file(df):
     df.to_csv(s_buff)
     return s_buff.getvalue().encode()
-def download_df(df,id):
-    with st.expander(':arrow_down: Download this dataframe'):
-        st.markdown("<h4 style='color:tomato;'>Select column(s) to save:</h4>",unsafe_allow_html=True)
-        for col in df.columns:
-            st.checkbox(col,key=str(id)+'_col_'+str(col))
-        st.text_input('File name (.csv):','dataframe',key=str(id)+'_file_name')
-        ste.download_button('Download',df_to_file(df[[col for col in df.columns if st.session_state[str(id)+'_col_'+str(col)]]]),st.session_state[str(id)+'_file_name']+'.csv')
 lg = rdkit.RDLogger.logger()
 lg.setLevel(rdkit.RDLogger.CRITICAL)
@@ -195,6 +211,39 @@ def load_model():
         model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu')))
     return model
 from streamlit_lottie import st_lottie
 import requests
@@ -228,6 +277,9 @@ def oam_sidebar(step):
     st.markdown("<h4 style='color: "+color_ls[2]+"'>Optimizing a molecule</h4>",unsafe_allow_html=True)
     st.markdown('|')
     st.markdown("<h4 style='color: "+color_ls[3]+"'>Finished</h4>",unsafe_allow_html=True)
 def oab_sidebar(step):
     st.title('**Optimize a batch**')
@@ -255,7 +307,15 @@ def oab_sidebar(step):
     st.markdown("<h4 style='color: "+color_ls[4]+"'>Optimizing a batch</h4>",unsafe_allow_html=True)
     st.markdown('|')
     st.markdown("<h4 style='color: "+color_ls[5]+"'>Finished</h4>",unsafe_allow_html=True)
 # @st.cache_data(experimental_allow_widgets=True)
 # if 'sidebar_con' not in st.session_state:
@@ -319,6 +379,12 @@ We seek to automate the design of molecules based on specific chemical propertie
     img_caption = '''
 Figure 3. Overview of our method: A molecular graph G is first decomposed into its junction tree TG, where each colored node in the tree represents a substructure in the molecule. We then encode both the tree and graph into their latent embeddings zT and zG. To decode the molecule, we first reconstruct junction tree from zT , and then assemble nodes in the tree back to the original molecule.'''
     with st.expander(':four_leaf_clover: About the author',expanded=True):
         st.markdown('')
         st.markdown("<h3 style='text-align:center;'>Gia-Bao Truong</h3>",unsafe_allow_html=True)
@@ -398,6 +464,8 @@ def Optimize_a_molecule():
         st.session_state.single_optimized = False
     if 'smiles_checked' not in st.session_state:
         st.session_state.smiles_checked = False
     # with oab_sel_container.container():
     ls_opt = ['-','Sorafenib','Pazopanib','Sunitinib']
     sample_mode = {
@@ -502,6 +570,7 @@ def Optimize_a_molecule():
         else: check_single_con.empty()
     optim_single_con = st.empty()
     if st.session_state.smiles_checked:
         if optim_single_butt:
             # sidebar_con.empty()
@@ -560,6 +629,113 @@ def Optimize_a_molecule():
                                 # st.write('THIS MOLECULE DOES NOT EXIST!')
                                 st.markdown("<h3 style='text-align: center; color: mediumseagreen;'>THIS MOLECULE DOES NOT EXIST!</h3>",unsafe_allow_html=True)
                         st.markdown("<p style='text-align: center; color: grey;'>Checked using molbloom</p>",unsafe_allow_html=True)
             with sidebar_con.container():
                     set_step(3)
                     oam_sidebar(3)
@@ -858,6 +1034,7 @@ def reset_oam_state():
     st.session_state.checked_single = 'NO'
     st.session_state.smiles_checked = False
     st.session_state.single_optimized = False
     set_step(0)
 def reset_oab_state():

 import rdkit.Chem as Chem
 from rdkit.Chem.Draw import MolToImage
 from rdkit.Chem import Descriptors
+from rdkit.Chem import RDConfig
+from rdkit.Chem.Draw import rdMolDraw2D
+import os
+import sys
+sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
 import sascorer
 import networkx as nx
 from stqdm import stqdm
 import base64, io
 import pandas as pd
 import streamlit_ext as ste
+from metrics import Metrics
 os.environ['KMP_DUPLICATE_LIB_OK']='True'
 from jtprop_vae import JTPropVAE
 from molbloom import buy
+what_new = '''
+### Version 1.01
+* Add `Compare` feature in `Optimize a molecule` page.
+* Add `What's new?` in sidebar.
+* Add `Changelog` in README.
+'''
 css='''
 [data-testid="metric-container"] {
     width: fit-content;
     df.to_csv(s_buff)
     return s_buff.getvalue().encode()
+# def download_df(df,id):
+#     with st.expander(':arrow_down: Download this dataframe'):
+#         st.markdown("<h4 style='color:tomato;'>Select column(s) to save:</h4>",unsafe_allow_html=True)
+#         for col in df.columns:
+#             st.checkbox(col,key=str(id)+'_col_'+str(col))
+#         st.text_input('File name (.csv):','dataframe',key=str(id)+'_file_name')
+#         ste.download_button('Download',df_to_file(df[[col for col in df.columns if st.session_state[str(id)+'_col_'+str(col)]]]),st.session_state[str(id)+'_file_name']+'.csv')
 lg = rdkit.RDLogger.logger()
 lg.setLevel(rdkit.RDLogger.CRITICAL)
         model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu')))
     return model
+descrip_dict ={
+    'logp':'LogP',
+    'mw':'MW',
+    'tpsa':'TPSA',
+    'n_hba':'nHA',
+    'n_hbd':'nHD'
+}
+rule_dict = {
+    'ro5':'RO5',
+    'pfizer_rule_passed':'PFIZER Rule',
+    'gsk_rule_passed':'GSK Rule',
+    'goldentriangle_rule':'GOLDENTRIANGLE Rule'
+}
+score_dict ={
+    'qed':'QED',
+    'sascore' : 'SA score',
+    'fsp3' : 'Fsp3',
+    'mce18' : 'MCE-18',
+    'npscore' : 'NP score'
+}
+score_pass_dict = {
+    'qed_passed' : 'QED Passed',
+    'sascore_passed' : 'SA Passed',
+    'fsp3_passed' : 'Fsp3 Passed',
+    'mce18_passed' : 'MCE-18 Passed'
+}
+filter_dict = {
+    'pains_filter' : 'PAINS Filter',
+    'alarm_nmr_filter' : 'ALARM NMR Filter',
+    'bms_filter' : 'BMS Filter',
+    'chelator_filter' : 'Chelator Filter'
+}
 from streamlit_lottie import st_lottie
 import requests
     st.markdown("<h4 style='color: "+color_ls[2]+"'>Optimizing a molecule</h4>",unsafe_allow_html=True)
     st.markdown('|')
     st.markdown("<h4 style='color: "+color_ls[3]+"'>Finished</h4>",unsafe_allow_html=True)
+    st.markdown("""---""")
+    with st.expander("# **:green[What's new?]**"):
+        st.markdown(what_new)
 def oab_sidebar(step):
     st.title('**Optimize a batch**')
     st.markdown("<h4 style='color: "+color_ls[4]+"'>Optimizing a batch</h4>",unsafe_allow_html=True)
     st.markdown('|')
     st.markdown("<h4 style='color: "+color_ls[5]+"'>Finished</h4>",unsafe_allow_html=True)
+    st.markdown("""---""")
+    with st.expander("# **:green[What's new?]**"):
+        st.markdown(what_new)
+def ab_sidebar():
+    st.title('**About**')
+    st.markdown("""---""")
+    with st.expander("# **:green[What's new?]**"):
+        st.markdown(what_new)
 # @st.cache_data(experimental_allow_widgets=True)
 # if 'sidebar_con' not in st.session_state:
     img_caption = '''
 Figure 3. Overview of our method: A molecular graph G is first decomposed into its junction tree TG, where each colored node in the tree represents a substructure in the molecule. We then encode both the tree and graph into their latent embeddings zT and zG. To decode the molecule, we first reconstruct junction tree from zT , and then assemble nodes in the tree back to the original molecule.'''
+    with st.sidebar:
+        sidebar_con = st.empty()
+    # sidebar_con.empty()
+    with sidebar_con.container():
+            ab_sidebar()
     with st.expander(':four_leaf_clover: About the author',expanded=True):
         st.markdown('')
         st.markdown("<h3 style='text-align:center;'>Gia-Bao Truong</h3>",unsafe_allow_html=True)
         st.session_state.single_optimized = False
     if 'smiles_checked' not in st.session_state:
         st.session_state.smiles_checked = False
+    if 'compared' not in st.session_state:
+        st.session_state.compared = False
     # with oab_sel_container.container():
     ls_opt = ['-','Sorafenib','Pazopanib','Sunitinib']
     sample_mode = {
         else: check_single_con.empty()
     optim_single_con = st.empty()
+    compare_single_con = st.empty()
     if st.session_state.smiles_checked:
         if optim_single_butt:
             # sidebar_con.empty()
                                 # st.write('THIS MOLECULE DOES NOT EXIST!')
                                 st.markdown("<h3 style='text-align: center; color: mediumseagreen;'>THIS MOLECULE DOES NOT EXIST!</h3>",unsafe_allow_html=True)
                         st.markdown("<p style='text-align: center; color: grey;'>Checked using molbloom</p>",unsafe_allow_html=True)
+                        if st.button('Compare',use_container_width=True):
+                            st.session_state.compared = True
+                        if st.session_state.compared:
+                                compare_single_con.empty()
+                                with compare_single_con.container():
+                                    com_col = st.columns(3)
+                                    com_col[1].markdown("<h4 style='text-align: center;'>Original</h4>",unsafe_allow_html=True)
+                                    com_col[2].markdown("<h4 style='text-align: center;'>New</h4>",unsafe_allow_html=True)
+                                    imgByteArr.seek(0)
+                                    MolToImage(Chem.MolFromSmiles(st.session_state.canon_smiles),size=(400,200)).save(imgByteArr,format='PNG')
+                                    old_mol = base64.b64encode(imgByteArr.getvalue()).decode()
+                                    imgByteArr.seek(0)
+                                    MolToImage(Chem.MolFromSmiles(st.session_state.new_smiles),size=(400,200)).save(imgByteArr,format='PNG')
+                                    new_mol = base64.b64encode(imgByteArr.getvalue()).decode()
+                                    com_col[1].markdown("<p style='text-align: center;'>"+
+                                        f"<img src='data:image/png;base64,{old_mol}' class='img-fluid'>"+
+                                        "</p>", unsafe_allow_html=True)
+                                    com_col[2].markdown("<p style='text-align: center;'>"+
+                                        f"<img src='data:image/png;base64,{new_mol}' class='img-fluid'>"+
+                                        "</p>", unsafe_allow_html=True)
+                                    old_mol_metrics = Metrics(st.session_state.canon_smiles).calculate_all()
+                                    new_mol_metrics = Metrics(st.session_state.new_smiles).calculate_all()
+                                    value_com_col = st.columns(3)
+                                    for met,met_name in descrip_dict.items():
+                                        value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
+                                        if met not in ['n_hba','n_hbd']:
+                                            value_com_col[1].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
+                                            value_com_col[2].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
+                                        else:
+                                            value_com_col[1].markdown("<p style='text-align: center;'>%d</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
+                                            value_com_col[2].markdown("<p style='text-align: center;'>%d</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
+                                    for met,met_name in rule_dict.items():
+                                        value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
+                                        old_passed = old_mol_metrics[met]
+                                        new_passed = new_mol_metrics[met]
+                                        if met == 'ro5':
+                                            value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed > 3 else 'tomato',old_passed),unsafe_allow_html=True)
+                                            value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed > 3 else 'tomato',new_passed),unsafe_allow_html=True)
+                                        else:
+                                            value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato','Passed' if old_passed else 'Failed'),unsafe_allow_html=True)
+                                            value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato','Passed' if new_passed else 'Failed'),unsafe_allow_html=True)
+                                    score_col_old = value_com_col[1].columns(2)
+                                    score_col_new = value_com_col[2].columns(2)
+                                    for met,met_name in score_dict.items():
+                                        value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
+                                        if met != 'npscore':
+                                            score_col_old[0].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
+                                            score_col_new[0].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
+                                            old_passed = old_mol_metrics[met+'_passed']
+                                            new_passed = new_mol_metrics[met+'_passed']
+                                            score_col_old[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato','Good' if old_passed else 'Bad'),unsafe_allow_html=True)
+                                            score_col_new[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato','Good' if new_passed else 'Bad'),unsafe_allow_html=True)
+                                        else:
+                                            value_com_col[1].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
+                                            value_com_col[2].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
+                                    # for met,met_name in score_pass_dict.items():
+                                    #     value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
+                                    #     old_passed = old_mol_metrics[met]
+                                    #     new_passed = new_mol_metrics[met]
+                                    #     value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato',old_passed),unsafe_allow_html=True)
+                                    #     value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato',new_passed),unsafe_allow_html=True)
+                                    for met,met_name in filter_dict.items():
+                                        # value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
+                                        old_passed = old_mol_metrics[met]
+                                        new_passed = new_mol_metrics[met]
+                                        with value_com_col[1].expander("%s :%s[%s]"%(met_name,'green' if old_passed['Disposed'] == 'Accepted' else 'red',old_passed['Disposed'])):
+                                            st.markdown('Matched name(s):')
+                                            # st.write(old_passed['MatchedNames'])
+                                            # st.markdown('Matched atom(s):')
+                                            if old_passed['MatchedNames'] != ['-']:
+                                                for idx,patt in enumerate(old_passed['MatchedAtoms']):
+                                                    st.code(old_passed['MatchedNames'][idx])
+                                                    # st.markdown(patt)
+                                                    drawer = rdMolDraw2D.MolDraw2DSVG(300,200)
+                                                    # drawer.drawOptions().fillHighlights = False
+                                                    matches = sum(patt, ())
+                                                    drawer.DrawMolecule(mol, highlightAtoms=matches)
+                                                    drawer.FinishDrawing()
+                                                    svg = drawer.GetDrawingText()
+                                                    imgByteArr.seek(0)
+                                                    st.markdown("<p style='text-align: center;'>"+
+                                                            f"<img src='data:image/svg+xml;base64,{base64.b64encode(svg.encode('utf-8')).decode('utf-8')}' class='img-fluid'>"+
+                                                            "</p>", unsafe_allow_html=True)
+                                            else:
+                                                st.markdown("No matched pattern")
+                                            # st.write(old_passed['MatchedAtoms'])
+                                        with value_com_col[2].expander("%s :%s[%s]"%(met_name,'green' if new_passed['Disposed'] == 'Accepted' else 'red',new_passed['Disposed'])):
+                                            st.markdown('Matched name(s):')
+                                            # st.write(new_passed['MatchedNames'])
+                                            # st.markdown('Matched atom(s):')
+                                            # st.write(new_passed['MatchedAtoms'])
+                                            if new_passed['MatchedNames'] != ['-']:
+                                                for idx,patt in enumerate(new_passed['MatchedAtoms']):
+                                                    st.code(new_passed['MatchedNames'][idx])
+                                                    drawer = rdMolDraw2D.MolDraw2DSVG(300,200)
+                                                    # drawer.drawOptions().fillHighlights = False
+                                                    matches = sum(patt, ())
+                                                    drawer.DrawMolecule(mol, highlightAtoms=matches)
+                                                    drawer.FinishDrawing()
+                                                    svg = drawer.GetDrawingText()
+                                                    imgByteArr.seek(0)
+                                                    st.markdown("<p style='text-align: center;'>"+
+                                                            f"<img src='data:image/svg+xml;base64,{base64.b64encode(svg.encode('utf-8')).decode('utf-8')}' class='img-fluid'>"+
+                                                            "</p>", unsafe_allow_html=True)
+                                            else:
+                                                st.markdown("No matched pattern")
             with sidebar_con.container():
                     set_step(3)
                     oam_sidebar(3)
     st.session_state.checked_single = 'NO'
     st.session_state.smiles_checked = False
     st.session_state.single_optimized = False
+    st.session_state.compared = False
     set_step(0)
 def reset_oab_state():

metrics.py ADDED Viewed

	@@ -0,0 +1,423 @@

+from rdkit import Chem
+from rdkit.Chem import Descriptors
+from rdkit.Chem import rdMolDescriptors
+from rdkit.Chem import RDConfig
+import os
+import sys
+sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
+sys.path.append(os.path.join(RDConfig.RDContribDir, 'NP_Score'))
+import sascorer
+import npscorer
+import pandas as pd
+import numpy as np
+from scopy.ScoFH import fh_filter
+class Metrics():
+    def __init__(self,smiles):
+        #load filters and scores
+        if not os.path.isfile('./wehi_pains.csv'):
+             _pains = pd.read_csv('https://raw.githubusercontent.com/rdkit/rdkit/master/Data/Pains/wehi_pains.csv',names=['smarts', 'names'])
+        else:
+            _pains = pd.read_csv('./wehi_pains.csv',
+                     names=['smarts', 'names'])
+        self._pains_filters = [Chem.MolFromSmarts(x) for x in
+            _pains['smarts'].values]
+        self.fscore = npscorer.readNPModel()
+        # Decriptors
+        self.smiles = smiles
+        try:
+            mol = Chem.MolFromSmiles(smiles)
+            if mol is None:
+                raise ValueError('SMILES is not valid!')
+            else:
+                self.mol = mol
+                self.h_mol = Chem.AddHs(self.mol)
+        except ValueError as e:
+            print(e)
+        self.logp = Descriptors.MolLogP(self.mol)
+        self.mw = Descriptors.ExactMolWt(self.mol)
+        self.tpsa = Descriptors.TPSA(self.mol)
+        self.n_hba = Descriptors.NumHAcceptors(self.mol)
+        self.n_hbd = Descriptors.NumHDonors(self.mol)
+        pass
+    def ro5(self):
+        # True is good
+        """
+        Test if input molecule (SMILES) fulfills Lipinski's rule of five.
+        Returns
+        -------
+        int
+            Number of rules fullfilled
+        """
+        # Check if Ro5 conditions fulfilled
+        conditions = [self.mw <= 500, self.n_hba <= 10, self.n_hbd <= 5, self.logp <= 5, self.tpsa <= 140]
+        ro5_fulfilled = sum(conditions)
+        return ro5_fulfilled
+    def pfizer_rule_passed(self):
+        # True if pass, False if toxic
+        """
+        Test if input molecule (SMILES) fulfills Pfizer Rule.
+        Returns
+        -------
+        bool
+            Pfizer Rule compliance for input molecule.
+        """
+        # Check if Pfizer Rule conditions fulfilled
+        conditions = [self.logp > 3, self.tpsa < 75]
+        pfizer_pased = not (sum(conditions) == 2)
+        # Return False if 2 conditions are both fulfilled
+        return pfizer_pased
+    def gsk_rule_passed(self):
+        # True for favorable ADMET
+        """
+        Test if input molecule (SMILES) fulfills GSK Rule.
+        Returns
+        -------
+        bool
+            GSK Rule compliance for input molecule.
+        """
+        # Check if GSK Rule conditions fulfilled
+        conditions = [self.mw <= 400, self.logp <= 4]
+        gsk_fulfilled = sum(conditions) == 2
+        # Return True if 2 conditions are fulfilled
+        return gsk_fulfilled
+    def goldentriangle_rule_passed(self):
+        # True for favorable ADMET
+        """
+        Test if input molecule (SMILES) fulfills GoldenTriangle Rule.
+        Returns
+        -------
+        bool
+            GoldenTriangle Rule compliance for input molecule.
+        """
+        # Check if GoldenTrianlge Rule conditions fulfilled
+        conditions = [200 <= self.mw <= 450,-2 <= self.logp <= 5]
+        goldentriangle_fulfilled = sum(conditions) == 2
+        # Return True if 2 conditions are fulfilled
+        return goldentriangle_fulfilled
+    def qed(self):
+        """
+        Calculate QED
+        Returns
+        -------
+        numpy.float64
+            QED for input molecule
+        """
+        # Calculate QED of input molecule
+        qed = Chem.QED.qed(self.mol)
+        return qed
+    def qed_passed(self):
+        # True if attractive
+        """
+        Test if input molecule (SMILES) is 'attractive'.
+        Returns
+        -------
+        bool
+            QED 'attractive'-ness.
+        """
+        # Check if QED conditions fulfilled
+        qed_excellent = self.qed() > 0.67
+        # Return True if condition is fulfilled
+        return qed_excellent
+    def sascore(self):
+        """
+        Calculate sascore
+        Returns
+        -------
+        float
+            SAscore for input molecule
+        """
+        return sascorer.calculateScore(self.mol)
+    def sascore_passed(self):
+        # True if sa pass
+        """
+        Test if input molecule (SMILES) is easy to synthesize.
+        Returns
+        -------
+        bool
+            synthetic accessibility.
+        """
+        SAscore_excellent = self.sascore() <= 6
+        # Return True if condition is fulfilled
+        return SAscore_excellent
+    def fsp3(self):
+        """
+        Calculate Fsp3
+        Returns
+        -------
+        float
+            Fsp3 for input molecule
+        """
+        return Chem.rdMolDescriptors.CalcFractionCSP3(self.mol)
+    def fsp3_passed(self):
+        # True if if input molecule (SMILES) has suitable Fsp3 value.
+        """
+        Test if input molecule (SMILES) has suitable Fsp3 value.
+        Returns
+        -------
+        bool
+            Fsp3 suitability.
+        """
+        # Check if Fsp3 condition is fulfilled
+        fsp3_excellent = self.fsp3() >= 0.42
+        # Return True if condition is fulfilled
+        return fsp3_excellent
+    def pains_filter(self, detail=False):
+        # True if passed
+        # Detail return bool, list name, list atoms
+        """
+        PAINS filter for an input molecule (SMILES).
+        Returns
+        -------
+        [bool, list, list]
+            [pains_accepted, pains_matched_name, pains_matched_atoms]
+            Check if PAINS not violated and matched names, atoms.
+        """
+        # Check PAINS
+        pains = fh_filter.Check_PAINS(self.h_mol, detail = True)
+        # pains_accepted = pains['Disposed'] == 'Accepted' # Return True if not violating PAINS
+        # pains_matched_atoms = pains['MatchedAtoms']
+        # pains_matched_names = pains['MatchedNames']
+        # Return PAINS
+        if detail:
+            return pains
+        else:
+            return pains['Disposed']
+    # def pains_passed(self):
+    #     h_mol = Chem.AddHs(self.mol)
+    #     if any(h_mol.HasSubstructMatch(smarts) for smarts in self._pains_filters):
+    #         return False
+    #     else:
+    #         return True
+    def mce18(self):
+        """
+        Calculate MCE-18
+        Returns
+        -------
+        float
+            MCE-18 for input molecule
+        """
+        # Calculate MCE-18 relevant properties
+        AR = rdMolDescriptors.CalcNumAromaticRings(self.mol) > 0
+        NAR = rdMolDescriptors.CalcNumAliphaticRings(self.mol) > 0
+        CHIRAL = len(Chem.FindMolChiralCenters(self.mol, force = True, includeUnassigned = True)) > 0
+        SPIRO = rdMolDescriptors.CalcNumSpiroAtoms(self.mol) > 0
+        SP3 = self.fsp3()
+        # Calculate Cyc and Acyc
+        Csp3_cyclic = 0
+        Csp3_acyclic = 0
+        C_total = 0
+        CYC = 0
+        ACYC = 0
+        for atom in self.mol.GetAtoms():
+            if atom.GetAtomicNum() == 6: C_total+=1
+            if sum([atom.GetAtomicNum() == 6, atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
+                Csp3_cyclic += 1
+            if sum([atom.GetAtomicNum() == 6, not atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
+                Csp3_acyclic += 1
+        if C_total>0:
+            CYC = Csp3_cyclic/C_total
+            ACYC = Csp3_acyclic/C_total
+        # Calculate Q1
+        deltas=[x.GetDegree() for x in self.mol.GetAtoms()]
+        M = sum(np.array(deltas)**2)
+        N = self.mol.GetNumAtoms()
+        Q1 = 3-2*N+M/2.0
+        # Calculate MCE-18
+        mce18 = (AR + NAR + CHIRAL + SPIRO + (SP3 + CYC - ACYC)/(1 + SP3))*Q1
+        return mce18
+    def mce18_passed(self):
+        # True if interesting
+        """
+        Test if input molecule (SMILES) is interesting.
+        Returns
+        -------
+        bool
+            MCE-18 suitability.
+        """
+        # Check if MCE-18 condition is fulfilled
+        mce18_excellent = self.mce18() >= 45
+        # Return True if condition is fulfilled
+        return mce18_excellent
+    def npscore(self):
+        # [-5,5], higher is more nature-like
+        """
+        Calculate NPscore of molecule.
+        Returns
+        -------
+        float
+            NPscore for input molecule.
+        """
+        # Calculate NPscore of input molecule
+        npscore = npscorer.scoreMol(self.mol, self.fscore)
+        # Return NPscore
+        return npscore
+    def alarm_nmr_filter(self,detail=False):
+        # True if passed
+        # Detail return bool, list name, list atoms
+        """
+        ALARM NMR filter for an input molecule (SMILES).
+        Returns
+        -------
+        [bool, list, list]
+            [alarmnmr_accepted, alarmnmr_matched_names, alarmnmr_matched_atoms]
+            Check if ALARM NMR not violated and matched names, atoms.
+        """
+        # Check ALARM NMR
+        alarmnmr = fh_filter.Check_Alarm_NMR(self.h_mol, detail = True)
+        # alarmnmr_accepted = alarmnmr['Disposed'] == 'Accepted' # Return True if not violating ALARM NMR
+        # alarmnmr_matched_atoms = alarmnmr['MatchedAtoms']
+        # alarmnmr_matched_names = alarmnmr['MatchedNames']
+        # Return ALARM NMR
+        if detail:
+            return alarmnmr
+        else:
+            return alarmnmr['Disposed']
+    def bms_filter(self,detail=False):
+        # True if passed
+        # Detail return bool, list name, list atoms
+        """
+        BMS filter for an input molecule (SMILES).
+        Returns
+        -------
+        [bool, list, list]
+            [bms_accepted, bms_matched_names, bms_matched_atoms]
+            Check if BMS not violated and matched names, atoms.
+        """
+        bms = fh_filter.Check_BMS(self.h_mol, detail = True)
+        # bms_accepted = bms['Disposed'] == 'Accepted' # Return True if not violating BMS
+        # bms_matched_atoms = bms['MatchedAtoms']
+        # bms_matched_names = bms['MatchedNames']
+        # Return BMS
+        if detail:
+            return bms
+        else:
+            return bms['Disposed']
+    def chelator_filter(self, detail=False):
+        """
+        Chelator filter for an input molecule (SMILES).
+        Returns
+        -------
+        [bool, list, list]
+            [chelator_accepted, chelator_matched_names, chelator_matched_atoms]
+            Check if Chelator not violated and matched names, atoms.
+        """
+        # Check Chelator
+        chelator = fh_filter.Check_Chelating(self.h_mol, detail = True)
+        # chelator_accepted = chelator['Disposed'] == 'Accepted' # Return True if not violating Chelator
+        # chelator_matched_atoms = chelator['MatchedAtoms']
+        # chelator_matched_names = chelator['MatchedNames']
+        # Return Chelator
+        if detail:
+            return chelator
+        else:
+            return chelator['Disposed']
+    def calculate_all(self, descriptors = True,rules=True,scores = True,scores_passed = True,filters = True,detail = True):
+        """
+        Calculate all rules.
+        Parameters
+        ----------
+        smiles : str
+            SMILES for a molecule.
+        descriptors : bool
+            Extract molecular descriptors of molecule. Default is 'False'.
+        Returns
+        -------
+        pandas.Series
+            All rules w/wo descriptors.
+        """
+        # Calculate all rules of molecule
+        result = dict()
+        descrip_dict ={
+            'logp':self.logp,
+            'mw':self.mw,
+            'tpsa':self.tpsa,
+            'n_hba':self.n_hba,
+            'n_hbd':self.n_hbd
+        }
+        rule_dict = {
+            'ro5':self.ro5,
+            'pfizer_rule_passed':self.pfizer_rule_passed,
+            'gsk_rule_passed':self.gsk_rule_passed,
+            'goldentriangle_rule':self.goldentriangle_rule_passed
+        }
+        score_dict ={
+            'qed':self.qed,
+            'sascore' : self.sascore,
+            'fsp3' : self.fsp3,
+            'mce18' : self.mce18,
+            'npscore' : self.npscore
+        }
+        score_pass_dict = {
+            'qed_passed' : self.qed_passed,
+            'sascore_passed' : self.sascore_passed,
+            'fsp3_passed' : self.fsp3_passed,
+            'mce18_passed' : self.mce18_passed
+        }
+        filter_dict = {
+            'pains_filter' : self.pains_filter,
+            'alarm_nmr_filter' : self.alarm_nmr_filter,
+            'bms_filter' : self.bms_filter,
+            'chelator_filter' : self.chelator_filter
+        }
+        if descriptors:
+            for name, func in descrip_dict.items():
+                result[name] = func
+        if rules:
+            for name, func in rule_dict.items():
+                result[name] = func()
+        if scores:
+            for name, func in score_dict.items():
+                result[name] = func()
+        if scores_passed:
+            for name, func in score_pass_dict.items():
+                result[name] = func()
+        if filters:
+            for name, func in filter_dict.items():
+                result[name] = func(detail=detail)
+        return result

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ stqdm
 pandas
 streamlit_ext
 streamlit_lottie
-requests

 pandas
 streamlit_ext
 streamlit_lottie
+requests
+scopy