Trương Gia Bảo commited on
Commit
1cf9b3e
·
1 Parent(s): 97ff519

Update version 1.01

Browse files
Files changed (4) hide show
  1. README.md +13 -3
  2. app.py +185 -8
  3. metrics.py +423 -0
  4. requirements.txt +2 -1
README.md CHANGED
@@ -1,12 +1,22 @@
1
  ---
2
- title: Jtvae Demo
3
  emoji: 💻
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: streamlit
7
- sdk_version: 1.21.0
8
  app_file: app.py
9
- pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Molecular Optimization with JTVAE
3
  emoji: 💻
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: streamlit
7
+ sdk_version: 1.22.0
8
  app_file: app.py
9
+ pinned: true
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ # Changelog
15
+
16
+ ## 1.01
17
+ * Add `Compare` feature in `Optimize a molecule` page.
18
+ * Add `What's new?` in sidebar.
19
+ * Add `Changelog` in README.
20
+
21
+ ## 1.00
22
+ * Initial release.
app.py CHANGED
@@ -11,12 +11,18 @@ import rdkit
11
  import rdkit.Chem as Chem
12
  from rdkit.Chem.Draw import MolToImage
13
  from rdkit.Chem import Descriptors
 
 
 
 
 
14
  import sascorer
15
  import networkx as nx
16
  from stqdm import stqdm
17
  import base64, io
18
  import pandas as pd
19
  import streamlit_ext as ste
 
20
 
21
  os.environ['KMP_DUPLICATE_LIB_OK']='True'
22
 
@@ -25,6 +31,16 @@ from mol_tree import Vocab, MolTree
25
  from jtprop_vae import JTPropVAE
26
  from molbloom import buy
27
 
 
 
 
 
 
 
 
 
 
 
28
  css='''
29
  [data-testid="metric-container"] {
30
  width: fit-content;
@@ -141,14 +157,14 @@ def df_to_file(df):
141
  df.to_csv(s_buff)
142
  return s_buff.getvalue().encode()
143
 
144
- def download_df(df,id):
145
- with st.expander(':arrow_down: Download this dataframe'):
146
- st.markdown("<h4 style='color:tomato;'>Select column(s) to save:</h4>",unsafe_allow_html=True)
147
- for col in df.columns:
148
- st.checkbox(col,key=str(id)+'_col_'+str(col))
149
- st.text_input('File name (.csv):','dataframe',key=str(id)+'_file_name')
150
 
151
- ste.download_button('Download',df_to_file(df[[col for col in df.columns if st.session_state[str(id)+'_col_'+str(col)]]]),st.session_state[str(id)+'_file_name']+'.csv')
152
 
153
  lg = rdkit.RDLogger.logger()
154
  lg.setLevel(rdkit.RDLogger.CRITICAL)
@@ -195,6 +211,39 @@ def load_model():
195
  model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu')))
196
  return model
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  from streamlit_lottie import st_lottie
199
  import requests
200
 
@@ -228,6 +277,9 @@ def oam_sidebar(step):
228
  st.markdown("<h4 style='color: "+color_ls[2]+"'>Optimizing a molecule</h4>",unsafe_allow_html=True)
229
  st.markdown('|')
230
  st.markdown("<h4 style='color: "+color_ls[3]+"'>Finished</h4>",unsafe_allow_html=True)
 
 
 
231
 
232
  def oab_sidebar(step):
233
  st.title('**Optimize a batch**')
@@ -255,7 +307,15 @@ def oab_sidebar(step):
255
  st.markdown("<h4 style='color: "+color_ls[4]+"'>Optimizing a batch</h4>",unsafe_allow_html=True)
256
  st.markdown('|')
257
  st.markdown("<h4 style='color: "+color_ls[5]+"'>Finished</h4>",unsafe_allow_html=True)
258
-
 
 
 
 
 
 
 
 
259
  # @st.cache_data(experimental_allow_widgets=True)
260
 
261
  # if 'sidebar_con' not in st.session_state:
@@ -319,6 +379,12 @@ We seek to automate the design of molecules based on specific chemical propertie
319
  img_caption = '''
320
  Figure 3. Overview of our method: A molecular graph G is first decomposed into its junction tree TG, where each colored node in the tree represents a substructure in the molecule. We then encode both the tree and graph into their latent embeddings zT and zG. To decode the molecule, we first reconstruct junction tree from zT , and then assemble nodes in the tree back to the original molecule.'''
321
 
 
 
 
 
 
 
322
  with st.expander(':four_leaf_clover: About the author',expanded=True):
323
  st.markdown('')
324
  st.markdown("<h3 style='text-align:center;'>Gia-Bao Truong</h3>",unsafe_allow_html=True)
@@ -398,6 +464,8 @@ def Optimize_a_molecule():
398
  st.session_state.single_optimized = False
399
  if 'smiles_checked' not in st.session_state:
400
  st.session_state.smiles_checked = False
 
 
401
  # with oab_sel_container.container():
402
  ls_opt = ['-','Sorafenib','Pazopanib','Sunitinib']
403
  sample_mode = {
@@ -502,6 +570,7 @@ def Optimize_a_molecule():
502
  else: check_single_con.empty()
503
 
504
  optim_single_con = st.empty()
 
505
  if st.session_state.smiles_checked:
506
  if optim_single_butt:
507
  # sidebar_con.empty()
@@ -560,6 +629,113 @@ def Optimize_a_molecule():
560
  # st.write('THIS MOLECULE DOES NOT EXIST!')
561
  st.markdown("<h3 style='text-align: center; color: mediumseagreen;'>THIS MOLECULE DOES NOT EXIST!</h3>",unsafe_allow_html=True)
562
  st.markdown("<p style='text-align: center; color: grey;'>Checked using molbloom</p>",unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  with sidebar_con.container():
564
  set_step(3)
565
  oam_sidebar(3)
@@ -858,6 +1034,7 @@ def reset_oam_state():
858
  st.session_state.checked_single = 'NO'
859
  st.session_state.smiles_checked = False
860
  st.session_state.single_optimized = False
 
861
  set_step(0)
862
 
863
  def reset_oab_state():
 
11
  import rdkit.Chem as Chem
12
  from rdkit.Chem.Draw import MolToImage
13
  from rdkit.Chem import Descriptors
14
+ from rdkit.Chem import RDConfig
15
+ from rdkit.Chem.Draw import rdMolDraw2D
16
+ import os
17
+ import sys
18
+ sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
19
  import sascorer
20
  import networkx as nx
21
  from stqdm import stqdm
22
  import base64, io
23
  import pandas as pd
24
  import streamlit_ext as ste
25
+ from metrics import Metrics
26
 
27
  os.environ['KMP_DUPLICATE_LIB_OK']='True'
28
 
 
31
  from jtprop_vae import JTPropVAE
32
  from molbloom import buy
33
 
34
+
35
+ what_new = '''
36
+ ### Version 1.01
37
+ * Add `Compare` feature in `Optimize a molecule` page.
38
+ * Add `What's new?` in sidebar.
39
+ * Add `Changelog` in README.
40
+ '''
41
+
42
+
43
+
44
  css='''
45
  [data-testid="metric-container"] {
46
  width: fit-content;
 
157
  df.to_csv(s_buff)
158
  return s_buff.getvalue().encode()
159
 
160
+ # def download_df(df,id):
161
+ # with st.expander(':arrow_down: Download this dataframe'):
162
+ # st.markdown("<h4 style='color:tomato;'>Select column(s) to save:</h4>",unsafe_allow_html=True)
163
+ # for col in df.columns:
164
+ # st.checkbox(col,key=str(id)+'_col_'+str(col))
165
+ # st.text_input('File name (.csv):','dataframe',key=str(id)+'_file_name')
166
 
167
+ # ste.download_button('Download',df_to_file(df[[col for col in df.columns if st.session_state[str(id)+'_col_'+str(col)]]]),st.session_state[str(id)+'_file_name']+'.csv')
168
 
169
  lg = rdkit.RDLogger.logger()
170
  lg.setLevel(rdkit.RDLogger.CRITICAL)
 
211
  model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu')))
212
  return model
213
 
214
+ descrip_dict ={
215
+ 'logp':'LogP',
216
+ 'mw':'MW',
217
+ 'tpsa':'TPSA',
218
+ 'n_hba':'nHA',
219
+ 'n_hbd':'nHD'
220
+ }
221
+ rule_dict = {
222
+ 'ro5':'RO5',
223
+ 'pfizer_rule_passed':'PFIZER Rule',
224
+ 'gsk_rule_passed':'GSK Rule',
225
+ 'goldentriangle_rule':'GOLDENTRIANGLE Rule'
226
+ }
227
+ score_dict ={
228
+ 'qed':'QED',
229
+ 'sascore' : 'SA score',
230
+ 'fsp3' : 'Fsp3',
231
+ 'mce18' : 'MCE-18',
232
+ 'npscore' : 'NP score'
233
+ }
234
+ score_pass_dict = {
235
+ 'qed_passed' : 'QED Passed',
236
+ 'sascore_passed' : 'SA Passed',
237
+ 'fsp3_passed' : 'Fsp3 Passed',
238
+ 'mce18_passed' : 'MCE-18 Passed'
239
+ }
240
+ filter_dict = {
241
+ 'pains_filter' : 'PAINS Filter',
242
+ 'alarm_nmr_filter' : 'ALARM NMR Filter',
243
+ 'bms_filter' : 'BMS Filter',
244
+ 'chelator_filter' : 'Chelator Filter'
245
+ }
246
+
247
  from streamlit_lottie import st_lottie
248
  import requests
249
 
 
277
  st.markdown("<h4 style='color: "+color_ls[2]+"'>Optimizing a molecule</h4>",unsafe_allow_html=True)
278
  st.markdown('|')
279
  st.markdown("<h4 style='color: "+color_ls[3]+"'>Finished</h4>",unsafe_allow_html=True)
280
+ st.markdown("""---""")
281
+ with st.expander("# **:green[What's new?]**"):
282
+ st.markdown(what_new)
283
 
284
  def oab_sidebar(step):
285
  st.title('**Optimize a batch**')
 
307
  st.markdown("<h4 style='color: "+color_ls[4]+"'>Optimizing a batch</h4>",unsafe_allow_html=True)
308
  st.markdown('|')
309
  st.markdown("<h4 style='color: "+color_ls[5]+"'>Finished</h4>",unsafe_allow_html=True)
310
+ st.markdown("""---""")
311
+ with st.expander("# **:green[What's new?]**"):
312
+ st.markdown(what_new)
313
+
314
+ def ab_sidebar():
315
+ st.title('**About**')
316
+ st.markdown("""---""")
317
+ with st.expander("# **:green[What's new?]**"):
318
+ st.markdown(what_new)
319
  # @st.cache_data(experimental_allow_widgets=True)
320
 
321
  # if 'sidebar_con' not in st.session_state:
 
379
  img_caption = '''
380
  Figure 3. Overview of our method: A molecular graph G is first decomposed into its junction tree TG, where each colored node in the tree represents a substructure in the molecule. We then encode both the tree and graph into their latent embeddings zT and zG. To decode the molecule, we first reconstruct junction tree from zT , and then assemble nodes in the tree back to the original molecule.'''
381
 
382
+ with st.sidebar:
383
+ sidebar_con = st.empty()
384
+ # sidebar_con.empty()
385
+ with sidebar_con.container():
386
+ ab_sidebar()
387
+
388
  with st.expander(':four_leaf_clover: About the author',expanded=True):
389
  st.markdown('')
390
  st.markdown("<h3 style='text-align:center;'>Gia-Bao Truong</h3>",unsafe_allow_html=True)
 
464
  st.session_state.single_optimized = False
465
  if 'smiles_checked' not in st.session_state:
466
  st.session_state.smiles_checked = False
467
+ if 'compared' not in st.session_state:
468
+ st.session_state.compared = False
469
  # with oab_sel_container.container():
470
  ls_opt = ['-','Sorafenib','Pazopanib','Sunitinib']
471
  sample_mode = {
 
570
  else: check_single_con.empty()
571
 
572
  optim_single_con = st.empty()
573
+ compare_single_con = st.empty()
574
  if st.session_state.smiles_checked:
575
  if optim_single_butt:
576
  # sidebar_con.empty()
 
629
  # st.write('THIS MOLECULE DOES NOT EXIST!')
630
  st.markdown("<h3 style='text-align: center; color: mediumseagreen;'>THIS MOLECULE DOES NOT EXIST!</h3>",unsafe_allow_html=True)
631
  st.markdown("<p style='text-align: center; color: grey;'>Checked using molbloom</p>",unsafe_allow_html=True)
632
+ if st.button('Compare',use_container_width=True):
633
+ st.session_state.compared = True
634
+ if st.session_state.compared:
635
+ compare_single_con.empty()
636
+ with compare_single_con.container():
637
+ com_col = st.columns(3)
638
+ com_col[1].markdown("<h4 style='text-align: center;'>Original</h4>",unsafe_allow_html=True)
639
+ com_col[2].markdown("<h4 style='text-align: center;'>New</h4>",unsafe_allow_html=True)
640
+ imgByteArr.seek(0)
641
+ MolToImage(Chem.MolFromSmiles(st.session_state.canon_smiles),size=(400,200)).save(imgByteArr,format='PNG')
642
+ old_mol = base64.b64encode(imgByteArr.getvalue()).decode()
643
+ imgByteArr.seek(0)
644
+ MolToImage(Chem.MolFromSmiles(st.session_state.new_smiles),size=(400,200)).save(imgByteArr,format='PNG')
645
+ new_mol = base64.b64encode(imgByteArr.getvalue()).decode()
646
+ com_col[1].markdown("<p style='text-align: center;'>"+
647
+ f"<img src='data:image/png;base64,{old_mol}' class='img-fluid'>"+
648
+ "</p>", unsafe_allow_html=True)
649
+ com_col[2].markdown("<p style='text-align: center;'>"+
650
+ f"<img src='data:image/png;base64,{new_mol}' class='img-fluid'>"+
651
+ "</p>", unsafe_allow_html=True)
652
+ old_mol_metrics = Metrics(st.session_state.canon_smiles).calculate_all()
653
+ new_mol_metrics = Metrics(st.session_state.new_smiles).calculate_all()
654
+ value_com_col = st.columns(3)
655
+ for met,met_name in descrip_dict.items():
656
+ value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
657
+ if met not in ['n_hba','n_hbd']:
658
+ value_com_col[1].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
659
+ value_com_col[2].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
660
+ else:
661
+ value_com_col[1].markdown("<p style='text-align: center;'>%d</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
662
+ value_com_col[2].markdown("<p style='text-align: center;'>%d</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
663
+ for met,met_name in rule_dict.items():
664
+ value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
665
+ old_passed = old_mol_metrics[met]
666
+ new_passed = new_mol_metrics[met]
667
+ if met == 'ro5':
668
+ value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed > 3 else 'tomato',old_passed),unsafe_allow_html=True)
669
+ value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed > 3 else 'tomato',new_passed),unsafe_allow_html=True)
670
+ else:
671
+ value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato','Passed' if old_passed else 'Failed'),unsafe_allow_html=True)
672
+ value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato','Passed' if new_passed else 'Failed'),unsafe_allow_html=True)
673
+ score_col_old = value_com_col[1].columns(2)
674
+ score_col_new = value_com_col[2].columns(2)
675
+ for met,met_name in score_dict.items():
676
+ value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
677
+ if met != 'npscore':
678
+ score_col_old[0].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
679
+ score_col_new[0].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
680
+ old_passed = old_mol_metrics[met+'_passed']
681
+ new_passed = new_mol_metrics[met+'_passed']
682
+ score_col_old[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato','Good' if old_passed else 'Bad'),unsafe_allow_html=True)
683
+ score_col_new[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato','Good' if new_passed else 'Bad'),unsafe_allow_html=True)
684
+ else:
685
+ value_com_col[1].markdown("<p style='text-align: center;'>%.2f</p>"%(old_mol_metrics[met]),unsafe_allow_html=True)
686
+ value_com_col[2].markdown("<p style='text-align: center;'>%.2f</p>"%(new_mol_metrics[met]),unsafe_allow_html=True)
687
+ # for met,met_name in score_pass_dict.items():
688
+ # value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
689
+ # old_passed = old_mol_metrics[met]
690
+ # new_passed = new_mol_metrics[met]
691
+ # value_com_col[1].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if old_passed else 'tomato',old_passed),unsafe_allow_html=True)
692
+ # value_com_col[2].markdown("<p style='text-align: center; color: %s;'>%s</p>"%('mediumseagreen' if new_passed else 'tomato',new_passed),unsafe_allow_html=True)
693
+
694
+ for met,met_name in filter_dict.items():
695
+ # value_com_col[0].markdown(f"<p style='text-align: center;'>{met_name}</p>",unsafe_allow_html=True)
696
+ old_passed = old_mol_metrics[met]
697
+ new_passed = new_mol_metrics[met]
698
+ with value_com_col[1].expander("%s :%s[%s]"%(met_name,'green' if old_passed['Disposed'] == 'Accepted' else 'red',old_passed['Disposed'])):
699
+ st.markdown('Matched name(s):')
700
+ # st.write(old_passed['MatchedNames'])
701
+ # st.markdown('Matched atom(s):')
702
+ if old_passed['MatchedNames'] != ['-']:
703
+ for idx,patt in enumerate(old_passed['MatchedAtoms']):
704
+ st.code(old_passed['MatchedNames'][idx])
705
+ # st.markdown(patt)
706
+ drawer = rdMolDraw2D.MolDraw2DSVG(300,200)
707
+ # drawer.drawOptions().fillHighlights = False
708
+ matches = sum(patt, ())
709
+ drawer.DrawMolecule(mol, highlightAtoms=matches)
710
+ drawer.FinishDrawing()
711
+ svg = drawer.GetDrawingText()
712
+ imgByteArr.seek(0)
713
+ st.markdown("<p style='text-align: center;'>"+
714
+ f"<img src='data:image/svg+xml;base64,{base64.b64encode(svg.encode('utf-8')).decode('utf-8')}' class='img-fluid'>"+
715
+ "</p>", unsafe_allow_html=True)
716
+ else:
717
+ st.markdown("No matched pattern")
718
+ # st.write(old_passed['MatchedAtoms'])
719
+ with value_com_col[2].expander("%s :%s[%s]"%(met_name,'green' if new_passed['Disposed'] == 'Accepted' else 'red',new_passed['Disposed'])):
720
+ st.markdown('Matched name(s):')
721
+ # st.write(new_passed['MatchedNames'])
722
+ # st.markdown('Matched atom(s):')
723
+ # st.write(new_passed['MatchedAtoms'])
724
+ if new_passed['MatchedNames'] != ['-']:
725
+ for idx,patt in enumerate(new_passed['MatchedAtoms']):
726
+ st.code(new_passed['MatchedNames'][idx])
727
+ drawer = rdMolDraw2D.MolDraw2DSVG(300,200)
728
+ # drawer.drawOptions().fillHighlights = False
729
+ matches = sum(patt, ())
730
+ drawer.DrawMolecule(mol, highlightAtoms=matches)
731
+ drawer.FinishDrawing()
732
+ svg = drawer.GetDrawingText()
733
+ imgByteArr.seek(0)
734
+ st.markdown("<p style='text-align: center;'>"+
735
+ f"<img src='data:image/svg+xml;base64,{base64.b64encode(svg.encode('utf-8')).decode('utf-8')}' class='img-fluid'>"+
736
+ "</p>", unsafe_allow_html=True)
737
+ else:
738
+ st.markdown("No matched pattern")
739
  with sidebar_con.container():
740
  set_step(3)
741
  oam_sidebar(3)
 
1034
  st.session_state.checked_single = 'NO'
1035
  st.session_state.smiles_checked = False
1036
  st.session_state.single_optimized = False
1037
+ st.session_state.compared = False
1038
  set_step(0)
1039
 
1040
  def reset_oab_state():
metrics.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rdkit import Chem
2
+ from rdkit.Chem import Descriptors
3
+ from rdkit.Chem import rdMolDescriptors
4
+ from rdkit.Chem import RDConfig
5
+ import os
6
+ import sys
7
+ sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
8
+ sys.path.append(os.path.join(RDConfig.RDContribDir, 'NP_Score'))
9
+ import sascorer
10
+ import npscorer
11
+ import pandas as pd
12
+ import numpy as np
13
+ from scopy.ScoFH import fh_filter
14
+
15
+
16
+ class Metrics():
17
+ def __init__(self,smiles):
18
+ #load filters and scores
19
+ if not os.path.isfile('./wehi_pains.csv'):
20
+ _pains = pd.read_csv('https://raw.githubusercontent.com/rdkit/rdkit/master/Data/Pains/wehi_pains.csv',names=['smarts', 'names'])
21
+ else:
22
+ _pains = pd.read_csv('./wehi_pains.csv',
23
+ names=['smarts', 'names'])
24
+ self._pains_filters = [Chem.MolFromSmarts(x) for x in
25
+ _pains['smarts'].values]
26
+ self.fscore = npscorer.readNPModel()
27
+ # Decriptors
28
+ self.smiles = smiles
29
+ try:
30
+ mol = Chem.MolFromSmiles(smiles)
31
+ if mol is None:
32
+ raise ValueError('SMILES is not valid!')
33
+ else:
34
+ self.mol = mol
35
+ self.h_mol = Chem.AddHs(self.mol)
36
+ except ValueError as e:
37
+ print(e)
38
+ self.logp = Descriptors.MolLogP(self.mol)
39
+ self.mw = Descriptors.ExactMolWt(self.mol)
40
+ self.tpsa = Descriptors.TPSA(self.mol)
41
+ self.n_hba = Descriptors.NumHAcceptors(self.mol)
42
+ self.n_hbd = Descriptors.NumHDonors(self.mol)
43
+ pass
44
+
45
+ def ro5(self):
46
+ # True is good
47
+ """
48
+ Test if input molecule (SMILES) fulfills Lipinski's rule of five.
49
+
50
+ Returns
51
+ -------
52
+ int
53
+ Number of rules fullfilled
54
+ """
55
+ # Check if Ro5 conditions fulfilled
56
+ conditions = [self.mw <= 500, self.n_hba <= 10, self.n_hbd <= 5, self.logp <= 5, self.tpsa <= 140]
57
+ ro5_fulfilled = sum(conditions)
58
+ return ro5_fulfilled
59
+
60
+ def pfizer_rule_passed(self):
61
+ # True if pass, False if toxic
62
+ """
63
+ Test if input molecule (SMILES) fulfills Pfizer Rule.
64
+
65
+ Returns
66
+ -------
67
+ bool
68
+ Pfizer Rule compliance for input molecule.
69
+ """
70
+ # Check if Pfizer Rule conditions fulfilled
71
+ conditions = [self.logp > 3, self.tpsa < 75]
72
+ pfizer_pased = not (sum(conditions) == 2)
73
+ # Return False if 2 conditions are both fulfilled
74
+ return pfizer_pased
75
+
76
+ def gsk_rule_passed(self):
77
+ # True for favorable ADMET
78
+ """
79
+ Test if input molecule (SMILES) fulfills GSK Rule.
80
+
81
+ Returns
82
+ -------
83
+ bool
84
+ GSK Rule compliance for input molecule.
85
+ """
86
+ # Check if GSK Rule conditions fulfilled
87
+ conditions = [self.mw <= 400, self.logp <= 4]
88
+ gsk_fulfilled = sum(conditions) == 2
89
+ # Return True if 2 conditions are fulfilled
90
+ return gsk_fulfilled
91
+
92
+ def goldentriangle_rule_passed(self):
93
+ # True for favorable ADMET
94
+ """
95
+ Test if input molecule (SMILES) fulfills GoldenTriangle Rule.
96
+
97
+ Returns
98
+ -------
99
+ bool
100
+ GoldenTriangle Rule compliance for input molecule.
101
+ """
102
+ # Check if GoldenTrianlge Rule conditions fulfilled
103
+ conditions = [200 <= self.mw <= 450,-2 <= self.logp <= 5]
104
+ goldentriangle_fulfilled = sum(conditions) == 2
105
+ # Return True if 2 conditions are fulfilled
106
+ return goldentriangle_fulfilled
107
+
108
+ def qed(self):
109
+ """
110
+ Calculate QED
111
+
112
+ Returns
113
+ -------
114
+ numpy.float64
115
+ QED for input molecule
116
+ """
117
+ # Calculate QED of input molecule
118
+ qed = Chem.QED.qed(self.mol)
119
+ return qed
120
+
121
+ def qed_passed(self):
122
+ # True if attractive
123
+ """
124
+ Test if input molecule (SMILES) is 'attractive'.
125
+
126
+ Returns
127
+ -------
128
+ bool
129
+ QED 'attractive'-ness.
130
+ """
131
+ # Check if QED conditions fulfilled
132
+ qed_excellent = self.qed() > 0.67
133
+ # Return True if condition is fulfilled
134
+ return qed_excellent
135
+
136
+ def sascore(self):
137
+ """
138
+ Calculate sascore
139
+
140
+ Returns
141
+ -------
142
+ float
143
+ SAscore for input molecule
144
+ """
145
+ return sascorer.calculateScore(self.mol)
146
+
147
+ def sascore_passed(self):
148
+ # True if sa pass
149
+ """
150
+ Test if input molecule (SMILES) is easy to synthesize.
151
+
152
+ Returns
153
+ -------
154
+ bool
155
+ synthetic accessibility.
156
+ """
157
+ SAscore_excellent = self.sascore() <= 6
158
+ # Return True if condition is fulfilled
159
+ return SAscore_excellent
160
+
161
+ def fsp3(self):
162
+ """
163
+ Calculate Fsp3
164
+
165
+ Returns
166
+ -------
167
+ float
168
+ Fsp3 for input molecule
169
+ """
170
+ return Chem.rdMolDescriptors.CalcFractionCSP3(self.mol)
171
+
172
+ def fsp3_passed(self):
173
+ # True if if input molecule (SMILES) has suitable Fsp3 value.
174
+ """
175
+ Test if input molecule (SMILES) has suitable Fsp3 value.
176
+
177
+ Returns
178
+ -------
179
+ bool
180
+ Fsp3 suitability.
181
+ """
182
+ # Check if Fsp3 condition is fulfilled
183
+ fsp3_excellent = self.fsp3() >= 0.42
184
+ # Return True if condition is fulfilled
185
+ return fsp3_excellent
186
+
187
+ def pains_filter(self, detail=False):
188
+ # True if passed
189
+ # Detail return bool, list name, list atoms
190
+ """
191
+ PAINS filter for an input molecule (SMILES).
192
+
193
+ Returns
194
+ -------
195
+ [bool, list, list]
196
+ [pains_accepted, pains_matched_name, pains_matched_atoms]
197
+ Check if PAINS not violated and matched names, atoms.
198
+ """
199
+ # Check PAINS
200
+ pains = fh_filter.Check_PAINS(self.h_mol, detail = True)
201
+ # pains_accepted = pains['Disposed'] == 'Accepted' # Return True if not violating PAINS
202
+ # pains_matched_atoms = pains['MatchedAtoms']
203
+ # pains_matched_names = pains['MatchedNames']
204
+ # Return PAINS
205
+ if detail:
206
+ return pains
207
+ else:
208
+ return pains['Disposed']
209
+
210
+ # def pains_passed(self):
211
+ # h_mol = Chem.AddHs(self.mol)
212
+ # if any(h_mol.HasSubstructMatch(smarts) for smarts in self._pains_filters):
213
+ # return False
214
+ # else:
215
+ # return True
216
+
217
+ def mce18(self):
218
+ """
219
+ Calculate MCE-18
220
+
221
+ Returns
222
+ -------
223
+ float
224
+ MCE-18 for input molecule
225
+ """
226
+ # Calculate MCE-18 relevant properties
227
+ AR = rdMolDescriptors.CalcNumAromaticRings(self.mol) > 0
228
+ NAR = rdMolDescriptors.CalcNumAliphaticRings(self.mol) > 0
229
+ CHIRAL = len(Chem.FindMolChiralCenters(self.mol, force = True, includeUnassigned = True)) > 0
230
+ SPIRO = rdMolDescriptors.CalcNumSpiroAtoms(self.mol) > 0
231
+ SP3 = self.fsp3()
232
+
233
+ # Calculate Cyc and Acyc
234
+ Csp3_cyclic = 0
235
+ Csp3_acyclic = 0
236
+ C_total = 0
237
+ CYC = 0
238
+ ACYC = 0
239
+
240
+ for atom in self.mol.GetAtoms():
241
+ if atom.GetAtomicNum() == 6: C_total+=1
242
+ if sum([atom.GetAtomicNum() == 6, atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
243
+ Csp3_cyclic += 1
244
+ if sum([atom.GetAtomicNum() == 6, not atom.IsInRing(), atom.GetHybridization() == Chem.HybridizationType.SP3]) == 3:
245
+ Csp3_acyclic += 1
246
+
247
+ if C_total>0:
248
+ CYC = Csp3_cyclic/C_total
249
+ ACYC = Csp3_acyclic/C_total
250
+
251
+ # Calculate Q1
252
+ deltas=[x.GetDegree() for x in self.mol.GetAtoms()]
253
+ M = sum(np.array(deltas)**2)
254
+ N = self.mol.GetNumAtoms()
255
+ Q1 = 3-2*N+M/2.0
256
+
257
+ # Calculate MCE-18
258
+ mce18 = (AR + NAR + CHIRAL + SPIRO + (SP3 + CYC - ACYC)/(1 + SP3))*Q1
259
+ return mce18
260
+
261
+ def mce18_passed(self):
262
+ # True if interesting
263
+ """
264
+ Test if input molecule (SMILES) is interesting.
265
+
266
+ Returns
267
+ -------
268
+ bool
269
+ MCE-18 suitability.
270
+ """
271
+ # Check if MCE-18 condition is fulfilled
272
+ mce18_excellent = self.mce18() >= 45
273
+ # Return True if condition is fulfilled
274
+ return mce18_excellent
275
+
276
+ def npscore(self):
277
+ # [-5,5], higher is more nature-like
278
+ """
279
+ Calculate NPscore of molecule.
280
+
281
+ Returns
282
+ -------
283
+ float
284
+ NPscore for input molecule.
285
+ """
286
+ # Calculate NPscore of input molecule
287
+ npscore = npscorer.scoreMol(self.mol, self.fscore)
288
+ # Return NPscore
289
+ return npscore
290
+
291
+ def alarm_nmr_filter(self,detail=False):
292
+ # True if passed
293
+ # Detail return bool, list name, list atoms
294
+ """
295
+ ALARM NMR filter for an input molecule (SMILES).
296
+ Returns
297
+ -------
298
+ [bool, list, list]
299
+ [alarmnmr_accepted, alarmnmr_matched_names, alarmnmr_matched_atoms]
300
+ Check if ALARM NMR not violated and matched names, atoms.
301
+ """
302
+ # Check ALARM NMR
303
+ alarmnmr = fh_filter.Check_Alarm_NMR(self.h_mol, detail = True)
304
+ # alarmnmr_accepted = alarmnmr['Disposed'] == 'Accepted' # Return True if not violating ALARM NMR
305
+ # alarmnmr_matched_atoms = alarmnmr['MatchedAtoms']
306
+ # alarmnmr_matched_names = alarmnmr['MatchedNames']
307
+ # Return ALARM NMR
308
+ if detail:
309
+ return alarmnmr
310
+ else:
311
+ return alarmnmr['Disposed']
312
+
313
+ def bms_filter(self,detail=False):
314
+ # True if passed
315
+ # Detail return bool, list name, list atoms
316
+ """
317
+ BMS filter for an input molecule (SMILES).
318
+ Returns
319
+ -------
320
+ [bool, list, list]
321
+ [bms_accepted, bms_matched_names, bms_matched_atoms]
322
+ Check if BMS not violated and matched names, atoms.
323
+ """
324
+ bms = fh_filter.Check_BMS(self.h_mol, detail = True)
325
+ # bms_accepted = bms['Disposed'] == 'Accepted' # Return True if not violating BMS
326
+ # bms_matched_atoms = bms['MatchedAtoms']
327
+ # bms_matched_names = bms['MatchedNames']
328
+ # Return BMS
329
+ if detail:
330
+ return bms
331
+ else:
332
+ return bms['Disposed']
333
+
334
+ def chelator_filter(self, detail=False):
335
+ """
336
+ Chelator filter for an input molecule (SMILES).
337
+ Returns
338
+ -------
339
+ [bool, list, list]
340
+ [chelator_accepted, chelator_matched_names, chelator_matched_atoms]
341
+ Check if Chelator not violated and matched names, atoms.
342
+ """
343
+
344
+ # Check Chelator
345
+ chelator = fh_filter.Check_Chelating(self.h_mol, detail = True)
346
+ # chelator_accepted = chelator['Disposed'] == 'Accepted' # Return True if not violating Chelator
347
+ # chelator_matched_atoms = chelator['MatchedAtoms']
348
+ # chelator_matched_names = chelator['MatchedNames']
349
+ # Return Chelator
350
+ if detail:
351
+ return chelator
352
+ else:
353
+ return chelator['Disposed']
354
+
355
+ def calculate_all(self, descriptors = True,rules=True,scores = True,scores_passed = True,filters = True,detail = True):
356
+ """
357
+ Calculate all rules.
358
+
359
+ Parameters
360
+ ----------
361
+ smiles : str
362
+ SMILES for a molecule.
363
+ descriptors : bool
364
+ Extract molecular descriptors of molecule. Default is 'False'.
365
+
366
+ Returns
367
+ -------
368
+ pandas.Series
369
+ All rules w/wo descriptors.
370
+ """
371
+ # Calculate all rules of molecule
372
+ result = dict()
373
+ descrip_dict ={
374
+ 'logp':self.logp,
375
+ 'mw':self.mw,
376
+ 'tpsa':self.tpsa,
377
+ 'n_hba':self.n_hba,
378
+ 'n_hbd':self.n_hbd
379
+ }
380
+ rule_dict = {
381
+ 'ro5':self.ro5,
382
+ 'pfizer_rule_passed':self.pfizer_rule_passed,
383
+ 'gsk_rule_passed':self.gsk_rule_passed,
384
+ 'goldentriangle_rule':self.goldentriangle_rule_passed
385
+ }
386
+ score_dict ={
387
+ 'qed':self.qed,
388
+ 'sascore' : self.sascore,
389
+ 'fsp3' : self.fsp3,
390
+ 'mce18' : self.mce18,
391
+ 'npscore' : self.npscore
392
+ }
393
+ score_pass_dict = {
394
+ 'qed_passed' : self.qed_passed,
395
+ 'sascore_passed' : self.sascore_passed,
396
+ 'fsp3_passed' : self.fsp3_passed,
397
+ 'mce18_passed' : self.mce18_passed
398
+ }
399
+ filter_dict = {
400
+ 'pains_filter' : self.pains_filter,
401
+ 'alarm_nmr_filter' : self.alarm_nmr_filter,
402
+ 'bms_filter' : self.bms_filter,
403
+ 'chelator_filter' : self.chelator_filter
404
+ }
405
+ if descriptors:
406
+ for name, func in descrip_dict.items():
407
+ result[name] = func
408
+ if rules:
409
+ for name, func in rule_dict.items():
410
+ result[name] = func()
411
+ if scores:
412
+ for name, func in score_dict.items():
413
+ result[name] = func()
414
+ if scores_passed:
415
+ for name, func in score_pass_dict.items():
416
+ result[name] = func()
417
+ if filters:
418
+ for name, func in filter_dict.items():
419
+ result[name] = func(detail=detail)
420
+
421
+ return result
422
+
423
+
requirements.txt CHANGED
@@ -9,4 +9,5 @@ stqdm
9
  pandas
10
  streamlit_ext
11
  streamlit_lottie
12
- requests
 
 
9
  pandas
10
  streamlit_ext
11
  streamlit_lottie
12
+ requests
13
+ scopy