test comparison with property
Browse files- all_data.pkl +3 -0
- app.py +55 -1
all_data.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4e1047789171080682f14601ea24aec533a15279591c85e996c4ca0a01b5236
|
| 3 |
+
size 91089
|
app.py
CHANGED
|
@@ -24,7 +24,8 @@ from rdkit.Chem import AllChem
|
|
| 24 |
from rdkit.Chem.Draw import rdMolDraw2D
|
| 25 |
import pandas as pd
|
| 26 |
from st_keyup import st_keyup
|
| 27 |
-
|
|
|
|
| 28 |
st.set_page_config(layout="wide", page_title="VaultChem")
|
| 29 |
|
| 30 |
|
|
@@ -415,6 +416,7 @@ def clear_session_state():
|
|
| 415 |
# Define global variables outside main function scope.
|
| 416 |
|
| 417 |
task_options = ["0", "1", "2", "3", "4", "5"]
|
|
|
|
| 418 |
task_mapping = {
|
| 419 |
"0": "HLM",
|
| 420 |
"1": "MDR-1-MDCK-ER",
|
|
@@ -423,6 +425,18 @@ task_mapping = {
|
|
| 423 |
"4": "Protein bind. rat",
|
| 424 |
"5": "RLM",
|
| 425 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
unit_mapping = {
|
| 427 |
"0": "(mL/min/kg)",
|
| 428 |
"1": " ",
|
|
@@ -431,6 +445,10 @@ unit_mapping = {
|
|
| 431 |
"4": " (%)",
|
| 432 |
"5": "(mL/min/kg)",
|
| 433 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
task_options = list(task_mapping.values())
|
| 435 |
|
| 436 |
# Create the dropdown menu
|
|
@@ -617,9 +635,45 @@ if __name__ == "__main__":
|
|
| 617 |
f"The Molecule {st.session_state['input_molecule']} has a {task_label} value of {value} {unit}"
|
| 618 |
)
|
| 619 |
st.toast("Session successfully completed!!!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
else:
|
| 621 |
st.warning("Check if FHE computation has been done.")
|
| 622 |
|
|
|
|
|
|
|
|
|
|
| 623 |
with st.container():
|
| 624 |
st.subheader(f"Step 6 : Reset to predict a new molecule")
|
| 625 |
reset_button = st.button("Reset app", on_click=clear_session_state)
|
|
|
|
| 24 |
from rdkit.Chem.Draw import rdMolDraw2D
|
| 25 |
import pandas as pd
|
| 26 |
from st_keyup import st_keyup
|
| 27 |
+
import pickle
|
| 28 |
+
import numpy as np
|
| 29 |
st.set_page_config(layout="wide", page_title="VaultChem")
|
| 30 |
|
| 31 |
|
|
|
|
| 416 |
# Define global variables outside main function scope.
|
| 417 |
|
| 418 |
task_options = ["0", "1", "2", "3", "4", "5"]
|
| 419 |
+
|
| 420 |
task_mapping = {
|
| 421 |
"0": "HLM",
|
| 422 |
"1": "MDR-1-MDCK-ER",
|
|
|
|
| 425 |
"4": "Protein bind. rat",
|
| 426 |
"5": "RLM",
|
| 427 |
}
|
| 428 |
+
|
| 429 |
+
task_mapping_2 = {
|
| 430 |
+
"0": "LOG HLM_CLint (mL/min/kg)",
|
| 431 |
+
"1":" LOG MDR1-MDCK ER (B-A/A-B)",
|
| 432 |
+
"2": "LOG SOLUBILITY PH 6.8 (ug/mL)",
|
| 433 |
+
"3": "LOG PLASMA PROTEIN BINDING (HUMAN) (% unbound)",
|
| 434 |
+
"4": "LOG PLASMA PROTEIN BINDING (RAT) (% unbound)",
|
| 435 |
+
"5": "LOG RLM_CLint (mL/min/kg)"
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
|
| 440 |
unit_mapping = {
|
| 441 |
"0": "(mL/min/kg)",
|
| 442 |
"1": " ",
|
|
|
|
| 445 |
"4": " (%)",
|
| 446 |
"5": "(mL/min/kg)",
|
| 447 |
}
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
|
| 452 |
task_options = list(task_mapping.values())
|
| 453 |
|
| 454 |
# Create the dropdown menu
|
|
|
|
| 635 |
f"The Molecule {st.session_state['input_molecule']} has a {task_label} value of {value} {unit}"
|
| 636 |
)
|
| 637 |
st.toast("Session successfully completed!!!")
|
| 638 |
+
|
| 639 |
+
|
| 640 |
+
st.markdown("Is this a large, average or small value for this property? 🤔 Find out by comparing with the property distribution of the training dataset")
|
| 641 |
+
# now load the data from the pkl
|
| 642 |
+
with open("all_data.pkl", "rb") as f:
|
| 643 |
+
all_data = pickle.load(f)
|
| 644 |
+
import plotly.graph_objects as go
|
| 645 |
+
|
| 646 |
+
# Assuming task_mapping_2, all_data, and 'value' are defined elsewhere in your code.
|
| 647 |
+
|
| 648 |
+
task_label_2 = task_mapping_2[st.session_state["task"]]
|
| 649 |
+
data = all_data[task_label_2]
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
# Create a histogram
|
| 653 |
+
fig = go.Figure(go.Histogram(x=data, nbinsx=20, marker_color='blue', opacity=0.5))
|
| 654 |
+
|
| 655 |
+
# If you don't have specific y-values for the vertical line, you can set them to ensure the line spans the plot.
|
| 656 |
+
# Here, we're assuming a static range. You might want to adjust these based on your dataset's characteristics.
|
| 657 |
+
max_y_value = np.max(np.histogram(data, bins=20)[0]) # Calculate the max height of the histogram bars
|
| 658 |
+
|
| 659 |
+
fig.add_trace(go.Scatter(x=[value, value], y=[0, max_y_value * 1.1], mode="lines", name="Threshold", line=dict(color="red", dash="dash")))
|
| 660 |
+
|
| 661 |
+
# Update layout if necessary
|
| 662 |
+
fig.update_layout(
|
| 663 |
+
title="Comparison of the molecule's value with the distribution of the ADME dataset",
|
| 664 |
+
xaxis_title=task_label_2,
|
| 665 |
+
yaxis_title="Count",
|
| 666 |
+
bargap=0.2, # Adjust the gap between bars
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
# Display the figure in the Streamlit app
|
| 670 |
+
st.plotly_chart(fig)
|
| 671 |
else:
|
| 672 |
st.warning("Check if FHE computation has been done.")
|
| 673 |
|
| 674 |
+
|
| 675 |
+
|
| 676 |
+
|
| 677 |
with st.container():
|
| 678 |
st.subheader(f"Step 6 : Reset to predict a new molecule")
|
| 679 |
reset_button = st.button("Reset app", on_click=clear_session_state)
|