Spaces:
Running
Running
File size: 5,407 Bytes
dfb48ba 633ef66 dfb48ba 633ef66 dfb48ba 603cd4e 15fdf16 603cd4e 15fdf16 dfb48ba 15fdf16 08d1954 633ef66 15fdf16 633ef66 15fdf16 633ef66 15fdf16 633ef66 15fdf16 633ef66 15fdf16 633ef66 08d1954 15fdf16 633ef66 15fdf16 633ef66 15fdf16 633ef66 15fdf16 633ef66 dfb48ba 15fdf16 633ef66 08d1954 633ef66 15fdf16 633ef66 dfb48ba 633ef66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr
from pymatgen.core import Composition
import numpy as np
import pandas as pd
from matminer.featurizers.conversions import StrToComposition
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
import joblib
# Define feature calculators
feature_calculators = MultipleFeaturizer([
cf.ElementProperty.from_preset(preset_name="magpie"),
cf.Stoichiometry(),
cf.ValenceOrbital(props=['frac']),
cf.IonProperty(fast=True),
cf.BandCenter(),
cf.ElementFraction(),
])
def generate_single(formula, ignore_errors=False):
"""
Generate features from a single chemical formula string.
"""
fake_df = pd.DataFrame({"formula": [formula]})
fake_df = StrToComposition().featurize_dataframe(fake_df, "formula", ignore_errors=ignore_errors)
fake_df = fake_df.dropna()
fake_df = feature_calculators.featurize_dataframe(fake_df, col_id='composition', ignore_errors=ignore_errors)
fake_df["NComp"] = fake_df["composition"].apply(len)
return fake_df
def mlmdd_single(formula):
"""
Compute reduced formula features for a single chemical formula string.
"""
comp = Composition(formula)
redu = comp.get_reduced_formula_and_factor()[1]
most = comp.num_atoms
data = np.array(list(comp.as_dict().values()))
max_val = max(data)
min_val = min(data)
mean_val = np.mean(data)
var = np.var(data / most)
return pd.DataFrame([[most, max_val, min_val, mean_val, redu, var]],
columns=["MostAtoms", "MaxAtoms", "MinAtoms", "MeanAtoms", "ReductionFactor", "Var"])
def get_features_single(formula):
"""
Combine features generated from chemical composition and reduced formula for a single input.
"""
mlmd = mlmdd_single(formula)
ext_mag = generate_single(formula)
result = pd.concat([ext_mag, mlmd], axis=1)
return result
def predict_features(formula):
try:
try:
comp = Composition(formula)
_ = comp.get_reduced_formula_and_factor()
except Exception:
return {"Error": "Invalid chemical formula. Please check your input and try again."}
# Generate features for the input formula
df = get_features_single(formula)
if df.empty:
return {"Error": "Unable to generate features. Please check your input and try again."}
X_user = df.iloc[:, 2:].fillna(0)
# Load saved model
model_path = "saved_model/lgbm_model.pkl"
loaded_model = joblib.load(model_path)
# Load saved LabelEncoder
label_encoder_path = "saved_model/label_encoder.pkl"
label_encoder = joblib.load(label_encoder_path)
# Load Layer Group Mapping
mapping_path = "saved_model/layer_group_mapping.pkl"
layer_group_mapping = joblib.load(mapping_path)
# Predict probabilities
prediction_probs = loaded_model.predict_proba(X_user)[0]
# Get top 5 predictions
top_5_indices = np.argsort(prediction_probs)[-5:][::-1]
top_5_numbers = label_encoder.inverse_transform(top_5_indices)
top_5_names = [layer_group_mapping.get(num, "Unknown Layer Group") for num in top_5_numbers]
top_5_probs = prediction_probs[top_5_indices]
# Prepare top 5 results as list of lists
top_5_results = [
[num, name, f"{prob:.2%}"]
for num, name, prob in zip(top_5_numbers, top_5_names, top_5_probs)
]
# Predict using the loaded model
prediction = loaded_model.predict(X_user)
# Decode Layer Group Number
decoded_prediction = label_encoder.inverse_transform(prediction)[0]
# Map to Layer Group Name
layer_group_name = layer_group_mapping.get(decoded_prediction, "Unknown Layer Group")
return {
"Formula": formula,
"Predicted Layer Group Number": decoded_prediction,
"Predicted Layer Group Name": layer_group_name,
"Top 5 Predictions": top_5_results
}
except Exception as e:
return {"Error": f"An unexpected error occurred: {str(e)}"}
# Define a more visually appealing Gradio interface
def gradio_ui():
with gr.Blocks() as demo:
gr.Markdown("""
# 🔬 2D Material Layer Group Predictor
Enter a chemical formula of 2D Material below to get the predicted layer group information along with the top 5 probable groups.
""")
with gr.Row():
formula_input = gr.Textbox(label="Enter Chemical Formula", placeholder="E.g., BrIPtZrS2")
predict_button = gr.Button("Predict")
with gr.Row():
final_prediction = gr.Textbox(label="Final Prediction (Layer Group Name)", interactive=False)
with gr.Row():
top_5_table = gr.Dataframe(headers=["Layer Group Number", "Layer Group Name", "Probability"], interactive=False)
def update_output(formula):
result = predict_features(formula)
if "Error" in result:
return f"Error: {result['Error']}", []
return result.get("Predicted Layer Group Name", "Unknown"), result.get("Top 5 Predictions", [])
predict_button.click(fn=update_output, inputs=[formula_input], outputs=[final_prediction, top_5_table])
return demo
if __name__ == "__main__":
gradio_ui().launch()
|