Spaces:
Running
Running
import gradio as gr | |
from pymatgen.core import Composition | |
import numpy as np | |
import pandas as pd | |
from matminer.featurizers.conversions import StrToComposition | |
from matminer.featurizers.base import MultipleFeaturizer | |
from matminer.featurizers import composition as cf | |
import joblib | |
# Define feature calculators | |
feature_calculators = MultipleFeaturizer([ | |
cf.ElementProperty.from_preset(preset_name="magpie"), | |
cf.Stoichiometry(), | |
cf.ValenceOrbital(props=['frac']), | |
cf.IonProperty(fast=True), | |
cf.BandCenter(), | |
cf.ElementFraction(), | |
]) | |
def generate_single(formula, ignore_errors=False): | |
""" | |
Generate features from a single chemical formula string. | |
""" | |
fake_df = pd.DataFrame({"formula": [formula]}) | |
fake_df = StrToComposition().featurize_dataframe(fake_df, "formula", ignore_errors=ignore_errors) | |
fake_df = fake_df.dropna() | |
fake_df = feature_calculators.featurize_dataframe(fake_df, col_id='composition', ignore_errors=ignore_errors) | |
fake_df["NComp"] = fake_df["composition"].apply(len) | |
return fake_df | |
def mlmdd_single(formula): | |
""" | |
Compute reduced formula features for a single chemical formula string. | |
""" | |
comp = Composition(formula) | |
redu = comp.get_reduced_formula_and_factor()[1] | |
most = comp.num_atoms | |
data = np.array(list(comp.as_dict().values())) | |
max_val = max(data) | |
min_val = min(data) | |
mean_val = np.mean(data) | |
var = np.var(data / most) | |
return pd.DataFrame([[most, max_val, min_val, mean_val, redu, var]], | |
columns=["MostAtoms", "MaxAtoms", "MinAtoms", "MeanAtoms", "ReductionFactor", "Var"]) | |
def get_features_single(formula): | |
""" | |
Combine features generated from chemical composition and reduced formula for a single input. | |
""" | |
mlmd = mlmdd_single(formula) | |
ext_mag = generate_single(formula) | |
result = pd.concat([ext_mag, mlmd], axis=1) | |
return result | |
def predict_features(formula): | |
try: | |
try: | |
comp = Composition(formula) | |
_ = comp.get_reduced_formula_and_factor() | |
except Exception: | |
return {"Error": "Invalid chemical formula. Please check your input and try again."} | |
# Generate features for the input formula | |
df = get_features_single(formula) | |
if df.empty: | |
return {"Error": "Unable to generate features. Please check your input and try again."} | |
X_user = df.iloc[:, 2:].fillna(0) | |
# Load saved model | |
model_path = "saved_model/lgbm_model.pkl" | |
loaded_model = joblib.load(model_path) | |
# Load saved LabelEncoder | |
label_encoder_path = "saved_model/label_encoder.pkl" | |
label_encoder = joblib.load(label_encoder_path) | |
# Load Layer Group Mapping | |
mapping_path = "saved_model/layer_group_mapping.pkl" | |
layer_group_mapping = joblib.load(mapping_path) | |
# Predict probabilities | |
prediction_probs = loaded_model.predict_proba(X_user)[0] | |
# Get top 5 predictions | |
top_5_indices = np.argsort(prediction_probs)[-5:][::-1] | |
top_5_numbers = label_encoder.inverse_transform(top_5_indices) | |
top_5_names = [layer_group_mapping.get(num, "Unknown Layer Group") for num in top_5_numbers] | |
top_5_probs = prediction_probs[top_5_indices] | |
# Prepare top 5 results as list of lists | |
top_5_results = [ | |
[num, name, f"{prob:.2%}"] | |
for num, name, prob in zip(top_5_numbers, top_5_names, top_5_probs) | |
] | |
# Predict using the loaded model | |
prediction = loaded_model.predict(X_user) | |
# Decode Layer Group Number | |
decoded_prediction = label_encoder.inverse_transform(prediction)[0] | |
# Map to Layer Group Name | |
layer_group_name = layer_group_mapping.get(decoded_prediction, "Unknown Layer Group") | |
return { | |
"Formula": formula, | |
"Predicted Layer Group Number": decoded_prediction, | |
"Predicted Layer Group Name": layer_group_name, | |
"Top 5 Predictions": top_5_results | |
} | |
except Exception as e: | |
return {"Error": f"An unexpected error occurred: {str(e)}"} | |
# Define a more visually appealing Gradio interface | |
def gradio_ui(): | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# 🔬 2D Material Layer Group Predictor | |
Enter a chemical formula of 2D Material below to get the predicted layer group information along with the top 5 probable groups. | |
""") | |
with gr.Row(): | |
formula_input = gr.Textbox(label="Enter Chemical Formula", placeholder="E.g., BrIPtZrS2") | |
predict_button = gr.Button("Predict") | |
with gr.Row(): | |
final_prediction = gr.Textbox(label="Final Prediction (Layer Group Name)", interactive=False) | |
with gr.Row(): | |
top_5_table = gr.Dataframe(headers=["Layer Group Number", "Layer Group Name", "Probability"], interactive=False) | |
def update_output(formula): | |
result = predict_features(formula) | |
if "Error" in result: | |
return f"Error: {result['Error']}", [] | |
return result.get("Predicted Layer Group Name", "Unknown"), result.get("Top 5 Predictions", []) | |
predict_button.click(fn=update_output, inputs=[formula_input], outputs=[final_prediction, top_5_table]) | |
return demo | |
if __name__ == "__main__": | |
gradio_ui().launch() | |