Au2Lap / app.py
alan918727's picture
Update app.py
603cd4e verified
import gradio as gr
from pymatgen.core import Composition
import numpy as np
import pandas as pd
from matminer.featurizers.conversions import StrToComposition
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
import joblib
# Define feature calculators
feature_calculators = MultipleFeaturizer([
cf.ElementProperty.from_preset(preset_name="magpie"),
cf.Stoichiometry(),
cf.ValenceOrbital(props=['frac']),
cf.IonProperty(fast=True),
cf.BandCenter(),
cf.ElementFraction(),
])
def generate_single(formula, ignore_errors=False):
"""
Generate features from a single chemical formula string.
"""
fake_df = pd.DataFrame({"formula": [formula]})
fake_df = StrToComposition().featurize_dataframe(fake_df, "formula", ignore_errors=ignore_errors)
fake_df = fake_df.dropna()
fake_df = feature_calculators.featurize_dataframe(fake_df, col_id='composition', ignore_errors=ignore_errors)
fake_df["NComp"] = fake_df["composition"].apply(len)
return fake_df
def mlmdd_single(formula):
"""
Compute reduced formula features for a single chemical formula string.
"""
comp = Composition(formula)
redu = comp.get_reduced_formula_and_factor()[1]
most = comp.num_atoms
data = np.array(list(comp.as_dict().values()))
max_val = max(data)
min_val = min(data)
mean_val = np.mean(data)
var = np.var(data / most)
return pd.DataFrame([[most, max_val, min_val, mean_val, redu, var]],
columns=["MostAtoms", "MaxAtoms", "MinAtoms", "MeanAtoms", "ReductionFactor", "Var"])
def get_features_single(formula):
"""
Combine features generated from chemical composition and reduced formula for a single input.
"""
mlmd = mlmdd_single(formula)
ext_mag = generate_single(formula)
result = pd.concat([ext_mag, mlmd], axis=1)
return result
def predict_features(formula):
try:
try:
comp = Composition(formula)
_ = comp.get_reduced_formula_and_factor()
except Exception:
return {"Error": "Invalid chemical formula. Please check your input and try again."}
# Generate features for the input formula
df = get_features_single(formula)
if df.empty:
return {"Error": "Unable to generate features. Please check your input and try again."}
X_user = df.iloc[:, 2:].fillna(0)
# Load saved model
model_path = "saved_model/lgbm_model.pkl"
loaded_model = joblib.load(model_path)
# Load saved LabelEncoder
label_encoder_path = "saved_model/label_encoder.pkl"
label_encoder = joblib.load(label_encoder_path)
# Load Layer Group Mapping
mapping_path = "saved_model/layer_group_mapping.pkl"
layer_group_mapping = joblib.load(mapping_path)
# Predict probabilities
prediction_probs = loaded_model.predict_proba(X_user)[0]
# Get top 5 predictions
top_5_indices = np.argsort(prediction_probs)[-5:][::-1]
top_5_numbers = label_encoder.inverse_transform(top_5_indices)
top_5_names = [layer_group_mapping.get(num, "Unknown Layer Group") for num in top_5_numbers]
top_5_probs = prediction_probs[top_5_indices]
# Prepare top 5 results as list of lists
top_5_results = [
[num, name, f"{prob:.2%}"]
for num, name, prob in zip(top_5_numbers, top_5_names, top_5_probs)
]
# Predict using the loaded model
prediction = loaded_model.predict(X_user)
# Decode Layer Group Number
decoded_prediction = label_encoder.inverse_transform(prediction)[0]
# Map to Layer Group Name
layer_group_name = layer_group_mapping.get(decoded_prediction, "Unknown Layer Group")
return {
"Formula": formula,
"Predicted Layer Group Number": decoded_prediction,
"Predicted Layer Group Name": layer_group_name,
"Top 5 Predictions": top_5_results
}
except Exception as e:
return {"Error": f"An unexpected error occurred: {str(e)}"}
# Define a more visually appealing Gradio interface
def gradio_ui():
with gr.Blocks() as demo:
gr.Markdown("""
# 🔬 2D Material Layer Group Predictor
Enter a chemical formula of 2D Material below to get the predicted layer group information along with the top 5 probable groups.
""")
with gr.Row():
formula_input = gr.Textbox(label="Enter Chemical Formula", placeholder="E.g., BrIPtZrS2")
predict_button = gr.Button("Predict")
with gr.Row():
final_prediction = gr.Textbox(label="Final Prediction (Layer Group Name)", interactive=False)
with gr.Row():
top_5_table = gr.Dataframe(headers=["Layer Group Number", "Layer Group Name", "Probability"], interactive=False)
def update_output(formula):
result = predict_features(formula)
if "Error" in result:
return f"Error: {result['Error']}", []
return result.get("Predicted Layer Group Name", "Unknown"), result.get("Top 5 Predictions", [])
predict_button.click(fn=update_output, inputs=[formula_input], outputs=[final_prediction, top_5_table])
return demo
if __name__ == "__main__":
gradio_ui().launch()