import gradio as gr from pymatgen.core import Composition import numpy as np import pandas as pd from matminer.featurizers.conversions import StrToComposition from matminer.featurizers.base import MultipleFeaturizer from matminer.featurizers import composition as cf import joblib # Define feature calculators feature_calculators = MultipleFeaturizer([ cf.ElementProperty.from_preset(preset_name="magpie"), cf.Stoichiometry(), cf.ValenceOrbital(props=['frac']), cf.IonProperty(fast=True), cf.BandCenter(), cf.ElementFraction(), ]) def generate_single(formula, ignore_errors=False): """ Generate features from a single chemical formula string. """ fake_df = pd.DataFrame({"formula": [formula]}) fake_df = StrToComposition().featurize_dataframe(fake_df, "formula", ignore_errors=ignore_errors) fake_df = fake_df.dropna() fake_df = feature_calculators.featurize_dataframe(fake_df, col_id='composition', ignore_errors=ignore_errors) fake_df["NComp"] = fake_df["composition"].apply(len) return fake_df def mlmdd_single(formula): """ Compute reduced formula features for a single chemical formula string. """ comp = Composition(formula) redu = comp.get_reduced_formula_and_factor()[1] most = comp.num_atoms data = np.array(list(comp.as_dict().values())) max_val = max(data) min_val = min(data) mean_val = np.mean(data) var = np.var(data / most) return pd.DataFrame([[most, max_val, min_val, mean_val, redu, var]], columns=["MostAtoms", "MaxAtoms", "MinAtoms", "MeanAtoms", "ReductionFactor", "Var"]) def get_features_single(formula): """ Combine features generated from chemical composition and reduced formula for a single input. """ mlmd = mlmdd_single(formula) ext_mag = generate_single(formula) result = pd.concat([ext_mag, mlmd], axis=1) return result def predict_features(formula): try: try: comp = Composition(formula) _ = comp.get_reduced_formula_and_factor() except Exception: return {"Error": "Invalid chemical formula. Please check your input and try again."} # Generate features for the input formula df = get_features_single(formula) if df.empty: return {"Error": "Unable to generate features. Please check your input and try again."} X_user = df.iloc[:, 2:].fillna(0) # Load saved model model_path = "saved_model/lgbm_model.pkl" loaded_model = joblib.load(model_path) # Load saved LabelEncoder label_encoder_path = "saved_model/label_encoder.pkl" label_encoder = joblib.load(label_encoder_path) # Load Layer Group Mapping mapping_path = "saved_model/layer_group_mapping.pkl" layer_group_mapping = joblib.load(mapping_path) # Predict probabilities prediction_probs = loaded_model.predict_proba(X_user)[0] # Get top 5 predictions top_5_indices = np.argsort(prediction_probs)[-5:][::-1] top_5_numbers = label_encoder.inverse_transform(top_5_indices) top_5_names = [layer_group_mapping.get(num, "Unknown Layer Group") for num in top_5_numbers] top_5_probs = prediction_probs[top_5_indices] # Prepare top 5 results as list of lists top_5_results = [ [num, name, f"{prob:.2%}"] for num, name, prob in zip(top_5_numbers, top_5_names, top_5_probs) ] # Predict using the loaded model prediction = loaded_model.predict(X_user) # Decode Layer Group Number decoded_prediction = label_encoder.inverse_transform(prediction)[0] # Map to Layer Group Name layer_group_name = layer_group_mapping.get(decoded_prediction, "Unknown Layer Group") return { "Formula": formula, "Predicted Layer Group Number": decoded_prediction, "Predicted Layer Group Name": layer_group_name, "Top 5 Predictions": top_5_results } except Exception as e: return {"Error": f"An unexpected error occurred: {str(e)}"} # Define a more visually appealing Gradio interface def gradio_ui(): with gr.Blocks() as demo: gr.Markdown(""" # 🔬 2D Material Layer Group Predictor Enter a chemical formula of 2D Material below to get the predicted layer group information along with the top 5 probable groups. """) with gr.Row(): formula_input = gr.Textbox(label="Enter Chemical Formula", placeholder="E.g., BrIPtZrS2") predict_button = gr.Button("Predict") with gr.Row(): final_prediction = gr.Textbox(label="Final Prediction (Layer Group Name)", interactive=False) with gr.Row(): top_5_table = gr.Dataframe(headers=["Layer Group Number", "Layer Group Name", "Probability"], interactive=False) def update_output(formula): result = predict_features(formula) if "Error" in result: return f"Error: {result['Error']}", [] return result.get("Predicted Layer Group Name", "Unknown"), result.get("Top 5 Predictions", []) predict_button.click(fn=update_output, inputs=[formula_input], outputs=[final_prediction, top_5_table]) return demo if __name__ == "__main__": gradio_ui().launch()