File size: 5,407 Bytes
dfb48ba
 
 
 
 
 
 
633ef66
dfb48ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633ef66
dfb48ba
 
 
603cd4e
15fdf16
603cd4e
 
15fdf16
 
 
 
dfb48ba
15fdf16
 
08d1954
633ef66
 
15fdf16
633ef66
 
 
15fdf16
633ef66
 
 
15fdf16
633ef66
 
 
15fdf16
633ef66
 
15fdf16
633ef66
 
 
08d1954
15fdf16
 
633ef66
 
 
 
 
15fdf16
633ef66
15fdf16
 
633ef66
15fdf16
 
633ef66
 
 
 
 
 
 
 
dfb48ba
15fdf16
633ef66
08d1954
633ef66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15fdf16
 
 
633ef66
 
 
 
dfb48ba
 
633ef66
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import gradio as gr
from pymatgen.core import Composition
import numpy as np
import pandas as pd
from matminer.featurizers.conversions import StrToComposition
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
import joblib

# Define feature calculators
feature_calculators = MultipleFeaturizer([
    cf.ElementProperty.from_preset(preset_name="magpie"),
    cf.Stoichiometry(),
    cf.ValenceOrbital(props=['frac']),
    cf.IonProperty(fast=True),
    cf.BandCenter(),
    cf.ElementFraction(),
])

def generate_single(formula, ignore_errors=False):
    """
    Generate features from a single chemical formula string.
    """
    fake_df = pd.DataFrame({"formula": [formula]})
    fake_df = StrToComposition().featurize_dataframe(fake_df, "formula", ignore_errors=ignore_errors)
    fake_df = fake_df.dropna()
    fake_df = feature_calculators.featurize_dataframe(fake_df, col_id='composition', ignore_errors=ignore_errors)
    fake_df["NComp"] = fake_df["composition"].apply(len)
    return fake_df

def mlmdd_single(formula):
    """
    Compute reduced formula features for a single chemical formula string.
    """
    comp = Composition(formula)
    redu = comp.get_reduced_formula_and_factor()[1]
    most = comp.num_atoms
    data = np.array(list(comp.as_dict().values()))
    max_val = max(data)
    min_val = min(data)
    mean_val = np.mean(data)
    var = np.var(data / most)
    return pd.DataFrame([[most, max_val, min_val, mean_val, redu, var]],
                        columns=["MostAtoms", "MaxAtoms", "MinAtoms", "MeanAtoms", "ReductionFactor", "Var"])

def get_features_single(formula):
    """
    Combine features generated from chemical composition and reduced formula for a single input.
    """
    mlmd = mlmdd_single(formula)
    ext_mag = generate_single(formula)
    result = pd.concat([ext_mag, mlmd], axis=1)
    return result

def predict_features(formula):
    try:

        try:
            comp = Composition(formula)  
            _ = comp.get_reduced_formula_and_factor()  
        except Exception:
            return {"Error": "Invalid chemical formula. Please check your input and try again."}

        # Generate features for the input formula
        df = get_features_single(formula)
        if df.empty:
            return {"Error": "Unable to generate features. Please check your input and try again."}

        X_user = df.iloc[:, 2:].fillna(0)

        # Load saved model
        model_path = "saved_model/lgbm_model.pkl"
        loaded_model = joblib.load(model_path)

        # Load saved LabelEncoder
        label_encoder_path = "saved_model/label_encoder.pkl"
        label_encoder = joblib.load(label_encoder_path)

        # Load Layer Group Mapping
        mapping_path = "saved_model/layer_group_mapping.pkl"
        layer_group_mapping = joblib.load(mapping_path)

        # Predict probabilities
        prediction_probs = loaded_model.predict_proba(X_user)[0]

        # Get top 5 predictions
        top_5_indices = np.argsort(prediction_probs)[-5:][::-1]
        top_5_numbers = label_encoder.inverse_transform(top_5_indices)
        top_5_names = [layer_group_mapping.get(num, "Unknown Layer Group") for num in top_5_numbers]
        top_5_probs = prediction_probs[top_5_indices]

        # Prepare top 5 results as list of lists
        top_5_results = [
            [num, name, f"{prob:.2%}"]
            for num, name, prob in zip(top_5_numbers, top_5_names, top_5_probs)
        ]

        # Predict using the loaded model
        prediction = loaded_model.predict(X_user)

        # Decode Layer Group Number
        decoded_prediction = label_encoder.inverse_transform(prediction)[0]

        # Map to Layer Group Name
        layer_group_name = layer_group_mapping.get(decoded_prediction, "Unknown Layer Group")

        return {
            "Formula": formula,
            "Predicted Layer Group Number": decoded_prediction,
            "Predicted Layer Group Name": layer_group_name,
            "Top 5 Predictions": top_5_results
        }
    except Exception as e:
        return {"Error": f"An unexpected error occurred: {str(e)}"}


# Define a more visually appealing Gradio interface
def gradio_ui():
    with gr.Blocks() as demo:
        gr.Markdown("""
        # 🔬 2D Material Layer Group Predictor
        Enter a chemical formula of 2D Material below to get the predicted layer group information along with the top 5 probable groups.
        """)

        with gr.Row():
            formula_input = gr.Textbox(label="Enter Chemical Formula", placeholder="E.g., BrIPtZrS2")
            predict_button = gr.Button("Predict")

        with gr.Row():
            final_prediction = gr.Textbox(label="Final Prediction (Layer Group Name)", interactive=False)

        with gr.Row():
            top_5_table = gr.Dataframe(headers=["Layer Group Number", "Layer Group Name", "Probability"], interactive=False)

        def update_output(formula):
            result = predict_features(formula)
            if "Error" in result:
                return f"Error: {result['Error']}", []
            return result.get("Predicted Layer Group Name", "Unknown"), result.get("Top 5 Predictions", [])

        predict_button.click(fn=update_output, inputs=[formula_input], outputs=[final_prediction, top_5_table])

    return demo

if __name__ == "__main__":
    gradio_ui().launch()