File size: 27,294 Bytes
78575a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
# prompt: import pandas and basic machine learning models for regression

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR


from sklearn.model_selection import train_test_split

import itertools
import random

import torch
import random
import numpy as np

import os
import joblib

import matplotlib.pyplot as plt

from tabpfn import TabPFNRegressor
from sklearn.model_selection import KFold
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures

from sklearn.metrics import mean_absolute_percentage_error

from sklearn.linear_model import LinearRegression

from inference import TabPFNEnsemblePredictor  # import inference.py

# from sklearn.metrics import mean_absolute_percentage_error
# from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
from itertools import combinations
from scipy.special import comb
# from tabpfn.model.loading import (
#     load_fitted_tabpfn_model,
#     save_fitted_tabpfn_model,
# )


class EagleBlendPredictor:
    def __init__(self, model_sources = './Models'):
        """
        model_sources: Dict[str, Any]
            A dictionary where keys are 'BlendProperty1', ..., 'BlendProperty10'
            and values are:
              - loaded model objects, or
              - callables returning models, or
              - custom loading logic (you will supply these)
        """
        self.home = model_sources
        self.saved_files_map = {
                      1: {
                          "model": 'linear_model_poly_target_1.joblib',
                          "transform": 'poly1_features.joblib'
                      },
                      2: {
                          "model": 'linear_model_poly_target_2.joblib',
                          "transform": 'poly2_features.joblib'
                      },
                      5: {
                          "model": 'tabpfn_model_target_5.joblib', #tabpfn_model_target_5_cpu.tabpfn_fit,'tabpfn_model_target_5_cpu.tabpfn_fit'
                          "transform": 'poly5_features.joblib'
                      },
                      6: {
                          "model": 'linear_model_poly_target_6.joblib',
                          "transform": 'poly6_features.joblib'
                      },
                      7: {
                          "model": 'tabpfn_model_target_7.joblib',
                          # For Property 7, the transformation is the mixture feature generation,
                          # which is not a saved object like PolynomialFeatures.
                          # You would need to apply the generate_mixture_features function.
                          "transform_function": "generate_mixture_features"
                      },
                      8: {
                          # For Property 8, the "model" is the initial prediction model (not explicitly saved in this workflow)
                          # and the correction is the piecewise function defined by parameters and threshold.
                          "params": 'piecewise_params_prop8.joblib',
                          "threshold": 'piecewise_threshold_prop8.joblib',
                          "correction_function": "piecewise_model" # Reference the function name
                      },
                      10: {
                          "model": 'linear_model_poly_target_10.joblib',
                          "transform": 'poly10_features.joblib'
                      }
                  }


        self.models = {}
        # Load models and transformers manually
        self.model_1 = joblib.load(os.path.join(self.home, self.saved_files_map[1]["model"]))
        self.poly_1 = joblib.load(os.path.join(self.home, self.saved_files_map[1]["transform"]))

        self.model_2 = joblib.load(os.path.join(self.home, self.saved_files_map[2]["model"]))
        self.poly_2 = joblib.load(os.path.join(self.home, self.saved_files_map[2]["transform"]))

        self.model_5 = joblib.load(
            os.path.join(self.home, self.saved_files_map[5]["model"]), #device="cpu"
        )
        self.poly_5 = joblib.load(os.path.join(self.home, self.saved_files_map[5]["transform"]))

        self.model_6 = joblib.load(os.path.join(self.home, self.saved_files_map[6]["model"]))
        self.poly_6 = joblib.load(os.path.join(self.home, self.saved_files_map[6]["transform"]))

        self.model_7 = joblib.load(
            os.path.join(self.home, self.saved_files_map[7]["model"]), #device="cpu"
        )
        # No saved transform for model_7 — use generate_mixture_features later in prediction
        self.piecewise_params_8 = joblib.load(os.path.join(self.home, self.saved_files_map[8]["params"]))
        self.piecewise_threshold_8 = joblib.load(os.path.join(self.home, self.saved_files_map[8]["threshold"]))

        # Use piecewise_model function later

        self.model_10 = joblib.load(os.path.join(self.home, self.saved_files_map[10]["model"]))
        self.poly_10 = joblib.load(os.path.join(self.home, self.saved_files_map[10]["transform"]))

        self.model_3489 = TabPFNEnsemblePredictor(model_dir="Models")
        pass


    def piecewise_model(self, x, boundaries=np.linspace(-0.2, 0.2, 10+1)[1:-1]):
        """
        x: a single float value
        params: list of 20 parameters [A1, B1, A2, B2, ..., A10, B10]
        boundaries: 9 values that divide x into 10 regions
        """
        params = self.piecewise_params_8
        # Unpack parameters
        segments = [(params[i], params[i+1]) for i in range(0, 20, 2)]

        # Piecewise logic using boundaries
        for i, bound in enumerate(boundaries):
            if x < bound:
                A, B = segments[i]
                return A * x + B
        # If x is greater than all boundaries, use the last segment
        A, B = segments[-1]
        return A * x + B

    def predict_BlendProperty1(self, data, full = True):
        # Dummy custom transformation and prediction for BlendProperty1
        if full:
            features = self._transform1(data)
            features = self.poly_1.transform(features)
        else:
            features = self.poly_1.transform(data)
        res_df = self.model_1.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty1'])


    def predict_BlendProperty2(self, data, full = True):
        if full:
            features = self._transform2(data)
            features = self.poly_2.transform(features)
        else:
            features = self.poly_2.transform(data)
        res_df = self.model_2.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty2'])


    def predict_BlendProperty3489(self, df):
        arrray,result_df = self.model_3489.custom_predict(df)
        ans_df= result_df[['BlendProperty3','BlendProperty4','BlendProperty8','BlendProperty9']].copy() # Explicitly create a copy

        ans_df.loc[ans_df['BlendProperty8'].abs()<0.2,'BlendProperty8'] = ans_df[ans_df['BlendProperty8'].abs()<0.2]['BlendProperty8'].apply(self.piecewise_model)
        ans_df.loc[ans_df['BlendProperty9'].abs()<0.1,'BlendProperty9'] = 0 #ans_df[ans_df['BlendProperty8'].abs()<0.2]['BlendProperty8'].apply(self.piecewise_model)

        return ans_df

        # ndf.loc[ndf[pred_col].abs() < threshold_8, pred_col] = ndf[ndf[pred_col].abs() < threshold_8][pred_col].apply(func8)

    def predict_BlendProperty5(self, data, full =True ):
        if full:
            features = self._transform5(data)
            features = self.poly_5.transform(features)
        else:
            features = self.poly_5.transform(data)
        res_df = self.model_5.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty5'])


    def predict_BlendProperty6(self, data, full=True):
        if full:
            features = self._transform6(data)
            features = self.poly_6.transform(features)
        else:
            features = self.poly_6.transform(data)
        res_df = self.model_6.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty6'])


    def predict_BlendProperty7(self, data, full =True)-> pd.DataFrame:
        if full:
            features = self._transform7(data)
        else:
            raise ValueError("BlendProperty7 prediction requires full data.")
        res_df = self.model_7.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty7'])


    def predict_BlendProperty10(self, data, full = False)-> pd.DataFrame:
        if full:
            features = self._transform10(data)
            features = self.poly_10.transform(features)
        else:
            features = self.poly_10.transform(data)
        res_df = self.model_10.predict(features)
        return pd.DataFrame(res_df, columns=['BlendProperty10'])


    def predict_all(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Generates predictions for all blend properties using the individual prediction methods.

        Args:
            df: Input DataFrame containing the features.

        Returns:
            DataFrame with predicted blend properties from 'BlendProperty1' to 'BlendProperty10'.
        """
        predictions_list = []

        # Predict individual properties
        predictions_list.append(self.predict_BlendProperty1(df, full=True))
        predictions_list.append(self.predict_BlendProperty2(df, full=True))

        # Predict BlendProperty3, 4, 8, and 9 together using predict_BlendProperty3489
        # Assuming predict_BlendProperty3489 returns a DataFrame with columns for these properties.
        predictions_3489_df = self.predict_BlendProperty3489(df)
        predictions_list.append(predictions_3489_df[['BlendProperty3']])
        predictions_list.append(predictions_3489_df[['BlendProperty4']])
        predictions_list.append(predictions_3489_df[['BlendProperty8']])
        predictions_list.append(predictions_3489_df[['BlendProperty9']])


        predictions_list.append(self.predict_BlendProperty5(df, full=True))
        predictions_list.append(self.predict_BlendProperty6(df, full=True))
        predictions_list.append(self.predict_BlendProperty7(df, full=True))


        predictions_list.append(self.predict_BlendProperty10(df, full=True))


        # Concatenate the list of single-column DataFrames into a single DataFrame
        predictions_df = pd.concat(predictions_list, axis=1)

        # Ensure columns are in the desired order
        ordered_cols = [f'BlendProperty{i}' for i in range(1, 11)]
        # Reindex to ensure columns are in order, dropping any not generated (though all should be)
        predictions_df = predictions_df.reindex(columns=ordered_cols)


        return predictions_df






    # Dummy transformation functions (replace with your actual logic later)
    def _transform1(self, data):
        """
        Transforms input data (DataFrame or NumPy array) to features for BlendProperty1 prediction.

        If input is a DataFrame, selects 'ComponentX_fraction' (X=1-5) and 'ComponentX_Property1' (X=1-5).
        If input is a NumPy array, assumes the columns are already in the correct order:
        Component1-5_fraction, Component1-5_Property1, Component1-5_Property2, ..., Component1-5_Property10
        and selects the relevant columns for Property1.
        Args:
            data: pandas DataFrame or numpy array.

        Returns:
            numpy array of transformed features.
        """
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]
        property_cols = [f'Component{i+1}_Property1' for i in range(5)]
        required_cols = fraction_cols + property_cols

        if isinstance(data, pd.DataFrame):
            # Select the required columns from the DataFrame
            # Ensure columns exist to avoid KeyError
            try:
                features = data[required_cols]
            except KeyError as e:
                missing_col = str(e).split("'")[1]
                raise ValueError(f"Input DataFrame is missing required column: {missing_col}") from e

        elif isinstance(data, np.ndarray):
            # Assume the NumPy array has columns in the specified order
            # Select the first 5 columns (fractions) and columns for Property1 (indices 5 to 9)
            if data.shape[1] < 10: # Need at least 5 fractions and 5 properties
                raise ValueError(f"Input NumPy array must have at least 10 columns for this transformation.")

            # Selecting columns based on the assumed order: fractions (0-4), Property1 (5-9)
            features = data[:, :10] # Select first 10 columns: 5 fractions + 5 Property1

        else:
            raise TypeError("Input data must be a pandas DataFrame or a numpy array.")

        # Return as numpy array, as expected by PolynomialFeatures.transform
        return features

    def _transform2(self, data):
        """
        Transforms input data (DataFrame or NumPy array) to features for BlendProperty2 prediction.
        """
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]
        property_cols = [f'Component{i+1}_Property2' for i in range(5)]
        required_cols = fraction_cols + property_cols

        if isinstance(data, pd.DataFrame):
            try:
                features = data[required_cols]
            except KeyError as e:
                missing_col = str(e).split("'")[1]
                raise ValueError(f"Input DataFrame is missing required column: {missing_col}") from e

        elif isinstance(data, np.ndarray):
            # Assume the NumPy array has columns in the specified order
            # Select the first 5 columns (fractions) and columns for Property2 (indices 10 to 14)
            if data.shape[1] < 15: # Need at least 5 fractions, 5 Property1, and 5 Property2
                raise ValueError(f"Input NumPy array must have at least 15 columns for this transformation.")

            # Selecting columns based on the assumed order: fractions (0-4), Property1 (5-9), Property2 (10-14)
            features = np.concatenate([data[:, :5], data[:, 10:15]], axis=1)

        else:
            raise TypeError("Input data must be a pandas DataFrame or a numpy array.")

        return features.values if isinstance(features, pd.DataFrame) else features

    def _transform3(self, data): return None

    def _transform4(self, data): return None

    def _transform5(self, data):
        """
        Transforms input data (DataFrame or NumPy array) to features for BlendProperty5 prediction.
        Args:
            data: pandas DataFrame or numpy array.

        Returns:
            numpy array of transformed features.
        """
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]
        property_cols = [f'Component{i+1}_Property5' for i in range(5)]
        required_cols = fraction_cols + property_cols

        if isinstance(data, pd.DataFrame):
            try:
                features = data[required_cols]
            except KeyError as e:
                missing_col = str(e).split("'")[1]
                raise ValueError(f"Input DataFrame is missing required column: {missing_col}") from e

        elif isinstance(data, np.ndarray):
            # Assume the NumPy array has columns in the specified order
            # Select the first 5 columns (fractions) and columns for Property5 (indices 25 to 29)
            if data.shape[1] < 30: # Need at least 5 fractions and 5 properties for each of Property1-5
                raise ValueError(f"Input NumPy array must have at least 30 columns for this transformation.")

            # Selecting columns based on the assumed order: fractions (0-4), properties (5-9) for P1, (10-14) for P2, ..., (25-29) for P5
            features = np.concatenate([data[:, :5], data[:, 25:30]], axis=1)

        else:
            raise TypeError("Input data must be a pandas DataFrame or a numpy array.")

        return features


    def _transform6(self, data):
        """
        Transforms input data (DataFrame or NumPy array) to features for BlendProperty6 prediction.

        Args:
            data: pandas DataFrame or numpy array.

        Returns:
            numpy array of transformed features.
        """
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]
        property_cols = [f'Component{i+1}_Property6' for i in range(5)]
        required_cols = fraction_cols + property_cols

        if isinstance(data, pd.DataFrame):
            try:
                features = data[required_cols]
            except KeyError as e:
                missing_col = str(e).split("'")[1]
                raise ValueError(f"Input DataFrame is missing required column: {missing_col}") from e

        elif isinstance(data, np.ndarray):
            # Assume the NumPy array has columns in the specified order
            # Select the first 5 columns (fractions) and columns for Property6 (indices 30 to 34)
            if data.shape[1] < 35: # Need at least 5 fractions and 5 properties for each of Property1-6
                raise ValueError(f"Input NumPy array must have at least 35 columns for this transformation.")

            # Selecting columns based on the assumed order: fractions (0-4), properties (5-9) for P1, ..., (30-34) for P6
            features = np.concatenate([data[:, :5], data[:, 30:35]], axis=1)

        else:
            raise TypeError("Input data must be a pandas DataFrame or a numpy array.")

        return features



    def _transform7(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Corrected transformation function for BlendProperty7 prediction.

        Args:
            df: Input DataFrame containing the features.

        Returns:
            DataFrame with generated features for BlendProperty7 prediction.
        """
        tn = 7
        fn = tn

        property_tn = [f'Component{i+1}_Property{fn}' for i in range(5)]
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]

        # Generate mixture features
        df_prop7 = df[fraction_cols + property_tn].reset_index(drop=True) # Reset index here
        # Call the class's generate_mixture_features method
        mixture_features = self.generate_mixture_features(df_prop7)

        # Identify columns to concatenate (all ComponentX_PropertyY where Y != 7)
        other_property_cols = [f"Component{i}_Property{j}" for j in range(1,11) for i in range(1,6) if j!= 7]

        # Select these columns from the input DataFrame
        try:
            # Use .loc to preserve the original index when selecting columns, then reset index
            other_features_df = df.loc[:, other_property_cols].reset_index(drop=True) # Reset index here
        except KeyError as e:
            missing_col = str(e).split("'")[1]
            raise ValueError(f"Input DataFrame for _transform7 is missing required column: {missing_col}") from e


        # Concatenate along columns (axis=1). Indices should now be aligned after resetting.
        combined_features = pd.concat([mixture_features, other_features_df], axis=1)

        return combined_features

    def _transform8(self, row): return None
    def _transform9(self, row): return None

    def _transform10(self, data):
        """
        Transforms input data (DataFrame or NumPy array) to features for BlendProperty10 prediction.

        If input is a DataFrame, selects 'ComponentX_fraction' (X=1-5) and 'ComponentX_Property10' (X=1-5).
        If input is a NumPy array, assumes the columns are already in the correct order:
        Component1-5_fraction, Component1-5_Property1, Component1-5_Property2, ..., Component1-5_Property10
        and selects the relevant columns for Property10.

        Args:
            data: pandas DataFrame or numpy array.

        Returns:
            numpy array of transformed features.
        """
        fraction_cols = [f'Component{i+1}_fraction' for i in range(5)]
        property_cols = [f'Component{i+1}_Property10' for i in range(5)]
        required_cols = fraction_cols + property_cols

        if isinstance(data, pd.DataFrame):
            try:
                features = data[required_cols]
            except KeyError as e:
                missing_col = str(e).split("'")[1]
                raise ValueError(f"Input DataFrame is missing required column: {missing_col}") from e

        elif isinstance(data, np.ndarray):
            # Assume the NumPy array has columns in the specified order
            # Select the first 5 columns (fractions) and columns for Property10 (indices 50 to 54)
            if data.shape[1] < 55: # Need at least 5 fractions and 5 properties for each of Property1-10
                raise ValueError(f"Input NumPy array must have at least 55 columns for this transformation.")

            # Selecting columns based on the assumed order: fractions (0-4), properties (5-9) for P1, ..., (50-54) for P10
            features = np.concatenate([data[:, :5], data[:, 50:55]], axis=1)

        else:
            raise TypeError("Input data must be a pandas DataFrame or a numpy array.")

        return features



    def generate_mixture_features(self,data):
        """
        Generate symmetric and weighted nonlinear interactions between fuel weights and properties.
        The input 'data' should contain weights in the first 5 columns/elements and properties in the next 5.

        :param data: np.ndarray, pd.DataFrame, or list of shape (n_samples, 10) or (10,)
        :return: pd.DataFrame with generated features.
        """
        # Convert input to numpy array and handle single row/list input
        if isinstance(data, pd.DataFrame):
            data_array = data.values
        elif isinstance(data, list):
            data_array = np.array(data)
        elif isinstance(data, np.ndarray):
            data_array = data
        else:
            raise TypeError("Input data must be a pandas DataFrame, numpy array, or list.")

        # Reshape single row/list input to 2D array
        if data_array.ndim == 1:
            data_array = data_array.reshape(1, -1)

        # Ensure the input has 10 columns (5 weights + 5 properties)
        if data_array.shape[1] != 10:
            raise ValueError("Input data must have 10 columns/elements (5 weights and 5 properties).")

        # Separate weights and properties
        W = data_array[:, :5]
        P = data_array[:, 5:]


        n_samples, n_fuels = W.shape
        features = {}

        # Original weights and properties
        for i in range(n_fuels):
            features[f'w{i+1}'] = W[:, i]
            features[f'p{i+1}'] = P[:, i]
            features[f'w{i+1}_p{i+1}'] = W[:, i] * P[:, i]  # weighted property

        # --- 1. Weighted sum of properties ---
        features['weighted_sum'] = np.sum(W * P, axis=1)

        # --- 2. Weighted square of properties ---
        features['weighted_sum_sq'] = np.sum(W * P**2, axis=1)

        # --- 3. Weighted tanh of properties ---
        features['weighted_tanh'] = np.sum(W * np.tanh(P), axis=1)

        # --- 4. Weighted exponential ---
        # features['weighted_exp'] = np.sum(W * np.exp(P), axis=1)
        # Clip P before exponential to avoid overflow
        safe_exp = np.exp(np.clip(P, a_min=None, a_max=50))  # 50 is safe upper bound
        features['weighted_exp'] = np.sum(W * safe_exp, axis=1)


        # --- 5. Weighted logarithm (clip to avoid -inf) ---
        # features['weighted_log'] = np.sum(W * np.log(np.clip(P, 1e-6, None)), axis=1)
        features['weighted_log'] = np.sum(W * np.log(np.clip(P, 1e-6, None)), axis=1)


        # --- 6. Pairwise interactions (symmetric, weighted) ---
        for i, j in combinations(range(n_fuels), 2):
            pij = P[:, i] * P[:, j]
            wij = W[:, i] * W[:, j]
            features[f'pair_p{i+1}p{j+1}'] = pij
            features[f'weighted_pair_p{i+1}p{j+1}'] = pij * wij

        # --- 7. Triple interactions (weighted & symmetric) ---
        for i, j, k in combinations(range(n_fuels), 3):
            pij = P[:, i] * P[:, j] * P[:, k]
            wij = W[:, i] * W[:, j] * W[:, k]
            features[f'triplet_p{i+1}{j+1}{k+1}'] = pij
            features[f'weighted_triplet_p{i+1}{j+1}{k+1}'] = pij * wij

        # --- 8. Power series + weight modulated ---
        for power in [2, 3, 4]:
            features[f'power_sum_{power}'] = np.sum(W * P**power, axis=1)

        # --- 9. Log-weighted property (prevent log(0)) ---
        logW = np.log(np.clip(W, 1e-6, None))
        features['log_weighted_p'] = np.sum(logW * P, axis=1)

        # --- 10. Symmetric polynomial combinations (elementary symmetric) ---
        # Up to degree 5 (since you have 5 fuels)
        for r in range(1, 6):
            key = f'e_sym_poly_r{r}'
            val = np.zeros(n_samples)
            for idx in combinations(range(n_fuels), r):
                prod_p = np.prod(P[:, idx], axis=1)
                val += prod_p
            features[key] = val

        # --- 11. Weighted interaction difference (symmetry in differences) ---
        for i, j in combinations(range(n_fuels), 2):
            diff = P[:, i] - P[:, j]
            wdiff = W[:, i] * W[:, j]
            features[f'weighted_diff_p{i+1}{j+1}'] = diff * wdiff

        # --- 12. Mean, max, min (weighted) ---
        total_weight = np.sum(W, axis=1, keepdims=True)
        weighted_mean = np.sum(W * P, axis=1) / np.clip(total_weight.squeeze(), 1e-6, None)
        features['weighted_mean'] = weighted_mean
        features['max_prop'] = np.max(P, axis=1)
        features['min_prop'] = np.min(P, axis=1)

        # --- 13. Weighted cross-log terms ---
        for i, j in combinations(range(n_fuels), 2):
            log_mix = np.log(np.clip(P[:, i] + P[:, j], 1e-6, None))
            wij = W[:, i] * W[:, j]
            features[f'logsum_p{i+1}{j+1}'] = log_mix * wij

        # --- 14. Inverse + weighted inverse ---
        # features['inv_prop_sum'] = np.sum(W / np.clip(P, 1e-6, None), axis=1)
        features['inv_prop_sum'] = np.sum(W / np.clip(P, 1e-6, None), axis=1)


        # --- 15. Weighted relu (max(p, 0)) ---
        relu = np.maximum(P, 0)
        features['weighted_relu'] = np.sum(W * relu, axis=1)

        # --- 16. Weighted sin/cos transforms ---
        features['weighted_sin'] = np.sum(W * np.sin(P), axis=1)
        features['weighted_cos'] = np.sum(W * np.cos(P), axis=1)

        # --- 17. Normalized properties ---
        prop_sum = np.sum(P, axis=1, keepdims=True)
        normalized_P = P / np.clip(prop_sum, 1e-6, None)
        for i in range(n_fuels):
            features[f'norm_p{i+1}'] = normalized_P[:, i]

        # --- 18. Product of all p's and all w's ---
        features['total_product_p'] = np.prod(P, axis=1)
        features['total_product_w'] = np.prod(W, axis=1)

        # --- 19. Mixed entropic form ---
        # entropy_like = -np.sum(W * np.log(np.clip(W, 1e-6, None)), axis=1)
        # features['entropy_weights'] = entropy_like

        # Convert to DataFrame
        df = pd.DataFrame(features)

        return df