Spaces:

TabPFN
/

TabPFNPrediction

Runtime error

App Files Files Community

TabPFN commited on Sep 24, 2022

Commit

d213847

1 Parent(s): 0f0db0b

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -40

app.py CHANGED Viewed

@@ -4,22 +4,25 @@ sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c84002
 from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier
 import numpy as np
 import pandas as pd
 import torch
 import gradio as gr
 import openml
 def compute(table: np.array):
-    vfunc = np.vectorize(lambda s: len(s))
     non_empty_row_mask = (vfunc(table).sum(1) != 0)
     table = table[non_empty_row_mask]
-    empty_mask = table == ''
     empty_inds = np.where(empty_mask)
     if not len(empty_inds[0]):
-        return "**Please leave at least one field blank for prediction.**", None
     if not np.all(empty_inds[1][0] == empty_inds[1]):
-        return "**Please only leave fields of one column blank for prediction.**", None
     y_column = empty_inds[1][0]
     eval_lines = empty_inds[0]
@@ -32,66 +35,117 @@ def compute(table: np.array):
         y_train = train_table[:, y_column]
     except ValueError:
-        return "**Please only add numbers (to the inputs) or leave fields empty.**", None
     classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
     classifier.fit(x_train, y_train)
     y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
     # print(file, type(file))
-    out_table = table.copy().astype(str)
-    out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)]
-    return None, out_table
-def upload_file(file):
     if file.name.endswith('.arff'):
         dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
         X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
             dataset_format="array"
         )
         df = pd.DataFrame(X_, columns=attribute_names_)
-        return df
     elif file.name.endswith('.csv') or file.name.endswith('.data'):
-        df = pd.read_csv(file.name, header=None)
-        df.columns = np.arange(len(df.columns))
-        return df
-example = \
-    [
-        [1, 2, 1],
-        [2, 1, 1],
-        [1, 1, 1],
-        [2, 2, 2],
-        [3, 4, 2],
-        [3, 2, 2],
-        [2, 3, '']
-    ]
 with gr.Blocks() as demo:
-    gr.Markdown("""This demo allows you to play with the **TabPFN**.
-The TabPFN will classify the values for all empty cells in the label column.
-Please, provide everything but the label column as numeric values.
-You can also upload datasets to fill the table automatically.
-    """)
-    inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3)
-    upload_file('iris.csv')
-    btn = gr.Button("Predict Empty Table Cells")
-    btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table])
-    out_text = gr.Markdown()
-    out_table = gr.DataFrame()
     examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'],
                            inputs=[inp_file],
                            outputs=[inp_table],
                            fn=upload_file,
                            cache_examples=True)
-    inp_file = gr.File(
-        label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
     inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table)
-demo.launch()

 from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier
 import numpy as np
+from pathlib import Path
 import pandas as pd
 import torch
 import gradio as gr
 import openml
+import os
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
 def compute(table: np.array):
+    vfunc = np.vectorize(lambda s: len(str(s)))
     non_empty_row_mask = (vfunc(table).sum(1) != 0)
     table = table[non_empty_row_mask]
+    empty_mask = table == '(predict)'
     empty_inds = np.where(empty_mask)
     if not len(empty_inds[0]):
+        return "⚠️ **ERROR: Please leave at least one field blank for prediction.**", None, None
     if not np.all(empty_inds[1][0] == empty_inds[1]):
+        return "⚠️ **Please only leave fields of one column blank for prediction.**", None, None
     y_column = empty_inds[1][0]
     eval_lines = empty_inds[0]
         y_train = train_table[:, y_column]
     except ValueError:
+        return "⚠️ **Please only add numbers (to the inputs) or leave fields empty.**", None, None
     classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
     classifier.fit(x_train, y_train)
     y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
     # print(file, type(file))
+    out_table = pd.DataFrame(table.copy().astype(str))
+    out_table.iloc[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)]
+    out_table = out_table.iloc[eval_lines, :]
+    out_table.columns = headers
+    # PLOTTING
+    fig = plt.figure(figsize=(10,10))
+    ax = fig.add_subplot(111)
+    cm = plt.cm.RdBu
+    cm_bright = ListedColormap(["#FF0000", "#0000FF"])
+    # Plot the training points
+    vfunc = np.vectorize(lambda x : np.where(classifier.classes_ == x)[0])
+    y_train_index = vfunc(y_train)
+    y_train_index = y_train_index == 0
+    y_train = y_train_index
+    #x_train = x_train[y_train_index <= 1]
+    #y_train = y_train[y_train_index <= 1]
+    #y_train_index = y_train_index[y_train_index <= 1]
+    ax.scatter(x_train[:, 0], x_train[:, 1], c=y_train_index, cmap=cm_bright)
+    classifier = TabPFNClassifier(device='cpu', base_path='/home/hollmann/',
+                                model_string=model_string, N_ensemble_configurations=1
+                          , no_preprocess_mode=False, i=i, feature_shift_decoder=True, multiclass_decoder='permutation')
+    classifier.fit(x_train[:, 0:2], y_train)
+    DecisionBoundaryDisplay.from_estimator(
+        classifier, x_train[:, 0:2], alpha=0.6, ax=ax, eps=2.0, grid_resolution=100, response_method="predict_proba"
+    )
+    plt.xlabel(headers[0])
+    plt.ylabel(headers[1])
+    return None, out_table, fig
+def upload_file(file, remove_entries=10):
+    global headers
     if file.name.endswith('.arff'):
         dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
         X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
             dataset_format="array"
         )
         df = pd.DataFrame(X_, columns=attribute_names_)
+        headers = df.columns
     elif file.name.endswith('.csv') or file.name.endswith('.data'):
+        df = pd.read_csv(file.name, header='infer')
+        headers = df.columns
+        #df.columns = np.arange(len(df.columns))
+    df.iloc[0:remove_entries, -1] = '(predict)'
+    return df
+def update_table(table):
+    global headers
+    table = pd.DataFrame(table)
+    vfunc = np.vectorize(lambda s: len(str(s)))
+    non_empty_row_mask = (vfunc(table).sum(1) != 0)
+    table = table[non_empty_row_mask]
+    empty_mask = table == ''
+    empty_inds = np.where(empty_mask)
+    if not len(empty_inds[0]):
+        return table
+    y_column = empty_inds[1][0]
+    eval_lines = empty_inds[0]
+    table.iloc[eval_lines, y_column] = '(predict)'
+    table.columns = headers
+    return table
+headers = []
+gr.Markdown("""This demo allows you to play with the **TabPFN**.
+            The TabPFN will classify the values for all empty cells in the label column.
+            Please, provide everything but the label column as numeric values.
+            You can also upload datasets to fill the table automatically.
+                """)
 with gr.Blocks() as demo:
+    with gr.Tab("Enter Input Data"):
+        inp_file = gr.File(
+        label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
+        inp_table = gr.DataFrame(type='numpy', value=upload_file(Path('iris.csv'), remove_entries=10), headers=[''] * 3)
+        inp_table.change(fn=update_table, inputs=inp_table, outputs=inp_table)
+    with gr.Tab("Run Predictions"):
+        btn = gr.Button("Start")
+        out_text = gr.Markdown()
+        out_table = gr.DataFrame()
+        out_plot = gr.Plot()
+    btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table, out_plot])
     examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'],
                            inputs=[inp_file],
                            outputs=[inp_table],
                            fn=upload_file,
                            cache_examples=True)
     inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table)
+demo.launch()