File size: 1,004 Bytes
1507360 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import pandas as pd
from transformers import pipeline
from transformers.pipelines import PIPELINE_REGISTRY
from rutabert_pipeline.model import BertForClassification
from rutabert_pipeline.pipeline import ColumnTypeAnnotationPipeline
if __name__ == "__main__":
PIPELINE_REGISTRY.register_pipeline(
"column-type-annotation",
pipeline_class=ColumnTypeAnnotationPipeline,
pt_model=BertForClassification
)
table = pd.read_csv("../rutabert_pipeline/data/example.csv", header=0)
data_list = []
for col_idx in table.columns:
label_id = 0
label = "none"
column_data = " ".join(list(map(lambda x: str(x).strip(), table[col_idx])))
data_list.append(["example.csv", col_idx, label_id, label, column_data])
df = pd.DataFrame(data_list, columns=["table_id", "column_id", "label_id", "label", "column_data"])
pipeline = pipeline("column-type-annotation", model="sti-team/rutabert-base")
output = pipeline(df)
print(output)
|