File size: 643 Bytes
5db7813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import pandas as pd
from sklearn.model_selection import train_test_split

from utils import get_datasetdict_object


def get_dataset(train_path:str, test_path:str):
    df_train = pd.read_csv(train_path, sep="\t")
    df_train, df_val = train_test_split(df_train, test_size=0.23805, random_state=42, 
                                        stratify=df_train["#3_country_label"])
    df_test = pd.read_csv(test_path, sep="\t")

    return get_datasetdict_object(df_train, df_val, df_test)


def main():
    dataset = get_dataset("data/DA_train_labeled.tsv", "data/DA_dev_labeled.tsv")
    print(dataset)


if __name__ == "__main__":
    main()