Spaces:
Sleeping
Sleeping
Petr Tsvetkov
commited on
Commit
·
0dd349d
1
Parent(s):
a4dedae
Update the labeling app to use new data
Browse files- app.py +2 -1
- data_loader.py +9 -22
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import random
|
3 |
import uuid
|
@@ -56,7 +57,7 @@ def update_commit_view(sample_ind):
|
|
56 |
|
57 |
record = data[sample_ind]
|
58 |
|
59 |
-
diff_view = get_diff2html_view(convert_diff_to_unified(record['mods']))
|
60 |
|
61 |
repo_val = record['repo']
|
62 |
hash_val = record['hash']
|
|
|
1 |
+
import json
|
2 |
import os
|
3 |
import random
|
4 |
import uuid
|
|
|
57 |
|
58 |
record = data[sample_ind]
|
59 |
|
60 |
+
diff_view = get_diff2html_view(convert_diff_to_unified(json.loads(record['mods'])))
|
61 |
|
62 |
repo_val = record['repo']
|
63 |
hash_val = record['hash']
|
data_loader.py
CHANGED
@@ -1,29 +1,16 @@
|
|
|
|
|
|
1 |
from datasets import load_dataset
|
2 |
|
3 |
-
|
4 |
-
|
|
|
5 |
|
6 |
CACHE_DIR = 'cache'
|
7 |
|
8 |
|
9 |
def load_data():
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
columns={'message': 'reference'})
|
15 |
-
|
16 |
-
message_cols = ['reference']
|
17 |
-
|
18 |
-
for model in MODELS:
|
19 |
-
model_dataset = load_dataset("JetBrains-Research/lca-results",
|
20 |
-
model,
|
21 |
-
split="test",
|
22 |
-
cache_dir=CACHE_DIR).to_pandas().set_index(['hash', 'repo'])[["prediction"]]
|
23 |
-
model_dataset = model_dataset[~model_dataset.index.duplicated(keep='first')]
|
24 |
-
|
25 |
-
cur_col_name = f"{model}"
|
26 |
-
dataset = dataset.join(other=model_dataset).rename(columns={'prediction': cur_col_name})
|
27 |
-
message_cols.append(cur_col_name)
|
28 |
-
|
29 |
-
return dataset.reset_index().to_dict("records"), message_cols
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
from datasets import load_dataset
|
4 |
|
5 |
+
HF_TOKEN = os.environ.get('HF_TOKEN')
|
6 |
+
|
7 |
+
MESSAGE_COLS = ['reference', 'prediction', 'enhanced']
|
8 |
|
9 |
CACHE_DIR = 'cache'
|
10 |
|
11 |
|
12 |
def load_data():
|
13 |
+
return load_dataset("JetBrains-Research/commit-labeling-samples",
|
14 |
+
split="train",
|
15 |
+
cache_dir=CACHE_DIR,
|
16 |
+
token=HF_TOKEN).to_pandas().to_dict("records"), MESSAGE_COLS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|