Spaces:
Sleeping
Sleeping
import json | |
import os | |
import random | |
import uuid | |
from datetime import datetime | |
from itertools import chain | |
import gradio as gr | |
from data_loader import load_data | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
HF_DATASET = os.environ.get('HF_DATASET') | |
N_QUESTIONS = 5 | |
data, models = load_data() | |
n_samples = len(data) | |
n_models = len(models) | |
saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True) | |
def convert_diff_to_unified(diff): | |
result = "\n".join( | |
[ | |
f'--- {modified_file["old_path"]}\n' | |
f'+++ {modified_file["new_path"]}\n' | |
f'{modified_file["diff"]}' | |
for modified_file in diff | |
] | |
) | |
return result | |
def get_diff2html_view(raw_diff): | |
html = f""" | |
<div style='width:100%; height:1400px; overflow:auto; position: relative'> | |
<div id='diff-raw' hidden>{raw_diff}</div> | |
<div class="d2h-view-wrapper"> | |
<div id='diff-view'></div> | |
</div> | |
</div> | |
""" | |
return html | |
def get_github_link_md(repo, hash): | |
return f'[See the commit on Github](https://github.com/{repo}/commit/{hash})' | |
def update_commit_view(sample_ind): | |
if sample_ind >= n_samples: | |
return None | |
record = data[sample_ind] | |
diff_view = get_diff2html_view(convert_diff_to_unified(json.loads(record['mods']))) | |
repo_val = record['repo'] | |
hash_val = record['hash'] | |
github_link_md = get_github_link_md(repo_val, hash_val) | |
diff_loaded_timestamp = datetime.now().isoformat() | |
models_shuffled = models[:] | |
random.shuffle(models_shuffled) | |
commit_messages = tuple(record[model] for model in models_shuffled) | |
return ( | |
github_link_md, diff_view, repo_val, hash_val, diff_loaded_timestamp, | |
n_forms_submitted) + commit_messages + tuple( | |
models_shuffled) | |
def reset_answers(): | |
return (None,) * (N_QUESTIONS * n_models) | |
def reset_submit_buttons(): | |
return tuple(gr.Button(value="Submit", interactive=True) for _ in range(n_models)) | |
def reset_continue_button(): | |
return gr.Button(value=f"0/{n_models} forms submitted", interactive=False) | |
def next_sample(current_sample_ind, shuffled_idx): | |
if current_sample_ind == n_samples: | |
return None | |
current_sample_ind += 1 | |
updated_view = update_commit_view(shuffled_idx[current_sample_ind]) | |
return (current_sample_ind,) + updated_view + reset_answers() + reset_submit_buttons() + (reset_continue_button(),) | |
with open("head.html") as head_file: | |
head_html = head_file.read() | |
force_light_theme_js_func = """ | |
function refresh() { | |
const url = new URL(window.location); | |
if (url.searchParams.get('__theme') !== 'light') { | |
url.searchParams.set('__theme', 'light'); | |
window.location.href = url.href; | |
} | |
} | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css", | |
js=force_light_theme_js_func) as application: | |
repo_val = gr.Textbox(interactive=False, label='repo', visible=False) | |
hash_val = gr.Textbox(interactive=False, label='hash', visible=False) | |
shuffled_idx_val = gr.JSON(visible=False) | |
with gr.Row(): | |
with gr.Accordion("Help"): | |
with open("survey_guide.md") as content_file: | |
gr.Markdown(content_file.read()) | |
with gr.Row(): | |
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1, | |
value=0, | |
interactive=False, | |
label='sample_ind', | |
info=f"Samples labeled/skipped (out of {n_samples})", | |
show_label=False, | |
container=False, | |
scale=5) | |
with gr.Column(scale=1): | |
skip_btn = gr.Button("Skip the current sample") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
github_link = gr.Markdown() | |
diff_view = gr.HTML() | |
with gr.Column(scale=1): | |
# commit_msg = [] | |
# is_correct = [] | |
# # has_what = [] | |
# # has_why = [] | |
# is_not_verbose = [] | |
# # has_headline = [] | |
# easy_to_read = [] | |
# overall_rating = [] | |
# comments = [] | |
# model_name = [] | |
commit_msgs = [] | |
questions = [] | |
model_names = [] | |
submit_buttons = [] | |
SCALE = list(range(1, 6)) | |
for model_ind in range(n_models): | |
with gr.Tab(f"Variant #{model_ind + 1}"): | |
commit_msgs.append(gr.TextArea(label="Commit message (can be scrollable)", | |
interactive=False, | |
)) | |
gr.Markdown("## Please, rate your level of agreement with each statement\n" | |
"\n" | |
"*1 - strongly disagree, 2 - disagree, 3 - not sure, 4 - agree, 5 - strongly agree*") | |
model_questions = [] | |
model_questions.append(gr.Radio( | |
info='The information provided in the commit message is consistent with the code changes.', | |
label=f'is_correct', | |
show_label=False, | |
choices=SCALE, | |
interactive=True)) | |
# model_questions.append(gr.Radio( | |
# info='The commit message answers the question of WHAT changes have been made.', | |
# label=f'has_what', | |
# show_label=False, | |
# choices=SCALE, | |
# interactive=True)) | |
# | |
# model_questions.append(gr.Radio( | |
# info='The commit message answers the question of WHY these changes have been made.', | |
# label=f'has_why', | |
# show_label=False, | |
# choices=SCALE, | |
# interactive=True)) | |
model_questions.append(gr.Radio( | |
info='The commit message cannot be substantially shortened without loss of important ' | |
'information.', | |
label=f'is_not_verbose', | |
show_label=False, | |
choices=SCALE, | |
interactive=True)) | |
# model_questions.append(gr.Radio( | |
# info='The commit message includes a short headline that provides a good overview of the ' | |
# 'changes.', | |
# label=f'has_headline', | |
# show_label=False, | |
# choices=SCALE, | |
# interactive=True)) | |
model_questions.append(gr.Radio( | |
info='The commit message is easy to read and to understand.', | |
label=f'easy_to_read', | |
show_label=False, | |
choices=SCALE, | |
interactive=True)) | |
model_questions.append(gr.Radio( | |
info='Please, describe your overall impression of the commit message (1 - very bad, 5 - very ' | |
'good)', | |
label=f'overall_rating', | |
show_label=False, | |
choices=SCALE, | |
interactive=True)) | |
model_questions.append(gr.Textbox( | |
info='Additional comments on the commit message', | |
label=f'comments', | |
show_label=False, | |
interactive=True)) | |
assert len(model_questions) == N_QUESTIONS | |
questions.append(model_questions) | |
model_names.append(gr.Textbox(interactive=False, label=f'model', visible=False)) | |
submit_buttons.append(gr.Button(value="Submit")) | |
n_forms_submitted = gr.Number(visible=False, value=0, precision=0) | |
continue_btn = reset_continue_button() | |
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False, | |
label='session') | |
with gr.Row(visible=False): | |
sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False, | |
container=True, show_label=False) | |
sample_submitted_timestamp = gr.Textbox(info="Current time", | |
interactive=False, container=True, show_label=False, | |
value=lambda: datetime.now().isoformat(), every=1.0, | |
label='submitted_ts') | |
commit_view = [ | |
github_link, | |
diff_view, | |
repo_val, | |
hash_val, | |
sample_loaded_timestamp, | |
n_forms_submitted, | |
*commit_msgs, | |
*model_names, | |
] | |
feedback_metadata = [ | |
session_val, | |
repo_val, | |
hash_val, | |
sample_loaded_timestamp, | |
sample_submitted_timestamp | |
] | |
saver.setup([current_sample_sld] + feedback_metadata + questions[0] + [model_names[0], ], "feedback") | |
questions_list = list(chain.from_iterable(questions)) | |
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], | |
outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn]) | |
continue_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], | |
outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn]) | |
def submit_for_model(current_sample, n_forms_submitted_val, *args): | |
saver.flag((current_sample,) + args) | |
n_forms_submitted_val += 1 | |
all_forms_submitted = n_forms_submitted_val == n_models | |
return (gr.Button(value="Submitted", interactive=False), | |
n_forms_submitted_val, | |
gr.Button( | |
"Next sample" if all_forms_submitted else f"{n_forms_submitted_val}/{n_models} forms submitted", | |
interactive=all_forms_submitted)) | |
for model_ind in range(n_models): | |
submit_buttons[model_ind].click( | |
submit_for_model, | |
inputs=[current_sample_sld, n_forms_submitted] + feedback_metadata + questions[model_ind] + [ | |
model_names[model_ind], ], | |
outputs=[submit_buttons[model_ind], n_forms_submitted, continue_btn] | |
) | |
def init_session(current_sample): | |
session = str(uuid.uuid4()) | |
shuffled_idx = list(range(n_samples)) | |
random.shuffle(shuffled_idx) | |
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample]) | |
application.load(init_session, | |
inputs=[current_sample_sld], | |
outputs=[session_val, shuffled_idx_val] + commit_view, ) | |
application.launch() | |