import json import os import random import uuid from datetime import datetime from itertools import chain import gradio as gr from data_loader import load_data HF_TOKEN = os.environ.get('HF_TOKEN') HF_DATASET = os.environ.get('HF_DATASET') N_QUESTIONS = 5 data, models = load_data() n_samples = len(data) n_models = len(models) saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True) def convert_diff_to_unified(diff): result = "\n".join( [ f'--- {modified_file["old_path"]}\n' f'+++ {modified_file["new_path"]}\n' f'{modified_file["diff"]}' for modified_file in diff ] ) return result def get_diff2html_view(raw_diff): html = f""" <div style='width:100%; height:1400px; overflow:auto; position: relative'> <div id='diff-raw' hidden>{raw_diff}</div> <div class="d2h-view-wrapper"> <div id='diff-view'></div> </div> </div> """ return html def get_github_link_md(repo, hash): return f'[See the commit on Github](https://github.com/{repo}/commit/{hash})' def update_commit_view(sample_ind): if sample_ind >= n_samples: return None record = data[sample_ind] diff_view = get_diff2html_view(convert_diff_to_unified(json.loads(record['mods']))) repo_val = record['repo'] hash_val = record['hash'] github_link_md = get_github_link_md(repo_val, hash_val) diff_loaded_timestamp = datetime.now().isoformat() models_shuffled = models[:] random.shuffle(models_shuffled) commit_messages = tuple(record[model] for model in models_shuffled) return ( github_link_md, diff_view, repo_val, hash_val, diff_loaded_timestamp, n_forms_submitted) + commit_messages + tuple( models_shuffled) def reset_answers(): return (None,) * (N_QUESTIONS * n_models) def reset_submit_buttons(): return tuple(gr.Button(value="Submit", interactive=True) for _ in range(n_models)) def reset_continue_button(): return gr.Button(value=f"0/{n_models} forms submitted", interactive=False) def next_sample(current_sample_ind, shuffled_idx): if current_sample_ind == n_samples: return None current_sample_ind += 1 updated_view = update_commit_view(shuffled_idx[current_sample_ind]) return (current_sample_ind,) + updated_view + reset_answers() + reset_submit_buttons() + (reset_continue_button(),) with open("head.html") as head_file: head_html = head_file.read() force_light_theme_js_func = """ function refresh() { const url = new URL(window.location); if (url.searchParams.get('__theme') !== 'light') { url.searchParams.set('__theme', 'light'); window.location.href = url.href; } } """ with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css", js=force_light_theme_js_func) as application: repo_val = gr.Textbox(interactive=False, label='repo', visible=False) hash_val = gr.Textbox(interactive=False, label='hash', visible=False) shuffled_idx_val = gr.JSON(visible=False) with gr.Row(): with gr.Accordion("Help"): with open("survey_guide.md") as content_file: gr.Markdown(content_file.read()) with gr.Row(): current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1, value=0, interactive=False, label='sample_ind', info=f"Samples labeled/skipped (out of {n_samples})", show_label=False, container=False, scale=5) with gr.Column(scale=1): skip_btn = gr.Button("Skip the current sample") with gr.Row(): with gr.Column(scale=2): github_link = gr.Markdown() diff_view = gr.HTML() with gr.Column(scale=1): # commit_msg = [] # is_correct = [] # # has_what = [] # # has_why = [] # is_not_verbose = [] # # has_headline = [] # easy_to_read = [] # overall_rating = [] # comments = [] # model_name = [] commit_msgs = [] questions = [] model_names = [] submit_buttons = [] SCALE = list(range(1, 6)) for model_ind in range(n_models): with gr.Tab(f"Variant #{model_ind + 1}"): commit_msgs.append(gr.TextArea(label="Commit message (can be scrollable)", interactive=False, )) gr.Markdown("## Please, rate your level of agreement with each statement\n" "\n" "*1 - strongly disagree, 2 - disagree, 3 - not sure, 4 - agree, 5 - strongly agree*") model_questions = [] model_questions.append(gr.Radio( info='The information provided in the commit message is consistent with the code changes.', label=f'is_correct', show_label=False, choices=SCALE, interactive=True)) # model_questions.append(gr.Radio( # info='The commit message answers the question of WHAT changes have been made.', # label=f'has_what', # show_label=False, # choices=SCALE, # interactive=True)) # # model_questions.append(gr.Radio( # info='The commit message answers the question of WHY these changes have been made.', # label=f'has_why', # show_label=False, # choices=SCALE, # interactive=True)) model_questions.append(gr.Radio( info='The commit message cannot be substantially shortened without loss of important ' 'information.', label=f'is_not_verbose', show_label=False, choices=SCALE, interactive=True)) # model_questions.append(gr.Radio( # info='The commit message includes a short headline that provides a good overview of the ' # 'changes.', # label=f'has_headline', # show_label=False, # choices=SCALE, # interactive=True)) model_questions.append(gr.Radio( info='The commit message is easy to read and to understand.', label=f'easy_to_read', show_label=False, choices=SCALE, interactive=True)) model_questions.append(gr.Radio( info='Please, describe your overall impression of the commit message (1 - very bad, 5 - very ' 'good)', label=f'overall_rating', show_label=False, choices=SCALE, interactive=True)) model_questions.append(gr.Textbox( info='Additional comments on the commit message', label=f'comments', show_label=False, interactive=True)) assert len(model_questions) == N_QUESTIONS questions.append(model_questions) model_names.append(gr.Textbox(interactive=False, label=f'model', visible=False)) submit_buttons.append(gr.Button(value="Submit")) n_forms_submitted = gr.Number(visible=False, value=0, precision=0) continue_btn = reset_continue_button() session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False, label='session') with gr.Row(visible=False): sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False, container=True, show_label=False) sample_submitted_timestamp = gr.Textbox(info="Current time", interactive=False, container=True, show_label=False, value=lambda: datetime.now().isoformat(), every=1.0, label='submitted_ts') commit_view = [ github_link, diff_view, repo_val, hash_val, sample_loaded_timestamp, n_forms_submitted, *commit_msgs, *model_names, ] feedback_metadata = [ session_val, repo_val, hash_val, sample_loaded_timestamp, sample_submitted_timestamp ] saver.setup([current_sample_sld] + feedback_metadata + questions[0] + [model_names[0], ], "feedback") questions_list = list(chain.from_iterable(questions)) skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn]) continue_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn]) def submit_for_model(current_sample, n_forms_submitted_val, *args): saver.flag((current_sample,) + args) n_forms_submitted_val += 1 all_forms_submitted = n_forms_submitted_val == n_models return (gr.Button(value="Submitted", interactive=False), n_forms_submitted_val, gr.Button( "Next sample" if all_forms_submitted else f"{n_forms_submitted_val}/{n_models} forms submitted", interactive=all_forms_submitted)) for model_ind in range(n_models): submit_buttons[model_ind].click( submit_for_model, inputs=[current_sample_sld, n_forms_submitted] + feedback_metadata + questions[model_ind] + [ model_names[model_ind], ], outputs=[submit_buttons[model_ind], n_forms_submitted, continue_btn] ) def init_session(current_sample): session = str(uuid.uuid4()) shuffled_idx = list(range(n_samples)) random.shuffle(shuffled_idx) return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample]) application.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view, ) application.launch()