|
|
import json |
|
|
from functools import lru_cache |
|
|
import re |
|
|
import traceback |
|
|
from typing import Optional |
|
|
|
|
|
import gradio as gr |
|
|
from huggingface_hub import HfApi, hf_hub_download, hf_hub_url |
|
|
from huggingface_hub.utils import HfHubHTTPError |
|
|
|
|
|
|
|
|
DEFAULT_REPO_ID = "mlfoundations-cua-dev/human_eval" |
|
|
IMAGE_EXTENSIONS = ( |
|
|
".jpg", |
|
|
".jpeg", |
|
|
".png", |
|
|
".bmp", |
|
|
".gif", |
|
|
".webp", |
|
|
".tif", |
|
|
".tiff", |
|
|
) |
|
|
INIT_SCREENSHOT_NAMES = {"intial_screenshot", "initial_screenshot"} |
|
|
STEP_FILENAME_PATTERN = re.compile(r"^step_(\d+)(?:\.[^.]+)?$", re.IGNORECASE) |
|
|
TRAJECTORY_FILENAME = "traj.jsonl" |
|
|
|
|
|
|
|
|
api = HfApi() |
|
|
|
|
|
|
|
|
@lru_cache(maxsize=16) |
|
|
def _list_repo_files(repo_id: str) -> list[str]: |
|
|
"""Return all file paths contained in a Hugging Face dataset repository.""" |
|
|
return api.list_repo_files(repo_id=repo_id, repo_type="dataset") |
|
|
|
|
|
|
|
|
def _extract_top_level(repo_id: str) -> tuple[list[str], list[str]]: |
|
|
"""Split top-level folders and files for the given repository.""" |
|
|
files = _list_repo_files(repo_id) |
|
|
top_level_dirs = sorted({path.split("/", 1)[0] for path in files if "/" in path}) |
|
|
top_level_files = sorted(path for path in files if "/" not in path) |
|
|
return top_level_dirs, top_level_files |
|
|
|
|
|
|
|
|
def _get_subdirectories(repo_id: str, directory: str) -> list[str]: |
|
|
"""Return the direct subdirectories of the given directory.""" |
|
|
if not directory: |
|
|
return [] |
|
|
|
|
|
files = [path for path in _list_repo_files(repo_id) if path.startswith(f"{directory}/")] |
|
|
relative_paths = [path[len(directory) + 1 :] for path in files] |
|
|
|
|
|
|
|
|
child_dirs = sorted({rel.split("/", 1)[0] for rel in relative_paths if "/" in rel}) |
|
|
|
|
|
return child_dirs |
|
|
|
|
|
|
|
|
def _build_path(*parts) -> str: |
|
|
"""Join path parts while skipping empty values.""" |
|
|
return "/".join(part for part in parts if part) |
|
|
|
|
|
|
|
|
def _image_sort_key(path: str): |
|
|
filename = path.rsplit("/", 1)[-1] |
|
|
lower_name = filename.lower() |
|
|
|
|
|
if any(lower_name.startswith(name) for name in INIT_SCREENSHOT_NAMES): |
|
|
return (0, 0) |
|
|
|
|
|
match = STEP_FILENAME_PATTERN.match(lower_name) |
|
|
if match: |
|
|
return (1, int(match.group(1))) |
|
|
|
|
|
return (2, lower_name) |
|
|
|
|
|
|
|
|
def _load_traj_entries(repo_id: str, directory: str) -> list: |
|
|
"""Load trajectory annotations from traj.jsonl within the given directory.""" |
|
|
if not directory: |
|
|
return [] |
|
|
|
|
|
traj_path = _build_path(directory, TRAJECTORY_FILENAME) |
|
|
repo_files = _list_repo_files(repo_id) |
|
|
|
|
|
if traj_path not in repo_files: |
|
|
return [] |
|
|
|
|
|
local_path = hf_hub_download( |
|
|
repo_id=repo_id, |
|
|
filename=traj_path, |
|
|
repo_type="dataset", |
|
|
) |
|
|
|
|
|
entries: list = [] |
|
|
with open(local_path, "r", encoding="utf-8") as file: |
|
|
for raw_line in file: |
|
|
stripped = raw_line.strip() |
|
|
if not stripped: |
|
|
continue |
|
|
|
|
|
parsed = json.loads(stripped) |
|
|
if isinstance(parsed, list): |
|
|
entries.extend(parsed) |
|
|
else: |
|
|
entries.append(parsed) |
|
|
|
|
|
return entries |
|
|
|
|
|
|
|
|
def _load_instruction_file(repo_id: str, filepath: str) -> Optional[str]: |
|
|
"""Download a metadata JSON file and extract its instruction field.""" |
|
|
local_path = hf_hub_download( |
|
|
repo_id=repo_id, |
|
|
filename=filepath, |
|
|
repo_type="dataset", |
|
|
) |
|
|
|
|
|
with open(local_path, "r", encoding="utf-8") as file: |
|
|
data = json.load(file) |
|
|
|
|
|
instruction: Optional[str] = None |
|
|
if isinstance(data, dict): |
|
|
instruction = data.get("instruction") |
|
|
elif isinstance(data, list): |
|
|
for entry in data: |
|
|
if isinstance(entry, dict) and "instruction" in entry: |
|
|
instruction = entry["instruction"] |
|
|
break |
|
|
|
|
|
if instruction is None: |
|
|
return None |
|
|
|
|
|
if isinstance(instruction, str): |
|
|
return instruction.strip() |
|
|
|
|
|
return str(instruction) |
|
|
|
|
|
|
|
|
def _format_annotation(index: int, annotation) -> str: |
|
|
prefix = f"Step {index + 1}" |
|
|
if isinstance(annotation, str): |
|
|
content = annotation.strip() |
|
|
else: |
|
|
try: |
|
|
content = json.dumps(annotation, ensure_ascii=False) |
|
|
except TypeError: |
|
|
content = str(annotation) |
|
|
return f"{prefix}: {content}" if content else prefix |
|
|
|
|
|
|
|
|
def _prepare_gallery_items( |
|
|
repo_id: str, directory: Optional[str] |
|
|
) -> tuple[list[tuple[str, str]], list[str], str]: |
|
|
"""Prepare gallery items, status messages, and instruction text for a directory.""" |
|
|
if not directory: |
|
|
return ( |
|
|
[], |
|
|
["Select a fifth-level folder to view screenshots."], |
|
|
"ℹ️ Instruction will appear once a fifth-level folder is selected.", |
|
|
) |
|
|
|
|
|
prefix = f"{directory}/" |
|
|
files = [path for path in _list_repo_files(repo_id) if path.startswith(prefix)] |
|
|
|
|
|
image_files = [ |
|
|
path |
|
|
for path in files |
|
|
if "/" not in path[len(prefix) :] |
|
|
and path.lower().endswith(IMAGE_EXTENSIONS) |
|
|
] |
|
|
|
|
|
sorted_files = sorted(image_files, key=_image_sort_key) |
|
|
image_urls = [ |
|
|
hf_hub_url(repo_id=repo_id, filename=path, repo_type="dataset") |
|
|
for path in sorted_files |
|
|
] |
|
|
|
|
|
status_lines: list[str] = [ |
|
|
f"🖼️ Images: {len(image_urls)}", |
|
|
] |
|
|
|
|
|
annotations: list = [] |
|
|
try: |
|
|
annotations = _load_traj_entries(repo_id, directory) |
|
|
except json.JSONDecodeError as error: |
|
|
status_lines.append(f"⚠️ Failed to parse `{TRAJECTORY_FILENAME}`: {error}") |
|
|
except Exception as error: |
|
|
status_lines.append(f"⚠️ Error loading `{TRAJECTORY_FILENAME}`: {error}") |
|
|
|
|
|
status_lines.append(f"📝 Annotations: {len(annotations)}") |
|
|
|
|
|
if not image_urls: |
|
|
status_lines.append("⚠️ No images found in this folder.") |
|
|
|
|
|
if image_urls and not annotations: |
|
|
status_lines.append( |
|
|
f"⚠️ `{TRAJECTORY_FILENAME}` missing or empty; no annotations to display." |
|
|
) |
|
|
|
|
|
if annotations and len(annotations) != len(image_urls): |
|
|
status_lines.append( |
|
|
"⚠️ Mismatch between images and annotations; displaying available pairs." |
|
|
) |
|
|
|
|
|
folder_name = directory.rsplit("/", 1)[-1] |
|
|
metadata_files = [ |
|
|
path |
|
|
for path in files |
|
|
if "/" not in path[len(prefix) :] |
|
|
and path.lower().endswith(".json") |
|
|
and not path.lower().endswith(".jsonl") |
|
|
] |
|
|
|
|
|
instruction_markdown = "" |
|
|
if metadata_files: |
|
|
metadata_files.sort( |
|
|
key=lambda path: ( |
|
|
0 |
|
|
if path.rsplit("/", 1)[-1].rsplit(".", 1)[0] == folder_name |
|
|
else 1, |
|
|
path.rsplit("/", 1)[-1].lower(), |
|
|
) |
|
|
) |
|
|
metadata_path = metadata_files[0] |
|
|
metadata_filename = metadata_path.rsplit("/", 1)[-1] |
|
|
try: |
|
|
instruction_value = _load_instruction_file(repo_id, metadata_path) |
|
|
if instruction_value: |
|
|
instruction_markdown = f"**Instruction:** {instruction_value}" |
|
|
status_lines.append("📋 Instruction: loaded") |
|
|
else: |
|
|
instruction_markdown = ( |
|
|
f"⚠️ Instruction missing in `{metadata_filename}`." |
|
|
) |
|
|
status_lines.append("⚠️ Instruction: missing in metadata.") |
|
|
except json.JSONDecodeError as error: |
|
|
status_lines.append( |
|
|
f"⚠️ Failed to parse `{metadata_filename}`: {error}" |
|
|
) |
|
|
instruction_markdown = ( |
|
|
f"⚠️ Unable to parse instruction from `{metadata_filename}`." |
|
|
) |
|
|
except Exception as error: |
|
|
status_lines.append( |
|
|
f"⚠️ Error loading `{metadata_filename}`: {error}" |
|
|
) |
|
|
instruction_markdown = ( |
|
|
f"⚠️ Unable to load instruction from `{metadata_filename}`." |
|
|
) |
|
|
else: |
|
|
instruction_markdown = "⚠️ Instruction file not found in this folder." |
|
|
status_lines.append("⚠️ Instruction: file not found.") |
|
|
|
|
|
gallery_items: list[tuple[str, str]] = [] |
|
|
for idx, url in enumerate(image_urls): |
|
|
caption = "No annotation available" |
|
|
if idx < len(annotations): |
|
|
caption = _format_annotation(idx, annotations[idx]) |
|
|
gallery_items.append((url, caption)) |
|
|
|
|
|
return gallery_items, status_lines, instruction_markdown |
|
|
|
|
|
|
|
|
def _dropdown_update( |
|
|
*, |
|
|
choices: list[str], |
|
|
value: Optional[str], |
|
|
label: str, |
|
|
filled_info: str, |
|
|
empty_info: str, |
|
|
): |
|
|
has_choices = bool(choices) |
|
|
return gr.update( |
|
|
choices=choices, |
|
|
value=value if has_choices else None, |
|
|
interactive=has_choices, |
|
|
label=label, |
|
|
info=filled_info if has_choices else empty_info, |
|
|
) |
|
|
|
|
|
|
|
|
def refresh_repo(repo_id: str): |
|
|
try: |
|
|
top_dirs, top_files = _extract_top_level(repo_id) |
|
|
except HfHubHTTPError as error: |
|
|
print(f"[refresh_repo] Hub HTTP error for {repo_id}: {error}", flush=True) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
return ( |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(value=""), |
|
|
gr.update(value=[]), |
|
|
gr.update(value=""), |
|
|
gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"), |
|
|
) |
|
|
except Exception as error: |
|
|
print(f"[refresh_repo] Unexpected error for {repo_id}: {error}", flush=True) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
return ( |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(choices=[], value=None, interactive=False), |
|
|
gr.update(value=""), |
|
|
gr.update(value=[]), |
|
|
gr.update(value=""), |
|
|
gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"), |
|
|
) |
|
|
|
|
|
status_lines = [ |
|
|
f"✅ Loaded `{repo_id}`", |
|
|
f"• Top-level folders: {len(top_dirs)}", |
|
|
] |
|
|
|
|
|
if top_files: |
|
|
status_lines.append(f"• Loose files at root: {len(top_files)}") |
|
|
if not top_dirs: |
|
|
status_lines.append("• No sub-folders found at root.") |
|
|
|
|
|
top_value = top_dirs[0] if top_dirs else None |
|
|
|
|
|
second_dirs = _get_subdirectories(repo_id, top_value) if top_value else [] |
|
|
second_value = second_dirs[0] if second_dirs else None |
|
|
|
|
|
third_dirs = ( |
|
|
_get_subdirectories(repo_id, _build_path(top_value, second_value)) |
|
|
if second_value |
|
|
else [] |
|
|
) |
|
|
third_value = third_dirs[0] if third_dirs else None |
|
|
|
|
|
fourth_dirs = ( |
|
|
_get_subdirectories(repo_id, _build_path(top_value, second_value, third_value)) |
|
|
if third_value |
|
|
else [] |
|
|
) |
|
|
fourth_value = fourth_dirs[0] if fourth_dirs else None |
|
|
|
|
|
fifth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, _build_path(top_value, second_value, third_value, fourth_value) |
|
|
) |
|
|
if fourth_value |
|
|
else [] |
|
|
) |
|
|
fifth_value = fifth_dirs[0] if fifth_dirs else None |
|
|
|
|
|
target_directory = ( |
|
|
_build_path( |
|
|
top_value, |
|
|
second_value, |
|
|
third_value, |
|
|
fourth_value, |
|
|
fifth_value, |
|
|
) |
|
|
if fifth_value |
|
|
else None |
|
|
) |
|
|
gallery_items, gallery_status, instruction_markdown = _prepare_gallery_items( |
|
|
repo_id, target_directory |
|
|
) |
|
|
|
|
|
first_dropdown_update = _dropdown_update( |
|
|
choices=top_dirs, |
|
|
value=top_value, |
|
|
label="Top-level folders", |
|
|
filled_info="Choose a folder to explore", |
|
|
empty_info="No folders found at the repository root", |
|
|
) |
|
|
|
|
|
second_dropdown_update = _dropdown_update( |
|
|
choices=second_dirs, |
|
|
value=second_value, |
|
|
label="Second-level folders", |
|
|
filled_info="Choose a second-level folder", |
|
|
empty_info="No subdirectories under the selected folder", |
|
|
) |
|
|
|
|
|
third_dropdown_update = _dropdown_update( |
|
|
choices=third_dirs, |
|
|
value=third_value, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="No third-level folders under the selection", |
|
|
) |
|
|
|
|
|
fourth_dropdown_update = _dropdown_update( |
|
|
choices=fourth_dirs, |
|
|
value=fourth_value, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="No fourth-level folders under the selection", |
|
|
) |
|
|
|
|
|
fifth_dropdown_update = _dropdown_update( |
|
|
choices=fifth_dirs, |
|
|
value=fifth_value, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="No fifth-level folders under the selection", |
|
|
) |
|
|
|
|
|
instruction_update = gr.update(value=instruction_markdown) |
|
|
gallery_update = gr.update(value=gallery_items) |
|
|
annotation_update = gr.update(value="\n".join(gallery_status)) |
|
|
status_update = gr.update(value="\n".join(status_lines)) |
|
|
|
|
|
return ( |
|
|
first_dropdown_update, |
|
|
second_dropdown_update, |
|
|
third_dropdown_update, |
|
|
fourth_dropdown_update, |
|
|
fifth_dropdown_update, |
|
|
instruction_update, |
|
|
gallery_update, |
|
|
annotation_update, |
|
|
status_update, |
|
|
) |
|
|
|
|
|
|
|
|
def update_second_dropdown(repo_id: str, top_level_dir: str): |
|
|
"""Update downstream dropdowns when the top-level selection changes.""" |
|
|
try: |
|
|
if not top_level_dir: |
|
|
empty_second = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Second-level folders", |
|
|
filled_info="Choose a second-level folder", |
|
|
empty_info="Select a top-level folder first", |
|
|
) |
|
|
empty_third = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="Select a higher-level folder first", |
|
|
) |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Select a higher-level folder first", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Select a higher-level folder first", |
|
|
) |
|
|
instruction_update = gr.update( |
|
|
value="ℹ️ Select a top-level folder to display its instruction." |
|
|
) |
|
|
gallery_update = gr.update(value=[]) |
|
|
annotation_update = gr.update( |
|
|
value="Select a top-level folder to load screenshots and annotations." |
|
|
) |
|
|
return ( |
|
|
empty_second, |
|
|
empty_third, |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
instruction_update, |
|
|
gallery_update, |
|
|
annotation_update, |
|
|
) |
|
|
|
|
|
second_dirs = _get_subdirectories(repo_id, top_level_dir) |
|
|
second_value = second_dirs[0] if second_dirs else None |
|
|
|
|
|
third_dirs = ( |
|
|
_get_subdirectories(repo_id, _build_path(top_level_dir, second_value)) |
|
|
if second_value |
|
|
else [] |
|
|
) |
|
|
third_value = third_dirs[0] if third_dirs else None |
|
|
|
|
|
fourth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, _build_path(top_level_dir, second_value, third_value) |
|
|
) |
|
|
if third_value |
|
|
else [] |
|
|
) |
|
|
fourth_value = fourth_dirs[0] if fourth_dirs else None |
|
|
|
|
|
fifth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, |
|
|
_build_path(top_level_dir, second_value, third_value, fourth_value), |
|
|
) |
|
|
if fourth_value |
|
|
else [] |
|
|
) |
|
|
fifth_value = fifth_dirs[0] if fifth_dirs else None |
|
|
|
|
|
target_directory = ( |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_value, |
|
|
third_value, |
|
|
fourth_value, |
|
|
fifth_value, |
|
|
) |
|
|
if fifth_value |
|
|
else None |
|
|
) |
|
|
( |
|
|
gallery_items, |
|
|
gallery_status, |
|
|
instruction_markdown, |
|
|
) = _prepare_gallery_items(repo_id, target_directory) |
|
|
|
|
|
return ( |
|
|
_dropdown_update( |
|
|
choices=second_dirs, |
|
|
value=second_value, |
|
|
label="Second-level folders", |
|
|
filled_info="Choose a second-level folder", |
|
|
empty_info="No subdirectories under the selected folder", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=third_dirs, |
|
|
value=third_value, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="No third-level folders under the selection", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=fourth_dirs, |
|
|
value=fourth_value, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="No fourth-level folders under the selection", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=fifth_dirs, |
|
|
value=fifth_value, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="No fifth-level folders under the selection", |
|
|
), |
|
|
gr.update(value=instruction_markdown), |
|
|
gr.update(value=gallery_items), |
|
|
gr.update(value="\n".join(gallery_status)), |
|
|
) |
|
|
except Exception as error: |
|
|
print(f"[update_second_dropdown] Error for {repo_id}/{top_level_dir}: {error}", flush=True) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
empty_second = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Second-level folders", |
|
|
filled_info="Choose a second-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_third = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
return ( |
|
|
empty_second, |
|
|
empty_third, |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
gr.update(value="Unable to load instruction."), |
|
|
gr.update(value=[]), |
|
|
gr.update(value="Unable to load screenshots or annotations."), |
|
|
) |
|
|
|
|
|
|
|
|
def update_third_dropdown(repo_id: str, top_level_dir: str, second_level_dir: str): |
|
|
"""Update the third and fourth dropdowns when the second-level changes.""" |
|
|
try: |
|
|
if not top_level_dir or not second_level_dir: |
|
|
empty_third = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
instruction_update = gr.update( |
|
|
value="ℹ️ Select higher-level folders to display the instruction." |
|
|
) |
|
|
gallery_update = gr.update(value=[]) |
|
|
annotation_update = gr.update( |
|
|
value="Select higher-level folders to load screenshots and annotations." |
|
|
) |
|
|
return ( |
|
|
empty_third, |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
instruction_update, |
|
|
gallery_update, |
|
|
annotation_update, |
|
|
) |
|
|
|
|
|
third_dirs = _get_subdirectories( |
|
|
repo_id, _build_path(top_level_dir, second_level_dir) |
|
|
) |
|
|
third_value = third_dirs[0] if third_dirs else None |
|
|
|
|
|
fourth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, _build_path(top_level_dir, second_level_dir, third_value) |
|
|
) |
|
|
if third_value |
|
|
else [] |
|
|
) |
|
|
fourth_value = fourth_dirs[0] if fourth_dirs else None |
|
|
|
|
|
fifth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, |
|
|
_build_path(top_level_dir, second_level_dir, third_value, fourth_value), |
|
|
) |
|
|
if fourth_value |
|
|
else [] |
|
|
) |
|
|
fifth_value = fifth_dirs[0] if fifth_dirs else None |
|
|
|
|
|
target_directory = ( |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_value, |
|
|
fourth_value, |
|
|
fifth_value, |
|
|
) |
|
|
if fifth_value |
|
|
else None |
|
|
) |
|
|
( |
|
|
gallery_items, |
|
|
gallery_status, |
|
|
instruction_markdown, |
|
|
) = _prepare_gallery_items(repo_id, target_directory) |
|
|
|
|
|
return ( |
|
|
_dropdown_update( |
|
|
choices=third_dirs, |
|
|
value=third_value, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="No third-level folders under the selection", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=fourth_dirs, |
|
|
value=fourth_value, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="No fourth-level folders under the selection", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=fifth_dirs, |
|
|
value=fifth_value, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="No fifth-level folders under the selection", |
|
|
), |
|
|
gr.update(value=instruction_markdown), |
|
|
gr.update(value=gallery_items), |
|
|
gr.update(value="\n".join(gallery_status)), |
|
|
) |
|
|
except Exception as error: |
|
|
print( |
|
|
f"[update_third_dropdown] Error for {repo_id}/{top_level_dir}/{second_level_dir}: {error}", |
|
|
flush=True, |
|
|
) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
empty_third = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Third-level folders", |
|
|
filled_info="Choose a third-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
return ( |
|
|
empty_third, |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
gr.update(value="Unable to load instruction."), |
|
|
gr.update(value=[]), |
|
|
gr.update(value="Unable to load screenshots or annotations."), |
|
|
) |
|
|
|
|
|
|
|
|
def update_fourth_dropdown( |
|
|
repo_id: str, |
|
|
top_level_dir: str, |
|
|
second_level_dir: str, |
|
|
third_level_dir: str, |
|
|
): |
|
|
"""Update the fourth and fifth dropdowns when the third-level changes.""" |
|
|
try: |
|
|
if not top_level_dir or not second_level_dir or not third_level_dir: |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
instruction_update = gr.update( |
|
|
value="ℹ️ Select higher-level folders to display the instruction." |
|
|
) |
|
|
gallery_update = gr.update(value=[]) |
|
|
annotation_update = gr.update( |
|
|
value="Select higher-level folders to load screenshots and annotations." |
|
|
) |
|
|
return ( |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
instruction_update, |
|
|
gallery_update, |
|
|
annotation_update, |
|
|
) |
|
|
|
|
|
fourth_dirs = _get_subdirectories( |
|
|
repo_id, |
|
|
_build_path(top_level_dir, second_level_dir, third_level_dir), |
|
|
) |
|
|
fourth_value = fourth_dirs[0] if fourth_dirs else None |
|
|
|
|
|
fifth_dirs = ( |
|
|
_get_subdirectories( |
|
|
repo_id, |
|
|
_build_path( |
|
|
top_level_dir, second_level_dir, third_level_dir, fourth_value |
|
|
), |
|
|
) |
|
|
if fourth_value |
|
|
else [] |
|
|
) |
|
|
fifth_value = fifth_dirs[0] if fifth_dirs else None |
|
|
|
|
|
target_directory = ( |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_level_dir, |
|
|
fourth_value, |
|
|
fifth_value, |
|
|
) |
|
|
if fifth_value |
|
|
else None |
|
|
) |
|
|
( |
|
|
gallery_items, |
|
|
gallery_status, |
|
|
instruction_markdown, |
|
|
) = _prepare_gallery_items(repo_id, target_directory) |
|
|
|
|
|
return ( |
|
|
_dropdown_update( |
|
|
choices=fourth_dirs, |
|
|
value=fourth_value, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="No fourth-level folders under the selection", |
|
|
), |
|
|
_dropdown_update( |
|
|
choices=fifth_dirs, |
|
|
value=fifth_value, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="No fifth-level folders under the selection", |
|
|
), |
|
|
gr.update(value=instruction_markdown), |
|
|
gr.update(value=gallery_items), |
|
|
gr.update(value="\n".join(gallery_status)), |
|
|
) |
|
|
except Exception as error: |
|
|
print( |
|
|
"[update_fourth_dropdown] Error for " |
|
|
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}: {error}", |
|
|
flush=True, |
|
|
) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
empty_fourth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fourth-level folders", |
|
|
filled_info="Choose a fourth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
return ( |
|
|
empty_fourth, |
|
|
empty_fifth, |
|
|
gr.update(value="Unable to load instruction."), |
|
|
gr.update(value=[]), |
|
|
gr.update(value="Unable to load screenshots or annotations."), |
|
|
) |
|
|
|
|
|
|
|
|
def update_fifth_dropdown( |
|
|
repo_id: str, |
|
|
top_level_dir: str, |
|
|
second_level_dir: str, |
|
|
third_level_dir: str, |
|
|
fourth_level_dir: str, |
|
|
): |
|
|
"""Update the fifth dropdown and gallery when the fourth-level changes.""" |
|
|
try: |
|
|
if not all( |
|
|
[top_level_dir, second_level_dir, third_level_dir, fourth_level_dir] |
|
|
): |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Select higher-level folders first", |
|
|
) |
|
|
instruction_update = gr.update( |
|
|
value="ℹ️ Select higher-level folders to display the instruction." |
|
|
) |
|
|
gallery_update = gr.update(value=[]) |
|
|
annotation_update = gr.update( |
|
|
value="Select higher-level folders to load screenshots and annotations." |
|
|
) |
|
|
return empty_fifth, instruction_update, gallery_update, annotation_update |
|
|
|
|
|
fifth_dirs = _get_subdirectories( |
|
|
repo_id, |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_level_dir, |
|
|
fourth_level_dir, |
|
|
), |
|
|
) |
|
|
fifth_value = fifth_dirs[0] if fifth_dirs else None |
|
|
|
|
|
target_directory = ( |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_level_dir, |
|
|
fourth_level_dir, |
|
|
fifth_value, |
|
|
) |
|
|
if fifth_value |
|
|
else None |
|
|
) |
|
|
( |
|
|
gallery_items, |
|
|
gallery_status, |
|
|
instruction_markdown, |
|
|
) = _prepare_gallery_items(repo_id, target_directory) |
|
|
|
|
|
return ( |
|
|
_dropdown_update( |
|
|
choices=fifth_dirs, |
|
|
value=fifth_value, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="No fifth-level folders under the selection", |
|
|
), |
|
|
gr.update(value=instruction_markdown), |
|
|
gr.update(value=gallery_items), |
|
|
gr.update(value="\n".join(gallery_status)), |
|
|
) |
|
|
except Exception as error: |
|
|
print( |
|
|
"[update_fifth_dropdown] Error for " |
|
|
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}/{fourth_level_dir}: {error}", |
|
|
flush=True, |
|
|
) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
empty_fifth = _dropdown_update( |
|
|
choices=[], |
|
|
value=None, |
|
|
label="Fifth-level folders", |
|
|
filled_info="Choose a fifth-level folder", |
|
|
empty_info="Unable to load subdirectories", |
|
|
) |
|
|
return ( |
|
|
empty_fifth, |
|
|
gr.update(value="Unable to load instruction."), |
|
|
gr.update(value=[]), |
|
|
gr.update(value="Unable to load screenshots or annotations."), |
|
|
) |
|
|
|
|
|
|
|
|
def update_gallery( |
|
|
repo_id: str, |
|
|
top_level_dir: str, |
|
|
second_level_dir: str, |
|
|
third_level_dir: str, |
|
|
fourth_level_dir: str, |
|
|
fifth_level_dir: str, |
|
|
): |
|
|
"""Update the image gallery when the fifth-level selection changes.""" |
|
|
try: |
|
|
directory = ( |
|
|
_build_path( |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_level_dir, |
|
|
fourth_level_dir, |
|
|
fifth_level_dir, |
|
|
) |
|
|
if all( |
|
|
[ |
|
|
top_level_dir, |
|
|
second_level_dir, |
|
|
third_level_dir, |
|
|
fourth_level_dir, |
|
|
fifth_level_dir, |
|
|
] |
|
|
) |
|
|
else None |
|
|
) |
|
|
( |
|
|
gallery_items, |
|
|
gallery_status, |
|
|
instruction_markdown, |
|
|
) = _prepare_gallery_items(repo_id, directory) |
|
|
return ( |
|
|
gr.update(value=instruction_markdown), |
|
|
gr.update(value=gallery_items), |
|
|
gr.update(value="\n".join(gallery_status)), |
|
|
) |
|
|
except Exception as error: |
|
|
print( |
|
|
"[update_gallery] Error for " |
|
|
f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}/{fourth_level_dir}/{fifth_level_dir}: {error}", |
|
|
flush=True, |
|
|
) |
|
|
print(traceback.format_exc(), flush=True) |
|
|
return ( |
|
|
gr.update(value="Unable to load instruction."), |
|
|
gr.update(value=[]), |
|
|
gr.update(value="Unable to load screenshots or annotations."), |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Blocks(title="HF Dataset Explorer") as demo: |
|
|
gr.Markdown( |
|
|
"""# Hugging Face Dataset Explorer |
|
|
|
|
|
Provide a dataset repository ID (e.g. `org/dataset`) to list its top-level folders.""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
repo_id_input = gr.Textbox( |
|
|
value=DEFAULT_REPO_ID, |
|
|
label="Dataset repo ID", |
|
|
placeholder="owner/dataset", |
|
|
info="Any public dataset on the Hugging Face Hub" |
|
|
) |
|
|
reload_button = gr.Button("Load repo", variant="primary") |
|
|
|
|
|
status_display = gr.Markdown() |
|
|
folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False) |
|
|
second_level_dropdown = gr.Dropdown(label="Second-level folders", interactive=False) |
|
|
third_level_dropdown = gr.Dropdown(label="Third-level folders", interactive=False) |
|
|
fourth_level_dropdown = gr.Dropdown(label="Fourth-level folders", interactive=False) |
|
|
fifth_level_dropdown = gr.Dropdown(label="Fifth-level folders", interactive=False) |
|
|
instruction_display = gr.Markdown() |
|
|
image_gallery = gr.Gallery(label="Images", columns=4) |
|
|
annotation_status = gr.Markdown() |
|
|
|
|
|
reload_button.click( |
|
|
refresh_repo, |
|
|
inputs=repo_id_input, |
|
|
outputs=[ |
|
|
folder_dropdown, |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
status_display, |
|
|
], |
|
|
) |
|
|
|
|
|
folder_dropdown.change( |
|
|
update_second_dropdown, |
|
|
inputs=[repo_id_input, folder_dropdown], |
|
|
outputs=[ |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
], |
|
|
) |
|
|
|
|
|
second_level_dropdown.change( |
|
|
update_third_dropdown, |
|
|
inputs=[repo_id_input, folder_dropdown, second_level_dropdown], |
|
|
outputs=[ |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
], |
|
|
) |
|
|
|
|
|
third_level_dropdown.change( |
|
|
update_fourth_dropdown, |
|
|
inputs=[ |
|
|
repo_id_input, |
|
|
folder_dropdown, |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
], |
|
|
outputs=[ |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
], |
|
|
) |
|
|
|
|
|
fourth_level_dropdown.change( |
|
|
update_fifth_dropdown, |
|
|
inputs=[ |
|
|
repo_id_input, |
|
|
folder_dropdown, |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
], |
|
|
outputs=[ |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
], |
|
|
) |
|
|
|
|
|
fifth_level_dropdown.change( |
|
|
update_gallery, |
|
|
inputs=[ |
|
|
repo_id_input, |
|
|
folder_dropdown, |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
], |
|
|
outputs=[instruction_display, image_gallery, annotation_status], |
|
|
) |
|
|
|
|
|
demo.load( |
|
|
refresh_repo, |
|
|
inputs=repo_id_input, |
|
|
outputs=[ |
|
|
folder_dropdown, |
|
|
second_level_dropdown, |
|
|
third_level_dropdown, |
|
|
fourth_level_dropdown, |
|
|
fifth_level_dropdown, |
|
|
instruction_display, |
|
|
image_gallery, |
|
|
annotation_status, |
|
|
status_display, |
|
|
], |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |