mertunsall
commited on
Commit
·
8dc7996
1
Parent(s):
2a35e5e
Fix
Browse files
app.py
CHANGED
|
@@ -26,26 +26,18 @@ def _extract_top_level(repo_id: str) -> tuple[list[str], list[str]]:
|
|
| 26 |
return top_level_dirs, top_level_files
|
| 27 |
|
| 28 |
|
| 29 |
-
def
|
| 30 |
-
"""Return
|
| 31 |
if not directory:
|
| 32 |
-
return
|
| 33 |
|
| 34 |
files = [path for path in _list_repo_files(repo_id) if path.startswith(f"{directory}/")]
|
| 35 |
relative_paths = [path[len(directory) + 1 :] for path in files]
|
| 36 |
-
|
|
|
|
| 37 |
child_dirs = sorted({rel.split("/", 1)[0] for rel in relative_paths if "/" in rel})
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
sample_files = child_files[:10]
|
| 41 |
-
has_more_files = len(child_files) > len(sample_files)
|
| 42 |
-
|
| 43 |
-
return {
|
| 44 |
-
"folder": directory,
|
| 45 |
-
"total_files": len(files),
|
| 46 |
-
"direct_subdirectories": child_dirs,
|
| 47 |
-
"sample_files": sample_files + (["..."] if has_more_files else []),
|
| 48 |
-
}
|
| 49 |
|
| 50 |
|
| 51 |
def refresh_repo(repo_id: str):
|
|
@@ -55,17 +47,17 @@ def refresh_repo(repo_id: str):
|
|
| 55 |
print(f"[refresh_repo] Hub HTTP error for {repo_id}: {error}", flush=True)
|
| 56 |
print(traceback.format_exc(), flush=True)
|
| 57 |
return (
|
|
|
|
| 58 |
gr.update(choices=[], value=None, interactive=False),
|
| 59 |
gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"),
|
| 60 |
-
{}
|
| 61 |
)
|
| 62 |
except Exception as error: # pragma: no cover - network and auth edge cases
|
| 63 |
print(f"[refresh_repo] Unexpected error for {repo_id}: {error}", flush=True)
|
| 64 |
print(traceback.format_exc(), flush=True)
|
| 65 |
return (
|
|
|
|
| 66 |
gr.update(choices=[], value=None, interactive=False),
|
| 67 |
gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"),
|
| 68 |
-
{}
|
| 69 |
)
|
| 70 |
|
| 71 |
status_lines = [
|
|
@@ -79,26 +71,50 @@ def refresh_repo(repo_id: str):
|
|
| 79 |
status_lines.append("• No sub-folders found at root.")
|
| 80 |
|
| 81 |
dropdown_value = top_dirs[0] if top_dirs else None
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
choices=top_dirs,
|
| 84 |
value=dropdown_value,
|
| 85 |
interactive=bool(top_dirs),
|
| 86 |
label="Top-level folders",
|
| 87 |
info="Choose a folder to explore"
|
| 88 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
return dropdown_update, gr.update(value="\n".join(status_lines)), folder_summary
|
| 93 |
|
| 94 |
|
| 95 |
-
def
|
|
|
|
| 96 |
try:
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
except Exception as error:
|
| 99 |
-
print(f"[
|
| 100 |
print(traceback.format_exc(), flush=True)
|
| 101 |
-
return
|
| 102 |
|
| 103 |
|
| 104 |
with gr.Blocks(title="HF Dataset Explorer") as demo:
|
|
@@ -119,24 +135,24 @@ Provide a dataset repository ID (e.g. `org/dataset`) to list its top-level folde
|
|
| 119 |
|
| 120 |
status_display = gr.Markdown()
|
| 121 |
folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False)
|
| 122 |
-
|
| 123 |
|
| 124 |
reload_button.click(
|
| 125 |
refresh_repo,
|
| 126 |
inputs=repo_id_input,
|
| 127 |
-
outputs=[folder_dropdown,
|
| 128 |
)
|
| 129 |
|
| 130 |
folder_dropdown.change(
|
| 131 |
-
|
| 132 |
inputs=[repo_id_input, folder_dropdown],
|
| 133 |
-
outputs=
|
| 134 |
)
|
| 135 |
|
| 136 |
demo.load(
|
| 137 |
refresh_repo,
|
| 138 |
inputs=repo_id_input,
|
| 139 |
-
outputs=[folder_dropdown,
|
| 140 |
)
|
| 141 |
|
| 142 |
if __name__ == "__main__":
|
|
|
|
| 26 |
return top_level_dirs, top_level_files
|
| 27 |
|
| 28 |
|
| 29 |
+
def _get_subdirectories(repo_id: str, directory: str) -> list[str]:
|
| 30 |
+
"""Return the direct subdirectories of the given directory."""
|
| 31 |
if not directory:
|
| 32 |
+
return []
|
| 33 |
|
| 34 |
files = [path for path in _list_repo_files(repo_id) if path.startswith(f"{directory}/")]
|
| 35 |
relative_paths = [path[len(directory) + 1 :] for path in files]
|
| 36 |
+
|
| 37 |
+
# Get immediate subdirectories (first level only)
|
| 38 |
child_dirs = sorted({rel.split("/", 1)[0] for rel in relative_paths if "/" in rel})
|
| 39 |
+
|
| 40 |
+
return child_dirs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def refresh_repo(repo_id: str):
|
|
|
|
| 47 |
print(f"[refresh_repo] Hub HTTP error for {repo_id}: {error}", flush=True)
|
| 48 |
print(traceback.format_exc(), flush=True)
|
| 49 |
return (
|
| 50 |
+
gr.update(choices=[], value=None, interactive=False),
|
| 51 |
gr.update(choices=[], value=None, interactive=False),
|
| 52 |
gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"),
|
|
|
|
| 53 |
)
|
| 54 |
except Exception as error: # pragma: no cover - network and auth edge cases
|
| 55 |
print(f"[refresh_repo] Unexpected error for {repo_id}: {error}", flush=True)
|
| 56 |
print(traceback.format_exc(), flush=True)
|
| 57 |
return (
|
| 58 |
+
gr.update(choices=[], value=None, interactive=False),
|
| 59 |
gr.update(choices=[], value=None, interactive=False),
|
| 60 |
gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"),
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
status_lines = [
|
|
|
|
| 71 |
status_lines.append("• No sub-folders found at root.")
|
| 72 |
|
| 73 |
dropdown_value = top_dirs[0] if top_dirs else None
|
| 74 |
+
|
| 75 |
+
# Get subdirectories for the first top-level folder
|
| 76 |
+
subdirs = _get_subdirectories(repo_id, dropdown_value) if dropdown_value else []
|
| 77 |
+
subdir_value = subdirs[0] if subdirs else None
|
| 78 |
+
|
| 79 |
+
first_dropdown_update = gr.update(
|
| 80 |
choices=top_dirs,
|
| 81 |
value=dropdown_value,
|
| 82 |
interactive=bool(top_dirs),
|
| 83 |
label="Top-level folders",
|
| 84 |
info="Choose a folder to explore"
|
| 85 |
)
|
| 86 |
+
|
| 87 |
+
second_dropdown_update = gr.update(
|
| 88 |
+
choices=subdirs,
|
| 89 |
+
value=subdir_value,
|
| 90 |
+
interactive=bool(subdirs),
|
| 91 |
+
label="Subdirectories",
|
| 92 |
+
info="Choose a subdirectory"
|
| 93 |
+
)
|
| 94 |
|
| 95 |
+
return first_dropdown_update, second_dropdown_update, gr.update(value="\n".join(status_lines))
|
|
|
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
+
def update_second_dropdown(repo_id: str, top_level_dir: str):
|
| 99 |
+
"""Update the second dropdown when the first dropdown selection changes."""
|
| 100 |
try:
|
| 101 |
+
if not top_level_dir:
|
| 102 |
+
return gr.update(choices=[], value=None, interactive=False)
|
| 103 |
+
|
| 104 |
+
subdirs = _get_subdirectories(repo_id, top_level_dir)
|
| 105 |
+
subdir_value = subdirs[0] if subdirs else None
|
| 106 |
+
|
| 107 |
+
return gr.update(
|
| 108 |
+
choices=subdirs,
|
| 109 |
+
value=subdir_value,
|
| 110 |
+
interactive=bool(subdirs),
|
| 111 |
+
label="Subdirectories",
|
| 112 |
+
info="Choose a subdirectory"
|
| 113 |
+
)
|
| 114 |
except Exception as error:
|
| 115 |
+
print(f"[update_second_dropdown] Error for {repo_id}/{top_level_dir}: {error}", flush=True)
|
| 116 |
print(traceback.format_exc(), flush=True)
|
| 117 |
+
return gr.update(choices=[], value=None, interactive=False)
|
| 118 |
|
| 119 |
|
| 120 |
with gr.Blocks(title="HF Dataset Explorer") as demo:
|
|
|
|
| 135 |
|
| 136 |
status_display = gr.Markdown()
|
| 137 |
folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False)
|
| 138 |
+
subfolder_dropdown = gr.Dropdown(label="Subdirectories", interactive=False)
|
| 139 |
|
| 140 |
reload_button.click(
|
| 141 |
refresh_repo,
|
| 142 |
inputs=repo_id_input,
|
| 143 |
+
outputs=[folder_dropdown, subfolder_dropdown, status_display],
|
| 144 |
)
|
| 145 |
|
| 146 |
folder_dropdown.change(
|
| 147 |
+
update_second_dropdown,
|
| 148 |
inputs=[repo_id_input, folder_dropdown],
|
| 149 |
+
outputs=subfolder_dropdown,
|
| 150 |
)
|
| 151 |
|
| 152 |
demo.load(
|
| 153 |
refresh_repo,
|
| 154 |
inputs=repo_id_input,
|
| 155 |
+
outputs=[folder_dropdown, subfolder_dropdown, status_display],
|
| 156 |
)
|
| 157 |
|
| 158 |
if __name__ == "__main__":
|