Spaces:
Sleeping
Sleeping
update app
Browse files
app.py
CHANGED
@@ -1,42 +1,69 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import pyarrow.parquet as pq
|
3 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
4 |
from huggingface_hub import HfFileSystem
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
)
|
15 |
-
revision_textbox = gr.Textbox("main")
|
16 |
-
parquet_file_dropdown = gr.Dropdown()
|
17 |
-
with gr.Column():
|
18 |
-
output_dataframe = gr.DataFrame()
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
@dataset_search.change(inputs=[dataset_search], outputs=[
|
31 |
-
def
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
@
|
35 |
-
def
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
@revision_textbox.change(inputs=[dataset_search, revision_textbox, parquet_file_dropdown], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
|
39 |
-
def show_input_from_parquet_file(dataset, revision, parquet_file):
|
40 |
-
yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file)
|
41 |
|
42 |
demo.launch()
|
|
|
1 |
+
from typing import Optional
|
2 |
+
|
3 |
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
import pyarrow.parquet as pq
|
6 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
7 |
from huggingface_hub import HfFileSystem
|
8 |
|
9 |
+
css = """
|
10 |
+
.settings {
|
11 |
+
background: transparent;
|
12 |
+
}
|
13 |
+
.settings button span {
|
14 |
+
color: var(--body-text-color-subdued);
|
15 |
+
}
|
16 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
with gr.Blocks(css=css) as demo:
|
19 |
+
gr.Markdown("# π Parquet Viewer π")
|
20 |
+
gr.Markdown("View the content of Parquet files inside a dataset repository or pull request.")
|
21 |
+
with gr.Row():
|
22 |
+
with gr.Column(scale=10):
|
23 |
+
dataset_search = HuggingfaceHubSearch(
|
24 |
+
label="Hub Dataset ID",
|
25 |
+
placeholder="Search for dataset id on Huggingface",
|
26 |
+
search_type="dataset",
|
27 |
+
)
|
28 |
+
with gr.Row():
|
29 |
+
revision_dropdown = gr.Dropdown("main", label="Revision", allow_custom_value=True)
|
30 |
+
parquet_file_dropdown = gr.Dropdown(label="Parquet file", allow_custom_value=True)
|
31 |
+
gr.Markdown("Parquet content:")
|
32 |
+
output_dataframe = gr.DataFrame()
|
33 |
+
with gr.Column(scale=4, min_width="200px"):
|
34 |
+
with gr.Accordion("Settings", open=False, elem_classes="settings"):
|
35 |
+
gr.Markdown("Access private/gated repos")
|
36 |
+
gr.LoginButton()
|
37 |
|
38 |
+
@dataset_search.change(inputs=[dataset_search], outputs=[revision_dropdown, parquet_file_dropdown, output_dataframe])
|
39 |
+
def dataset_update(dataset, oauth_token: Optional[gr.OAuthToken] = None):
|
40 |
+
fs = HfFileSystem(token=oauth_token)
|
41 |
+
if "/" not in dataset:
|
42 |
+
return {revision_dropdown: gr.Dropdown(choices=[], value="", info="")}
|
43 |
+
try:
|
44 |
+
prs = [f"{dataset}@refs/pr/{pr.num}" for pr in fs._api.get_repo_discussions(dataset, repo_type="dataset", discussion_type="pull_request")]
|
45 |
+
revision = f"{dataset}@main"
|
46 |
+
return {revision_dropdown: gr.Dropdown(choices=[revision] + prs, value=revision, info=f"{len(prs)} pull request{'s' if len(prs) > 1 else ''} available" if prs else None)}
|
47 |
+
except Exception:
|
48 |
+
return {revision_dropdown: gr.Dropdown(choices=[], value="", info="no revisions available")}
|
49 |
|
50 |
+
@revision_dropdown.change(inputs=[revision_dropdown], outputs=[parquet_file_dropdown, output_dataframe])
|
51 |
+
def revision_update(dataset_and_revision, oauth_token: Optional[gr.OAuthToken] = None):
|
52 |
+
fs = HfFileSystem(token=oauth_token)
|
53 |
+
try:
|
54 |
+
parquet_files = ["hf://" + path for path in fs.glob(f"datasets/{dataset_and_revision}/**/*.parquet")]
|
55 |
+
parquet_file = parquet_files[0] if parquet_files else None
|
56 |
+
return {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file, info=f"{len(parquet_files)} parquet file{'s' if len(parquet_files) > 1 else ''} available")}
|
57 |
+
except Exception:
|
58 |
+
return {parquet_file_dropdown: gr.Dropdown(choices=[], value="", info="")}
|
59 |
+
|
60 |
+
@parquet_file_dropdown.change(inputs=[parquet_file_dropdown], outputs=[output_dataframe])
|
61 |
+
def parquet_file_update(parquet_file, oauth_token: Optional[gr.OAuthToken] = None):
|
62 |
+
fs = HfFileSystem(token=oauth_token)
|
63 |
+
try:
|
64 |
+
return {output_dataframe: pd.DataFrame([{k: str(v)[:1000] for k, v in x.items()} for x in pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pylist()] if parquet_file else [])}
|
65 |
+
except Exception:
|
66 |
+
return {output_dataframe: []}
|
67 |
|
|
|
|
|
|
|
68 |
|
69 |
demo.launch()
|