Spaces:

Wauplin
/

kaggle_importer

Sleeping

App Files Files Community

Wauplin HF Staff commited on Apr 10, 2024

Commit

2fc8bbc

verified ·

1 Parent(s): 1b89115

First draft

Browse files

Files changed (2) hide show

app.py +130 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import os
+import shutil
+from pathlib import Path
+from typing import Iterable, List
+import gradio as gr
+import kagglehub
+from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
+from huggingface_hub import HfApi
+KAGGLE_JSON = os.environ.get("KAGGLE_JSON")
+KAGGLE_JSON_PATH = Path("~/.kaggle/kaggle.json").expanduser().resolve()
+if KAGGLE_JSON_PATH.exists():
+    print(f"Found existing kaggle.json file at {KAGGLE_JSON_PATH}")
+elif KAGGLE_JSON is not None:
+    print(
+        "KAGGLE_JSON is set as secret. Will be able to be authenticated when downloading files from Kaggle."
+    )
+    KAGGLE_JSON_PATH.mkdir(parents=True, exist_ok=True)
+    KAGGLE_JSON_PATH.write_text(KAGGLE_JSON)
+else:
+    print(
+        f"No kaggle.json file found at {KAGGLE_JSON_PATH}. You will not be able to download private/gated files from Kaggle."
+    )
+MARKDOWN_DESCRIPTION = """
+# Keggla-importer GUI
+The fastest way to import a model from KaggleHub to the Hugging Face Hub 🔥
+Specify a Kaggle handle and a Hugging Face Write Token to import a model from KaggleHub to the Hugging Face Hub.
+To find the Kaggle handle from a web UI, click on the "download dropdown" and copy the handle from the code snippet.
+Example: `"keras/gemma/keras/gemma_instruct_2b_en"`.
+"""
+if KAGGLE_JSON_PATH.exists():
+    MARKDOWN_DESCRIPTION += """
+**Note**: a `kaggle.json` file exists in the home directory. This means the Space will be able to download **SOME** private/gated files from Kaggle.
+To access other models, please duplicate this Space to a private Space and set the `KAGGLE_JSON` environment variable with the content of the `kaggle.json`
+you've downloaded from your Kaggle user account.
+"""
+def import_model(kaggle_model: str, repo_name: str, token: str) -> Iterable[List[Log]]:
+    if not kaggle_model:
+        return "Kaggle model is required."
+    if not repo_name:
+        repo_name = kaggle_model.split("/")[-1]
+    if not token:
+        return "HF Write Token is required."
+    api = HfApi(token=token)
+    runner = LogsViewRunner()
+    yield runner.log(f"Creating HF repo {repo_name}")
+    repo_url = api.create_repo(repo_name, exist_ok=True)
+    yield runner.log(f"Created HF repo: {repo_url}")
+    repo_id = repo_url.repo_id
+    model_id = api.model_info(repo_id)
+    if len(model_id.siblings) > 1:
+        yield runner.log(
+            f"Model repo {repo_id} is not empty. Please delete it or set a different repo name.",
+            level="ERROR",
+        )
+        return
+    yield runner.log(f"Downloading model {kaggle_model} from Kaggle.")
+    yield from runner.run_python(kagglehub.model_download, handle=kaggle_model)
+    if runner.exit_code != 0:
+        yield runner.log("Failed to download model from Kaggle.", level="ERROR")
+        api.delete_repo(repo_id=repo_id)
+        return
+    cache_path = kagglehub.model_download(kaggle_model)  # should be instant
+    yield runner.log(f"Model successfully downloaded from Kaggle to {cache_path}.")
+    yield runner.log(f"Uploading model to HF repo {repo_id}.")
+    yield from runner.run_python(
+        api.upload_folder, repo_id=repo_id, folder_path=cache_path
+    )
+    if runner.exit_code != 0:
+        yield runner.log("Failed to upload model to HF repo.", level="ERROR")
+        api.delete_repo(repo_id=repo_id)
+        return
+    yield runner.log(f"Model successfully uploaded to HF: {repo_url}.")
+    yield runner.log(f"Deleting local cache from {cache_path}.")
+    shutil.rmtree(cache_path)
+    yield runner.log("Done!")
+with gr.Blocks() as demo:
+    gr.Markdown(MARKDOWN_DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            kaggle_model = gr.Textbox(
+                lines=1,
+                label="Kaggle Model*",
+                placeholder="keras/codegemma/keras/code_gemma_7b_en",
+            )
+            repo_name = gr.Textbox(
+                lines=1,
+                label="Repo name",
+                placeholder="Optional. Will infer from Kaggle Model if empty.",
+            )
+        with gr.Column():
+            token = gr.Textbox(
+                lines=1,
+                label="HF Write Token*",
+                info="https://hf.co/settings/token",
+                type="password",
+                placeholder="hf_***",
+            )
+    button = gr.Button("Import", variant="primary")
+    logs = LogsView(label="Terminal output")
+    button.click(
+        fn=import_model, inputs=[kaggle_model, repo_name, token], outputs=[logs]
+    )
+demo.queue(default_concurrency_limit=1).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+kaggle
+huggingface_hub
+# see https://huggingface.co/spaces/Wauplin/gradio_logsview
+gradio_logsview@https://huggingface.co/spaces/Wauplin/gradio_logsview/resolve/main/gradio_logsview-0.0.5-py3-none-any.whl