File size: 2,777 Bytes
7bf4b88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import shutil
from huggingface_hub import hf_hub_download, list_repo_files


def download_hf_file(repo, file, repo_type="dataset", save_as_file=None):
    """
    Downloads a file from a Hugging Face repository and saves it to the specified path.

    Args:
        repo (str): The repository name.
        file (str): The file path within the repository to download.
        repo_type (str): The type of the repository (e.g., 'dataset').
        save_as_file (str, optional): The local file path to save the downloaded file. 
                                      If not provided, saves the file in the current directory 
                                      with the same name as the original file.
    """
    # Download the file from the repository
    file_path = hf_hub_download(repo, file, repo_type=repo_type)
    
    # Determine the save path
    if save_as_file is None:
        return file_path
    
    # Create necessary directories
    os.makedirs(os.path.dirname(save_as_file), exist_ok=True)
    
    # Copy the downloaded file to the desired location
    if not os.path.exists(save_as_file) and file_path != save_as_file:
        shutil.copy2(file_path, save_as_file)
    
    print(f"Downloaded <file:{file}> from <repo:{repo}> to <path:{save_as_file}>!")
    return save_as_file

def download_hf_folder(repo, folder, repo_type="dataset", save_as_folder=None):
    """
    Downloads a folder from a Hugging Face repository and saves it to the specified directory.

    Args:
        repo (str): The repository name.
        folder (str): The folder path within the repository to download.
        repo_type (str): The type of the repository (e.g., 'dataset').
        save_as_folder (str, optional): The local directory to save the downloaded folder. 
                                        Defaults to "data/".
    """
    # List all files in the repository
    files = list_repo_files(repo, repo_type=repo_type)
    
    # Filter files that belong to the specified folder
    folder_files = [f for f in files if f.startswith(folder + '/')]
    
    # Download and save each file in the folder
    for file in folder_files:
        file_path = hf_hub_download(repo, file, repo_type=repo_type)
        if save_as_folder is not None:  
            new_file_path = os.path.join(save_as_folder, os.path.relpath(file, folder))
            os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
            if not os.path.exists(new_file_path) and file_path != new_file_path:
                shutil.copy2(file_path, new_file_path)
        else:
            # get the upper dir absolute dir name of the file
            save_as_folder = os.path.dirname(os.path.dirname(file_path))
    print(f"Use file from {file_path}.")
    return save_as_folder