File size: 1,427 Bytes
b5cf002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from src.dataset.GoodDataset import *

import argparse

def main(config):
    """
    Main function to process the dataset and save it as a CSV file.
    Args:
        config: Namespace object containing the script arguments.
    """
    # Initialize the dataset
    dataset = AugmentedDataset()

    positive_samples = dataset.fetch_positive_samples_parallel(
        num_samples=config.size,
        random=config.random,
        seed=config.seed,
        full=config.full
    )
    
    dataset.save(config.output)
    

if __name__ == "__main__":
    # Parse command-line arguments
    from src.utils.io_utils import PROJECT_ROOT
    parser = argparse.ArgumentParser(description="Generate and save a dataset based on the given configuration.")

    parser.add_argument("-s", "--size", type=int, default=10, help="Number of samples to generate.")
    parser.add_argument("-r", "--random", type=bool, default=True, help="Whether to sample randomly.")
    parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility.")
    parser.add_argument("--full", action="store_true", help="Boolean flag to indicate full dataset mode.")
    parser.add_argument("-o", "--output", type=str, default=os.path.join(PROJECT_ROOT, "data/dataset.pkl"), help="Output file path to save the dataset as a CSV.")

    # Parse the arguments and pass to the main function
    config = parser.parse_args()
    main(config)