import argparse import os import warnings import crepe ############################################################################### # Entry point ############################################################################### def parse_args(): """Parse command-line arguments""" parser = argparse.ArgumentParser() # Required arguments parser.add_argument( '--audio_files', nargs='+', required=True, help='The audio file to process') parser.add_argument( '--output_files', nargs='+', required=True, help='The file to save pitch or embedding') parser.add_argument( '--hop_length', type=int, help='The hop length of the analysis window') # Optionally save harmonicity [DEPRECATED] parser.add_argument( '--output_harmonicity_files', nargs='+', help='The file to save harmonicity') # Optionally save periodicity parser.add_argument( '--output_periodicity_files', nargs='+', help='The files to save periodicity') # Optionally create embedding instead of pitch contour parser.add_argument( '--embed', action='store_true', help='Performs embedding instead of pitch prediction') # Optional arguments parser.add_argument( '--fmin', default=50., type=float, help='The minimum frequency allowed') parser.add_argument( '--fmax', default=crepe.MAX_FMAX, type=float, help='The maximum frequency allowed') parser.add_argument( '--model', default='full', help='The model capacity. One of "tiny" or "full"') parser.add_argument( '--decoder', default='viterbi', help='The decoder to use. One of "argmax", "viterbi", or ' + '"weighted_argmax"') parser.add_argument( '--batch_size', type=int, help='The number of frames per batch') parser.add_argument( '--gpu', type=int, help='The gpu to perform inference on') parser.add_argument( '--no_pad', action='store_true', help='Whether to pad the audio') return parser.parse_args() def make_parent_directory(file): """Create parent directory for file if it does not already exist""" parent = os.path.dirname(os.path.abspath(file)) os.makedirs(parent, exist_ok=True) def main(): # Parse command-line arguments args = parse_args() # Deprecate output_harmonicity_files if args.output_harmonicity_files is not None: message = ( 'The crepe output_harmonicity_files argument is deprecated and ' 'will be removed in a future release. Please use ' 'output_periodicity_files. Rationale: if network confidence measured ' 'harmonic content, the value would be low for non-harmonic, periodic ' 'sounds (e.g., sine waves). But this is not observed.') warnings.warn(message, DeprecationWarning) args.output_periodicity_files = args.output_harmonicity_files # Ensure output directory exist [make_parent_directory(file) for file in args.output_files] if args.output_periodicity_files is not None: [make_parent_directory(file) for file in args.output_periodicity_files] # Get inference device device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}' # Get decoder if args.decoder == 'argmax': decoder = crepe.decode.argmax elif args.decoder == 'weighted_argmax': decoder = crepe.decode.weighted_argmax elif args.decoder == 'viterbi': decoder = crepe.decode.viterbi # Infer pitch or embedding and save to disk if args.embed: crepe.embed_from_files_to_files(args.audio_files, args.output_files, args.hop_length, args.model, args.batch_size, device, not args.no_pad) else: crepe.predict_from_files_to_files(args.audio_files, args.output_files, None, args.output_periodicity_files, args.hop_length, args.fmin, args.fmax, args.model, decoder, args.batch_size, device, not args.no_pad) # Run module entry point main()