File size: 5,255 Bytes
fe62bf5 3bbedf7 fe62bf5 3bbedf7 fe62bf5 3bbedf7 23f5054 8436088 fe62bf5 8436088 fe62bf5 3bbedf7 fe62bf5 23f5054 fe62bf5 23f5054 fe62bf5 23f5054 fe62bf5 3bbedf7 8436088 fe62bf5 3bbedf7 8436088 3bbedf7 8436088 23f5054 fe62bf5 23f5054 fe62bf5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import os
import argparse
import shutil
import sys
from dotenv import load_dotenv, find_dotenv
from concurrent.futures import ThreadPoolExecutor
# Importing modules from the utils package
from utils.resize_images import main as resize_images_main
from utils.removebg import iterate_over_directory as removebg_iterate
from utils.photoroom import iterate_over_directory as photoroom_iterate
from utils.bria_rmbg20 import iterate_over_directory as bria_iterate
from utils.clipdrop import iterate_over_directory as clipdrop_iterate
from utils.upload_to_dataset import upload_to_dataset
from utils.resize_processed_images import process_images as downsize_processed_images
from utils.add_checkered_background import process_directory as add_checkered_background_process
from utils.birefnet import process_directory as birefnet_iterate
def check_env_variables():
"""Check if the necessary environment variables are loaded."""
if not find_dotenv():
sys.exit("Error: .env file not found.")
load_dotenv()
required_keys = [
'REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY',
'BRIA_API_TOKEN', 'CLIPDROP_API_KEY',
'FAL_KEY'
]
missing_keys = [key for key in required_keys if not os.getenv(key)]
if missing_keys:
sys.exit(f"Error: Missing environment variables: {', '.join(missing_keys)}")
def copy_images(source_dir, dest_dir):
os.makedirs(dest_dir, exist_ok=True)
valid_extensions = ('.png', '.jpg', '.jpeg', '.webp')
# Walk through the source directory
for root, _, files in os.walk(source_dir):
for filename in files:
if filename.lower().endswith(valid_extensions):
source_file = os.path.join(root, filename)
# Extract the folder name
folder_name = os.path.basename(root)
# Append folder name to the filename
new_filename = f"{folder_name}_{filename}"
dest_file = os.path.join(dest_dir, new_filename)
# Check if the file is an image and doesn't already exist in the destination
if os.path.isfile(source_file) and not os.path.exists(dest_file):
shutil.copy2(source_file, dest_file)
print(f"Copied: {new_filename}")
else:
print(f"Skipped: {filename} (already exists or not a file)")
def main():
check_env_variables()
parser = argparse.ArgumentParser(description="Image Processing Pipeline")
parser.add_argument("--input-dir", type=str, default="original-images", help="Input directory for images")
parser.add_argument("--work-dir", type=str, default="workdir", help="Working directory for intermediate images")
parser.add_argument("--dataset-name", type=str, help="Name of the dataset to upload to Hugging Face Hub")
parser.add_argument("--push-dataset", action="store_true", help="Push the dataset to the Hugging Face Hub")
args = parser.parse_args()
# Define intermediate directories within the work directory
input_resized_dir = os.path.join(args.work_dir, "resized")
bg_removed_dir = os.path.join(args.work_dir, "background-removed")
checkered_bg_dir = os.path.join(args.work_dir, "checkered-background")
# Ensure all directories exist
for directory in [input_resized_dir, bg_removed_dir, checkered_bg_dir]:
os.makedirs(directory, exist_ok=True)
# Step 4: Move images to final output directory
print("Moving images to final output directory...")
original_images_dir = os.path.join(args.work_dir, "merged-categories")
copy_images(args.input_dir, original_images_dir)
# Step 1: Resize images
print("Resizing images...")
resize_images_main(input_directory=original_images_dir, output_directory=input_resized_dir)
# Step 2: Remove background
print("Removing backgrounds...")
bg_removal_dirs = {
"removebg": os.path.join(bg_removed_dir, "removebg"),
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
"bria": os.path.join(bg_removed_dir, "bria"),
"clipdrop": os.path.join(bg_removed_dir, "clipdrop"),
"birefnet": os.path.join(bg_removed_dir, "birefnet")
}
for dir_path in bg_removal_dirs.values():
os.makedirs(dir_path, exist_ok=True)
# Use ThreadPoolExecutor to parallelize API calls
with ThreadPoolExecutor(max_workers=5) as executor:
executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
executor.submit(birefnet_iterate, input_resized_dir, bg_removal_dirs["birefnet"])
print("Adding checkered background...")
add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
if args.dataset_name:
upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
else:
print("Please provide a dataset name using --dataset-name")
if __name__ == "__main__":
main() |