import os from PIL import Image # Define the data directories base_dir = 'data/chest_xray' train_dir = os.path.join(base_dir, 'train') val_dir = os.path.join(base_dir, 'val') # Function to count images in a specific category (e.g., NORMAL, PNEUMONIA) def count_images(directory, category): category_dir = os.path.join(directory, category) count = 0 for root, dirs, files in os.walk(category_dir): count += len([f for f in files if f.endswith(('.jpg', '.jpeg', '.png'))]) return count # Function to check for corrupted images in a specific category def check_corrupted_images(directory, category): category_dir = os.path.join(directory, category) corrupted_files = [] for root, dirs, files in os.walk(category_dir): for file in files: if file.endswith(('.jpg', '.jpeg', '.png')): try: img = Image.open(os.path.join(root, file)) img.verify() # Check if the image can be opened and is not corrupted except (IOError, SyntaxError) as e: corrupted_files.append(os.path.join(root, file)) return corrupted_files # Count images in the train and validation sets train_normal_count = count_images(train_dir, 'NORMAL') train_pneumonia_count = count_images(train_dir, 'PNEUMONIA') val_normal_count = count_images(val_dir, 'NORMAL') val_pneumonia_count = count_images(val_dir, 'PNEUMONIA') # Check for corrupted images in the train and validation sets train_normal_corrupted = check_corrupted_images(train_dir, 'NORMAL') train_pneumonia_corrupted = check_corrupted_images(train_dir, 'PNEUMONIA') val_normal_corrupted = check_corrupted_images(val_dir, 'NORMAL') val_pneumonia_corrupted = check_corrupted_images(val_dir, 'PNEUMONIA') # Print the results print(f"Training NORMAL images: {train_normal_count}") print(f"Training PNEUMONIA images: {train_pneumonia_count}") print(f"Validation NORMAL images: {val_normal_count}") print(f"Validation PNEUMONIA images: {val_pneumonia_count}") print(f"Corrupted images in training NORMAL: {train_normal_corrupted}") print(f"Corrupted images in training PNEUMONIA: {train_pneumonia_corrupted}") print(f"Corrupted images in validation NORMAL: {val_normal_corrupted}") print(f"Corrupted images in validation PNEUMONIA: {val_pneumonia_corrupted}")