File size: 2,333 Bytes
a3d82f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from PIL import Image

# Define the data directories
base_dir = 'data/chest_xray'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')


# Function to count images in a specific category (e.g., NORMAL, PNEUMONIA)
def count_images(directory, category):
    category_dir = os.path.join(directory, category)
    count = 0
    for root, dirs, files in os.walk(category_dir):
        count += len([f for f in files if f.endswith(('.jpg', '.jpeg', '.png'))])
    return count


# Function to check for corrupted images in a specific category
def check_corrupted_images(directory, category):
    category_dir = os.path.join(directory, category)
    corrupted_files = []
    for root, dirs, files in os.walk(category_dir):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png')):
                try:
                    img = Image.open(os.path.join(root, file))
                    img.verify()  # Check if the image can be opened and is not corrupted
                except (IOError, SyntaxError) as e:
                    corrupted_files.append(os.path.join(root, file))
    return corrupted_files


# Count images in the train and validation sets
train_normal_count = count_images(train_dir, 'NORMAL')
train_pneumonia_count = count_images(train_dir, 'PNEUMONIA')
val_normal_count = count_images(val_dir, 'NORMAL')
val_pneumonia_count = count_images(val_dir, 'PNEUMONIA')

# Check for corrupted images in the train and validation sets
train_normal_corrupted = check_corrupted_images(train_dir, 'NORMAL')
train_pneumonia_corrupted = check_corrupted_images(train_dir, 'PNEUMONIA')
val_normal_corrupted = check_corrupted_images(val_dir, 'NORMAL')
val_pneumonia_corrupted = check_corrupted_images(val_dir, 'PNEUMONIA')

# Print the results
print(f"Training NORMAL images: {train_normal_count}")
print(f"Training PNEUMONIA images: {train_pneumonia_count}")
print(f"Validation NORMAL images: {val_normal_count}")
print(f"Validation PNEUMONIA images: {val_pneumonia_count}")

print(f"Corrupted images in training NORMAL: {train_normal_corrupted}")
print(f"Corrupted images in training PNEUMONIA: {train_pneumonia_corrupted}")
print(f"Corrupted images in validation NORMAL: {val_normal_corrupted}")
print(f"Corrupted images in validation PNEUMONIA: {val_pneumonia_corrupted}")