File size: 6,026 Bytes

# -*- coding: utf-8 -*-
"""
Created on Mon Apr 29 17:46:18 2024

@author: beni
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import os
from pylab import *
import re
from PIL import Image, ImageChops, ImageEnhance
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
from tensorflow.keras.utils import to_categorical 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers.legacy import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from scipy.ndimage import gaussian_filter





def convert_to_ela_image(path, quality, output_dir, resize=(256, 256)):
    filename = path
    resaved_filename = os.path.join(output_dir, os.path.splitext(os.path.basename(filename))[0] + '.resaved.jpg')
    ELA_filename = os.path.join(output_dir, os.path.splitext(os.path.basename(filename))[0] + '.ela.png')
    
    # Open and resize the image
    im = Image.open(filename).convert('RGB')
    im_resized = im.resize(resize)
    
    # Save the resized image
    im_resized.save(resaved_filename, 'JPEG', quality=quality)
    resaved_im = Image.open(resaved_filename)
    
    ela_im = ImageChops.difference(im_resized, resaved_im)
    
    extrema = ela_im.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    if max_diff == 0:
        max_diff = 1
    scale = 255.0 / max_diff
    
    ela_im = ImageEnhance.Brightness(ela_im).enhance(scale)
    
    ela_im.save(ELA_filename)
    
    return ela_im


def shuffle_and_split_data(dataframe, test_size=0.2, random_state=59):
    # Shuffle the DataFrame
    shuffled_df = dataframe.sample(frac=1, random_state=random_state).reset_index(drop=True)
    
    # Split the DataFrame into train and validation sets
    train_df, val_df = train_test_split(shuffled_df, test_size=test_size, random_state=random_state)
    
    return train_df, val_df

def labeling(path_real, path_fake):
    image_paths = []
    labels = []

    for filename in os.listdir(path_real):
        image_paths.append(os.path.join(path_real, filename))
        labels.append(0)

    for filename in os.listdir(path_fake):
        image_paths.append(os.path.join(path_fake, filename))
        labels.append(1)

    dataset = pd.DataFrame({'image_path': image_paths, 'label': labels})

    return dataset


if __name__ == "__main__":
    ##############################################################
    # handling the dataset , set it and label it
    np.random.seed(22)
    tf.random.set_seed(9)

    traning_fake_folder = 'datasets/training_set/fake/'
    traning_real_folder = 'datasets/training_set/real/'


    traning_ela_output = 'datasets/training_set/ela_output/'
    traning_set = labeling(traning_real_folder, traning_fake_folder)
        

    X = []
    Y = []

    #################################################################
    # preprocess the images using ELA method and storing the output.
    
    for index, row in traning_set.iterrows():
       X.append(array(convert_to_ela_image(row[0], 90,traning_ela_output).resize((128, 128))).flatten() / 255.0)
       Y.append(row[1])

    X = np.array(X)
    Y = to_categorical(Y, 2)
    X = X.reshape(-1, 128, 128, 3)
    X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.2, random_state=1,shuffle=True)


    ################################################################################
    # Cnn network creation
    model = Sequential()
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'valid', 
                     activation ='relu', input_shape = (128,128,3)))
    print("Input: ", model.input_shape)
    print("Output: ", model.output_shape)

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'valid', 
                     activation ='relu'))
    print("Input: ", model.input_shape)
    print("Output: ", model.output_shape)

    model.add(MaxPool2D(pool_size=(2,2)))

    model.add(Dropout(0.25))
    print("Input: ", model.input_shape)
    print("Output: ", model.output_shape)

    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation = "softmax"))

    model.summary()

    #Define optimizer .
    optimizer = RMSprop(lr=0.0005, rho=0.9, epsilon=1e-08, decay=0.0)
    #setting the model , loss func , mertics , optimizer.
    model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
    #setting early stopping to train faster.
    early_stopping = EarlyStopping(monitor='val_acc',
                                  min_delta=0,
                                  patience=2,
        verbose=0, mode='auto')
    
    
    epochs = 22
    batch_size = 100
    #####################################################
    #running the model , adding the validation set
    history = model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs, 
              validation_data = (X_val, Y_val), verbose = 2, callbacks=[early_stopping])


    #####################################################
    #plots and metrics
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

# every training can give different results , you can mark the next line as comment when you got the best result running the test set.
    model.save('ELA_CNN_ART_V2.h5')