|
|
|
|
|
|
|
|
""" |
|
|
Corrected and upgraded by the Martial Terran, from |
|
|
https://github.com/spiderPan/Google-Machine-Learning-Crash-Course/blob/master/multi-class_classfication_of_handwritten_digits.py |
|
|
|
|
|
The architecture of this model is not a CNN (Convolutional Neural Network). |
|
|
It is a Dense Neural Network (DNN), also commonly known as a Multilayer Perceptron (MLP). |
|
|
Let's break down why and look at the specific architecture. |
|
|
Why it's a DNN and Not a CNN |
|
|
The defining characteristic of a CNN is its use of convolutional layers (Conv2D). These layers are specifically designed to work with grid-like data, such as images. They use filters (or kernels) to slide across the input image, detecting spatial patterns like edges, textures, and shapes. |
|
|
This model does not use any convolutional layers. Instead, its core components are Dense layers (tf.keras.layers.Dense). |
|
|
DNN Approach: The 28x28 pixel image is flattened into a single vector of 784 numbers. The Dense layers treat these numbers as a simple list, with no inherent understanding that pixel #29 is directly below pixel #1. It learns patterns from the pixel values themselves, but loses all the spatial relationships between them. |
|
|
CNN Approach: A CNN would take the input as a 2D grid (e.g., shape=(28, 28, 1)) and use Conv2D layers to analyze neighboring pixels, preserving the spatial structure of the image. |
|
|
The Specific Architecture of this Model |
|
|
You can see the exact architecture from the code or by printing the model's summary (model.summary()). |
|
|
Based on the code with hidden_units = [100, 100], the architecture is as follows: |
|
|
Layer # Layer Type Description Output Shape |
|
|
1 Input A flat vector of 784 pixel values (28x28). (None, 784) |
|
|
2 Dense First fully-connected hidden layer. Every one of its 100 neurons is connected to all 784 input pixels. (None, 100) |
|
|
3 Dense Second fully-connected hidden layer. Every one of its 100 neurons is connected to all 100 neurons before it. (None, 100) |
|
|
4 Dropout Regularization layer. Randomly sets 20% of neuron activations to zero during training to prevent overfitting. (None, 100) |
|
|
5 Dense The final output layer. It has 10 neurons, one for each class (digits 0-9). (None, 10) |
|
|
Softmax The activation function on the output layer that converts the outputs into a probability distribution. (None, 10) |
|
|
(Note: "None" in the output shape refers to the batch size, which can vary.) |
|
|
In summary: |
|
|
It's a DNN/MLP: It uses stacked Dense (fully-connected) layers. |
|
|
It's not a CNN: It lacks Conv2D and MaxPooling2D layers, and it flattens the image data, discarding the crucial 2D spatial information that CNNs are built to exploit. |
|
|
|
|
|
Model Summary: |
|
|
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/input_layer.py:27: UserWarning: Argument `input_shape` is deprecated. Use `shape` instead. |
|
|
warnings.warn( |
|
|
Model: "sequential_1" |
|
|
βββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββ³ββββββββββββββββ |
|
|
β Layer (type) β Output Shape β Param # β |
|
|
β‘βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© |
|
|
β dense_3 (Dense) β (None, 100) β 78,500 β |
|
|
βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
|
|
β dense_4 (Dense) β (None, 100) β 10,100 β |
|
|
βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
|
|
β dropout_1 (Dropout) β (None, 100) β 0 β |
|
|
βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€ |
|
|
β dense_5 (Dense) β (None, 10) β 1,010 β |
|
|
βββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββ΄ββββββββββββββββ |
|
|
Total params: 89,610 (350.04 KB) |
|
|
Trainable params: 89,610 (350.04 KB) |
|
|
Non-trainable params: 0 (0.00 B) |
|
|
|
|
|
Final accuracy (on validation data): 0.96 |
|
|
|
|
|
Evaluating on test data... |
|
|
Accuracy on test data: 0.96 |
|
|
|
|
|
""" |
|
|
|
|
|
import glob |
|
|
import math |
|
|
import os |
|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
import tensorflow as tf |
|
|
from IPython.display import display |
|
|
from matplotlib import pyplot as plt |
|
|
from sklearn import metrics |
|
|
|
|
|
|
|
|
pd.options.display.max_rows = 10 |
|
|
pd.options.display.float_format = '{:.1f}'.format |
|
|
|
|
|
|
|
|
def parse_labels_and_features(dataset): |
|
|
"""Parses a dataset into features and labels. |
|
|
|
|
|
Args: |
|
|
dataset: A Pandas DataFrame with the first column being the label |
|
|
and the remaining columns as pixel data. |
|
|
Returns: |
|
|
A tuple of (labels, features), where both are Pandas Series/DataFrame. |
|
|
""" |
|
|
labels = dataset[0] |
|
|
|
|
|
features = dataset.loc[:, 1:784] |
|
|
|
|
|
features = features / 255 |
|
|
return labels, features |
|
|
|
|
|
|
|
|
def create_and_train_nn_model( |
|
|
learning_rate, |
|
|
epochs, |
|
|
batch_size, |
|
|
hidden_units, |
|
|
training_examples, |
|
|
training_targets, |
|
|
validation_examples, |
|
|
validation_targets): |
|
|
""" |
|
|
Creates, trains, and evaluates a Deep Neural Network model using tf.keras. |
|
|
|
|
|
Args: |
|
|
learning_rate: The learning rate for the optimizer. |
|
|
epochs: The number of times to iterate through the training data. |
|
|
batch_size: The number of examples to use in each training step. |
|
|
hidden_units: A list of integers, where each integer is the number of nodes |
|
|
in a hidden layer. |
|
|
training_examples: DataFrame of training features. |
|
|
training_targets: Series of training labels. |
|
|
validation_examples: DataFrame of validation features. |
|
|
validation_targets: Series of validation labels. |
|
|
|
|
|
Returns: |
|
|
The trained tf.keras.Model object and the training history. |
|
|
""" |
|
|
|
|
|
model = tf.keras.models.Sequential() |
|
|
|
|
|
|
|
|
model.add(tf.keras.layers.InputLayer(input_shape=(784,))) |
|
|
|
|
|
|
|
|
for units in hidden_units: |
|
|
model.add(tf.keras.layers.Dense(units, activation='relu')) |
|
|
|
|
|
|
|
|
model.add(tf.keras.layers.Dropout(0.2)) |
|
|
|
|
|
|
|
|
model.add(tf.keras.layers.Dense(10, activation='softmax')) |
|
|
|
|
|
|
|
|
model.compile( |
|
|
optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate), |
|
|
loss="sparse_categorical_crossentropy", |
|
|
metrics=['accuracy'] |
|
|
) |
|
|
|
|
|
|
|
|
print("Model Summary:") |
|
|
model.summary() |
|
|
print("\nTraining Model...") |
|
|
|
|
|
|
|
|
history = model.fit( |
|
|
x=training_examples.values, |
|
|
y=training_targets.values, |
|
|
batch_size=batch_size, |
|
|
epochs=epochs, |
|
|
shuffle=True, |
|
|
validation_data=(validation_examples.values, validation_targets.values), |
|
|
|
|
|
verbose=2 |
|
|
) |
|
|
print("Model training finished.") |
|
|
|
|
|
|
|
|
training_loss = history.history["loss"] |
|
|
validation_loss = history.history["val_loss"] |
|
|
epochs_range = range(1, epochs + 1) |
|
|
|
|
|
plt.figure(figsize=(10, 5)) |
|
|
plt.ylabel("Loss (Sparse Categorical Crossentropy)") |
|
|
plt.xlabel("Epochs") |
|
|
plt.title("Loss vs. Epochs") |
|
|
plt.plot(epochs_range, training_loss, label="Training") |
|
|
plt.plot(epochs_range, validation_loss, label="Validation") |
|
|
plt.legend() |
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
validation_probabilities = model.predict(validation_examples.values) |
|
|
validation_predictions = np.argmax(validation_probabilities, axis=1) |
|
|
|
|
|
cm = metrics.confusion_matrix(validation_targets, validation_predictions) |
|
|
cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] |
|
|
|
|
|
plt.figure(figsize=(8, 8)) |
|
|
ax = sns.heatmap(cm_normalized, cmap="bone_r", annot=True, fmt=".2f") |
|
|
ax.set_aspect(1) |
|
|
plt.title("Confusion Matrix") |
|
|
plt.ylabel("True Label") |
|
|
plt.xlabel("Predicted Label") |
|
|
plt.show() |
|
|
|
|
|
|
|
|
final_validation_accuracy = history.history["val_accuracy"][-1] |
|
|
print(f"Final accuracy (on validation data): {final_validation_accuracy:.2f}") |
|
|
|
|
|
return model, history |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mnist_dataframe = pd.read_csv('sample_data/mnist_train_small.csv', sep=",", header=None) |
|
|
mnist_test_dataframe = pd.read_csv('sample_data/mnist_test.csv', sep=',', header=None) |
|
|
|
|
|
|
|
|
mnist_dataframe = mnist_dataframe.head(10000) |
|
|
mnist_dataframe = mnist_dataframe.reindex(np.random.permutation(mnist_dataframe.index)) |
|
|
display(mnist_dataframe.head()) |
|
|
|
|
|
|
|
|
training_targets, training_examples = parse_labels_and_features(mnist_dataframe[:7500]) |
|
|
validation_targets, validation_examples = parse_labels_and_features(mnist_dataframe[7500:10000]) |
|
|
testing_targets, testing_examples = parse_labels_and_features(mnist_test_dataframe) |
|
|
|
|
|
display(training_examples.describe()) |
|
|
display(validation_examples.describe()) |
|
|
|
|
|
|
|
|
rand_example_idx = np.random.choice(training_examples.index) |
|
|
_, ax = plt.subplots() |
|
|
ax.matshow(training_examples.loc[rand_example_idx].values.reshape(28, 28)) |
|
|
ax.set_title(f"Label: {training_targets.loc[rand_example_idx]}") |
|
|
ax.grid(False) |
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LEARNING_RATE = 0.05 |
|
|
EPOCHS = 25 |
|
|
BATCH_SIZE = 30 |
|
|
HIDDEN_UNITS = [100, 100] |
|
|
|
|
|
|
|
|
trained_model, history = create_and_train_nn_model( |
|
|
learning_rate=LEARNING_RATE, |
|
|
epochs=EPOCHS, |
|
|
batch_size=BATCH_SIZE, |
|
|
hidden_units=HIDDEN_UNITS, |
|
|
training_examples=training_examples, |
|
|
training_targets=training_targets, |
|
|
validation_examples=validation_examples, |
|
|
validation_targets=validation_targets |
|
|
) |
|
|
|
|
|
|
|
|
print("\nEvaluating on test data...") |
|
|
loss, accuracy = trained_model.evaluate(testing_examples.values, testing_targets.values, verbose=0) |
|
|
print(f"Accuracy on test data: {accuracy:.2f}") |
|
|
|
|
|
|
|
|
print("\nVisualizing weights of the first hidden layer...") |
|
|
|
|
|
|
|
|
weights0 = trained_model.layers[0].get_weights()[0] |
|
|
print('Weights 0 shape:', weights0.shape) |
|
|
|
|
|
num_nodes = weights0.shape[1] |
|
|
num_rows = int(math.ceil(num_nodes / 10.0)) |
|
|
fig, axes = plt.subplots(num_rows, 10, figsize=(20, 2 * num_rows)) |
|
|
for coef, ax in zip(weights0.T, axes.ravel()): |
|
|
|
|
|
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.pink) |
|
|
ax.set_xticks(()) |
|
|
ax.set_yticks(()) |
|
|
|
|
|
plt.suptitle("First Hidden Layer Weights", fontsize=20) |
|
|
plt.show() |
|
|
|