CSE572 Statistical Machine Learning - Project 3 - Classification Using Neural Networks and Deep Learning Project
Import Libraries¶
import warnings
warnings.filterwarnings("ignore")
import keras
from tensorflow.keras.utils import to_categorical
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import os
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Problem Solution¶
Show Compute Environment¶
# Number of logical CPUs
num_cores = multiprocessing.cpu_count()
print(f"Number of CPU cores available: {num_cores}")
# Number of GPUs
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))Number of CPU cores available: 16
Num GPUs Available: 1
Define Batch Size¶
batch_size = 64Define Number of Classes¶
num_classes = 10Define Number of Training Epochs¶
epochs = 12Define Image Dimensions¶
# input image dimensions
img_rows, img_cols = 28, 28Define Kernel Size (Updated from (3,3) Baseline to (5,5) Task 2)¶
kernel_size = (5,5)Define Pooling Size¶
pool_size = (2,2)Load Data and Train / Test Split¶
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test)=tf.keras.datasets.mnist.load_data('mnist.npz')x_train.shape(60000, 28, 28)y_train.shape(60000,)x_test.shape(10000, 28, 28)y_test.shape(10000,)EDA on Data¶
Visualize First Train Image¶
ticks = np.arange(0, 28+2, 2)
#ticksplt.imshow(x_train[0], cmap='grey')
plt.xlim(0, 28)
plt.xticks(ticks=ticks)
plt.yticks(ticks=ticks)
plt.show()
Visualize the First 10 X Train Images¶
# Display the first 10 images
num_images_to_display = 10
plt.figure(figsize=(10, 5)) # Adjust the figure size
for i in range(num_images_to_display):
plt.subplot(2, 5, i + 1) # Create a 2x5 grid for 10 images
plt.imshow(x_train[i], cmap='gray') # Display each image
plt.title(f'Train Image {i + 1}')
plt.axis('off') # Hide axes for better visualization
plt.tight_layout()
plt.show()
Print the First 10 Y Train Class Labels¶
y_train[0:10]array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)Visualize the First X 10 Test Images¶
# Display the first 10 images
num_images_to_display = 10
plt.figure(figsize=(10, 5)) # Adjust the figure size
for i in range(num_images_to_display):
plt.subplot(2, 5, i + 1) # Create a 2x5 grid for 10 images
plt.imshow(x_test[i], cmap='gray') # Display each image
plt.title(f'Test Image {i + 1}')
plt.axis('off') # Hide axes for better visualization
plt.tight_layout()
plt.show()
Print the First 10 Y Test Class Labels¶
y_test[0:10]array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], dtype=uint8)Reformat Data if Necessary¶
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)x_train.shape(60000, 28, 28, 1)x_test.shape(10000, 28, 28, 1)Cast and Normalize Data¶
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255Print Out Description of Data¶
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Convert Class Vectors into Binary Class Matrices¶
# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)y_train.shape(60000, 10)y_train[0:5]array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)y_test.shape(10000, 10)y_test[0:5]array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)Create Task 2 Model¶
Define Task 2 Model Type¶
# Artificial Neural Network
model = Sequential()model<keras.engine.sequential.Sequential at 0x24a43f95730>Create Task 2 Model Structure¶
# First layer is a convolutional layer with 6 filters, each of size 3x3
model.add(Conv2D(
filters=6,
kernel_size=kernel_size,
strides=(1, 1),
activation='relu',
input_shape=input_shape))
# Second layer is a max pooling layer with a pool size of 2x2
model.add(MaxPooling2D(
pool_size=pool_size,
strides=(1, 1)))
# Third layer is a convolutional layer with 16 filters, each of size 3x3
model.add(Conv2D(
filters=16,
kernel_size=kernel_size,
strides=(1, 1),
activation='relu'))
# Fourth layer is a max pooling layer with a pool size of 2x2
model.add(MaxPooling2D(
pool_size=pool_size,
strides=(1, 1)))
# Fifth layer is a flatten layer
model.add(Flatten())
# Sixth layer is a dense layer with 120 units
model.add(Dense(120, activation='relu'))
# Seventh layer is a dense layer with 84 units
model.add(Dense(84, activation='relu'))
# Eighth layer is a dense layer with 10 units for 10 classes in the output layer
model.add(Dense(num_classes, activation='softmax'))model<keras.engine.sequential.Sequential at 0x24a43f95730>Define Task 2 Model Optimizer¶
optim = tf.keras.optimizers.Adadelta(
learning_rate=0.1,
rho=0.95,
decay=0.0)optim<keras.optimizer_v2.adadelta.Adadelta at 0x24a42d01fd0>Compile Task 2 Model¶
# https://keras.io/optimizers/
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=optim,
metrics=['accuracy'])model<keras.engine.sequential.Sequential at 0x24a43f95730>Train Task 2 Model¶
model_history = \
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=2,
validation_data=(x_test, y_test))
Epoch 1/12
938/938 - 10s - loss: 0.3448 - accuracy: 0.8967 - val_loss: 0.1091 - val_accuracy: 0.9694
Epoch 2/12
938/938 - 2s - loss: 0.0940 - accuracy: 0.9723 - val_loss: 0.0704 - val_accuracy: 0.9806
Epoch 3/12
938/938 - 2s - loss: 0.0659 - accuracy: 0.9804 - val_loss: 0.0575 - val_accuracy: 0.9809
Epoch 4/12
938/938 - 2s - loss: 0.0530 - accuracy: 0.9841 - val_loss: 0.0489 - val_accuracy: 0.9841
Epoch 5/12
938/938 - 2s - loss: 0.0451 - accuracy: 0.9865 - val_loss: 0.0571 - val_accuracy: 0.9808
Epoch 6/12
938/938 - 2s - loss: 0.0392 - accuracy: 0.9887 - val_loss: 0.0464 - val_accuracy: 0.9847
Epoch 7/12
938/938 - 2s - loss: 0.0352 - accuracy: 0.9895 - val_loss: 0.0358 - val_accuracy: 0.9885
Epoch 8/12
938/938 - 2s - loss: 0.0318 - accuracy: 0.9908 - val_loss: 0.0366 - val_accuracy: 0.9875
Epoch 9/12
938/938 - 2s - loss: 0.0291 - accuracy: 0.9911 - val_loss: 0.0323 - val_accuracy: 0.9889
Epoch 10/12
938/938 - 2s - loss: 0.0265 - accuracy: 0.9923 - val_loss: 0.0316 - val_accuracy: 0.9892
Epoch 11/12
938/938 - 2s - loss: 0.0240 - accuracy: 0.9930 - val_loss: 0.0335 - val_accuracy: 0.9887
Epoch 12/12
938/938 - 2s - loss: 0.0222 - accuracy: 0.9937 - val_loss: 0.0344 - val_accuracy: 0.9894
model_history.history['loss'][0.34479445219039917,
0.09400391578674316,
0.0658968836069107,
0.05295504257082939,
0.04514137655496597,
0.03916701301932335,
0.03520461171865463,
0.03181973844766617,
0.029129598289728165,
0.026451660320162773,
0.024046970531344414,
0.022170674055814743]model_history.history['accuracy'][0.8967000246047974,
0.9722999930381775,
0.9804333448410034,
0.9841333627700806,
0.9865333437919617,
0.9887333512306213,
0.9895333051681519,
0.9907500147819519,
0.9910500049591064,
0.9922666549682617,
0.992983341217041,
0.9937000274658203]Save the Fitted Task 2 Model¶
model.save("fitted_task2_model.keras") # Save the model in HDF5 format
# Save the history
import json
with open("fitted_task2_model_history.json", "w") as f:
json.dump(model_history.history, f)Load the Model if Necessary¶
#from keras.models import load_model
#model = load_model("fitted_baseline_model.keras") # Load the saved model
# Load the history
#import json
#with open("fitted_baseline_model_history.json", "r") as f:
# model_history = json.load(f)Get Model Summary¶
model.summary()Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 24, 24, 6) 156
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 23, 23, 6) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 19, 19, 16) 2416
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 18, 18, 16) 0
_________________________________________________________________
flatten (Flatten) (None, 5184) 0
_________________________________________________________________
dense (Dense) (None, 120) 622200
_________________________________________________________________
dense_1 (Dense) (None, 84) 10164
_________________________________________________________________
dense_2 (Dense) (None, 10) 850
=================================================================
Total params: 635,786
Trainable params: 635,786
Non-trainable params: 0
_________________________________________________________________
Evaluate Task 2 Model¶
Training Accuracy¶
loss_train, accuracy_train = model.evaluate(x_train, y_train)1875/1875 [==============================] - 3s 1ms/step - loss: 0.0209 - accuracy: 0.9940
print('Print the training loss and the accuracy of the model on the dataset')
print('Train Loss: {0:0.4f} Train Accuracy: {1:0.4f}'.format(loss_train, accuracy_train))Print the training loss and the accuracy of the model on the dataset
Train Loss: 0.0209 Train Accuracy: 0.9940
Testing Accuracy¶
loss_test, accuracy_test = model.evaluate(x_test, y_test)313/313 [==============================] - 1s 2ms/step - loss: 0.0344 - accuracy: 0.9894
print('Print the testing loss and the accuracy of the model on the dataset')
print('Test Loss: {0:0.4f} Test Accuracy: {1:0.4f}'.format(loss_test, accuracy_test))Print the testing loss and the accuracy of the model on the dataset
Test Loss: 0.0344 Test Accuracy: 0.9894
#score = model.evaluate(x_test, y_test, verbose=0)
#print('Test loss:', score[0])
#print('Test accuracy:', score[1])Use Model to Make Predictions¶
########################################
# Making predictions and evaluating the model
#######################################
# Predicting the Test set results
y_pred = model.predict(x_test)
y_pred[y_pred > 0.9] = 1y_pred[y_pred > 0.9] = 1
y_pred[y_pred <= 0.9] = 0y_pred.shape(10000, 10)y_pred[0]array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)# summarize the middle 5 cases in a table
for i in range(15,20):
print('actual ', end=' ')
for j in range(10):
print('{:.1f}'.format(y_test[i][j]), end=' ')
print()
print('predicted', end=' ')
for j in range(10):
print('{:.1f}'.format(y_pred[i][j]), end=' ')
print()
print()actual 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
predicted 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
actual 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0
predicted 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0
actual 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
predicted 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
actual 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
predicted 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
actual 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
predicted 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
Making the Confusion Matrix¶
y_pred[0]array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)y_test[0]array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)y_pred.shape(10000, 10)y_test.shape(10000, 10)# Convert one-hot encoded arrays to label indices
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)y_pred_labels[0:15]array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1], dtype=int64)y_test_labels[0:15]array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1], dtype=int64)# Making the Confusion Matrix
cm = confusion_matrix(y_test_labels, y_pred_labels)
print('Print the Confusion Matrix:')
print(cm)Print the Confusion Matrix:
[[ 977 0 0 0 0 0 1 1 1 0]
[ 8 1127 0 0 0 0 0 0 0 0]
[ 30 2 998 0 1 0 0 1 0 0]
[ 25 0 0 985 0 0 0 0 0 0]
[ 30 0 0 0 951 0 0 0 0 1]
[ 23 0 0 1 0 867 1 0 0 0]
[ 34 1 0 0 1 3 919 0 0 0]
[ 31 0 1 0 0 0 0 996 0 0]
[ 56 0 2 1 0 0 0 2 911 2]
[ 41 2 0 0 1 0 0 0 0 965]]
print(classification_report(y_test_labels, y_pred_labels)) precision recall f1-score support
0 0.78 1.00 0.87 980
1 1.00 0.99 0.99 1135
2 1.00 0.97 0.98 1032
3 1.00 0.98 0.99 1010
4 1.00 0.97 0.98 982
5 1.00 0.97 0.98 892
6 1.00 0.96 0.98 958
7 1.00 0.97 0.98 1028
8 1.00 0.94 0.97 974
9 1.00 0.96 0.98 1009
accuracy 0.97 10000
macro avg 0.98 0.97 0.97 10000
weighted avg 0.98 0.97 0.97 10000
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.arange(10))
disp.plot()
plt.savefig('confusion_matrix_baseline.png')
plt.show()
Plot The Accuracy¶
print('Plot the accuracy')
# Keras 2.2.4 recognizes 'acc' and 2.3.1 recognizes 'accuracy'
# use the command python -c 'import keras; print(keras.__version__)' on MAC or Linux to check Keras' version
plt.plot(model_history.history['accuracy'], label='Training Accuracy')
#plt.plot(model_history.history['val_accuracy'], label='Validation Accuracy')
plt.title('5x5 Kernel Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.yticks(np.arange(0.9, 1.2, step=0.01))
plt.xticks(np.arange(0, 14, step=1))
plt.ylim(0.9, 1)
plt.xlim(0, 12)
plt.grid()
plt.legend(loc='lower right')
plt.savefig('task2_model_accuracy.png')
plt.show()Plot the accuracy

Plot the Loss¶
print('Plot the loss')
plt.plot(model_history.history['loss'], label='Training Loss')
#plt.plot(model_history.history['val_loss'], label='Validation Loss')
plt.title('5x5 Kernel Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.yticks(np.arange(0, 0.40, step=0.025))
plt.xticks(np.arange(0, 14, step=1))
plt.ylim(0, 0.35)
plt.xlim(0, 12)
plt.grid()
plt.legend(loc='upper right')
plt.savefig('task2_model_loss.png')
plt.show()Plot the loss
