from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

# Importing the required libraries
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
(X_train, trainY), (X_test,testY) = tf.keras.datasets.fashion_mnist.load_data()
X_train.shape, trainY.shape, X_test.shape, testY.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
29515/29515 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
26421880/26421880 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
5148/5148 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
4422102/4422102 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

# Let's visualize the dataset at hand
import matplotlib.pyplot as plt
class_names_list = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

plt.figure(figsize=(8,8))
for i in range(24):
    plt.subplot(4,6,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap=plt.cm.binary)
    plt.xlabel(class_names_list[trainY[i]])
plt.show()

# Reshaping the training and the testing data.
# This needs to be reshaped because of the input shape requirement of the CNN
x_train = X_train.reshape(60000,28,28,1)
x_test = X_test.reshape(10000,28,28,1)

# Let's normalize the dataset. Since there are pixel values ranging from 0-255, lets divide by 255 to get the new ranges from 0-1
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

# Also we will create one hot encoded vectors for the target variable
y_train = tf.keras.utils.to_categorical(trainY)
y_test = tf.keras.utils.to_categorical(testY)

# Imporing the libraries as required. Note that we are using tensorflow.keras here.
import tensorflow as tf

# Keras Sequential Model
from tensorflow.keras.models import Sequential

# Importing all the different layers and optimizers
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Initiating the sequential model object
model1 = Sequential()

# Generating the model and adding layers
# Two convolution layers and pooling layer
model1.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same', data_format='channels_last', input_shape=(28,28,1)))
model1.add(MaxPooling2D(pool_size=(2, 2)))

# Flattening the model and making room for the fully connected component
model1.add(Flatten())
model1.add(Dense(64, activation='relu'))      # Write Code for hidden layer
model1.add(Dense(10, activation='softmax'))   # Write Code for hidden layer

/usr/local/lib/python3.10/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

# Let us summarize the model structure
model1.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                      │ (None, 28, 28, 16)          │             160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d (MaxPooling2D)         │ (None, 14, 14, 16)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten (Flatten)                    │ (None, 3136)                │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 64)                  │         200,768 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 10)                  │             650 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 201,578 (787.41 KB)

 Trainable params: 201,578 (787.41 KB)

 Non-trainable params: 0 (0.00 B)

# Optimizer for the model, Adam with a learning rate (lr) of 0.01.
optimizer = Adam(learning_rate=0.01)

# Compile the model with the optimizer, loss (categorical_crossentropy) and the metrics (accuracy) to look at.
model1.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

# Fitting the model with a batch size of 128 and 10 epochs.
model1.fit(x_train, y_train, validation_data=(x_test, y_test), verbose = 1, batch_size=128, epochs = 10)

Epoch 1/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 25s 51ms/step - accuracy: 0.6842 - loss: 20.4855 - val_accuracy: 0.8491 - val_loss: 0.4337
Epoch 2/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 23s 49ms/step - accuracy: 0.8604 - loss: 0.3811 - val_accuracy: 0.8487 - val_loss: 0.4228
Epoch 3/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 42s 51ms/step - accuracy: 0.8779 - loss: 0.3330 - val_accuracy: 0.8525 - val_loss: 0.4463
Epoch 4/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 40s 49ms/step - accuracy: 0.8831 - loss: 0.3160 - val_accuracy: 0.8616 - val_loss: 0.4099
Epoch 5/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 41s 48ms/step - accuracy: 0.8894 - loss: 0.2934 - val_accuracy: 0.8630 - val_loss: 0.4143
Epoch 6/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 21s 44ms/step - accuracy: 0.8915 - loss: 0.2857 - val_accuracy: 0.8688 - val_loss: 0.3913
Epoch 7/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 23s 48ms/step - accuracy: 0.8960 - loss: 0.2740 - val_accuracy: 0.8532 - val_loss: 0.4421
Epoch 8/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 41s 48ms/step - accuracy: 0.8997 - loss: 0.2729 - val_accuracy: 0.8628 - val_loss: 0.4610
Epoch 9/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 22s 47ms/step - accuracy: 0.8995 - loss: 0.2642 - val_accuracy: 0.8673 - val_loss: 0.4338
Epoch 10/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 21s 45ms/step - accuracy: 0.8994 - loss: 0.2643 - val_accuracy: 0.8550 - val_loss: 0.4741

<keras.src.callbacks.history.History at 0x797cb08da320>

def metrics_score(actual, predicted):
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    print(classification_report(actual, predicted))
    cm = confusion_matrix(actual, predicted)
    plt.figure(figsize=(8,5))
    sns.heatmap(cm, annot=True,  fmt='.0f', xticklabels=class_names_list, yticklabels=class_names_list)
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()

test_pred1 = np.argmax(model1.predict(x_test), axis=-1)

metrics_score(testY, test_pred1)

313/313 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step
              precision    recall  f1-score   support

           0       0.80      0.83      0.82      1000
           1       0.95      0.98      0.96      1000
           2       0.71      0.77      0.74      1000
           3       0.90      0.82      0.86      1000
           4       0.71      0.77      0.74      1000
           5       0.98      0.92      0.95      1000
           6       0.68      0.57      0.62      1000
           7       0.91      0.96      0.94      1000
           8       0.97      0.95      0.96      1000
           9       0.95      0.95      0.95      1000

    accuracy                           0.85     10000
   macro avg       0.86      0.86      0.85     10000
weighted avg       0.86      0.85      0.85     10000

# Initiating the sequential model object
model2 = Sequential()

# Generating the model and adding layers
# Two convolution layers and pooling layer
model2.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same', data_format='channels_last', input_shape=(28, 28, 1)))
model2.add(MaxPooling2D(pool_size=(2, 2)))

# Adding the second convolutional layer (no input_shape required)
model2.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

# Initiating the sequential model object
model2 = Sequential()

# Generating the model and adding layers
# Two convolution layers and pooling layer
model2.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same', data_format='channels_last', input_shape=(28,28,1)))
model2.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')) # Add convolutional layer also not pass the input_shape parameter in this layer
model2.add(MaxPooling2D(pool_size=(2, 2)))

# Flattening the model and making room for the fully connected component
model2.add(Flatten())
model2.add(Dense(64, activation='relu'))
model2.add(BatchNormalization())
model2.add(Dense(16, activation='relu'))
model2.add(BatchNormalization())
model2.add(Dense(10, activation='softmax')) # Add output layer details

/usr/local/lib/python3.10/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

model2.summary()

Model: "sequential_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d_1 (Conv2D)                    │ (None, 28, 28, 16)          │             160 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_2 (Conv2D)                    │ (None, 28, 28, 32)          │           4,640 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_1 (MaxPooling2D)       │ (None, 14, 14, 32)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten_1 (Flatten)                  │ (None, 6272)                │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_2 (Dense)                      │ (None, 64)                  │         401,472 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ batch_normalization                  │ (None, 64)                  │             256 │
│ (BatchNormalization)                 │                             │                 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_3 (Dense)                      │ (None, 16)                  │           1,040 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ batch_normalization_1                │ (None, 16)                  │              64 │
│ (BatchNormalization)                 │                             │                 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_4 (Dense)                      │ (None, 10)                  │             170 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 407,802 (1.56 MB)

 Trainable params: 407,642 (1.56 MB)

 Non-trainable params: 160 (640.00 B)

# Optimizer for the model, Adam with a learning rate(lr) of 0.01.
optimizer = Adam(learning_rate=0.01)

# Compile the model with the optimizer, loss (categorical_crossentropy) and the metrics (accuracy) to look at.
model2.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

model2.fit(x_train, y_train, validation_data=(x_test, y_test), verbose = 1, batch_size=128, epochs = 10)

Epoch 1/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 97s 202ms/step - accuracy: 0.8091 - loss: 0.5709 - val_accuracy: 0.8808 - val_loss: 0.3278
Epoch 2/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 89s 190ms/step - accuracy: 0.9032 - loss: 0.2642 - val_accuracy: 0.8904 - val_loss: 0.3259
Epoch 3/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 88s 188ms/step - accuracy: 0.9204 - loss: 0.2156 - val_accuracy: 0.8994 - val_loss: 0.2905
Epoch 4/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 142s 187ms/step - accuracy: 0.9318 - loss: 0.1875 - val_accuracy: 0.8992 - val_loss: 0.3084
Epoch 5/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 141s 186ms/step - accuracy: 0.9390 - loss: 0.1636 - val_accuracy: 0.9078 - val_loss: 0.2762
Epoch 6/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 87s 186ms/step - accuracy: 0.9482 - loss: 0.1398 - val_accuracy: 0.9098 - val_loss: 0.2636
Epoch 7/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 142s 186ms/step - accuracy: 0.9538 - loss: 0.1275 - val_accuracy: 0.9141 - val_loss: 0.2648
Epoch 8/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 141s 185ms/step - accuracy: 0.9587 - loss: 0.1112 - val_accuracy: 0.9076 - val_loss: 0.3163
Epoch 9/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 142s 186ms/step - accuracy: 0.9673 - loss: 0.0915 - val_accuracy: 0.9117 - val_loss: 0.3001
Epoch 10/10
469/469 ━━━━━━━━━━━━━━━━━━━━ 150s 203ms/step - accuracy: 0.9672 - loss: 0.0871 - val_accuracy: 0.9089 - val_loss: 0.3293

<keras.src.callbacks.history.History at 0x797ca2cdc280>

test_pred2 = np.argmax(model2.predict(x_test), axis=-1)

metrics_score(testY, test_pred2)

313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
              precision    recall  f1-score   support

           0       0.80      0.92      0.85      1000
           1       0.99      0.98      0.98      1000
           2       0.81      0.90      0.85      1000
           3       0.94      0.88      0.91      1000
           4       0.87      0.85      0.86      1000
           5       0.99      0.97      0.98      1000
           6       0.81      0.67      0.73      1000
           7       0.94      0.98      0.96      1000
           8       0.98      0.98      0.98      1000
           9       0.97      0.97      0.97      1000

    accuracy                           0.91     10000
   macro avg       0.91      0.91      0.91     10000
weighted avg       0.91      0.91      0.91     10000

rows = 4
cols = 6
fig = plt.figure(figsize=(15, 15))
for i in range(cols):
    for j in range(rows):
        random_index = np.random.randint(0, len(testY))
        ax = fig.add_subplot(rows, cols, i * rows + j + 1)
        ax.imshow(X_test[random_index, :])
        pred_label = class_names_list[test_pred2[random_index]]
        true_label = class_names_list[testY[random_index]]
        y_pred_test_max_probas = np.max(model2.predict(x_test), axis=-1)
        pred_proba = y_pred_test_max_probas[random_index]
        ax.set_title("actual: {}\npredicted: {}\nprobability: {:.3}\n".format(
               true_label, pred_label, pred_proba
        ))
plt.show()

313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 16ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 18ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 15ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 15ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 6s 19ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 15ms/step
313/313 ━━━━━━━━━━━━━━━━━━━━ 5s 16ms/step

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Six_-_Deep_Learning/Hand_On_Quiz_CNN/Fashion_MNIST_using_CNN.ipynb"

Fashion MNIST¶

Loading the libraries and the dataset¶

Data Preprocessing¶

Model Building¶

Model-1¶

Question 2: So how many parameters does the Max pooling layer take? (check model summary)¶

Compiling, fitting and evaluating the test set prediction¶

Evaluate the model on the test set¶

Model-2¶

Question 3: In model1 you have only one convolution layer, to increase the performance build model2 with 2 convolution layers. One layer has been already implemented for you just add another layer with a number of filters, kernel_size, activation, and padding same as previous layers.¶

Observations¶

Comments¶

Recommendations¶