%load_ext autoreload
%autoreload 2

from numpy_nn import *
import numpy as np
import matplotlib.pyplot as plt
import time
import copy
import numpy.testing as testing


# param for visualisation
n_data = 1001

# set layer params
input_dim = 3
output_dim = 2

# create instance of linear layer
linear_layer = Linear(input_dim, output_dim)

# set weights and bias of linear layer (for testing purposes)
linear_layer.W = np.array([[ 1.,   0.6],
                           [ 0.2, -0.2],
                           [-0.6, -1. ]])
linear_layer.b = np.array([[-0.5, 0.5]])

# create sample input data
batch_size = 2
x_linear = np.array([[-2.,  -1.2, -0.4],
              [ 0.4,  1.2,  2. ]])

# forward pass of linear layer
forward_output = linear_layer.forward(x_linear)

# check if the result is correct
testing.assert_array_almost_equal([[-2.5, -0.06], [-1.06, -1.5 ]], forward_output, decimal=6, err_msg='The values returned from the forward pass are incorrect.')

# set data for visualization
x_visualization = np.linspace(-1, 1, n_data * input_dim).reshape(n_data, input_dim)

# forward pass of linear layer
forward_output = linear_layer.forward(x_visualization)

# visualize results
visualize_xy([(x_visualization[:, 0], forward_output[:, 0])], 
             ['forward'], 
             title='Linear layer',
             xlabel='$x_1$ (input)', 
             ylabel='$z = x\mathbf{W} + b$', 
             axis='equal', 
             grid=True, 
             linestyle='b', 
             save_filepath='layer_linear_forward.png')

# run forward pass to make sure variables stored in the layer are up to date
_ = linear_layer.forward(x_linear)

# Define gradient of loss wrt layer output
dL_wrt_output = np.array([[1., 1.33333333],
                          [1.66666667, 2. ]])

# backward pass of linear layer (computation of gradient of loss wrt layer input)
dL_wrt_x = linear_layer.backward(dL_wrt_output)

# Check if the results are correct
testing.assert_array_almost_equal([[1.8, -0.06666667, -1.93333333],
                                   [ 2.86666667, -0.06666667, -3.]],
                                  dL_wrt_x,
                                  decimal=6,
                                  err_msg='The gradient values returned from the backward pass are incorrect.')
testing.assert_array_almost_equal(
    [[[-2., -2.66666667],
    [-1.2, -1.6],
    [-0.4, -0.53333333]],
    [[ 0.66666667, 0.8],
    [ 2., 2.4  ],
    [ 3.33333333,  4.0 ]]], 
    linear_layer.dL_wrt_W, decimal=6, err_msg='The W parameter gradient values populated by the backward pass are incorrect.')

testing.assert_array_almost_equal(
    [[[1., 1.33333333]],
    [[1.66666667, 2.]]],
    linear_layer.dL_wrt_b, decimal=6, err_msg='The b parameter gradient values populated by the backward pass are incorrect.')

# create instance of ReLU layer
relu_layer = ReLU()

# create sample input data
x_relu = np.array([[-2.,  -1.2, -0.4], 
                   [0.4,  1.2,  2.]])

# forward pass of ReLu layer
forward_output = relu_layer.forward(x_relu)

# check if the result is correct
testing.assert_array_almost_equal([[0, 0, 0], [0.4,  1.2,  2.]], forward_output, decimal=6, err_msg='The values returned from the forward pass are incorrect.')

# set data
x_visualization = np.linspace(-1,1,n_data)

# compute the forward pass
forward_output = relu_layer.forward(x_visualization)

# visualize results
visualize_xy([(x_visualization, forward_output)], 
             ['forward'], 
             title='ReLU', 
             axis='equal', 
             xlabel='$x$', 
             ylabel='$z=$ReLU$(x)$', 
             linestyle='b', 
             Tgrid=True, 
             save_filepath='layer_relu_forward.png')

# run forward pass to make sure variables stored in the layer are up to date
_ = relu_layer.forward(x_relu)

# Define sample gradient of loss wrt layer output
dL_wrt_output_relu = np.array([[-3.,  1.2, -0.5], 
                               [ -0.4,  2.2,  3. ]])


# backward pass of ReLU layer (computation of gradient of loss wrt layer input)
dL_wrt_x_relu = relu_layer.backward(dL_wrt_output_relu)

# Check if the results are correct
testing.assert_array_almost_equal([[ 0.,   0.,   0. ],
                                   [-0.4,  2.2,  3. ]],
                                   dL_wrt_x_relu,
                                   decimal=6, err_msg='The gradient values returned from the backward pass are incorrect.')

# create instance of Sigmoid layer
sigmoid_layer = Sigmoid()

# create sample input data
x_sigmoid = np.array([[-2.,  -1.2, -0.4], 
                      [ 0.4,  1.2,  2. ]])

# forward pass of Sigmoid layer
forward_output = sigmoid_layer.forward(x_sigmoid)

# check if the result is correct
testing.assert_array_almost_equal([[0.119203, 0.231475, 0.401312],
                                  [0.598688, 0.768525, 0.880797]], forward_output, decimal=6, err_msg='The values returned from the forward pass are incorrect.')

# set data
x_visualization = np.linspace(-10,10,n_data)

# compute forward pass sigmoid
forward_output = sigmoid_layer.forward(x_visualization)

# visualize results
visualize_xy([(x_visualization, forward_output)], 
             ['forward'], 
             title='Sigmoid', 
             xlabel='$x$', 
             ylabel='$z$', 
             grid=True, 
             linestyle='b', 
             save_filepath='layer_sigmoid_forward.png')

# run forward pass to make sure variables stored in the layer are up to date
_ = sigmoid_layer.forward(x_sigmoid)

# Define sample gradient of loss wrt layer output
dL_wrt_output_sigmoid = np.array([[-3.,  1.2, -0.5], 
                                  [ -0.4,  2.2,  3. ]])

# compute backward pass of Sigmoid layer
dL_wrt_x_sigmoid = sigmoid_layer.backward(dL_wrt_output_sigmoid)

# Check if the results are correct
testing.assert_array_almost_equal([[-0.314981,  0.213473, -0.12013 ],
                                   [-0.096104,  0.391368,  0.314981]],
                                  dL_wrt_x_sigmoid,
                                  decimal=6, err_msg='The gradient values returned from the backward pass are incorrect.')

# set input data and ground truth data
y = np.linspace(-2, 2, 1001)
y_gt = 0 * y

# create instance of Squared Error Loss layer
se_layer = SE()

# compute SE loss
loss_se = se_layer.forward(y, y_gt)

# visualize results
visualize_xy([(y, loss_se)], 
             ['forward'], 
             title='Squared Error Loss', 
             axis='equal', 
             xlabel='$y$', 
             ylabel='$z$', 
             linestyle='b', 
             grid=True, 
             save_filepath='layer_se_forward.png')

# set gradient of loss wrt layer output
n_data_bw = 9
dL_wrt_output_se = np.linspace(-10, 10, n_data_bw)

# set forward pass data and computation
y = np.linspace(-5, 5, n_data_bw)
y_gt = 0.5 * y
_ = se_layer.forward(y, y_gt)

# compute backward pass ReLU
dL_wrt_y = se_layer.backward()

# Check if the results are correct
testing.assert_array_almost_equal([-5., -3.75, -2.5, -1.25, 0., 1.25, 2.5, 3.75, 5.],
                                  dL_wrt_y,
                                  decimal=6, err_msg='The gradient values returned from the backward pass are incorrect.')

class_a = 4
class_b = 9

(X_trn, y_trn), (X_tst, y_tst) = load_data('full-mnist.npz',
                                           class_a,
                                           class_b)

N_trn, D = X_trn.shape
assert D == 28 * 28
assert y_trn.shape == (N_trn, 1)

# Shuffle the data
trn_indices = np.arange(N_trn)
np.random.shuffle(trn_indices)
X_trn = X_trn[trn_indices, :]
y_trn = y_trn[trn_indices, :]

raise NotImplementedError("Experiment with the model architecture.")
model = [Linear(D, 5), ReLU(), Linear(5, 1), Sigmoid()]
trn_head = SE()

raise NotImplementedError("You have to set hyperparameters yourself.")
learning_rate = None
batch_size = None
N_epochs = 90
validation_set_fraction = 0.5

print_each = 5 # loss printing setting - (you can modify this)

# Split into train/val
idx_split = int(np.round(N_trn * validation_set_fraction))

X_val = X_trn[:idx_split, :]
X_trn = X_trn[idx_split:, :]
N_trn = X_trn.shape[0]
N_val = X_val.shape[0]

y_val = y_trn[:idx_split, :]
y_trn = y_trn[idx_split:, :]

# The training loop
model_best_params = []
trn_losses = []
val_losses = []
best_val_loss_epoch = 0

batch_count = int(np.ceil(N_trn / batch_size))
for epoch in range(N_epochs):
    try:
        cumulative_epoch_trn_loss = 0 # just for reporting progress
        time_start = time.time()
        for batch_i in range(batch_count):
            # load the minibatch:
            batch_idx = range(batch_i * batch_size,
                              min(N_trn, (batch_i + 1) * batch_size))

            activation = X_trn[batch_idx]

            # forward pass:
            for layer in model:
                activation = layer.forward(activation)

            loss = trn_head.forward(activation, y_trn[batch_idx])
            trn_loss = loss.mean()

            cumulative_epoch_trn_loss += trn_loss

            # backward pass:
            grad_output = trn_head.backward()
            for layer in reversed(model):
                grad_output = layer.backward(grad_output)

            # Update the weights with gradient descent
            for layer in model:
                for param_name, param_value in layer.params().items():
                    param_value -= learning_rate * layer.grads()[param_name].mean(axis=0) # mean across the minibatch

        # validation
        activation = X_val.copy()
        for layer in model:
            activation = layer.forward(activation)
        val_losses.append(trn_head.forward(activation, y_val).mean())

        # remember the best model so far
        if len(val_losses) == 0 or val_losses[-1] < val_losses[best_val_loss_epoch]:
            best_val_loss_epoch = epoch
            model_best_params = [copy.deepcopy(layer.params()) for layer in model]

        trn_losses.append(cumulative_epoch_trn_loss / batch_count)
        if (epoch+1) % print_each == 0 or epoch == 0:
            print("[{:04d}/{:04d}][TRN] MSE loss {:2f} ({:.1f}s)".format(epoch+1, N_epochs, trn_losses[-1], time.time() - time_start))
            print("[{:04d}/{:04d}][VAL] MSE loss {:2f}".format(epoch+1, N_epochs, val_losses[-1]))
    except KeyboardInterrupt:
        print('Early exit')
        break

[0001/0030][TRN] MSE loss 0.035343 (2.1s)
[0001/0030][VAL] MSE loss 0.017097
[0005/0030][TRN] MSE loss 0.006365 (2.1s)
[0005/0030][VAL] MSE loss 0.008158
[0010/0030][TRN] MSE loss 0.002527 (2.1s)
[0010/0030][VAL] MSE loss 0.007173
[0015/0030][TRN] MSE loss 0.001384 (2.1s)
[0015/0030][VAL] MSE loss 0.007669
[0020/0030][TRN] MSE loss 0.000783 (2.0s)
[0020/0030][VAL] MSE loss 0.006015
[0025/0030][TRN] MSE loss 0.000771 (2.0s)
[0025/0030][VAL] MSE loss 0.005881
[0030/0030][TRN] MSE loss 0.000676 (2.0s)
[0030/0030][VAL] MSE loss 0.005825

visualize_data([val_losses, trn_losses], legend=['validation', 'training'], xlabel='epoch', ylabel='MSE', save_filepath='numpy_nn_training.png')

# TST load best model
print('Best VAL model loss {:.4f} at epoch #{:d}.'.format(val_losses[best_val_loss_epoch], best_val_loss_epoch))
for layer_id in range(len(model_best_params)):
    for key, value in model_best_params[layer_id].items():
        model[layer_id].params()[key] = value

# TST forward pass
activation = X_tst
for layer in model:
    activation = layer.forward(activation)
y_hat = (activation > 0.5).astype(int)

loss = trn_head.forward(activation, y_tst)
print("[TST] MSE loss {:.4f}".format(loss.mean()))

test_error = np.mean(y_hat != y_tst)
print("[TST] error {:.4f}".format(test_error))

plt.figure(figsize=(15, 10))
plt.title('NN classification: test error {:.4f}'.format(test_error))
show_classification(X_tst.transpose(1, 0).reshape(28, 28, -1), y_hat.squeeze(), '{}{}'.format(class_a,
                                                                                              class_b))
plt.savefig('numpy_nn_classification.png')
plt.show()

Best VAL model loss 0.0058 at epoch #29.
[TST] MSE loss 0.0079
[TST] error 0.0110

%load_ext autoreload
%autoreload 2

import torch
from pytorch_cnn import *
from torchvision import datasets, transforms
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

learning_rate = 0.001
epochs = 20
batch_size = 16

dataset = datasets.FashionMNIST('data', train=True, download=True,
                                transform=transforms.ToTensor())


trn_size = int(0.09 * len(dataset))
val_size = int(0.01 * len(dataset))
add_size = len(dataset) - trn_size - val_size  # you don't need ADDitional dataset to pass

trn_dataset, val_dataset, add_dataset = torch.utils.data.random_split(dataset, [trn_size,
                                                                                val_size,
                                                                                add_size])
trn_loader = torch.utils.data.DataLoader(trn_dataset,
                                         batch_size=batch_size,
                                         shuffle=True)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)


device = torch.device("cpu")
model = FCNet().to(device)

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

validation_accuracies = []
for epoch in range(1, epochs + 1):
    # training
    model.train()
    for i_batch, (x, y) in enumerate(trn_loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        net_output = model(x)
        loss = F.nll_loss(net_output, y)
        loss.backward()
        optimizer.step()

        if i_batch % 100 == 0:
            print('[TRN] Train epoch: {}, batch: {}\tLoss: {:.4f}'.format(
                epoch, i_batch, loss.item()))

    # validation
    model.eval()
    correct = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)

            prediction = classify(model, x)
            correct += prediction.eq(y).sum().item()
    val_accuracy = correct / len(val_loader.dataset)
    validation_accuracies.append(100. * val_accuracy)
    print('[VAL] Validation accuracy: {:.2f}%'.format(100 * val_accuracy))


visualize_data([validation_accuracies], legend=['validation_accuracy'], xlabel='epoch', ylabel='%', save_filepath='pytorch_fcnet_training.png')

# Validate that the MyNet works with correct inputs and output dimensions

batch_size = 64

some_input = torch.rand(batch_size, 1, 28, 28, requires_grad=True)   # Input will be a tensor of size BxCxHxW, which collects gradients for backpropagation
model = MyNet() # Initialise our network
network_output = model(some_input)

# Validate that the network outputs tensors of correct format
assert torch.is_tensor(network_output), 'The network has to output a PyTorch tensor'
assert network_output.requires_grad, 'The output has to collect gradients for backpropagation too'
testing.assert_array_equal(network_output.shape, [batch_size, 10], 'The network should output a tensor of dimensions BxK, where K = 10 classes.')

learning_rate = 1.0
epochs = 20

dataset = datasets.FashionMNIST('data', train=True, download=True,
                                transform=transforms.ToTensor())

trn_size = int(0.09 * len(dataset))
val_size = int(0.01 * len(dataset))
add_size = len(dataset) - trn_size - val_size  # you don't need ADDitional dataset to pass

trn_dataset, val_dataset, add_dataset = torch.utils.data.random_split(dataset, [trn_size,
                                                                                val_size,
                                                                                add_size])
trn_loader = torch.utils.data.DataLoader(trn_dataset,
                                         batch_size=batch_size,
                                         shuffle=True)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

device = torch.device("cpu")
model = MyNet().to(device)


optimizer = optim.SGD(model.parameters(), lr=learning_rate)


validation_accuracies = []
for epoch in range(1, epochs + 1):
    # training
    model.train()
    for i_batch, (x, y) in enumerate(trn_loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        net_output = model(x)
        loss = F.nll_loss(net_output, y)
        loss.backward()
        optimizer.step()

        if i_batch % 100 == 0:
            print('[TRN] Train epoch: {}, batch: {}\tLoss: {:.4f}'.format(
                epoch, i_batch, loss.item()))

    # validation
    model.eval()
    correct = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)

            prediction = classify(model, x)
            correct += prediction.eq(y).sum().item()
    val_accuracy = correct / len(val_loader.dataset)
    validation_accuracies.append(100. * val_accuracy)
    print('[VAL] Validation accuracy: {:.2f}%'.format(100 * val_accuracy))


print('Training completed, final accuracy: {:.2f}%'.format(100 * val_accuracy))
torch.save(model.state_dict(), "model.pt")

visualize_data([validation_accuracies], legend=['validation_accuracy'], xlabel='epoch', ylabel='%', save_filepath='pytorch_cnn_training.png', hline=75.0, hlinelabel='Accuracy threshold (on test set)')

RPZ Assignment: Convolutional Neural Networks¶

Introduction¶

Part 1: Implementing a simple neural network by hand¶

Fully-connected (Linear) layer¶

Rectified Linear Unit (ReLU) layer¶

Sigmoid layer¶

Squared Error loss¶

Training the network¶

Part 2: Convolutional Neural Network (CNN) in PyTorch¶

Linear classifier, multinomial logistic regression, stochastic gradient descent¶

Convolutional Neural Network¶

The challenge¶

Submission to the BRUTE Upload System¶