☑ Convolution and Pooling

Author

Ken Pu

import torch
import torch.nn as nn
import torch.nn.functional as F

1 Convolutional Layer

x = torch.ones(1, 8, 6, dtype=torch.float32)
x
tensor([[[1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.]]])
f = nn.Conv2d(1, 1, kernel_size=3)
f
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
y = f(x)
y
tensor([[[-0.0141, -0.0141, -0.0141, -0.0141],
         [-0.0141, -0.0141, -0.0141, -0.0141],
         [-0.0141, -0.0141, -0.0141, -0.0141],
         [-0.0141, -0.0141, -0.0141, -0.0141],
         [-0.0141, -0.0141, -0.0141, -0.0141],
         [-0.0141, -0.0141, -0.0141, -0.0141]]], grad_fn=<SqueezeBackward1>)
y.shape
torch.Size([1, 6, 4])

2 Examining the kernel weights

f.weight.detach().numpy()
array([[[[ 0.3178624 , -0.31483603, -0.11810105],
         [ 0.30186027, -0.21972227, -0.00794514],
         [ 0.31929708,  0.00528379, -0.16220145]]]], dtype=float32)
f.bias.detach().numpy()
array([-0.13564453], dtype=float32)

3 Controlling output shape

print(x)
print(x.shape)
tensor([[[1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.]]])
torch.Size([1, 8, 6])
#
# Uses 5 kernels
# Each kernel is (3x3)
# Pad the input width and height with extra padding
#

f = nn.Conv2d(1, 5, kernel_size=3, padding=1)
y = f(x)
y.shape
torch.Size([5, 8, 6])

Note:

What is the padding amount to maintain the same shape?

\[ \begin{eqnarray} && w' = (w + 2p) - k + 1 = w\\ &\implies& 2p-k+1 = 0 \\ &\implies& p = (k-1) / 2 \end{eqnarray} \]

With \(k=3\), we have \(p = 1\).

#
# Pytorch computes the padding automatically.
#

f = nn.Conv2d(1, 5, kernel_size=5, padding='same')
y = f(x)
y.shape
torch.Size([5, 8, 6])

4 Downsizing with Pooling

f = nn.MaxPool2d(kernel_size=2, stride=2)
y = f(x)

print("x.shape", x.shape)
print("y = MaxPool2d(x)")
print("y.shape", y.shape)
x.shape torch.Size([1, 8, 6])
y = MaxPool2d(x)
y.shape torch.Size([1, 4, 3])
f = nn.AvgPool2d(kernel_size=2, stride=2)
y = f(x)

print("x.shape", x.shape)
print("y = AvgPool2d(x)")
print("y.shape", y.shape)
x.shape torch.Size([1, 8, 6])
y = AvgPool2d(x)
y.shape torch.Size([1, 4, 3])
#
# Default stride is non-overlap patches
#

f = nn.AvgPool2d(2)
f(x).shape
torch.Size([1, 4, 3])

5 End-to-end classification model

class MyClassifier(nn.Module):
    def __init__(self, kernels, kernel_size):
        super().__init__()
        self.conv2d = nn.Conv2d(1, kernels, kernel_size, padding='same')
        self.maxpool = nn.MaxPool2d(kernel_size)
        self.flatten = nn.Flatten()
        self.linear = nn.LazyLinear(10)
        
    def forward(self, x):
        x = self.conv2d(x)
        x = self.maxpool(x)
        x = F.relu(x)
        x = self.flatten(x)
        x = self.linear(x)
        return x
import my
mnist = my.mnist()
from torch.utils.data.dataloader import DataLoader
dataloader = DataLoader(mnist, batch_size=128)
model = MyClassifier(5, 3)
/opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
  warnings.warn('Lazy modules are a new feature under heavy development '
optimizer = torch.optim.Adam(model.parameters())
loss = torch.nn.CrossEntropyLoss()
for epoch in range(2):
    for (i, (x, target)) in enumerate(dataloader):
        y = model(x)
        l = loss(y, target)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        if i % 100 == 0:
            with torch.no_grad():
                print(epoch, i, 'loss:', l.numpy())
0 0 loss: 0.27289918
0 100 loss: 0.20946638
0 200 loss: 0.21146445
0 300 loss: 0.20298699
0 400 loss: 0.30706117
1 0 loss: 0.23335779
1 100 loss: 0.17157413
1 200 loss: 0.19129881
1 300 loss: 0.17042246
1 400 loss: 0.2851454
#
# Accuracy
#

images = mnist.data[:, None, :, :].float() / 255
images.shape, images.dtype
(torch.Size([60000, 1, 28, 28]), torch.float32)
with torch.no_grad():
    y = model(images)
y.shape
torch.Size([60000, 10])
pred = y.argmax(axis=1)
pred.shape
torch.Size([60000])
(pred == mnist.targets).sum() / pred.shape[0]
tensor(0.9373)