import torch
import torch.nn as nn
import torch.nn.functional as F
☑ Convolution and Pooling
1 Convolutional Layer
= torch.ones(1, 8, 6, dtype=torch.float32)
x x
tensor([[[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.]]])
= nn.Conv2d(1, 1, kernel_size=3)
f f
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
= f(x)
y y
tensor([[[-0.0141, -0.0141, -0.0141, -0.0141],
[-0.0141, -0.0141, -0.0141, -0.0141],
[-0.0141, -0.0141, -0.0141, -0.0141],
[-0.0141, -0.0141, -0.0141, -0.0141],
[-0.0141, -0.0141, -0.0141, -0.0141],
[-0.0141, -0.0141, -0.0141, -0.0141]]], grad_fn=<SqueezeBackward1>)
y.shape
torch.Size([1, 6, 4])
2 Examining the kernel weights
f.weight.detach().numpy()
array([[[[ 0.3178624 , -0.31483603, -0.11810105],
[ 0.30186027, -0.21972227, -0.00794514],
[ 0.31929708, 0.00528379, -0.16220145]]]], dtype=float32)
f.bias.detach().numpy()
array([-0.13564453], dtype=float32)
3 Controlling output shape
print(x)
print(x.shape)
tensor([[[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.]]])
torch.Size([1, 8, 6])
#
# Uses 5 kernels
# Each kernel is (3x3)
# Pad the input width and height with extra padding
#
= nn.Conv2d(1, 5, kernel_size=3, padding=1)
f = f(x)
y y.shape
torch.Size([5, 8, 6])
Note:
What is the padding amount to maintain the same shape?
\[ \begin{eqnarray} && w' = (w + 2p) - k + 1 = w\\ &\implies& 2p-k+1 = 0 \\ &\implies& p = (k-1) / 2 \end{eqnarray} \]
With \(k=3\), we have \(p = 1\).
#
# Pytorch computes the padding automatically.
#
= nn.Conv2d(1, 5, kernel_size=5, padding='same')
f = f(x)
y y.shape
torch.Size([5, 8, 6])
4 Downsizing with Pooling
= nn.MaxPool2d(kernel_size=2, stride=2)
f = f(x)
y
print("x.shape", x.shape)
print("y = MaxPool2d(x)")
print("y.shape", y.shape)
x.shape torch.Size([1, 8, 6])
y = MaxPool2d(x)
y.shape torch.Size([1, 4, 3])
= nn.AvgPool2d(kernel_size=2, stride=2)
f = f(x)
y
print("x.shape", x.shape)
print("y = AvgPool2d(x)")
print("y.shape", y.shape)
x.shape torch.Size([1, 8, 6])
y = AvgPool2d(x)
y.shape torch.Size([1, 4, 3])
#
# Default stride is non-overlap patches
#
= nn.AvgPool2d(2)
f f(x).shape
torch.Size([1, 4, 3])
5 End-to-end classification model
class MyClassifier(nn.Module):
def __init__(self, kernels, kernel_size):
super().__init__()
self.conv2d = nn.Conv2d(1, kernels, kernel_size, padding='same')
self.maxpool = nn.MaxPool2d(kernel_size)
self.flatten = nn.Flatten()
self.linear = nn.LazyLinear(10)
def forward(self, x):
= self.conv2d(x)
x = self.maxpool(x)
x = F.relu(x)
x = self.flatten(x)
x = self.linear(x)
x return x
import my
= my.mnist() mnist
from torch.utils.data.dataloader import DataLoader
= DataLoader(mnist, batch_size=128) dataloader
= MyClassifier(5, 3) model
/opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/lazy.py:180: UserWarning: Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.
warnings.warn('Lazy modules are a new feature under heavy development '
= torch.optim.Adam(model.parameters()) optimizer
= torch.nn.CrossEntropyLoss() loss
for epoch in range(2):
for (i, (x, target)) in enumerate(dataloader):
= model(x)
y = loss(y, target)
l
optimizer.zero_grad()
l.backward()
optimizer.step()if i % 100 == 0:
with torch.no_grad():
print(epoch, i, 'loss:', l.numpy())
0 0 loss: 0.27289918
0 100 loss: 0.20946638
0 200 loss: 0.21146445
0 300 loss: 0.20298699
0 400 loss: 0.30706117
1 0 loss: 0.23335779
1 100 loss: 0.17157413
1 200 loss: 0.19129881
1 300 loss: 0.17042246
1 400 loss: 0.2851454
#
# Accuracy
#
= mnist.data[:, None, :, :].float() / 255
images images.shape, images.dtype
(torch.Size([60000, 1, 28, 28]), torch.float32)
with torch.no_grad():
= model(images)
y y.shape
torch.Size([60000, 10])
= y.argmax(axis=1)
pred pred.shape
torch.Size([60000])
== mnist.targets).sum() / pred.shape[0] (pred
tensor(0.9373)