2023-11-28 59th Class
Convolutional Neural Network - VGGNet
#️⃣ VGG (Pytorch Sequential Block ver.)
code
import torch
import torch.nn as nn
from collections import OrderedDict
from torchsummary import summary
class VGGNet(nn.Module):
def __init__(self):
super(VGGNet, self).__init__()
self.conv1 = nn.Sequential(OrderedDict([
# 1. input (224 x 224x RGB image)
('conv3-64', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),
('conv3-64-act', nn.ReLU()),
('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2)),
]))
self.conv2 = nn.Sequential(OrderedDict([
('conv3-128', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),
('conv3-128-act', nn.ReLU()),
('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2)),
]))
self.conv3 = nn.Sequential(OrderedDict([
('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),
('conv3-256-1-act', nn.ReLU()),
('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
('conv3-256-2-act', nn.ReLU()),
('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2)),
]))
self.conv4 = nn.Sequential(OrderedDict([
('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-1-act', nn.ReLU()),
('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-2-act', nn.ReLU()),
('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2)),
]))
self.conv5 = nn.Sequential(OrderedDict([
('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-3-act', nn.ReLU()),
('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-4-act', nn.ReLU()),
('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))
]))
# (Batch ,C , h W) -> (Batch , x)
self.classifier = nn.Sequential(OrderedDict([
('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),
('fc-4096-1-act', nn.ReLU()),
('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),
('fc-4096-2-act', nn.ReLU()),
('fc-1000', nn.Linear(in_features=4096, out_features=1000)),
]))
# 64, 512, 7, 7
# (64, b)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vggnet():
test_data = torch.randn((10, 3, 224, 224))
model = VGGNet()
summary(model.to('cuda'), input_size=(3, 224, 224))
model = model.to('cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
run_vggnet()
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
MaxPool2d-3 [-1, 64, 112, 112] 0
Conv2d-4 [-1, 128, 112, 112] 73,856
ReLU-5 [-1, 128, 112, 112] 0
MaxPool2d-6 [-1, 128, 56, 56] 0
Conv2d-7 [-1, 256, 56, 56] 295,168
ReLU-8 [-1, 256, 56, 56] 0
Conv2d-9 [-1, 256, 56, 56] 590,080
ReLU-10 [-1, 256, 56, 56] 0
MaxPool2d-11 [-1, 256, 28, 28] 0
Conv2d-12 [-1, 512, 28, 28] 1,180,160
ReLU-13 [-1, 512, 28, 28] 0
Conv2d-14 [-1, 512, 28, 28] 2,359,808
ReLU-15 [-1, 512, 28, 28] 0
MaxPool2d-16 [-1, 512, 14, 14] 0
Conv2d-17 [-1, 512, 14, 14] 2,359,808
ReLU-18 [-1, 512, 14, 14] 0
Conv2d-19 [-1, 512, 14, 14] 2,359,808
ReLU-20 [-1, 512, 14, 14] 0
MaxPool2d-21 [-1, 512, 7, 7] 0
Linear-22 [-1, 4096] 102,764,544
ReLU-23 [-1, 4096] 0
Linear-24 [-1, 4096] 16,781,312
ReLU-25 [-1, 4096] 0
Linear-26 [-1, 1000] 4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 125.12
Params size (MB): 506.83
Estimated Total Size (MB): 632.53
----------------------------------------------------------------
torch.Size([10, 1000])
- torchsummary 사용하여 model parameters 확인 가능
- VGG 모델의 경우 Pytorch Sequential을 활용해서 convolutional layer 블록 단위로 구분해 forward 할 수 있음
#️⃣ VGG13 (Pytorch Sequential Block ver.)
- VGG13은 VGG11모델에서 첫 번째 블록과 두 번째 블록에서 convolutional layer가 1개 추가된 모델
architecture
input(224x224 RGB image)
conv1 | conv2 | conv3 | conv4 | conv5 | classifier | |||||
---|---|---|---|---|---|---|---|---|---|---|
conv3-64 | maxpool | conv3-128 | maxpool | conv3-256 | maxpool | conv3-512 | maxpool | con3-512 | maxpool | FC-4096 |
conv3-64 | conv3-128 | conv3-256 | conv3-512 | con3-512 | FC-4096 | |||||
FC-1000 | ||||||||||
soft-max |
full code
import torch
from torch import nn
from collections import OrderedDict
from torchsummary import summary
class VGG13(nn.Module):
def __init__(self):
super(VGG13, self).__init__()
self.conv1 = nn.Sequential(OrderedDict([
('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),
('conv3-64-1-act', nn.ReLU()),
('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)),
('conv3-64-2-act', nn.ReLU()),
('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv2 = nn.Sequential(OrderedDict([
('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),
('conv3-128-1-act', nn.ReLU()),
('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)),
('conv3-128-2-act', nn.ReLU()),
('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv3 = nn.Sequential(OrderedDict([
('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),
('conv3-256-1-act', nn.ReLU()),
('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
('conv3-256-2-act', nn.ReLU()),
('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv4 = nn.Sequential(OrderedDict([
('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-1-act', nn.ReLU()),
('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-2-act', nn.ReLU()),
('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv5 = nn.Sequential(OrderedDict([
('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-3-act', nn.ReLU()),
('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),
('conv3-512-4-act', nn.ReLU()),
('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.classifier = nn.Sequential(OrderedDict([
('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),
('fc-4096-1-act', nn.ReLU()),
('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),
('fc-4096-2-act', nn.ReLU()),
('fc-1000', nn.Linear(in_features=4096, out_features=1000))
]))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vgg13():
test_data = torch.randn((8, 3, 224, 224))
model = VGG13()
summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
run_vgg13()
# run_vgg19()
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [16, 64, 224, 224] 1,792
ReLU-2 [16, 64, 224, 224] 0
Conv2d-3 [16, 64, 224, 224] 36,928
ReLU-4 [16, 64, 224, 224] 0
MaxPool2d-5 [16, 64, 112, 112] 0
Conv2d-6 [16, 128, 112, 112] 73,856
ReLU-7 [16, 128, 112, 112] 0
Conv2d-8 [16, 128, 112, 112] 147,584
ReLU-9 [16, 128, 112, 112] 0
MaxPool2d-10 [16, 128, 56, 56] 0
Conv2d-11 [16, 256, 56, 56] 295,168
ReLU-12 [16, 256, 56, 56] 0
Conv2d-13 [16, 256, 56, 56] 590,080
ReLU-14 [16, 256, 56, 56] 0
MaxPool2d-15 [16, 256, 28, 28] 0
Conv2d-16 [16, 512, 28, 28] 1,180,160
ReLU-17 [16, 512, 28, 28] 0
Conv2d-18 [16, 512, 28, 28] 2,359,808
ReLU-19 [16, 512, 28, 28] 0
MaxPool2d-20 [16, 512, 14, 14] 0
Conv2d-21 [16, 512, 14, 14] 2,359,808
ReLU-22 [16, 512, 14, 14] 0
Conv2d-23 [16, 512, 14, 14] 2,359,808
ReLU-24 [16, 512, 14, 14] 0
MaxPool2d-25 [16, 512, 7, 7] 0
Linear-26 [16, 4096] 102,764,544
ReLU-27 [16, 4096] 0
Linear-28 [16, 4096] 16,781,312
ReLU-29 [16, 4096] 0
Linear-30 [16, 1000] 4,097,000
================================================================
Total params: 133,047,848
Trainable params: 133,047,848
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3177.93
Params size (MB): 507.54
Estimated Total Size (MB): 3694.66
----------------------------------------------------------------
torch.Size([8, 1000])
#️⃣ VGG19 (Pytorch Sequential Block ver.)
- VGG19은 VGG16모델에서 3~5 번째 블록에서 convolutional layer가 1개 추가된 모델
architecture
input(224x224 RGB image)
conv1 | conv2 | conv3 | conv4 | conv5 | classifier | |||||
---|---|---|---|---|---|---|---|---|---|---|
conv3-64 | maxpool | conv3-128 | maxpool | conv3-256 | maxpool | conv3-512 | maxpool | con3-512 | maxpool | FC-4096 |
conv3-64 | conv3-128 | conv3-256 | conv3-512 | con3-512 | FC-4096 | |||||
conv3-256 | conv3-512 | con3-512 | FC-1000 | |||||||
conv3-256 | conv3-512 | con3-512 | soft-max |
full code
import torch
from torch import nn
from collections import OrderedDict
from torchsummary import summary
class VGG19(nn.Module):
def __init__(self):
super(VGG19, self).__init__()
self.conv1 = nn.Sequential(OrderedDict([
('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64,
kernel_size=3, padding=1)),
('conv3-64-1-act', nn.ReLU()),
('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64,
kernel_size=3, padding=1)),
('conv3-64-2-act', nn.ReLU()),
('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv2 = nn.Sequential(OrderedDict([
('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128,
kernel_size=3, padding=1)),
('conv3-128-1-act', nn.ReLU()),
('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128,
kernel_size=3, padding=1)),
('conv3-128-2-act', nn.ReLU()),
('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv3 = nn.Sequential(OrderedDict([
('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256,
kernel_size=3, padding=1)),
('conv3-256-1-act', nn.ReLU()),
('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256,
kernel_size=3, padding=1)),
('conv3-256-2-act', nn.ReLU()),
('conv3-256-3', nn.Conv2d(in_channels=256, out_channels=256,
kernel_size=3, padding=1)),
('conv3-256-3-act', nn.ReLU()),
('conv3-256-4', nn.Conv2d(in_channels=256, out_channels=256,
kernel_size=3, padding=1)),
('conv3-256-4-act', nn.ReLU()),
('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv4 = nn.Sequential(OrderedDict([
('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-1-act', nn.ReLU()),
('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-2-act', nn.ReLU()),
('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-3-act', nn.ReLU()),
('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-4-act', nn.ReLU()),
('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.conv5 = nn.Sequential(OrderedDict([
('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-3-act', nn.ReLU()),
('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-4-act', nn.ReLU()),
('conv3-512-5', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-5-act', nn.ReLU()),
('conv3-512-6', nn.Conv2d(in_channels=512, out_channels=512,
kernel_size=3, padding=1)),
('conv3-512-6-act', nn.ReLU()),
('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))
]))
self.classifier = nn.Sequential(OrderedDict([
('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),
('fc-4096-1-act', nn.ReLU()),
('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),
('fc-4096-2-act', nn.ReLU()),
('fc-1000', nn.Linear(in_features=4096, out_features=1000))
]))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vgg13():
test_data = torch.randn((8, 3, 224, 224))
model = VGG13()
summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
def run_vgg19():
test_data = torch.randn((8, 3, 224, 224))
model = VGG19()
summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
# run_vgg13()
run_vgg19()
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [16, 64, 224, 224] 1,792
ReLU-2 [16, 64, 224, 224] 0
Conv2d-3 [16, 64, 224, 224] 36,928
ReLU-4 [16, 64, 224, 224] 0
MaxPool2d-5 [16, 64, 112, 112] 0
Conv2d-6 [16, 128, 112, 112] 73,856
ReLU-7 [16, 128, 112, 112] 0
Conv2d-8 [16, 128, 112, 112] 147,584
ReLU-9 [16, 128, 112, 112] 0
MaxPool2d-10 [16, 128, 56, 56] 0
Conv2d-11 [16, 256, 56, 56] 295,168
ReLU-12 [16, 256, 56, 56] 0
Conv2d-13 [16, 256, 56, 56] 590,080
ReLU-14 [16, 256, 56, 56] 0
Conv2d-15 [16, 256, 56, 56] 590,080
ReLU-16 [16, 256, 56, 56] 0
Conv2d-17 [16, 256, 56, 56] 590,080
ReLU-18 [16, 256, 56, 56] 0
MaxPool2d-19 [16, 256, 28, 28] 0
Conv2d-20 [16, 512, 28, 28] 1,180,160
ReLU-21 [16, 512, 28, 28] 0
Conv2d-22 [16, 512, 28, 28] 2,359,808
ReLU-23 [16, 512, 28, 28] 0
Conv2d-24 [16, 512, 28, 28] 2,359,808
ReLU-25 [16, 512, 28, 28] 0
Conv2d-26 [16, 512, 28, 28] 2,359,808
ReLU-27 [16, 512, 28, 28] 0
MaxPool2d-28 [16, 512, 14, 14] 0
Conv2d-29 [16, 512, 14, 14] 2,359,808
ReLU-30 [16, 512, 14, 14] 0
Conv2d-31 [16, 512, 14, 14] 2,359,808
ReLU-32 [16, 512, 14, 14] 0
Conv2d-33 [16, 512, 14, 14] 2,359,808
ReLU-34 [16, 512, 14, 14] 0
Conv2d-35 [16, 512, 14, 14] 2,359,808
ReLU-36 [16, 512, 14, 14] 0
MaxPool2d-37 [16, 512, 7, 7] 0
Linear-38 [16, 4096] 102,764,544
ReLU-39 [16, 4096] 0
Linear-40 [16, 4096] 16,781,312
ReLU-41 [16, 4096] 0
Linear-42 [16, 1000] 4,097,000
================================================================
Total params: 143,667,240
Trainable params: 143,667,240
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3814.93
Params size (MB): 548.05
Estimated Total Size (MB): 4372.17
----------------------------------------------------------------
torch.Size([8, 1000])
#️⃣ VGG의 특징
- Conv - ReLU - Pool 또는 Conv- ReLU - Conv - ReLU - Pool 의 형식으로 반복되는 구조
- Conv에서 kernel size는 3, padding은 1로 고정되어 있음
- Conv - ReLU가 여러 번 반복될 때, 두 번째 Conv - ReLU 부터는 이전 채널을 유지
- Conv - ReLU가 반복된 후에는 max pooling
#️⃣ VGG Block 만들기
base code
class ConvBlockBase(nn.Module):
def __init__(self, in_channels, out_channels, n_layers):
super(ConvBlockBase, self).__init__()
self.layers = [
nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=3, padding=1),
nn.ReLU()
]
for _ in range(n_layers -1):
self.layers.append(nn.Conv2d(in_channels=out_channels, out_channels=out_channels,
kernel_size=3, padding=1))
self.layers.append(nn.ReLU())
# 마지막에 max pooling 추가
self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
# list에 들어있는 layer을 풀어 nn.Sequential에 입력
self.layers = nn.Sequential(*self.layers)
def forward(self, x):
x = self.layers(x)
return x
- self.layers = nn.Sequential(*self.layers)에서 list, tuple 등을 asterisk(별표)로 설정해 인자로 넣게 되면 함수/클래스 내부에서 unpacking되어 적용됨
베이스 코드를 리팩토링해서 1개의 for문에서 layer를 모두 추가하게 바꾸기
new code
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels, n_layers):
super(ConvBlock, self).__init__()
self.layers = list()
for i in range(n_layers):
self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=3, padding=1))
self.layers.append(nn.ReLU())
in_channels = out_channels
# 마지막에 max pooling 추가
self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
# list에 들어있는 layer을 풀어 nn.Sequential에 입력
self.layers = nn.Sequential(*self.layers)
print('here')
def forward(self, x):
x = self.layers(x)
return x
- for문 안에서 in_channel으로 설정할 값을 0번째면 input_channels로 들어가지만
- 그 다음 층부터는 out_channels가 in으로 들어가기 때문에
- in_channels = out_channels로 설정
#️⃣ VGG Block 으로 VGG 구현
[1] VGG11
architecture
input(224x224 RGB image)
conv1 | conv2 | conv3 | conv4 | conv5 | classifier | |||||
---|---|---|---|---|---|---|---|---|---|---|
conv3-64 | maxpool | conv3-128 | maxpool | conv3-256 | maxpool | conv3-512 | maxpool | con3-512 | maxpool | FC-4096 |
conv3-256 | conv3-512 | con3-512 | FC-4096 | |||||||
FC-1000 | ||||||||||
soft-max |
code
class VGG11Block(nn.Module):
def __init__(self):
super(VGG11Block, self).__init__()
self.conv1 = ConvBlock(in_channels=3, out_channels=64,
n_layers=1)
self.conv2 = ConvBlock(in_channels=64, out_channels=128,
n_layers=1)
self.conv3 = ConvBlock(in_channels=128, out_channels=256,
n_layers=2)
self.conv4 = ConvBlock(in_channels=256, out_channels=512,
n_layers=2)
self.conv5 = ConvBlock(in_channels=512, out_channels=512,
n_layers=2)
self.classifier = nn.Sequential(
nn.Linear(in_features=512 * 7 * 7, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=1000)
)
def forward(self, x):
x = self.conv1.forward(x)
x = self.conv2.forward(x)
x = self.conv3.forward(x)
x = self.conv4.forward(x)
x = self.conv5.forward(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vgg11_block():
test_data = torch.randn((8, 3, 224, 224))
model = VGG11Block()
# summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
# run_vgg13()
# run_vgg19()
# run_conv_block()
run_vgg11_block()
'''
torch.Size([8, 1000])
'''
- ConvBlock()은 위의 new code 와 동일함
[2] VGG13
architecture
input(224x224 RGB image)
conv1 | conv2 | conv3 | conv4 | conv5 | classifier | |||||
---|---|---|---|---|---|---|---|---|---|---|
conv3-64 | maxpool | conv3-128 | maxpool | conv3-256 | maxpool | conv3-512 | maxpool | con3-512 | maxpool | FC-4096 |
conv3-64 | conv3-128 | conv3-256 | conv3-512 | con3-512 | FC-4096 | |||||
FC-1000 | ||||||||||
soft-max |
code
class VGG13Block(nn.Module):
def __init__(self):
super(VGG13Block, self).__init__()
self.conv1 = ConvBlock(in_channels=3, out_channels=64,
n_layers=2)
self.conv2 = ConvBlock(in_channels=64, out_channels=128,
n_layers=2)
self.conv3 = ConvBlock(in_channels=128, out_channels=256,
n_layers=2)
self.conv4 = ConvBlock(in_channels=256, out_channels=512,
n_layers=2)
self.conv5 = ConvBlock(in_channels=512, out_channels=512,
n_layers=2)
self.classifier = nn.Sequential(
nn.Linear(in_features=512*7*7, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=1000)
)
def forward(self, x):
x = self.conv1.forward(x)
x = self.conv2.forward(x)
x = self.conv3.forward(x)
x = self.conv4.forward(x)
x = self.conv5.forward(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vgg13_block():
test_data = torch.randn((8, 3, 224, 224))
model = VGG13Block()
# summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
# run_vgg13()
# run_vgg19()
# run_conv_block()
# run_vgg11_block()
run_vgg13_block()
'''
torch.Size([8, 1000])
'''
[3] VGG19
architecture
input(224x224 RGB image)
conv1 | conv2 | conv3 | conv4 | conv5 | classifier | |||||
---|---|---|---|---|---|---|---|---|---|---|
conv3-64 | maxpool | conv3-128 | maxpool | conv3-256 | maxpool | conv3-512 | maxpool | con3-512 | maxpool | FC-4096 |
conv3-64 | conv3-128 | conv3-256 | conv3-512 | con3-512 | FC-4096 | |||||
conv3-256 | conv3-512 | con3-512 | FC-1000 | |||||||
conv3-256 | conv3-512 | con3-512 | soft-max |
code
class VGG19Block(nn.Module):
def __init__(self):
super(VGG19Block, self).__init__()
self.conv1 = ConvBlock(in_channels=3, out_channels=64,
n_layers=2)
self.conv2 = ConvBlock(in_channels=64, out_channels=128,
n_layers=2)
self.conv3 = ConvBlock(in_channels=128, out_channels=256,
n_layers=4)
self.conv4 = ConvBlock(in_channels=256, out_channels=512,
n_layers=4)
self.conv5 = ConvBlock(in_channels=512, out_channels=512,
n_layers=4)
self.classifier = nn.Sequential(
nn.Linear(in_features=512*7*7, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=1000)
)
def forward(self, x):
x = self.conv1.forward(x)
x = self.conv2.forward(x)
x = self.conv3.forward(x)
x = self.conv4.forward(x)
x = self.conv5.forward(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def run_vgg19_block():
test_data = torch.randn((8, 3, 224, 224))
model = VGG19Block()
summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')
pred = model.forward(test_data)
print(pred.shape)
if __name__ == '__main__':
# run_vgg13()
# run_vgg19()
# run_conv_block()
# run_vgg11_block()
# run_vgg13_block()
run_vgg19_block()
'''
torch.Size([8, 1000])
'''
VGGNet Parameter Numbers
#️⃣ 파라미터 개수 계산하기
- 커널 하나는 3x3=9개의 weight를 가지고 있음
- 커널의 채널은 input 채널과 같은 3을 가지고있음
- 따라서 9 x 3 = 27개의 weight를 가지고 있음
[1] block = ConvBlock(in_channels=3, out_channels=64, n_layers=1)
summary(block, input_size=(3, 100, 100)) 일때,
Layer(type) | output shape | params# |
---|---|---|
Conv2d-1 | (-1, 64, 100, 100) | 1792 |
ReLU-2 | (-1, 64, 100, 100) | |
MaxPool2d-3 | (-1, 64, 50, 50) |
Parmas # =(kernel×kernel×input channel+1)×output channel
(필터3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792개
[2] block = ConvBlock(in_channels=3, out_channels=64, n_layers=2)
summary(block, input_size=(3, 100, 100)) 일때,
Layer(type) | output shape | params# |
---|---|---|
Conv2d-1 | (-1, 64, 100, 100) | 1792 |
ReLU-2 | (-1, 64, 100, 100) | |
Conv2d-3 | (-1, 64, 100, 100) | 36928 |
ReLU-4 | (-1, 64, 100, 100) | |
MaxPool2d-5 | (-1, 64, 50, 50) |
(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928
[3] block = ConvBlock(in_channels=3, out_channels=64, n_layers=3)
summary(block, input_size=(3, 100, 100)) 일때,
Layer(type) | output shape | params# |
---|---|---|
Conv2d-1 | (-1, 64, 100, 100) | 1792 |
ReLU-2 | (-1, 64, 100, 100) | |
Conv2d-3 | (-1, 64, 100, 100) | 36928 |
ReLU-4 | (-1, 64, 100, 100) | |
Conv2d-5 | (-1, 64, 100, 100) | 36928 |
ReLU-6 | (-1, 64, 100, 100) | |
MaxPool2d-7 | (-1, 64, 50, 50) |
(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928
Train CIFAR10 with VGGNet19 and LeNet5
#️⃣ VGGNet19로 CIFAR10 이미지 데이터셋 학습
import pickle
import torch
from torch import nn
from collections import OrderedDict
from dataclasses import dataclass
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from tqdm import tqdm
import matplotlib.pyplot as plt
@dataclass
class Constants:
N_SAMPLES: int
BATCH_SIZE: int
EPOCHS: int
LR: float
DEVICE: torch.device
PATH: str
METRIC_PATH: str
SEED: int
def get_device():
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"curr device = {DEVICE}")
return DEVICE
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels, n_layers):
super(ConvBlock, self).__init__()
self.layers = list()
for i in range(n_layers):
self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=3, padding=1))
self.layers.append(nn.ReLU())
in_channels = out_channels
# 마지막에 max pooling 추가
self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
# list에 들어있는 layer을 풀어 nn.Sequential에 입력
self.layers = nn.Sequential(*self.layers)
def forward(self, x):
x = self.layers(x)
return x
class VGG19Block(nn.Module):
def __init__(self):
super(VGG19Block, self).__init__()
self.conv1 = ConvBlock(in_channels=3, out_channels=64,
n_layers=2)
self.conv2 = ConvBlock(in_channels=64, out_channels=128,
n_layers=2)
self.conv3 = ConvBlock(in_channels=128, out_channels=256,
n_layers=4)
self.conv4 = ConvBlock(in_channels=256, out_channels=512,
n_layers=4)
self.conv5 = ConvBlock(in_channels=512, out_channels=512,
n_layers=4)
# original 512*7*7
self.classifier = nn.Sequential(
nn.Linear(in_features=512*1*1, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=10)
)
def forward(self, x):
x = self.conv1.forward(x)
x = self.conv2.forward(x)
x = self.conv3.forward(x)
x = self.conv4.forward(x)
x = self.conv5.forward(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def classify_cifar10(c):
# (50000, 32, 32, 3)
dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)
model = VGG19Block()
model = model.to(c.DEVICE)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=c.LR)
losses, accs = list(), list()
for e in range(c.EPOCHS):
epoch_loss, n_corrects = 0., 0
for X_, y_ in tqdm(dataloader):
X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)
pred = model.forward(X_)
loss = loss_fn(pred, y_)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss
pred_cls = torch.argmax(pred, dim=1)
n_corrects += (pred_cls == y_).sum().item()
epoch_loss /= len(dataloader)
epoch_accr = n_corrects / c.N_SAMPLES
print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")
losses.append(epoch_loss.item())
accs.append(epoch_accr)
if e in [99, 199, 299, 399]:
rep = c.PATH.replace(".pt", f"_ep{e}.pt")
torch.save(model, rep)
print("==============")
# print(f"{losses:.4f}, \n {accs=}")
# Save Model and Metrics by Epoch
with open(c.METRIC_PATH, 'wb') as f:
result = {
'losses': losses,
'accs': accs
}
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
torch.save(model, c.PATH)
def visualize(losses, accs):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
axes[0].plot(losses)
axes[1].plot(accs)
axes[1].set_xlabel("Epoch", fontsize=15)
axes[0].set_ylabel("Loss", fontsize=15)
axes[1].set_ylabel("Accuracy", fontsize=15)
axes[0].tick_params(labelsize=10)
axes[1].tick_params(labelsize=10)
fig.suptitle("VGG19 Metrics by Epoch", fontsize=16)
plt.show()
if __name__ == '__main__':
constants = Constants(
N_SAMPLES=50000,
BATCH_SIZE=1024,
EPOCHS=500,
LR=0.0001,
DEVICE=get_device(),
PATH="model/vgg19_cifar10.pt",
METRIC_PATH="model/vgg_cifar10_metrics.pkl",
SEED=80
)
classify_cifar10(constants)
with open(constants.METRIC_PATH, 'rb') as f:
metric_dict = pickle.load(f)
# metric_dict['losses'] = [x.item() for x in metric_dict['losses']]
visualize(metric_dict['losses'], metric_dict['accs'])
#️⃣ Lenet5로 CIFAR10 이미지 데이터셋 학습
초기 설정
constants = Constants(
N_SAMPLES=50000,
BATCH_SIZE=128,
EPOCHS=300,
LR=0.01,
DEVICE=get_device(),
PATH="model/lenet5_cifar10.pt",
METRIC_PATH="model/lenet5_metrics.pkl",
SEED=80
)
epoch 32 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 73.06it/s]
0%| | 0/391 [00:00<?, ?it/s]epoch 33 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 71.95it/s]
0%| | 0/391 [00:00<?, ?it/s]epoch 34 : loss=0.0183, accr=0.10248
- 학습이 안됨
설정을 바꿔서 다시 시도
from dataclasses import dataclass
import pickle
import csv
from torchvision.datasets import CIFAR10
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from tqdm import tqdm
import matplotlib.pyplot as plt
@dataclass
class Constants:
N_SAMPLES: int
BATCH_SIZE: int
EPOCHS: int
LR: float
DEVICE: torch.device
PATH: str
METRIC_PATH: str
SEED: int
def get_device():
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"curr device = {DEVICE}")
return DEVICE
class LeNet(nn.Module):
def __init__(self, init_channel, out_features):
super(LeNet, self).__init__()
# self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=2)
self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=0)
self.cnn1_act = nn.Tanh()
self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)
self.cnn2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
self.cnn2_act = nn.Tanh()
self.avgpool2 = nn.AvgPool2d(kernel_size=2, stride=2)
self.cnn3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
self.cnn3_act = nn.Tanh()
# self.fc1 = nn.Linear(in_features=120*2*2, out_features=84)
self.fc1 = nn.Linear(in_features=120, out_features=84)
self.fc1_act = nn.Tanh()
self.fc2 = nn.Linear(in_features=84, out_features=out_features)
def forward(self, x):
x = self.cnn1(x)
x = self.cnn1_act(x)
x = self.avgpool1(x)
x = self.cnn2(x)
x = self.cnn2_act(x)
x = self.avgpool2(x)
x = self.cnn3(x)
x = self.cnn3_act(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc1_act(x)
x = self.fc2(x)
return x
def train_cifar10_w_lenet(c):
# CIFAR10 config
dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)
model = LeNet(init_channel=3, out_features=10).to(c.DEVICE)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=c.LR)
losses, accs = list(), list()
for e in range(c.EPOCHS):
epoch_loss, n_corrects = 0., 0
for X_, y_ in tqdm(dataloader):
optimizer.zero_grad()
X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)
pred = model(X_)
loss = loss_fn(pred, y_)
loss.backward()
optimizer.step()
epoch_loss += loss
pred_cls = torch.argmax(pred, dim=1)
n_corrects += (pred_cls == y_).sum().item()
epoch_loss /= len(dataloader)
# epoch_loss /= c.N_SAMPLES
epoch_accr = n_corrects / c.N_SAMPLES
print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")
losses.append(epoch_loss.item())
accs.append(epoch_accr)
if e in [199, 399, 599, 799]:
rep = c.PATH.replace(".pt", f"_ep{e}.pt")
torch.save(model, rep)
# print(losses)
# print(accs)
# Save Model and Metrics by Epoch
with open(c.METRIC_PATH, 'wb') as f:
result = {
'losses': losses,
'accs': accs
}
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
torch.save(model, c.PATH)
with open("model/lenet5_metrics_2.csv", 'w') as file:
writer = csv.writer(file)
writer.writerow(losses)
writer.writerow(accs)
visualize(losses, accs)
def visualize(losses, accs):
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
axes[0].plot(losses)
axes[1].plot(accs)
axes[1].set_xlabel("Epoch", fontsize=15)
axes[0].set_ylabel("Loss", fontsize=15)
axes[1].set_ylabel("Accuracy", fontsize=15)
axes[0].tick_params(labelsize=10)
axes[1].tick_params(labelsize=10)
fig.suptitle("Lenet5 Metrics by Epoch", fontsize=16)
plt.show()
if __name__ == '__main__':
constants = Constants(
N_SAMPLES=50000,
BATCH_SIZE=1024,
EPOCHS=1000,
LR=0.0001,
DEVICE=get_device(),
PATH="model/lenet5_cifar10_2.pt",
METRIC_PATH="model/lenet5_metrics_2.pkl",
SEED=80
)
train_cifar10_w_lenet(constants)
- learning rate를 대폭 낮추고 epoch 수를 늘려 accuracy 0.89까지 학습시킬 수 있었음
#️⃣ VGG19가 CIFAR10 이미지 데이터셋을 학습하기 어려운 이유
VGG19는 ISVRC 224x224 이미지를 기준으로 설계한 모델임
- VGG19는 VGG 모델중에서 가장 depth가 깊음
- pooling을 5번이나 함
이 모델을 CIFAR10에 적용하게 되면
마지막 풀링 때 image size가 1x1되니까 해상도가 낮아지면서 학습이 잘 안되는 문제 발생
결론: 데이터에 맞는 모델의 경량화 필요
'Education > 새싹 TIL' 카테고리의 다른 글
새싹 AI데이터엔지니어 핀테커스 13주차 (목) - ResNet (0) | 2023.11.30 |
---|---|
새싹 AI데이터엔지니어 핀테커스 13주차 (수) - GoogLeNet (0) | 2023.11.29 |
새싹 AI데이터엔지니어 핀테커스 13주차 (월) - LeNet5 & VGGNet (1) | 2023.11.27 |
새싹 AI데이터엔지니어 핀테커스 12주차 (금) - Sobel Filtering 3 & Convolutional Neural Network (0) | 2023.11.24 |
새싹 AI데이터엔지니어 핀테커스 12주차 (목) - Sobel Filtering 2 (1) | 2023.11.23 |