728x90

2023-11-28 59th Class

Convolutional Neural Network - VGGNet

#️⃣ VGG (Pytorch Sequential Block ver.)

code

import torch  
import torch.nn as nn  
from collections import OrderedDict  
from torchsummary import summary  
  
class VGGNet(nn.Module):  
    def __init__(self):  
        super(VGGNet, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            # 1. input (224 x 224x RGB image)  
            ('conv3-64', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        # (Batch ,C , h W) -> (Batch , x)  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000)),  
        ]))  
  
        # 64, 512, 7, 7  
        # (64, b)  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vggnet():  
    test_data = torch.randn((10, 3, 224, 224))  
    model = VGGNet()  
    summary(model.to('cuda'), input_size=(3, 224, 224))  
  
    model = model.to('cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    run_vggnet()
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
              ReLU-5        [-1, 128, 112, 112]               0
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
              ReLU-8          [-1, 256, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         590,080
             ReLU-10          [-1, 256, 56, 56]               0
        MaxPool2d-11          [-1, 256, 28, 28]               0
           Conv2d-12          [-1, 512, 28, 28]       1,180,160
             ReLU-13          [-1, 512, 28, 28]               0
           Conv2d-14          [-1, 512, 28, 28]       2,359,808
             ReLU-15          [-1, 512, 28, 28]               0
        MaxPool2d-16          [-1, 512, 14, 14]               0
           Conv2d-17          [-1, 512, 14, 14]       2,359,808
             ReLU-18          [-1, 512, 14, 14]               0
           Conv2d-19          [-1, 512, 14, 14]       2,359,808
             ReLU-20          [-1, 512, 14, 14]               0
        MaxPool2d-21            [-1, 512, 7, 7]               0
           Linear-22                 [-1, 4096]     102,764,544
             ReLU-23                 [-1, 4096]               0
           Linear-24                 [-1, 4096]      16,781,312
             ReLU-25                 [-1, 4096]               0
           Linear-26                 [-1, 1000]       4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 125.12
Params size (MB): 506.83
Estimated Total Size (MB): 632.53
----------------------------------------------------------------
torch.Size([10, 1000])
  • torchsummary 사용하여 model parameters 확인 가능
  • VGG 모델의 경우 Pytorch Sequential을 활용해서 convolutional layer 블록 단위로 구분해 forward 할 수 있음

#️⃣ VGG13 (Pytorch Sequential Block ver.)

  • VGG13은 VGG11모델에서 첫 번째 블록과 두 번째 블록에서 convolutional layer가 1개 추가된 모델

architecture
input(224x224 RGB image)

conv1 conv2 conv3 conv4 conv5 classifier
conv3-64 maxpool conv3-128 maxpool conv3-256 maxpool conv3-512 maxpool con3-512 maxpool FC-4096
conv3-64 conv3-128 conv3-256 conv3-512 con3-512 FC-4096
FC-1000
soft-max

full code

import torch  
from torch import nn  
from collections import OrderedDict  
from torchsummary import summary  
  
  
class VGG13(nn.Module):  
    def __init__(self):  
        super(VGG13, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            ('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-1-act', nn.ReLU()),  
            ('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-2-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-1-act', nn.ReLU()),  
            ('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-2-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000))  
        ]))  
  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
  
def run_vgg13():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  

  
if __name__ == '__main__':  
    run_vgg13()  
    # run_vgg19()
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [16, 64, 224, 224]           1,792
              ReLU-2         [16, 64, 224, 224]               0
            Conv2d-3         [16, 64, 224, 224]          36,928
              ReLU-4         [16, 64, 224, 224]               0
         MaxPool2d-5         [16, 64, 112, 112]               0
            Conv2d-6        [16, 128, 112, 112]          73,856
              ReLU-7        [16, 128, 112, 112]               0
            Conv2d-8        [16, 128, 112, 112]         147,584
              ReLU-9        [16, 128, 112, 112]               0
        MaxPool2d-10          [16, 128, 56, 56]               0
           Conv2d-11          [16, 256, 56, 56]         295,168
             ReLU-12          [16, 256, 56, 56]               0
           Conv2d-13          [16, 256, 56, 56]         590,080
             ReLU-14          [16, 256, 56, 56]               0
        MaxPool2d-15          [16, 256, 28, 28]               0
           Conv2d-16          [16, 512, 28, 28]       1,180,160
             ReLU-17          [16, 512, 28, 28]               0
           Conv2d-18          [16, 512, 28, 28]       2,359,808
             ReLU-19          [16, 512, 28, 28]               0
        MaxPool2d-20          [16, 512, 14, 14]               0
           Conv2d-21          [16, 512, 14, 14]       2,359,808
             ReLU-22          [16, 512, 14, 14]               0
           Conv2d-23          [16, 512, 14, 14]       2,359,808
             ReLU-24          [16, 512, 14, 14]               0
        MaxPool2d-25            [16, 512, 7, 7]               0
           Linear-26                 [16, 4096]     102,764,544
             ReLU-27                 [16, 4096]               0
           Linear-28                 [16, 4096]      16,781,312
             ReLU-29                 [16, 4096]               0
           Linear-30                 [16, 1000]       4,097,000
================================================================
Total params: 133,047,848
Trainable params: 133,047,848
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3177.93
Params size (MB): 507.54
Estimated Total Size (MB): 3694.66
----------------------------------------------------------------
torch.Size([8, 1000])

#️⃣ VGG19 (Pytorch Sequential Block ver.)

  • VGG19은 VGG16모델에서 3~5 번째 블록에서 convolutional layer가 1개 추가된 모델

architecture
input(224x224 RGB image)

conv1 conv2 conv3 conv4 conv5 classifier
conv3-64 maxpool conv3-128 maxpool conv3-256 maxpool conv3-512 maxpool con3-512 maxpool FC-4096
conv3-64 conv3-128 conv3-256 conv3-512 con3-512 FC-4096
conv3-256 conv3-512 con3-512 FC-1000
conv3-256 conv3-512 con3-512 soft-max

full code

import torch  
from torch import nn  
from collections import OrderedDict  
from torchsummary import summary  

  
class VGG19(nn.Module):  
    def __init__(self):  
        super(VGG19, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            ('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64,   
                                     kernel_size=3, padding=1)),  
            ('conv3-64-1-act', nn.ReLU()),  
            ('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64,   
                                     kernel_size=3, padding=1)),  
            ('conv3-64-2-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128,   
                                      kernel_size=3, padding=1)),  
            ('conv3-128-1-act', nn.ReLU()),  
            ('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128,   
                                      kernel_size=3, padding=1)),  
            ('conv3-128-2-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('conv3-256-3', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-3-act', nn.ReLU()),  
            ('conv3-256-4', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-4-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('conv3-512-5', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-5-act', nn.ReLU()),  
            ('conv3-512-6', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-6-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000))  
        ]))  
  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg13():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
def run_vgg19():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG19()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    # run_vgg13()  
    run_vgg19()
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [16, 64, 224, 224]           1,792
              ReLU-2         [16, 64, 224, 224]               0
            Conv2d-3         [16, 64, 224, 224]          36,928
              ReLU-4         [16, 64, 224, 224]               0
         MaxPool2d-5         [16, 64, 112, 112]               0
            Conv2d-6        [16, 128, 112, 112]          73,856
              ReLU-7        [16, 128, 112, 112]               0
            Conv2d-8        [16, 128, 112, 112]         147,584
              ReLU-9        [16, 128, 112, 112]               0
        MaxPool2d-10          [16, 128, 56, 56]               0
           Conv2d-11          [16, 256, 56, 56]         295,168
             ReLU-12          [16, 256, 56, 56]               0
           Conv2d-13          [16, 256, 56, 56]         590,080
             ReLU-14          [16, 256, 56, 56]               0
           Conv2d-15          [16, 256, 56, 56]         590,080
             ReLU-16          [16, 256, 56, 56]               0
           Conv2d-17          [16, 256, 56, 56]         590,080
             ReLU-18          [16, 256, 56, 56]               0
        MaxPool2d-19          [16, 256, 28, 28]               0
           Conv2d-20          [16, 512, 28, 28]       1,180,160
             ReLU-21          [16, 512, 28, 28]               0
           Conv2d-22          [16, 512, 28, 28]       2,359,808
             ReLU-23          [16, 512, 28, 28]               0
           Conv2d-24          [16, 512, 28, 28]       2,359,808
             ReLU-25          [16, 512, 28, 28]               0
           Conv2d-26          [16, 512, 28, 28]       2,359,808
             ReLU-27          [16, 512, 28, 28]               0
        MaxPool2d-28          [16, 512, 14, 14]               0
           Conv2d-29          [16, 512, 14, 14]       2,359,808
             ReLU-30          [16, 512, 14, 14]               0
           Conv2d-31          [16, 512, 14, 14]       2,359,808
             ReLU-32          [16, 512, 14, 14]               0
           Conv2d-33          [16, 512, 14, 14]       2,359,808
             ReLU-34          [16, 512, 14, 14]               0
           Conv2d-35          [16, 512, 14, 14]       2,359,808
             ReLU-36          [16, 512, 14, 14]               0
        MaxPool2d-37            [16, 512, 7, 7]               0
           Linear-38                 [16, 4096]     102,764,544
             ReLU-39                 [16, 4096]               0
           Linear-40                 [16, 4096]      16,781,312
             ReLU-41                 [16, 4096]               0
           Linear-42                 [16, 1000]       4,097,000
================================================================
Total params: 143,667,240
Trainable params: 143,667,240
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3814.93
Params size (MB): 548.05
Estimated Total Size (MB): 4372.17
----------------------------------------------------------------
torch.Size([8, 1000])

#️⃣ VGG의 특징

  1. Conv - ReLU - Pool 또는 Conv- ReLU - Conv - ReLU - Pool 의 형식으로 반복되는 구조
  2. Conv에서 kernel size는 3, padding은 1로 고정되어 있음
  3. Conv - ReLU가 여러 번 반복될 때, 두 번째 Conv - ReLU 부터는 이전 채널을 유지
  4. Conv - ReLU가 반복된 후에는 max pooling

#️⃣ VGG Block 만들기

base code

class ConvBlockBase(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlockBase, self).__init__()  
  
        self.layers = [  
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                      kernel_size=3, padding=1),  
            nn.ReLU()  
        ]  
  
        for _ in range(n_layers -1):  
            self.layers.append(nn.Conv2d(in_channels=out_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x

  • self.layers = nn.Sequential(*self.layers)에서 list, tuple 등을 asterisk(별표)로 설정해 인자로 넣게 되면 함수/클래스 내부에서 unpacking되어 적용됨

베이스 코드를 리팩토링해서 1개의 for문에서 layer를 모두 추가하게 바꾸기

new code

class ConvBlock(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlock, self).__init__()  
  
        self.layers = list()  
  
        for i in range(n_layers):    
            self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
            in_channels = out_channels
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
        print('here')  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x
  • for문 안에서 in_channel으로 설정할 값을 0번째면 input_channels로 들어가지만
  • 그 다음 층부터는 out_channels가 in으로 들어가기 때문에
  • in_channels = out_channels로 설정

#️⃣ VGG Block 으로 VGG 구현

[1] VGG11

architecture
input(224x224 RGB image)

conv1 conv2 conv3 conv4 conv5 classifier
conv3-64 maxpool conv3-128 maxpool conv3-256 maxpool conv3-512 maxpool con3-512 maxpool FC-4096
conv3-256 conv3-512 con3-512 FC-4096
FC-1000
soft-max

code

class VGG11Block(nn.Module):  
    def __init__(self):  
        super(VGG11Block, self).__init__()  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=1)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=1)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=2)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=2)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=2)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512 * 7 * 7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg11_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG11Block()  
    # summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    run_vgg11_block()


'''
torch.Size([8, 1000])
'''
  • ConvBlock()은 위의 new code 와 동일함

[2] VGG13

architecture

input(224x224 RGB image)

conv1 conv2 conv3 conv4 conv5 classifier
conv3-64 maxpool conv3-128 maxpool conv3-256 maxpool conv3-512 maxpool con3-512 maxpool FC-4096
conv3-64 conv3-128 conv3-256 conv3-512 con3-512 FC-4096
FC-1000
soft-max

code

class VGG13Block(nn.Module):  
    def __init__(self):  
        super(VGG13Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=2)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=2)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=2)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*7*7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg13_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13Block()  
    # summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    # run_vgg11_block()    
    run_vgg13_block()

'''
torch.Size([8, 1000])
'''

[3] VGG19

architecture
input(224x224 RGB image)

conv1 conv2 conv3 conv4 conv5 classifier
conv3-64 maxpool conv3-128 maxpool conv3-256 maxpool conv3-512 maxpool con3-512 maxpool FC-4096
conv3-64 conv3-128 conv3-256 conv3-512 con3-512 FC-4096
conv3-256 conv3-512 con3-512 FC-1000
conv3-256 conv3-512 con3-512 soft-max

code

class VGG19Block(nn.Module):  
    def __init__(self):  
        super(VGG19Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=4)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=4)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=4)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*7*7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
 
  
def run_vgg19_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG19Block()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    # run_vgg11_block()    
    # run_vgg13_block()    
    run_vgg19_block()

'''
torch.Size([8, 1000])
'''

VGGNet Parameter Numbers

#️⃣ 파라미터 개수 계산하기

Drawing 2023-11-28 15.00.25.excalidraw.png

  • 커널 하나는 3x3=9개의 weight를 가지고 있음
  • 커널의 채널은 input 채널과 같은 3을 가지고있음
  • 따라서 9 x 3 = 27개의 weight를 가지고 있음

[1] block = ConvBlock(in_channels=3, out_channels=64, n_layers=1)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type) output shape params#
Conv2d-1 (-1, 64, 100, 100) 1792
ReLU-2 (-1, 64, 100, 100)
MaxPool2d-3 (-1, 64, 50, 50)

Parmas # =(kernel×kernel×input channel+1)×output channel
(필터3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792개

[2] block = ConvBlock(in_channels=3, out_channels=64, n_layers=2)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type) output shape params#
Conv2d-1 (-1, 64, 100, 100) 1792
ReLU-2 (-1, 64, 100, 100)
Conv2d-3 (-1, 64, 100, 100) 36928
ReLU-4 (-1, 64, 100, 100)
MaxPool2d-5 (-1, 64, 50, 50)

(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928

[3] block = ConvBlock(in_channels=3, out_channels=64, n_layers=3)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type) output shape params#
Conv2d-1 (-1, 64, 100, 100) 1792
ReLU-2 (-1, 64, 100, 100)
Conv2d-3 (-1, 64, 100, 100) 36928
ReLU-4 (-1, 64, 100, 100)
Conv2d-5 (-1, 64, 100, 100) 36928
ReLU-6 (-1, 64, 100, 100)
MaxPool2d-7 (-1, 64, 50, 50)

(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928


Train CIFAR10 with VGGNet19 and LeNet5

#️⃣ VGGNet19로 CIFAR10 이미지 데이터셋 학습

import pickle  
  
import torch  
from torch import nn  
from collections import OrderedDict  
from dataclasses import dataclass  
import torch.optim as optim  
  
from torch.utils.data import DataLoader  
from torchsummary import summary  
from torchvision.datasets import CIFAR10  
from torchvision.transforms import ToTensor  
from tqdm import tqdm  
  
import matplotlib.pyplot as plt  
  
@dataclass  
class Constants:  
    N_SAMPLES: int  
    BATCH_SIZE: int  
    EPOCHS: int  
    LR: float  
    DEVICE: torch.device  
    PATH: str  
    METRIC_PATH: str  
    SEED: int  
  
  
def get_device():  
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
    print(f"curr device = {DEVICE}")  
    return DEVICE


class ConvBlock(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlock, self).__init__()  
  
        self.layers = list()  
  
        for i in range(n_layers):  
            self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
            in_channels = out_channels  
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x


class VGG19Block(nn.Module):  
    def __init__(self):  
        super(VGG19Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=4)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=4)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=4)  
  
        # original 512*7*7  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*1*1, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=10)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x

def classify_cifar10(c):  
    # (50000, 32, 32, 3)  
    dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)  
    dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)  
  
    model = VGG19Block()  
  
    model = model.to(c.DEVICE)  
  
    loss_fn = nn.CrossEntropyLoss()  
    optimizer = optim.Adam(model.parameters(), lr=c.LR)  
  
    losses, accs = list(), list()  
  
    for e in range(c.EPOCHS):  
        epoch_loss, n_corrects = 0., 0  
        for X_, y_ in tqdm(dataloader):  
            X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)  
  
            pred = model.forward(X_)  
            loss = loss_fn(pred, y_)  
  
            optimizer.zero_grad()  
            loss.backward()  
            optimizer.step()  
  
            epoch_loss += loss  
            pred_cls = torch.argmax(pred, dim=1)  
            n_corrects += (pred_cls == y_).sum().item()  
  
        epoch_loss /= len(dataloader)  
        epoch_accr = n_corrects / c.N_SAMPLES  
  
        print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")  
  
        losses.append(epoch_loss.item())  
        accs.append(epoch_accr)  
  
        if e in [99, 199, 299, 399]:  
            rep = c.PATH.replace(".pt", f"_ep{e}.pt")  
            torch.save(model, rep)  
  
    print("==============")  
    # print(f"{losses:.4f}, \n {accs=}")  
  
    # Save Model and Metrics by Epoch    
    with open(c.METRIC_PATH, 'wb') as f:  
        result = {  
            'losses': losses,  
            'accs': accs  
        }  
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)  
  
    torch.save(model, c.PATH)


def visualize(losses, accs):  
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))  
    axes[0].plot(losses)  
    axes[1].plot(accs)  
  
    axes[1].set_xlabel("Epoch", fontsize=15)  
    axes[0].set_ylabel("Loss", fontsize=15)  
    axes[1].set_ylabel("Accuracy", fontsize=15)  
    axes[0].tick_params(labelsize=10)  
    axes[1].tick_params(labelsize=10)  
    fig.suptitle("VGG19 Metrics by Epoch", fontsize=16)  
    plt.show()  
  
  
if __name__ == '__main__':   
    constants = Constants(  
        N_SAMPLES=50000,  
        BATCH_SIZE=1024,  
        EPOCHS=500,  
        LR=0.0001,  
        DEVICE=get_device(),  
        PATH="model/vgg19_cifar10.pt",  
        METRIC_PATH="model/vgg_cifar10_metrics.pkl",  
        SEED=80  
    )  
    classify_cifar10(constants)  
    with open(constants.METRIC_PATH, 'rb') as f:  
        metric_dict = pickle.load(f)  
  
    # metric_dict['losses'] = [x.item() for x in metric_dict['losses']]  
    visualize(metric_dict['losses'], metric_dict['accs'])

vgg19_lr0001.png

#️⃣ Lenet5로 CIFAR10 이미지 데이터셋 학습

초기 설정

constants = Constants(  
    N_SAMPLES=50000,  
    BATCH_SIZE=128,  
    EPOCHS=300,  
    LR=0.01,  
    DEVICE=get_device(),  
    PATH="model/lenet5_cifar10.pt",  
    METRIC_PATH="model/lenet5_metrics.pkl",  
    SEED=80  
)

epoch 32 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 73.06it/s]
  0%|          | 0/391 [00:00<?, ?it/s]epoch 33 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 71.95it/s]
  0%|          | 0/391 [00:00<?, ?it/s]epoch 34 : loss=0.0183, accr=0.10248
  • 학습이 안됨

설정을 바꿔서 다시 시도

from dataclasses import dataclass  
import pickle  
import csv  
from torchvision.datasets import CIFAR10  
import torch  
import torch.nn as nn  
from torch.optim import Adam  
from torch.utils.data import DataLoader  
  
from torchvision.transforms import ToTensor  
from tqdm import tqdm  
import matplotlib.pyplot as plt  
  
@dataclass  
class Constants:  
    N_SAMPLES: int  
    BATCH_SIZE: int  
    EPOCHS: int  
    LR: float  
    DEVICE: torch.device  
    PATH: str  
    METRIC_PATH: str  
    SEED: int  
  
  
def get_device():  
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
    print(f"curr device = {DEVICE}")  
    return DEVICE  
  
  
class LeNet(nn.Module):  
    def __init__(self, init_channel, out_features):  
        super(LeNet, self).__init__()  
        # self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=2)  
        self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=0)  
        self.cnn1_act = nn.Tanh()  
        self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)  
  
        self.cnn2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)  
        self.cnn2_act = nn.Tanh()  
        self.avgpool2 = nn.AvgPool2d(kernel_size=2, stride=2)  
  
        self.cnn3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)  
        self.cnn3_act = nn.Tanh()  
  
        # self.fc1 = nn.Linear(in_features=120*2*2, out_features=84)  
        self.fc1 = nn.Linear(in_features=120, out_features=84)  
        self.fc1_act = nn.Tanh()  
  
        self.fc2 = nn.Linear(in_features=84, out_features=out_features)  
  
    def forward(self, x):  
        x = self.cnn1(x)  
        x = self.cnn1_act(x)  
        x = self.avgpool1(x)  
        x = self.cnn2(x)  
        x = self.cnn2_act(x)  
        x = self.avgpool2(x)  
        x = self.cnn3(x)  
        x = self.cnn3_act(x)  
  
        x = x.view(x.size(0), -1)  
        x = self.fc1(x)  
        x = self.fc1_act(x)  
        x = self.fc2(x)  
        return x  
  
  
def train_cifar10_w_lenet(c):  
    # CIFAR10 config  
    dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)  
    dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)  
  
    model = LeNet(init_channel=3, out_features=10).to(c.DEVICE)  
    loss_fn = nn.CrossEntropyLoss()  
    optimizer = Adam(model.parameters(), lr=c.LR)  
  
    losses, accs = list(), list()  
  
    for e in range(c.EPOCHS):  
        epoch_loss, n_corrects = 0., 0  
  
        for X_, y_ in tqdm(dataloader):  
            optimizer.zero_grad()  
  
            X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)  
            pred = model(X_)  
            loss = loss_fn(pred, y_)  
  
            loss.backward()  
            optimizer.step()  
  
            epoch_loss += loss  
            pred_cls = torch.argmax(pred, dim=1)  
            n_corrects += (pred_cls == y_).sum().item()  
  
        epoch_loss /= len(dataloader)  
        # epoch_loss /= c.N_SAMPLES  
        epoch_accr = n_corrects / c.N_SAMPLES  
  
        print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")  
  
        losses.append(epoch_loss.item())  
        accs.append(epoch_accr)  
  
        if e in [199, 399, 599, 799]:  
            rep = c.PATH.replace(".pt", f"_ep{e}.pt")  
            torch.save(model, rep)  
  
    # print(losses)  
    # print(accs)  
    # Save Model and Metrics by Epoch    
    with open(c.METRIC_PATH, 'wb') as f:  
        result = {  
            'losses': losses,  
            'accs': accs  
        }  
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)  
  
    torch.save(model, c.PATH)  
  
    with open("model/lenet5_metrics_2.csv", 'w') as file:  
        writer = csv.writer(file)  
        writer.writerow(losses)  
        writer.writerow(accs)  
  
    visualize(losses, accs)  
  
  
def visualize(losses, accs):  
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))  
    axes[0].plot(losses)  
    axes[1].plot(accs)  
  
    axes[1].set_xlabel("Epoch", fontsize=15)  
    axes[0].set_ylabel("Loss", fontsize=15)  
    axes[1].set_ylabel("Accuracy", fontsize=15)  
    axes[0].tick_params(labelsize=10)  
    axes[1].tick_params(labelsize=10)  
    fig.suptitle("Lenet5 Metrics by Epoch", fontsize=16)  
    plt.show()  
  
  
if __name__ == '__main__':  
    constants = Constants(  
        N_SAMPLES=50000,  
        BATCH_SIZE=1024,  
        EPOCHS=1000,  
        LR=0.0001,  
        DEVICE=get_device(),  
        PATH="model/lenet5_cifar10_2.pt",  
        METRIC_PATH="model/lenet5_metrics_2.pkl",  
        SEED=80  
    )  
    train_cifar10_w_lenet(constants)

lenet5_cifar10_2.png

  • learning rate를 대폭 낮추고 epoch 수를 늘려 accuracy 0.89까지 학습시킬 수 있었음

#️⃣ VGG19가 CIFAR10 이미지 데이터셋을 학습하기 어려운 이유

VGG19는 ISVRC 224x224 이미지를 기준으로 설계한 모델임

  • VGG19는 VGG 모델중에서 가장 depth가 깊음
  • pooling을 5번이나 함

이 모델을 CIFAR10에 적용하게 되면
마지막 풀링 때 image size가 1x1되니까 해상도가 낮아지면서 학습이 잘 안되는 문제 발생

결론: 데이터에 맞는 모델의 경량화 필요

반응형