새싹 AI데이터엔지니어 핀테커스 13주차 (화) - VGGNet-11,13,19 & CIFAR10

2023. 11. 28. 18:15

728x90

2023-11-28 59th Class

Convolutional Neural Network - VGGNet

#️⃣ VGG (Pytorch Sequential Block ver.)

code

import torch  
import torch.nn as nn  
from collections import OrderedDict  
from torchsummary import summary  
  
class VGGNet(nn.Module):  
    def __init__(self):  
        super(VGGNet, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            # 1. input (224 x 224x RGB image)  
            ('conv3-64', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2)),  
            ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        # (Batch ,C , h W) -> (Batch , x)  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000)),  
        ]))  
  
        # 64, 512, 7, 7  
        # (64, b)  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vggnet():  
    test_data = torch.randn((10, 3, 224, 224))  
    model = VGGNet()  
    summary(model.to('cuda'), input_size=(3, 224, 224))  
  
    model = model.to('cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    run_vggnet()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
              ReLU-5        [-1, 128, 112, 112]               0
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
              ReLU-8          [-1, 256, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         590,080
             ReLU-10          [-1, 256, 56, 56]               0
        MaxPool2d-11          [-1, 256, 28, 28]               0
           Conv2d-12          [-1, 512, 28, 28]       1,180,160
             ReLU-13          [-1, 512, 28, 28]               0
           Conv2d-14          [-1, 512, 28, 28]       2,359,808
             ReLU-15          [-1, 512, 28, 28]               0
        MaxPool2d-16          [-1, 512, 14, 14]               0
           Conv2d-17          [-1, 512, 14, 14]       2,359,808
             ReLU-18          [-1, 512, 14, 14]               0
           Conv2d-19          [-1, 512, 14, 14]       2,359,808
             ReLU-20          [-1, 512, 14, 14]               0
        MaxPool2d-21            [-1, 512, 7, 7]               0
           Linear-22                 [-1, 4096]     102,764,544
             ReLU-23                 [-1, 4096]               0
           Linear-24                 [-1, 4096]      16,781,312
             ReLU-25                 [-1, 4096]               0
           Linear-26                 [-1, 1000]       4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 125.12
Params size (MB): 506.83
Estimated Total Size (MB): 632.53
----------------------------------------------------------------
torch.Size([10, 1000])

torchsummary 사용하여 model parameters 확인 가능
VGG 모델의 경우 Pytorch Sequential을 활용해서 convolutional layer 블록 단위로 구분해 forward 할 수 있음

#️⃣ VGG13 (Pytorch Sequential Block ver.)

VGG13은 VGG11모델에서 첫 번째 블록과 두 번째 블록에서 convolutional layer가 1개 추가된 모델

architecture
input(224x224 RGB image)

conv1		conv2		conv3		conv4		conv5		classifier
conv3-64	maxpool	conv3-128	maxpool	conv3-256	maxpool	conv3-512	maxpool	con3-512	maxpool	FC-4096
conv3-64		conv3-128		conv3-256		conv3-512		con3-512		FC-4096
										FC-1000
										soft-max

full code

import torch  
from torch import nn  
from collections import OrderedDict  
from torchsummary import summary  
  
  
class VGG13(nn.Module):  
    def __init__(self):  
        super(VGG13, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            ('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-1-act', nn.ReLU()),  
            ('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)),  
            ('conv3-64-2-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-1-act', nn.ReLU()),  
            ('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)),  
            ('conv3-128-2-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000))  
        ]))  
  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
  
def run_vgg13():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  

  
if __name__ == '__main__':  
    run_vgg13()  
    # run_vgg19()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [16, 64, 224, 224]           1,792
              ReLU-2         [16, 64, 224, 224]               0
            Conv2d-3         [16, 64, 224, 224]          36,928
              ReLU-4         [16, 64, 224, 224]               0
         MaxPool2d-5         [16, 64, 112, 112]               0
            Conv2d-6        [16, 128, 112, 112]          73,856
              ReLU-7        [16, 128, 112, 112]               0
            Conv2d-8        [16, 128, 112, 112]         147,584
              ReLU-9        [16, 128, 112, 112]               0
        MaxPool2d-10          [16, 128, 56, 56]               0
           Conv2d-11          [16, 256, 56, 56]         295,168
             ReLU-12          [16, 256, 56, 56]               0
           Conv2d-13          [16, 256, 56, 56]         590,080
             ReLU-14          [16, 256, 56, 56]               0
        MaxPool2d-15          [16, 256, 28, 28]               0
           Conv2d-16          [16, 512, 28, 28]       1,180,160
             ReLU-17          [16, 512, 28, 28]               0
           Conv2d-18          [16, 512, 28, 28]       2,359,808
             ReLU-19          [16, 512, 28, 28]               0
        MaxPool2d-20          [16, 512, 14, 14]               0
           Conv2d-21          [16, 512, 14, 14]       2,359,808
             ReLU-22          [16, 512, 14, 14]               0
           Conv2d-23          [16, 512, 14, 14]       2,359,808
             ReLU-24          [16, 512, 14, 14]               0
        MaxPool2d-25            [16, 512, 7, 7]               0
           Linear-26                 [16, 4096]     102,764,544
             ReLU-27                 [16, 4096]               0
           Linear-28                 [16, 4096]      16,781,312
             ReLU-29                 [16, 4096]               0
           Linear-30                 [16, 1000]       4,097,000
================================================================
Total params: 133,047,848
Trainable params: 133,047,848
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3177.93
Params size (MB): 507.54
Estimated Total Size (MB): 3694.66
----------------------------------------------------------------
torch.Size([8, 1000])

#️⃣ VGG19 (Pytorch Sequential Block ver.)

VGG19은 VGG16모델에서 3~5 번째 블록에서 convolutional layer가 1개 추가된 모델

architecture
input(224x224 RGB image)

conv1		conv2		conv3		conv4		conv5		classifier
conv3-64	maxpool	conv3-128	maxpool	conv3-256	maxpool	conv3-512	maxpool	con3-512	maxpool	FC-4096
conv3-64		conv3-128		conv3-256		conv3-512		con3-512		FC-4096
				conv3-256		conv3-512		con3-512		FC-1000
				conv3-256		conv3-512		con3-512		soft-max

full code

import torch  
from torch import nn  
from collections import OrderedDict  
from torchsummary import summary  

  
class VGG19(nn.Module):  
    def __init__(self):  
        super(VGG19, self).__init__()  
        self.conv1 = nn.Sequential(OrderedDict([  
            ('conv3-64-1', nn.Conv2d(in_channels=3, out_channels=64,   
                                     kernel_size=3, padding=1)),  
            ('conv3-64-1-act', nn.ReLU()),  
            ('conv3-64-2', nn.Conv2d(in_channels=64, out_channels=64,   
                                     kernel_size=3, padding=1)),  
            ('conv3-64-2-act', nn.ReLU()),  
            ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv2 = nn.Sequential(OrderedDict([  
            ('conv3-128-1', nn.Conv2d(in_channels=64, out_channels=128,   
                                      kernel_size=3, padding=1)),  
            ('conv3-128-1-act', nn.ReLU()),  
            ('conv3-128-2', nn.Conv2d(in_channels=128, out_channels=128,   
                                      kernel_size=3, padding=1)),  
            ('conv3-128-2-act', nn.ReLU()),  
            ('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv3 = nn.Sequential(OrderedDict([  
            ('conv3-256-1', nn.Conv2d(in_channels=128, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-1-act', nn.ReLU()),  
            ('conv3-256-2', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-2-act', nn.ReLU()),  
            ('conv3-256-3', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-3-act', nn.ReLU()),  
            ('conv3-256-4', nn.Conv2d(in_channels=256, out_channels=256,   
                                      kernel_size=3, padding=1)),  
            ('conv3-256-4-act', nn.ReLU()),  
            ('maxpool3', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv4 = nn.Sequential(OrderedDict([  
            ('conv3-512-1', nn.Conv2d(in_channels=256, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-1-act', nn.ReLU()),  
            ('conv3-512-2', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-2-act', nn.ReLU()),  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('maxpool4', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.conv5 = nn.Sequential(OrderedDict([  
            ('conv3-512-3', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-3-act', nn.ReLU()),  
            ('conv3-512-4', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-4-act', nn.ReLU()),  
            ('conv3-512-5', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-5-act', nn.ReLU()),  
            ('conv3-512-6', nn.Conv2d(in_channels=512, out_channels=512,   
                                      kernel_size=3, padding=1)),  
            ('conv3-512-6-act', nn.ReLU()),  
            ('maxpool5', nn.MaxPool2d(kernel_size=2, stride=2))  
        ]))  
  
        self.classifier = nn.Sequential(OrderedDict([  
            ('fc-4096-1', nn.Linear(in_features=512*7*7, out_features=4096)),  
            ('fc-4096-1-act', nn.ReLU()),  
            ('fc-4096-2', nn.Linear(in_features=4096, out_features=4096)),  
            ('fc-4096-2-act', nn.ReLU()),  
            ('fc-1000', nn.Linear(in_features=4096, out_features=1000))  
        ]))  
  
    def forward(self, x):  
        x = self.conv1(x)  
        x = self.conv2(x)  
        x = self.conv3(x)  
        x = self.conv4(x)  
        x = self.conv5(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg13():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
def run_vgg19():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG19()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    # run_vgg13()  
    run_vgg19()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [16, 64, 224, 224]           1,792
              ReLU-2         [16, 64, 224, 224]               0
            Conv2d-3         [16, 64, 224, 224]          36,928
              ReLU-4         [16, 64, 224, 224]               0
         MaxPool2d-5         [16, 64, 112, 112]               0
            Conv2d-6        [16, 128, 112, 112]          73,856
              ReLU-7        [16, 128, 112, 112]               0
            Conv2d-8        [16, 128, 112, 112]         147,584
              ReLU-9        [16, 128, 112, 112]               0
        MaxPool2d-10          [16, 128, 56, 56]               0
           Conv2d-11          [16, 256, 56, 56]         295,168
             ReLU-12          [16, 256, 56, 56]               0
           Conv2d-13          [16, 256, 56, 56]         590,080
             ReLU-14          [16, 256, 56, 56]               0
           Conv2d-15          [16, 256, 56, 56]         590,080
             ReLU-16          [16, 256, 56, 56]               0
           Conv2d-17          [16, 256, 56, 56]         590,080
             ReLU-18          [16, 256, 56, 56]               0
        MaxPool2d-19          [16, 256, 28, 28]               0
           Conv2d-20          [16, 512, 28, 28]       1,180,160
             ReLU-21          [16, 512, 28, 28]               0
           Conv2d-22          [16, 512, 28, 28]       2,359,808
             ReLU-23          [16, 512, 28, 28]               0
           Conv2d-24          [16, 512, 28, 28]       2,359,808
             ReLU-25          [16, 512, 28, 28]               0
           Conv2d-26          [16, 512, 28, 28]       2,359,808
             ReLU-27          [16, 512, 28, 28]               0
        MaxPool2d-28          [16, 512, 14, 14]               0
           Conv2d-29          [16, 512, 14, 14]       2,359,808
             ReLU-30          [16, 512, 14, 14]               0
           Conv2d-31          [16, 512, 14, 14]       2,359,808
             ReLU-32          [16, 512, 14, 14]               0
           Conv2d-33          [16, 512, 14, 14]       2,359,808
             ReLU-34          [16, 512, 14, 14]               0
           Conv2d-35          [16, 512, 14, 14]       2,359,808
             ReLU-36          [16, 512, 14, 14]               0
        MaxPool2d-37            [16, 512, 7, 7]               0
           Linear-38                 [16, 4096]     102,764,544
             ReLU-39                 [16, 4096]               0
           Linear-40                 [16, 4096]      16,781,312
             ReLU-41                 [16, 4096]               0
           Linear-42                 [16, 1000]       4,097,000
================================================================
Total params: 143,667,240
Trainable params: 143,667,240
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 3814.93
Params size (MB): 548.05
Estimated Total Size (MB): 4372.17
----------------------------------------------------------------
torch.Size([8, 1000])

#️⃣ VGG의 특징

Conv - ReLU - Pool 또는 Conv- ReLU - Conv - ReLU - Pool 의 형식으로 반복되는 구조
Conv에서 kernel size는 3, padding은 1로 고정되어 있음
Conv - ReLU가 여러 번 반복될 때, 두 번째 Conv - ReLU 부터는 이전 채널을 유지
Conv - ReLU가 반복된 후에는 max pooling

#️⃣ VGG Block 만들기

base code

class ConvBlockBase(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlockBase, self).__init__()  
  
        self.layers = [  
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                      kernel_size=3, padding=1),  
            nn.ReLU()  
        ]  
  
        for _ in range(n_layers -1):  
            self.layers.append(nn.Conv2d(in_channels=out_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x

self.layers = nn.Sequential(*self.layers)에서 list, tuple 등을 asterisk(별표)로 설정해 인자로 넣게 되면 함수/클래스 내부에서 unpacking되어 적용됨

베이스 코드를 리팩토링해서 1개의 for문에서 layer를 모두 추가하게 바꾸기

new code

class ConvBlock(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlock, self).__init__()  
  
        self.layers = list()  
  
        for i in range(n_layers):    
            self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
            in_channels = out_channels
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
        print('here')  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x

for문 안에서 in_channel으로 설정할 값을 0번째면 input_channels로 들어가지만
그 다음 층부터는 out_channels가 in으로 들어가기 때문에
in_channels = out_channels로 설정

#️⃣ VGG Block 으로 VGG 구현

[1] VGG11

architecture
input(224x224 RGB image)

conv1		conv2		conv3		conv4		conv5		classifier
conv3-64	maxpool	conv3-128	maxpool	conv3-256	maxpool	conv3-512	maxpool	con3-512	maxpool	FC-4096
				conv3-256		conv3-512		con3-512		FC-4096
										FC-1000
										soft-max

code

class VGG11Block(nn.Module):  
    def __init__(self):  
        super(VGG11Block, self).__init__()  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=1)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=1)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=2)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=2)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=2)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512 * 7 * 7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg11_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG11Block()  
    # summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    run_vgg11_block()


'''
torch.Size([8, 1000])
'''

ConvBlock()은 위의 new code 와 동일함

[2] VGG13

architecture

input(224x224 RGB image)

conv1		conv2		conv3		conv4		conv5		classifier
conv3-64	maxpool	conv3-128	maxpool	conv3-256	maxpool	conv3-512	maxpool	con3-512	maxpool	FC-4096
conv3-64		conv3-128		conv3-256		conv3-512		con3-512		FC-4096
										FC-1000
										soft-max

code

class VGG13Block(nn.Module):  
    def __init__(self):  
        super(VGG13Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=2)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=2)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=2)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*7*7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
  
  
def run_vgg13_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG13Block()  
    # summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    # run_vgg11_block()    
    run_vgg13_block()

'''
torch.Size([8, 1000])
'''

[3] VGG19

architecture
input(224x224 RGB image)

conv1		conv2		conv3		conv4		conv5		classifier
conv3-64	maxpool	conv3-128	maxpool	conv3-256	maxpool	conv3-512	maxpool	con3-512	maxpool	FC-4096
conv3-64		conv3-128		conv3-256		conv3-512		con3-512		FC-4096
				conv3-256		conv3-512		con3-512		FC-1000
				conv3-256		conv3-512		con3-512		soft-max

code

class VGG19Block(nn.Module):  
    def __init__(self):  
        super(VGG19Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=4)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=4)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=4)  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*7*7, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=1000)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x  
 
  
def run_vgg19_block():  
    test_data = torch.randn((8, 3, 224, 224))  
    model = VGG19Block()  
    summary(model, input_size=(3, 224, 224), batch_size=16, device='cpu')  
    pred = model.forward(test_data)  
    print(pred.shape)  
  
if __name__ == '__main__':  
    # run_vgg13()  
    # run_vgg19()    
    # run_conv_block()    
    # run_vgg11_block()    
    # run_vgg13_block()    
    run_vgg19_block()

'''
torch.Size([8, 1000])
'''

VGGNet Parameter Numbers

#️⃣ 파라미터 개수 계산하기

Drawing 2023-11-28 15.00.25.excalidraw.png

커널 하나는 3x3=9개의 weight를 가지고 있음
커널의 채널은 input 채널과 같은 3을 가지고있음
따라서 9 x 3 = 27개의 weight를 가지고 있음

[1] block = ConvBlock(in_channels=3, out_channels=64, n_layers=1)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type)	output shape	params#
Conv2d-1	(-1, 64, 100, 100)	1792
ReLU-2	(-1, 64, 100, 100)
MaxPool2d-3	(-1, 64, 50, 50)

Parmas # =(kernel×kernel×input channel+1)×output channel
(필터3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792개

[2] block = ConvBlock(in_channels=3, out_channels=64, n_layers=2)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type)	output shape	params#
Conv2d-1	(-1, 64, 100, 100)	1792
ReLU-2	(-1, 64, 100, 100)
Conv2d-3	(-1, 64, 100, 100)	36928
ReLU-4	(-1, 64, 100, 100)
MaxPool2d-5	(-1, 64, 50, 50)

(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928

[3] block = ConvBlock(in_channels=3, out_channels=64, n_layers=3)
summary(block, input_size=(3, 100, 100)) 일때,

Layer(type)	output shape	params#
Conv2d-1	(-1, 64, 100, 100)	1792
ReLU-2	(-1, 64, 100, 100)
Conv2d-3	(-1, 64, 100, 100)	36928
ReLU-4	(-1, 64, 100, 100)
Conv2d-5	(-1, 64, 100, 100)	36928
ReLU-6	(-1, 64, 100, 100)
MaxPool2d-7	(-1, 64, 50, 50)

(필터 3x3 * 채널3 + 바이어스1) * 아웃채널64 = 1792
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928
(필터 3x3 * 채널64 + 바이어스1) * 아웃채널64 = 36928

Train CIFAR10 with VGGNet19 and LeNet5

#️⃣ VGGNet19로 CIFAR10 이미지 데이터셋 학습

import pickle  
  
import torch  
from torch import nn  
from collections import OrderedDict  
from dataclasses import dataclass  
import torch.optim as optim  
  
from torch.utils.data import DataLoader  
from torchsummary import summary  
from torchvision.datasets import CIFAR10  
from torchvision.transforms import ToTensor  
from tqdm import tqdm  
  
import matplotlib.pyplot as plt  
  
@dataclass  
class Constants:  
    N_SAMPLES: int  
    BATCH_SIZE: int  
    EPOCHS: int  
    LR: float  
    DEVICE: torch.device  
    PATH: str  
    METRIC_PATH: str  
    SEED: int  
  
  
def get_device():  
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
    print(f"curr device = {DEVICE}")  
    return DEVICE


class ConvBlock(nn.Module):  
    def __init__(self, in_channels, out_channels, n_layers):  
        super(ConvBlock, self).__init__()  
  
        self.layers = list()  
  
        for i in range(n_layers):  
            self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,  
                                         kernel_size=3, padding=1))  
            self.layers.append(nn.ReLU())  
            in_channels = out_channels  
  
        # 마지막에 max pooling 추가  
        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  
  
        # list에 들어있는 layer을 풀어 nn.Sequential에 입력  
        self.layers = nn.Sequential(*self.layers)  
  
    def forward(self, x):  
        x = self.layers(x)  
        return x


class VGG19Block(nn.Module):  
    def __init__(self):  
        super(VGG19Block, self).__init__()  
  
        self.conv1 = ConvBlock(in_channels=3, out_channels=64,  
                               n_layers=2)  
        self.conv2 = ConvBlock(in_channels=64, out_channels=128,  
                               n_layers=2)  
        self.conv3 = ConvBlock(in_channels=128, out_channels=256,  
                               n_layers=4)  
        self.conv4 = ConvBlock(in_channels=256, out_channels=512,  
                               n_layers=4)  
        self.conv5 = ConvBlock(in_channels=512, out_channels=512,  
                               n_layers=4)  
  
        # original 512*7*7  
  
        self.classifier = nn.Sequential(  
            nn.Linear(in_features=512*1*1, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=4096),  
            nn.ReLU(),  
            nn.Linear(in_features=4096, out_features=10)  
        )  
  
    def forward(self, x):  
        x = self.conv1.forward(x)  
        x = self.conv2.forward(x)  
        x = self.conv3.forward(x)  
        x = self.conv4.forward(x)  
        x = self.conv5.forward(x)  
        x = x.view(x.size(0), -1)  
        x = self.classifier(x)  
        return x

def classify_cifar10(c):  
    # (50000, 32, 32, 3)  
    dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)  
    dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)  
  
    model = VGG19Block()  
  
    model = model.to(c.DEVICE)  
  
    loss_fn = nn.CrossEntropyLoss()  
    optimizer = optim.Adam(model.parameters(), lr=c.LR)  
  
    losses, accs = list(), list()  
  
    for e in range(c.EPOCHS):  
        epoch_loss, n_corrects = 0., 0  
        for X_, y_ in tqdm(dataloader):  
            X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)  
  
            pred = model.forward(X_)  
            loss = loss_fn(pred, y_)  
  
            optimizer.zero_grad()  
            loss.backward()  
            optimizer.step()  
  
            epoch_loss += loss  
            pred_cls = torch.argmax(pred, dim=1)  
            n_corrects += (pred_cls == y_).sum().item()  
  
        epoch_loss /= len(dataloader)  
        epoch_accr = n_corrects / c.N_SAMPLES  
  
        print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")  
  
        losses.append(epoch_loss.item())  
        accs.append(epoch_accr)  
  
        if e in [99, 199, 299, 399]:  
            rep = c.PATH.replace(".pt", f"_ep{e}.pt")  
            torch.save(model, rep)  
  
    print("==============")  
    # print(f"{losses:.4f}, \n {accs=}")  
  
    # Save Model and Metrics by Epoch    
    with open(c.METRIC_PATH, 'wb') as f:  
        result = {  
            'losses': losses,  
            'accs': accs  
        }  
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)  
  
    torch.save(model, c.PATH)


def visualize(losses, accs):  
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))  
    axes[0].plot(losses)  
    axes[1].plot(accs)  
  
    axes[1].set_xlabel("Epoch", fontsize=15)  
    axes[0].set_ylabel("Loss", fontsize=15)  
    axes[1].set_ylabel("Accuracy", fontsize=15)  
    axes[0].tick_params(labelsize=10)  
    axes[1].tick_params(labelsize=10)  
    fig.suptitle("VGG19 Metrics by Epoch", fontsize=16)  
    plt.show()  
  
  
if __name__ == '__main__':   
    constants = Constants(  
        N_SAMPLES=50000,  
        BATCH_SIZE=1024,  
        EPOCHS=500,  
        LR=0.0001,  
        DEVICE=get_device(),  
        PATH="model/vgg19_cifar10.pt",  
        METRIC_PATH="model/vgg_cifar10_metrics.pkl",  
        SEED=80  
    )  
    classify_cifar10(constants)  
    with open(constants.METRIC_PATH, 'rb') as f:  
        metric_dict = pickle.load(f)  
  
    # metric_dict['losses'] = [x.item() for x in metric_dict['losses']]  
    visualize(metric_dict['losses'], metric_dict['accs'])

#️⃣ Lenet5로 CIFAR10 이미지 데이터셋 학습

초기 설정

constants = Constants(  
    N_SAMPLES=50000,  
    BATCH_SIZE=128,  
    EPOCHS=300,  
    LR=0.01,  
    DEVICE=get_device(),  
    PATH="model/lenet5_cifar10.pt",  
    METRIC_PATH="model/lenet5_metrics.pkl",  
    SEED=80  
)

epoch 32 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 73.06it/s]
  0%|          | 0/391 [00:00<?, ?it/s]epoch 33 : loss=0.0183, accr=0.10248
100%|██████████| 391/391 [00:05<00:00, 71.95it/s]
  0%|          | 0/391 [00:00<?, ?it/s]epoch 34 : loss=0.0183, accr=0.10248

학습이 안됨

설정을 바꿔서 다시 시도

from dataclasses import dataclass  
import pickle  
import csv  
from torchvision.datasets import CIFAR10  
import torch  
import torch.nn as nn  
from torch.optim import Adam  
from torch.utils.data import DataLoader  
  
from torchvision.transforms import ToTensor  
from tqdm import tqdm  
import matplotlib.pyplot as plt  
  
@dataclass  
class Constants:  
    N_SAMPLES: int  
    BATCH_SIZE: int  
    EPOCHS: int  
    LR: float  
    DEVICE: torch.device  
    PATH: str  
    METRIC_PATH: str  
    SEED: int  
  
  
def get_device():  
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
    print(f"curr device = {DEVICE}")  
    return DEVICE  
  
  
class LeNet(nn.Module):  
    def __init__(self, init_channel, out_features):  
        super(LeNet, self).__init__()  
        # self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=2)  
        self.cnn1 = nn.Conv2d(in_channels=init_channel, out_channels=6, kernel_size=5, padding=0)  
        self.cnn1_act = nn.Tanh()  
        self.avgpool1 = nn.AvgPool2d(kernel_size=2, stride=2)  
  
        self.cnn2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)  
        self.cnn2_act = nn.Tanh()  
        self.avgpool2 = nn.AvgPool2d(kernel_size=2, stride=2)  
  
        self.cnn3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)  
        self.cnn3_act = nn.Tanh()  
  
        # self.fc1 = nn.Linear(in_features=120*2*2, out_features=84)  
        self.fc1 = nn.Linear(in_features=120, out_features=84)  
        self.fc1_act = nn.Tanh()  
  
        self.fc2 = nn.Linear(in_features=84, out_features=out_features)  
  
    def forward(self, x):  
        x = self.cnn1(x)  
        x = self.cnn1_act(x)  
        x = self.avgpool1(x)  
        x = self.cnn2(x)  
        x = self.cnn2_act(x)  
        x = self.avgpool2(x)  
        x = self.cnn3(x)  
        x = self.cnn3_act(x)  
  
        x = x.view(x.size(0), -1)  
        x = self.fc1(x)  
        x = self.fc1_act(x)  
        x = self.fc2(x)  
        return x  
  
  
def train_cifar10_w_lenet(c):  
    # CIFAR10 config  
    dataset = CIFAR10(root='data', train=True, transform=ToTensor(), download=True)  
    dataloader = DataLoader(dataset, batch_size=c.BATCH_SIZE, shuffle=True)  
  
    model = LeNet(init_channel=3, out_features=10).to(c.DEVICE)  
    loss_fn = nn.CrossEntropyLoss()  
    optimizer = Adam(model.parameters(), lr=c.LR)  
  
    losses, accs = list(), list()  
  
    for e in range(c.EPOCHS):  
        epoch_loss, n_corrects = 0., 0  
  
        for X_, y_ in tqdm(dataloader):  
            optimizer.zero_grad()  
  
            X_, y_ = X_.to(c.DEVICE), y_.to(c.DEVICE)  
            pred = model(X_)  
            loss = loss_fn(pred, y_)  
  
            loss.backward()  
            optimizer.step()  
  
            epoch_loss += loss  
            pred_cls = torch.argmax(pred, dim=1)  
            n_corrects += (pred_cls == y_).sum().item()  
  
        epoch_loss /= len(dataloader)  
        # epoch_loss /= c.N_SAMPLES  
        epoch_accr = n_corrects / c.N_SAMPLES  
  
        print(f"\n epoch {e} : loss={epoch_loss.item():.4f}, accr={epoch_accr}")  
  
        losses.append(epoch_loss.item())  
        accs.append(epoch_accr)  
  
        if e in [199, 399, 599, 799]:  
            rep = c.PATH.replace(".pt", f"_ep{e}.pt")  
            torch.save(model, rep)  
  
    # print(losses)  
    # print(accs)  
    # Save Model and Metrics by Epoch    
    with open(c.METRIC_PATH, 'wb') as f:  
        result = {  
            'losses': losses,  
            'accs': accs  
        }  
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)  
  
    torch.save(model, c.PATH)  
  
    with open("model/lenet5_metrics_2.csv", 'w') as file:  
        writer = csv.writer(file)  
        writer.writerow(losses)  
        writer.writerow(accs)  
  
    visualize(losses, accs)  
  
  
def visualize(losses, accs):  
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))  
    axes[0].plot(losses)  
    axes[1].plot(accs)  
  
    axes[1].set_xlabel("Epoch", fontsize=15)  
    axes[0].set_ylabel("Loss", fontsize=15)  
    axes[1].set_ylabel("Accuracy", fontsize=15)  
    axes[0].tick_params(labelsize=10)  
    axes[1].tick_params(labelsize=10)  
    fig.suptitle("Lenet5 Metrics by Epoch", fontsize=16)  
    plt.show()  
  
  
if __name__ == '__main__':  
    constants = Constants(  
        N_SAMPLES=50000,  
        BATCH_SIZE=1024,  
        EPOCHS=1000,  
        LR=0.0001,  
        DEVICE=get_device(),  
        PATH="model/lenet5_cifar10_2.pt",  
        METRIC_PATH="model/lenet5_metrics_2.pkl",  
        SEED=80  
    )  
    train_cifar10_w_lenet(constants)

learning rate를 대폭 낮추고 epoch 수를 늘려 accuracy 0.89까지 학습시킬 수 있었음

#️⃣ VGG19가 CIFAR10 이미지 데이터셋을 학습하기 어려운 이유

VGG19는 ISVRC 224x224 이미지를 기준으로 설계한 모델임

VGG19는 VGG 모델중에서 가장 depth가 깊음
pooling을 5번이나 함

이 모델을 CIFAR10에 적용하게 되면
마지막 풀링 때 image size가 1x1되니까 해상도가 낮아지면서 학습이 잘 안되는 문제 발생

결론: 데이터에 맞는 모델의 경량화 필요

'Education > 새싹 TIL' 카테고리의 다른 글

새싹 AI데이터엔지니어 핀테커스 13주차 (목) - ResNet (0)	2023.11.30
새싹 AI데이터엔지니어 핀테커스 13주차 (수) - GoogLeNet (0)	2023.11.29
새싹 AI데이터엔지니어 핀테커스 13주차 (월) - LeNet5 & VGGNet (1)	2023.11.27
새싹 AI데이터엔지니어 핀테커스 12주차 (금) - Sobel Filtering 3 & Convolutional Neural Network (0)	2023.11.24
새싹 AI데이터엔지니어 핀테커스 12주차 (목) - Sobel Filtering 2 (1)	2023.11.23

Shijuan's AI Diary