3주차_13 필기 (LeNet, Alexnet)

2023. 10. 15. 23:35

728x90

LeNet-5

class ImageTransform():    
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }
        
    def __call__(self, img, phase):
        return self.data_transform[phase](img)

ⓐ transforms.Compose: 이미지를 변형할 수 있는 방식들의 묶음
ⓑ transforms.RandomResizedCrop: 입력 이미지를 주어진 크기(resize: 224×224)로 조정
또한, scale은 원래 이미지를 임의의 크기(0.5~1.0(50~100%))만큼 면적을 무작위로 자르겠다는 의미
ⓒ transforms.RandomHorizontalFlip: 주어진 확률로 이미지를 수평 반전시킴

이때 확률 값을 지정하지 않았으므로 기본값인 0.5의 확률로 이미지들이 수평 반전
즉, 훈련 이미지 중 반은 위아래 뒤집힌 상태로 두고, 반은 그대로 사용
ⓓ transforms.ToTensor: ImageFolder 메서드를 비롯해서 torchvision 메서드는 이미지를 읽을 때 파이썬 이미지 라이브러리인 PIL을 사용
PIL을 사용해서 이미지를 읽으면 생성되는 이미지는 범위가 [0, 255]이며, 배열의 차원이 (높이 H×너비 W×채널 수 C)로 표현
이후 효율적인 연산을 위해 torch.FloatTensor 배열로 바꾸어야 하는데, 이때 픽셀 값의
범위는 [0.0, 1.0] 사이가 되고 차원의 순서도 (채널 수 C×높이 H×너비 W)로 바뀜
이러한 작업을 수행해 주는 메서드가 ToTensor()
ⓔ transforms.Normalize: 전이 학습에서 사용하는 사전 훈련된 모델들은 대개 ImageNet 데이터셋에서 훈련
사전 훈련된 모델을 사용하기 위해서는 ImageNet 데이터의 각 채널별 평균과 표준편차에 맞는 정규화(normalize)를 해 주어야 함
즉, Normalize 메서드 안에 사용된 (mean: 0.485, 0.456, 0.406), (std: 0.229, 0.224, 0.225)는 ImageNet에서 이미지들의 RGB 채널마다 평균과 표준편차를 의미
참고로 OpenCV를 사용해서 이미지를 읽어 온다면 RGB 이미지가 아닌 BGR 이미지이므로 채널 순서에 주의해야 함

cat_directory = 'catanddog/train/Cat/'
dog_directory = 'catanddog/train/Dog/'

cat_images_filepaths = sorted([os.path.join(cat_directory, f) for f in os.listdir(cat_directory)])   
dog_images_filepaths = sorted([os.path.join(dog_directory, f) for f in os.listdir(dog_directory)])
images_filepaths = [*cat_images_filepaths, *dog_images_filepaths]    
correct_images_filepaths = [i for i in images_filepaths if cv2.imread(i) is not None]    

random.seed(42)    
random.shuffle(correct_images_filepaths)
#train_images_filepaths = correct_images_filepaths[:20000] #성능을 향상시키고 싶다면 훈련 데이터셋을 늘려서 테스트해보세요   
#val_images_filepaths = correct_images_filepaths[20000:-10] #훈련과 함께 검증도 늘려줘야 합니다
train_images_filepaths = correct_images_filepaths[:300]    
val_images_filepaths = correct_images_filepaths[300:-10]  
test_images_filepaths = correct_images_filepaths[-10:]    
print(len(train_images_filepaths), len(val_images_filepaths), len(test_images_filepaths))

cat_images_filepaths: 불러와서 정렬
images_filepaths = [*cat_images_filepaths, *dog_images_filepaths] : asterisk는 전체인자값가져오기
correct_images_filepaths: cv에서 image를 read해서 올바른 것들만 list로 추림
train_images_filepaths 섞은 것에서 300개 추출하여 10개분리
300 75 10

def display_image_grid(images_filepaths, predicted_labels=(), cols=5):
    rows = len(images_filepaths) // cols
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
    for i, image_filepath in enumerate(images_filepaths):
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        true_label = os.path.normpath(image_filepath).split(os.sep)[-2]
        predicted_label = predicted_labels[i] if predicted_labels else true_label
        color = "green" if true_label == predicted_label else "red"
        ax.ravel()[i].imshow(image)
        ax.ravel()[i].set_title(predicted_label, color=color)
        ax.ravel()[i].set_axis_off()
    plt.tight_layout()
    plt.show()

display_image_grid(test_images_filepaths)

rows = len(images_filepaths) // cols 행 개수 계산
figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) : convert color BGR에서 RGB로 바꿈
true_label = os.path.normpath(image_filepath).split(os.sep) -2 :
catanddog/train/Dog/999.jpg 에서 -1은 999.jpg, -2는 Dog <- 라벨값을 붙이려고 가져옴
predicted_label = predicted_labels[i] if predicted_labels else true_label
ax.ravel()[i].imshow(image) ax 기준으로 idx번째에 그림 채우기
plt.tight_layout() 이미지 여백 조정

김광석 - 혼자 남은 밤
https://www.youtube.com/watch?v=S5dcSZXdc7c
을 갑자기 추천해주심… 🎶

class DogvsCatDataset(Dataset):    
    def __init__(self, file_list, transform=None, phase='train'):    
        self.file_list = file_list
        self.transform = transform
        self.phase = phase
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):       
        img_path = self.file_list[idx]
        img = Image.open(img_path)        
        img_transformed = self.transform(img, self.phase)
        
        label = img_path.split('/')[-1].split('.')[0]
        if label == 'dog':
            label = 1
        elif label == 'cat':
            label = 0
        return img_transformed, label
    
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

train_dataset = DogvsCatDataset(train_images_filepaths, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = DogvsCatDataset(val_images_filepaths, transform=ImageTransform(size, mean, std), phase='val')

index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
dataloader_dict = {'train': train_dataloader, 'val': val_dataloader}

batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

LeNet

class LeNet(nn.Module):  
    def __init__(self):  
        super(LeNet, self).__init__()  
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=0)  
        self.relu1 = nn.ReLU()  
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)  
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)  
        self.relu2 = nn.ReLU() # activation  
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)  
        self.fc1 = nn.Linear(32*53*53, 512)  
        self.relu5 = nn.ReLU()  
        self.fc2 = nn.Linear(512, 2)  
        self.output = nn.Softmax(dim=1)  
  
    def forward(self, x):  
        out = self.cnn1(x)  
        out = self.relu1(out)  
        out = self.maxpool1(out)  
        out = self.cnn2(out)  
        out = self.relu2(out)  
        out = self.maxpool2(out)  
        out = out.view(out.size(0), -1)  
        out = self.fc1(out)  
        out = self.fc2(out)  
        out = self.output(out)  
        return out  
  
model = LeNet().to(device)  
print(model)

LeNet(  
(cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))  
(relu1): ReLU()  
(maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)  
(cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))  
(relu2): ReLU()  
(maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)  
(fc1): Linear(in_features=89888, out_features=512, bias=True)  
(relu5): ReLU()  
(fc2): Linear(in_features=512, out_features=2, bias=True)  
(output): Softmax(dim=1)  
)

summary

#torchsummary 라이브러리 설치할 것  
#!pip install torchsummary  
  
from torchsummary import summary  
summary(model, input_size=(3, 224, 224))  
  
#model.parameters 함수를 사용하여 전체 파라미터만 확인 가능한 함수  
def count_parameters(model):  
    return sum(p.numel() for p in model.parameters() if p.requires_grad)  
  
print(f'The model has {count_parameters(model):,} trainable parameters')  
  
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  
criterion = nn.CrossEntropyLoss()  
  
model = model.to(device)  
criterion = criterion.to(device)

----------------------------------------------------------------  
Layer (type) Output Shape Param #  
================================================================  
Conv2d-1 [-1, 16, 220, 220] 1,216  
ReLU-2 [-1, 16, 220, 220] 0  
MaxPool2d-3 [-1, 16, 110, 110] 0  
Conv2d-4 [-1, 32, 106, 106] 12,832  
ReLU-5 [-1, 32, 106, 106] 0  
MaxPool2d-6 [-1, 32, 53, 53] 0  
Linear-7 [-1, 512] 46,023,168  
Linear-8 [-1, 2] 1,026  
Softmax-9 [-1, 2] 0  
================================================================  
Total params: 46,038,242  
Trainable params: 46,038,242  
Non-trainable params: 0  
----------------------------------------------------------------  
Input size (MB): 0.57  
Forward/backward pass size (MB): 19.47  
Params size (MB): 175.62  
Estimated Total Size (MB): 195.67  
----------------------------------------------------------------  
The model has 46,038,242 trainable parameters

train

def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):  
    since = time.time()  
    best_acc = 0.0  
  
    for epoch in range(num_epoch):  
        print('Epoch {}/{}'.format(epoch + 1, num_epoch))  
        print('-'*20)  
  
        for phase in ['train', 'val']:  
            if phase == 'train':  
                model.train()  
            else:  
                model.eval()  
  
            epoch_loss = 0.0  
            epoch_corrects = 0  
  
            for inputs, labels in tqdm(dataloader_dict[phase]):  
                inputs = inputs.to(device)  
                labels = labels.to(device)  
                optimizer.zero_grad()  
  
                with torch.set_grad_enabled(phase == 'train'):  
                    outputs = model(inputs)  
                    _, preds = torch.max(outputs, 1)  
                    loss = criterion(outputs, labels)  
  
                    if phase == 'train':  
                        loss.backward()  
                        optimizer.step()  
  
                    epoch_loss += loss.item() * inputs.size(0)  
                    epoch_corrects += torch.sum(preds == labels.data)  
  
            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)  
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)  
  
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))  
  
            if phase == 'val' and epoch_acc > best_acc:  
                best_acc = epoch_acc  
                best_model_wts = model.state_dict()  
  
    time_elapsed = time.time() - since  
    print('Training complete in {:.0f}m {:.0f}s'.format(  
        time_elapsed // 60, time_elapsed % 60))  
    print('Best val Acc: {:4f}'.format(best_acc))  
    return model  
  
num_epoch = 10  
model = train_model(model, dataloader_dict, criterion, optimizer, num_epoch)

evaluation

import pandas as pd  
  
id_list = []  
pred_list = []  
_id=0  
with torch.no_grad():  
    for test_path in tqdm(test_images_filepaths):  
        img = Image.open(test_path)  
        _id =test_path.split('/')[-1].split('.')[1]  
        transform = ImageTransform(size, mean, std)  
        img = transform(img, phase='val')  
        img = img.unsqueeze(0)  
        img = img.to(device)  
  
        model.eval()  
        outputs = model(img)  
        preds = F.softmax(outputs, dim=1)[:, 1].tolist()  
        id_list.append(_id)  
        pred_list.append(preds[0])  
  
res = pd.DataFrame({  
    'id': id_list,  
    'label': pred_list  
})  
  
res.sort_values(by='id', inplace=True)  
res.reset_index(drop=True, inplace=True)  
  
res.to_csv('LesNet.csv', index=False)

예측결과 시각화

class_ = classes = {0:'cat', 1:'dog'}  
def display_image_grid(images_filepaths, predicted_labels=(), cols=5):  
    rows = len(images_filepaths) // cols  
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))  
    for i, image_filepath in enumerate(images_filepaths):  
        image = cv2.imread(image_filepath)  
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
  
        a = random.choice(res['id'].values)  
        label = res.loc[res['id'] == a, 'label'].values[0]  
        if label > 0.5:  
            label = 1  
        else:  
            label = 0  
        ax.ravel()[i].imshow(image)  
        ax.ravel()[i].set_title(class_[label])  
        ax.ravel()[i].set_axis_off()  
    plt.tight_layout()  
    plt.show()  
  
display_image_grid(test_images_filepaths)

ALEXNET

모델 정의

class AlexNet(nn.Module):  
    def __init__(self) -> None:  
        super(AlexNet, self).__init__()  
        self.features = nn.Sequential(  
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(64, 192, kernel_size=5, padding=2),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
        )  
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))  
        self.classifier = nn.Sequential(  
            nn.Dropout(),  
            nn.Linear(256 * 6 * 6, 4096),  
            nn.ReLU(inplace=True),  
            nn.Dropout(),  
            nn.Linear(4096, 512),  
            nn.ReLU(inplace=True),  
            nn.Linear(512, 2),  
        )  
  
    def forward(self, x: torch.Tensor) -> torch.Tensor:  
        x = self.features(x)  
        x = self.avgpool(x)  
        x = torch.flatten(x, 1)  
        x = self.classifier(x)  
        return x

model = AlexNet()  
model.to(device)

AlexNet(  
(features): Sequential(  
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))  
(1): ReLU(inplace=True)  
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)  
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))  
(4): ReLU(inplace=True)  
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)  
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))  
(7): ReLU(inplace=True)  
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))  
(9): ReLU(inplace=True)  
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))  
(11): ReLU(inplace=True)  
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)  
)  
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))  
(classifier): Sequential(  
(0): Dropout(p=0.5, inplace=False)  
(1): Linear(in_features=9216, out_features=4096, bias=True)  
(2): ReLU(inplace=True)  
(3): Dropout(p=0.5, inplace=False)  
(4): Linear(in_features=4096, out_features=512, bias=True)  
(5): ReLU(inplace=True)  
(6): Linear(in_features=512, out_features=2, bias=True)  
)  
)

optimizer

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  
criterion = nn.CrossEntropyLoss()  
  
from torchsummary import summary  
summary(model, input_size=(3, 256, 256))

----------------------------------------------------------------  
Layer (type) Output Shape Param #  
================================================================  
Conv2d-1 [-1, 64, 63, 63] 23,296  
ReLU-2 [-1, 64, 63, 63] 0  
MaxPool2d-3 [-1, 64, 31, 31] 0  
Conv2d-4 [-1, 192, 31, 31] 307,392  
ReLU-5 [-1, 192, 31, 31] 0  
MaxPool2d-6 [-1, 192, 15, 15] 0  
Conv2d-7 [-1, 384, 15, 15] 663,936  
ReLU-8 [-1, 384, 15, 15] 0  
Conv2d-9 [-1, 256, 15, 15] 884,992  
ReLU-10 [-1, 256, 15, 15] 0  
Conv2d-11 [-1, 256, 15, 15] 590,080  
ReLU-12 [-1, 256, 15, 15] 0  
MaxPool2d-13 [-1, 256, 7, 7] 0  
AdaptiveAvgPool2d-14 [-1, 256, 6, 6] 0  
Dropout-15 [-1, 9216] 0  
Linear-16 [-1, 4096] 37,752,832  
ReLU-17 [-1, 4096] 0  
Dropout-18 [-1, 4096] 0  
Linear-19 [-1, 512] 2,097,664  
ReLU-20 [-1, 512] 0  
Linear-21 [-1, 2] 1,026  
================================================================  
Total params: 42,321,218  
Trainable params: 42,321,218  
Non-trainable params: 0  
----------------------------------------------------------------  
Input size (MB): 0.75  
Forward/backward pass size (MB): 10.90  
Params size (MB): 161.44  
Estimated Total Size (MB): 173.10  
----------------------------------------------------------------

training

def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):  
    since = time.time()  
    best_acc = 0.0  
  
    for epoch in range(num_epoch):  
        print('Epoch {}/{}'.format(epoch + 1, num_epoch))  
        print('-'*20)  
  
        for phase in ['train', 'val']:  
            if phase == 'train':  
                model.train()  
            else:  
                model.eval()  
  
            epoch_loss = 0.0  
            epoch_corrects = 0  
  
            for inputs, labels in tqdm(dataloader_dict[phase]):  
                inputs = inputs.to(device)  
                labels = labels.to(device)  
                optimizer.zero_grad()  
  
                with torch.set_grad_enabled(phase == 'train'):  
                    outputs = model(inputs)  
                    _, preds = torch.max(outputs, 1)  
                    loss = criterion(outputs, labels)  
  
                    if phase == 'train':  
                        loss.backward()  
                        optimizer.step()  
  
                    epoch_loss += loss.item() * inputs.size(0)  
                    epoch_corrects += torch.sum(preds == labels.data)  
  
            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)  
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)  
  
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))  
  
    time_elapsed = time.time() - since  
    print('Training complete in {:.0f}m {:.0f}s'.format(  
        time_elapsed // 60, time_elapsed % 60))  
    return model


num_epoch = 10  
model = train_model(model, dataloader_dict, criterion, optimizer, num_epoch)

predict

import pandas as pd  
  
id_list = []  
pred_list = []  
_id=0  
with torch.no_grad():  
    for test_path in tqdm(test_images_filepaths):  
        img = Image.open(test_path)  
        _id =test_path.split('/')[-1].split('.')[1]  
        transform = ImageTransform(size, mean, std)  
        img = transform(img, phase='val')  
        img = img.unsqueeze(0)  
        img = img.to(device)  
  
        model.eval()  
        outputs = model(img)  
        preds = F.softmax(outputs, dim=1)[:, 1].tolist()  
  
        id_list.append(_id)  
        pred_list.append(preds[0])  
  
res = pd.DataFrame({  
    'id': id_list,  
    'label': pred_list  
})  
res.to_csv('alexnet.csv', index=False)

확인

class_ = classes = {0:'cat', 1:'dog'}  
def display_image_grid(images_filepaths, predicted_labels=(), cols=5):  
    rows = len(images_filepaths) // cols  
    figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))  
    for i, image_filepath in enumerate(images_filepaths):  
        image = cv2.imread(image_filepath)  
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
  
        a = random.choice(res['id'].values)  
        label = res.loc[res['id'] == a, 'label'].values[0]  
        if label > 0.5:  
            label = 1  
        else:  
            label = 0  
        ax.ravel()[i].imshow(image)  
        ax.ravel()[i].set_title(class_[label])  
        ax.ravel()[i].set_axis_off()  
    plt.tight_layout()  
    plt.show()  
display_image_grid(test_images_filepaths)

캐글 고양이 강아지 데이터셋 (bigger volume)

https://www.kaggle.com/c/dogs-vs-cats/data

'Education > ICT AI 중급' 카테고리의 다른 글

4주차_15 필기 (ResNet) (1)	2023.10.15
4주차_14 필기 (VGGNet) (0)	2023.10.15
3주차_12 필기 (전이학습) (1)	2023.10.15
3주차_11 필기 (CNN) (1)	2023.10.15
3주차_10 필기 (파이토치 배열) (1)	2023.10.15

Shijuan's AI Diary