728x90
LeNet-5
class ImageTransform():
def __init__(self, resize, mean, std):
self.data_transform = {
'train': transforms.Compose([
transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(resize),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
}
def __call__(self, img, phase):
return self.data_transform[phase](img)
ⓐ transforms.Compose: 이미지를 변형할 수 있는 방식들의 묶음
ⓑ transforms.RandomResizedCrop: 입력 이미지를 주어진 크기(resize: 224×224)로 조정
또한, scale은 원래 이미지를 임의의 크기(0.5~1.0(50~100%))만큼 면적을 무작위로 자르겠다는 의미
ⓒ transforms.RandomHorizontalFlip: 주어진 확률로 이미지를 수평 반전시킴
- 이때 확률 값을 지정하지 않았으므로 기본값인 0.5의 확률로 이미지들이 수평 반전
- 즉, 훈련 이미지 중 반은 위아래 뒤집힌 상태로 두고, 반은 그대로 사용
ⓓ transforms.ToTensor: ImageFolder 메서드를 비롯해서 torchvision 메서드는 이미지를 읽을 때 파이썬 이미지 라이브러리인 PIL을 사용 - PIL을 사용해서 이미지를 읽으면 생성되는 이미지는 범위가 [0, 255]이며, 배열의 차원이 (높이 H×너비 W×채널 수 C)로 표현
- 이후 효율적인 연산을 위해 torch.FloatTensor 배열로 바꾸어야 하는데, 이때 픽셀 값의
- 범위는 [0.0, 1.0] 사이가 되고 차원의 순서도 (채널 수 C×높이 H×너비 W)로 바뀜
- 이러한 작업을 수행해 주는 메서드가 ToTensor()
ⓔ transforms.Normalize: 전이 학습에서 사용하는 사전 훈련된 모델들은 대개 ImageNet 데이터셋에서 훈련 - 사전 훈련된 모델을 사용하기 위해서는 ImageNet 데이터의 각 채널별 평균과 표준편차에 맞는 정규화(normalize)를 해 주어야 함
- 즉, Normalize 메서드 안에 사용된 (mean: 0.485, 0.456, 0.406), (std: 0.229, 0.224, 0.225)는 ImageNet에서 이미지들의 RGB 채널마다 평균과 표준편차를 의미
- 참고로 OpenCV를 사용해서 이미지를 읽어 온다면 RGB 이미지가 아닌 BGR 이미지이므로 채널 순서에 주의해야 함
cat_directory = 'catanddog/train/Cat/'
dog_directory = 'catanddog/train/Dog/'
cat_images_filepaths = sorted([os.path.join(cat_directory, f) for f in os.listdir(cat_directory)])
dog_images_filepaths = sorted([os.path.join(dog_directory, f) for f in os.listdir(dog_directory)])
images_filepaths = [*cat_images_filepaths, *dog_images_filepaths]
correct_images_filepaths = [i for i in images_filepaths if cv2.imread(i) is not None]
random.seed(42)
random.shuffle(correct_images_filepaths)
#train_images_filepaths = correct_images_filepaths[:20000] #성능을 향상시키고 싶다면 훈련 데이터셋을 늘려서 테스트해보세요
#val_images_filepaths = correct_images_filepaths[20000:-10] #훈련과 함께 검증도 늘려줘야 합니다
train_images_filepaths = correct_images_filepaths[:300]
val_images_filepaths = correct_images_filepaths[300:-10]
test_images_filepaths = correct_images_filepaths[-10:]
print(len(train_images_filepaths), len(val_images_filepaths), len(test_images_filepaths))
- cat_images_filepaths: 불러와서 정렬
- images_filepaths = [*cat_images_filepaths, *dog_images_filepaths] : asterisk는 전체인자값가져오기
- correct_images_filepaths: cv에서 image를 read해서 올바른 것들만 list로 추림
- train_images_filepaths 섞은 것에서 300개 추출하여 10개분리
- 300 75 10
def display_image_grid(images_filepaths, predicted_labels=(), cols=5):
rows = len(images_filepaths) // cols
figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
for i, image_filepath in enumerate(images_filepaths):
image = cv2.imread(image_filepath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
true_label = os.path.normpath(image_filepath).split(os.sep)[-2]
predicted_label = predicted_labels[i] if predicted_labels else true_label
color = "green" if true_label == predicted_label else "red"
ax.ravel()[i].imshow(image)
ax.ravel()[i].set_title(predicted_label, color=color)
ax.ravel()[i].set_axis_off()
plt.tight_layout()
plt.show()
display_image_grid(test_images_filepaths)
- rows = len(images_filepaths) // cols 행 개수 계산
- figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) : convert color BGR에서 RGB로 바꿈
- true_label = os.path.normpath(image_filepath).split(os.sep) -2 :
- catanddog/train/Dog/999.jpg 에서 -1은 999.jpg, -2는 Dog <- 라벨값을 붙이려고 가져옴
- predicted_label = predicted_labels[i] if predicted_labels else true_label
- ax.ravel()[i].imshow(image) ax 기준으로 idx번째에 그림 채우기
- plt.tight_layout() 이미지 여백 조정
김광석 - 혼자 남은 밤
https://www.youtube.com/watch?v=S5dcSZXdc7c
을 갑자기 추천해주심… 🎶
class DogvsCatDataset(Dataset):
def __init__(self, file_list, transform=None, phase='train'):
self.file_list = file_list
self.transform = transform
self.phase = phase
def __len__(self):
return len(self.file_list)
def __getitem__(self, idx):
img_path = self.file_list[idx]
img = Image.open(img_path)
img_transformed = self.transform(img, self.phase)
label = img_path.split('/')[-1].split('.')[0]
if label == 'dog':
label = 1
elif label == 'cat':
label = 0
return img_transformed, label
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32
train_dataset = DogvsCatDataset(train_images_filepaths, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = DogvsCatDataset(val_images_filepaths, transform=ImageTransform(size, mean, std), phase='val')
index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
dataloader_dict = {'train': train_dataloader, 'val': val_dataloader}
batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)
LeNet
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.cnn1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=0)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
self.relu2 = nn.ReLU() # activation
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.fc1 = nn.Linear(32*53*53, 512)
self.relu5 = nn.ReLU()
self.fc2 = nn.Linear(512, 2)
self.output = nn.Softmax(dim=1)
def forward(self, x):
out = self.cnn1(x)
out = self.relu1(out)
out = self.maxpool1(out)
out = self.cnn2(out)
out = self.relu2(out)
out = self.maxpool2(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.fc2(out)
out = self.output(out)
return out
model = LeNet().to(device)
print(model)
LeNet(
(cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
(relu1): ReLU()
(maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
(relu2): ReLU()
(maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=89888, out_features=512, bias=True)
(relu5): ReLU()
(fc2): Linear(in_features=512, out_features=2, bias=True)
(output): Softmax(dim=1)
)
summary
#torchsummary 라이브러리 설치할 것
#!pip install torchsummary
from torchsummary import summary
summary(model, input_size=(3, 224, 224))
#model.parameters 함수를 사용하여 전체 파라미터만 확인 가능한 함수
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
model = model.to(device)
criterion = criterion.to(device)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 220, 220] 1,216
ReLU-2 [-1, 16, 220, 220] 0
MaxPool2d-3 [-1, 16, 110, 110] 0
Conv2d-4 [-1, 32, 106, 106] 12,832
ReLU-5 [-1, 32, 106, 106] 0
MaxPool2d-6 [-1, 32, 53, 53] 0
Linear-7 [-1, 512] 46,023,168
Linear-8 [-1, 2] 1,026
Softmax-9 [-1, 2] 0
================================================================
Total params: 46,038,242
Trainable params: 46,038,242
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 19.47
Params size (MB): 175.62
Estimated Total Size (MB): 195.67
----------------------------------------------------------------
The model has 46,038,242 trainable parameters
train
def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):
since = time.time()
best_acc = 0.0
for epoch in range(num_epoch):
print('Epoch {}/{}'.format(epoch + 1, num_epoch))
print('-'*20)
for phase in ['train', 'val']:
if phase == 'train':
model.train()
else:
model.eval()
epoch_loss = 0.0
epoch_corrects = 0
for inputs, labels in tqdm(dataloader_dict[phase]):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
if phase == 'train':
loss.backward()
optimizer.step()
epoch_loss += loss.item() * inputs.size(0)
epoch_corrects += torch.sum(preds == labels.data)
epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = model.state_dict()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
return model
num_epoch = 10
model = train_model(model, dataloader_dict, criterion, optimizer, num_epoch)
evaluation
import pandas as pd
id_list = []
pred_list = []
_id=0
with torch.no_grad():
for test_path in tqdm(test_images_filepaths):
img = Image.open(test_path)
_id =test_path.split('/')[-1].split('.')[1]
transform = ImageTransform(size, mean, std)
img = transform(img, phase='val')
img = img.unsqueeze(0)
img = img.to(device)
model.eval()
outputs = model(img)
preds = F.softmax(outputs, dim=1)[:, 1].tolist()
id_list.append(_id)
pred_list.append(preds[0])
res = pd.DataFrame({
'id': id_list,
'label': pred_list
})
res.sort_values(by='id', inplace=True)
res.reset_index(drop=True, inplace=True)
res.to_csv('LesNet.csv', index=False)
예측결과 시각화
class_ = classes = {0:'cat', 1:'dog'}
def display_image_grid(images_filepaths, predicted_labels=(), cols=5):
rows = len(images_filepaths) // cols
figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
for i, image_filepath in enumerate(images_filepaths):
image = cv2.imread(image_filepath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
a = random.choice(res['id'].values)
label = res.loc[res['id'] == a, 'label'].values[0]
if label > 0.5:
label = 1
else:
label = 0
ax.ravel()[i].imshow(image)
ax.ravel()[i].set_title(class_[label])
ax.ravel()[i].set_axis_off()
plt.tight_layout()
plt.show()
display_image_grid(test_images_filepaths)
ALEXNET
모델 정의
class AlexNet(nn.Module):
def __init__(self) -> None:
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 2),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
model = AlexNet()
model.to(device)
AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=512, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=512, out_features=2, bias=True)
)
)
optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
from torchsummary import summary
summary(model, input_size=(3, 256, 256))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 63, 63] 23,296
ReLU-2 [-1, 64, 63, 63] 0
MaxPool2d-3 [-1, 64, 31, 31] 0
Conv2d-4 [-1, 192, 31, 31] 307,392
ReLU-5 [-1, 192, 31, 31] 0
MaxPool2d-6 [-1, 192, 15, 15] 0
Conv2d-7 [-1, 384, 15, 15] 663,936
ReLU-8 [-1, 384, 15, 15] 0
Conv2d-9 [-1, 256, 15, 15] 884,992
ReLU-10 [-1, 256, 15, 15] 0
Conv2d-11 [-1, 256, 15, 15] 590,080
ReLU-12 [-1, 256, 15, 15] 0
MaxPool2d-13 [-1, 256, 7, 7] 0
AdaptiveAvgPool2d-14 [-1, 256, 6, 6] 0
Dropout-15 [-1, 9216] 0
Linear-16 [-1, 4096] 37,752,832
ReLU-17 [-1, 4096] 0
Dropout-18 [-1, 4096] 0
Linear-19 [-1, 512] 2,097,664
ReLU-20 [-1, 512] 0
Linear-21 [-1, 2] 1,026
================================================================
Total params: 42,321,218
Trainable params: 42,321,218
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.75
Forward/backward pass size (MB): 10.90
Params size (MB): 161.44
Estimated Total Size (MB): 173.10
----------------------------------------------------------------
training
def train_model(model, dataloader_dict, criterion, optimizer, num_epoch):
since = time.time()
best_acc = 0.0
for epoch in range(num_epoch):
print('Epoch {}/{}'.format(epoch + 1, num_epoch))
print('-'*20)
for phase in ['train', 'val']:
if phase == 'train':
model.train()
else:
model.eval()
epoch_loss = 0.0
epoch_corrects = 0
for inputs, labels in tqdm(dataloader_dict[phase]):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
if phase == 'train':
loss.backward()
optimizer.step()
epoch_loss += loss.item() * inputs.size(0)
epoch_corrects += torch.sum(preds == labels.data)
epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
return model
num_epoch = 10
model = train_model(model, dataloader_dict, criterion, optimizer, num_epoch)
predict
import pandas as pd
id_list = []
pred_list = []
_id=0
with torch.no_grad():
for test_path in tqdm(test_images_filepaths):
img = Image.open(test_path)
_id =test_path.split('/')[-1].split('.')[1]
transform = ImageTransform(size, mean, std)
img = transform(img, phase='val')
img = img.unsqueeze(0)
img = img.to(device)
model.eval()
outputs = model(img)
preds = F.softmax(outputs, dim=1)[:, 1].tolist()
id_list.append(_id)
pred_list.append(preds[0])
res = pd.DataFrame({
'id': id_list,
'label': pred_list
})
res.to_csv('alexnet.csv', index=False)
확인
class_ = classes = {0:'cat', 1:'dog'}
def display_image_grid(images_filepaths, predicted_labels=(), cols=5):
rows = len(images_filepaths) // cols
figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 6))
for i, image_filepath in enumerate(images_filepaths):
image = cv2.imread(image_filepath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
a = random.choice(res['id'].values)
label = res.loc[res['id'] == a, 'label'].values[0]
if label > 0.5:
label = 1
else:
label = 0
ax.ravel()[i].imshow(image)
ax.ravel()[i].set_title(class_[label])
ax.ravel()[i].set_axis_off()
plt.tight_layout()
plt.show()
display_image_grid(test_images_filepaths)
캐글 고양이 강아지 데이터셋 (bigger volume)
반응형
'Education > ICT AI 중급' 카테고리의 다른 글
4주차_15 필기 (ResNet) (0) | 2023.10.15 |
---|---|
4주차_14 필기 (VGGNet) (0) | 2023.10.15 |
3주차_12 필기 (전이학습) (1) | 2023.10.15 |
3주차_11 필기 (CNN) (1) | 2023.10.15 |
3주차_10 필기 (파이토치 배열) (1) | 2023.10.15 |