Notice
Recent Posts
Recent Comments
Link
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | |||||
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |
Tags
- hackerrank
- 입문
- 3줄 논문
- Recsys-KR
- 큐
- 협업필터링
- Machine Learning Advanced
- eda
- Python
- 코딩테스트
- DilatedNet
- Segmentation
- 나는리뷰어다
- pytorch
- DFS
- Image Segmentation
- 추천시스템
- 파이썬
- Object Detection
- MySQL
- 나는 리뷰어다
- 알고리즘
- Semantic Segmentation
- 스택
- TEAM-EDA
- 한빛미디어
- 엘리스
- TEAM EDA
- 튜토리얼
- 프로그래머스
Archives
- Today
- Total
TEAM EDA
Fully Convolutional Networks (FCN) Code 본문
이전글 FCN에 이어서 코드를 한번 살펴보도록 하겠습니다. 기본적인 FCN32s의 코드는 다음과 같습니다.
import torch
import torch.nn as nn
class FCN32s(nn.Module):
def __init__(self, num_classes=21):
super(FCN32s, self).__init__()
self.relu = nn.ReLU(inplace=True)
# conv1
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
self.relu1_1 = nn.ReLU(inplace=True)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.relu1_2 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv2
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.relu2_1 = nn.ReLU(inplace=True)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.relu2_2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv3
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.relu3_1 = nn.ReLU(inplace=True)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_2 = nn.ReLU(inplace=True)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv4
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.relu4_1 = nn.ReLU(inplace=True)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_2 = nn.ReLU(inplace=True)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_3 = nn.ReLU(inplace=True)
self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv3
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_1 = nn.ReLU(inplace=True)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_2 = nn.ReLU(inplace=True)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_3 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# fc1
self.fc6 = nn.Conv2d(512, 4096, 7)
self.relu6 = nn.ReLU(inplace=True)
self.drop6 = nn.Dropout2d()
# fc2
self.fc7 = nn.Conv2d(4096, 4096, 1)
self.relu7 = nn.ReLU(inplace=True)
self.drop7 = nn.Dropout2d()
# fc3
self.score_fr = nn.Conv2d(4096, num_classes, kernel_size = 1)
self.upscore32 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 64, stride = 32)
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_uniform_(m.weight)
# xavier_uniform은 bias에 대해서는 제공하지 않음
# ValueError: Fan in and fan out can not be computed for tensor with fewer than 2 dimensions
if m.bias is not None:
torch.nn.init.zeros_(m.bias)
def forward(self, x):
o_h, o_w = x.size()[2], x.size()[3]
h = self.relu1_1(self.conv1_1(x))
h = self.relu1_2(self.conv1_2(h))
h = self.pool1(h)
h = self.relu2_1(self.conv2_1(h))
h = self.relu2_2(self.conv2_2(h))
h = self.pool2(h)
h = self.relu3_1(self.conv3_1(h))
h = self.relu3_2(self.conv3_2(h))
h = self.relu3_3(self.conv3_3(h))
h = self.pool3(h)
h = self.relu4_1(self.conv4_1(h))
h = self.relu4_2(self.conv4_2(h))
h = self.relu4_3(self.conv4_3(h))
h = self.pool4(h)
h = self.relu5_1(self.conv5_1(h))
h = self.relu5_2(self.conv5_2(h))
h = self.relu5_3(self.conv5_3(h))
h = self.pool5(h)
h = self.relu6(self.fc6(h))
h = self.drop6(h)
h = self.relu7(self.fc7(h))
h = self.drop7(h)
h = self.score_fr(h)
upscore32 = self.upscore32(h)
u_h, u_w = upscore32.size()[2], upscore32.size()[3]
dh, dw = (u_h - o_h)//2, (u_w - o_w)//2
return torch.sigmoid(upscore32[:, :, dh:(dh + o_h), dw:(dw + o_w)])
먼저 하나의 Convolution block은 Conv2d과 ReLU로 구성되어있고, 첫번째 Convolution은 Padding이 100으로 사이즈가 지나치게 사라지는것을 막아주기 위함입니다. 그리고, 마지막에 센터부분만 Crop하는 코드를 만들어서 Padding에 의한 부분을 제거한다고 보면 될 것 같습니다.
dh, dw = (u_h - o_h)//2, (u_w - o_w)//2
upscore32[:, :, dh:(dh + o_h), dw:(dw + o_w)]
16s와 8s는 위와 전체적인 구조는 동일합니다. 하지만, 내부에 Skip Connection이 들어가는게 차이가 있습니다.
class FCN16s(nn.Module):
def __init__(self, num_classes=21):
super(FCN16s, self).__init__()
self.relu = nn.ReLU(inplace=True)
# conv1
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
self.relu1_1 = nn.ReLU(inplace=True)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.relu1_2 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv2
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.relu2_1 = nn.ReLU(inplace=True)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.relu2_2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv3
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.relu3_1 = nn.ReLU(inplace=True)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_2 = nn.ReLU(inplace=True)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv4
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.relu4_1 = nn.ReLU(inplace=True)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_2 = nn.ReLU(inplace=True)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_3 = nn.ReLU(inplace=True)
self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv5
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_1 = nn.ReLU(inplace=True)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_2 = nn.ReLU(inplace=True)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_3 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
self.score_pool4 = nn.Conv2d(512, num_classes, 1)
# fc1
self.fc6 = nn.Conv2d(512, 4096, 7)
self.relu6 = nn.ReLU(inplace=True)
self.drop6 = nn.Dropout2d()
# fc2
self.fc7 = nn.Conv2d(4096, 4096, 1)
self.relu7 = nn.ReLU(inplace=True)
self.drop7 = nn.Dropout2d()
# fc3
self.score_fr = nn.Conv2d(4096, num_classes, kernel_size = 1)
self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 4, stride = 2)
self.upscore16 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 32, stride = 16)
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_uniform_(m.weight)
# xavier_uniform은 bias에 대해서는 제공하지 않음
# ValueError: Fan in and fan out can not be computed for tensor with fewer than 2 dimensions
if m.bias is not None:
torch.nn.init.zeros_(m.bias)
def forward(self, x):
h = self.relu1_1(self.conv1_1(x))
h = self.relu1_2(self.conv1_2(h))
h = self.pool1(h)
h = self.relu2_1(self.conv2_1(h))
h = self.relu2_2(self.conv2_2(h))
h = self.pool2(h)
h = self.relu3_1(self.conv3_1(h))
h = self.relu3_2(self.conv3_2(h))
h = self.relu3_3(self.conv3_3(h))
h = self.pool3(h)
h = self.relu4_1(self.conv4_1(h))
h = self.relu4_2(self.conv4_2(h))
h = self.relu4_3(self.conv4_3(h))
pool4 = h = self.pool4(h)
h = self.relu5_1(self.conv5_1(h))
h = self.relu5_2(self.conv5_2(h))
h = self.relu5_3(self.conv5_3(h))
h = self.pool5(h)
h = self.relu6(self.fc6(h))
h = self.drop6(h)
h = self.relu7(self.fc7(h))
h = self.drop7(h)
h = self.score_fr(h)
upscore2 = self.upscore2(h)
pool4 = self.score_pool4(pool4)
dh, dw = (pool4.size()[2] - upscore2.size()[2])//2, (pool4.size()[3] - upscore2.size()[3])//2
upscore16 = self.upscore16(upscore2 + pool4[:, :, dh:(dh + upscore2.size()[2]), dw:(dw + upscore2.size()[3])])
dh, dw = (upscore16.size()[2] - x.size()[2])//2, (upscore16.size()[3] - x.size()[3])//2
return torch.sigmoid(upscore16[:, :, dh:(dh + x.size()[2]), dw:(dw + x.size()[3])])
16s의 코드를 보면 다음이 핵심입니다.
h = self.drop7(h)
h = self.score_fr(h)
upscore2 = self.upscore2(h) # nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 4, stride = 2)
pool4 = self.score_pool4(pool4)
dh, dw = (pool4.size()[2] - upscore2.size()[2])//2, (pool4.size()[3] - upscore2.size()[3])//2
upscore16 = self.upscore16(upscore2 + pool4[:, :, dh:(dh + upscore2.size()[2]), dw:(dw + upscore2.size()[3])])
fc6과 7을 모두 통과한 결과에 채널을 class 수만큼 맞춰주고 Transposed Convolution을 적용하는데, 이때 Transposed Convolution은 2배만큼 크기를 키워줍니다. 그리고 Skip Connection을 pool4번째와 진행하는데, 7x7 Conv과 100의 padding에 의해서 크기가 맞지 않으므로 이를 맞춰주기 위해서 pool4[:, :, dh:(dh + upscore2.size()[2]), dw:(dw + upscore2.size()[3])]
를 이용해서 크기를 조정해줍니다. 그리고, 더하기를 진행한후 16배만큼 키우는 upscore16을 진행하는 것으로 보면 될 것 같습니다.
마지막으로 FCN8s 또한 동일한 식으로 코드를 작성할 수 있습니다.
class FCN8s(nn.Module):
def __init__(self, num_classes=21):
super(FCN8s, self).__init__()
self.relu = nn.ReLU(inplace=True)
# conv1
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
self.relu1_1 = nn.ReLU(inplace=True)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.relu1_2 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv2
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.relu2_1 = nn.ReLU(inplace=True)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.relu2_2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv3
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.relu3_1 = nn.ReLU(inplace=True)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_2 = nn.ReLU(inplace=True)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.relu3_3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv4
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.relu4_1 = nn.ReLU(inplace=True)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_2 = nn.ReLU(inplace=True)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu4_3 = nn.ReLU(inplace=True)
self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
# conv3
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_1 = nn.ReLU(inplace=True)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_2 = nn.ReLU(inplace=True)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.relu5_3 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
self.score_pool3 = nn.Conv2d(256, num_classes, 1)
self.score_pool4 = nn.Conv2d(512, num_classes, 1)
# fc1
self.fc6 = nn.Conv2d(512, 4096, 7)
self.relu6 = nn.ReLU(inplace=True)
self.drop6 = nn.Dropout2d()
# fc2
self.fc7 = nn.Conv2d(4096, 4096, 1)
self.relu7 = nn.ReLU(inplace=True)
self.drop7 = nn.Dropout2d()
# fc3
self.score_fr = nn.Conv2d(4096, num_classes, kernel_size = 1)
self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 4, stride = 2)
self.upscore2_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 4, stride = 2)
self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size = 16, stride = 8)
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_uniform_(m.weight)
# xavier_uniform은 bias에 대해서는 제공하지 않음
# ValueError: Fan in and fan out can not be computed for tensor with fewer than 2 dimensions
if m.bias is not None:
torch.nn.init.zeros_(m.bias)
def forward(self, x):
h = self.relu1_1(self.conv1_1(x))
h = self.relu1_2(self.conv1_2(h))
h = self.pool1(h)
h = self.relu2_1(self.conv2_1(h))
h = self.relu2_2(self.conv2_2(h))
h = self.pool2(h)
h = self.relu3_1(self.conv3_1(h))
h = self.relu3_2(self.conv3_2(h))
h = self.relu3_3(self.conv3_3(h))
pool3 = h = self.pool3(h)
h = self.relu4_1(self.conv4_1(h))
h = self.relu4_2(self.conv4_2(h))
h = self.relu4_3(self.conv4_3(h))
pool4 = h = self.pool4(h)
h = self.relu5_1(self.conv5_1(h))
h = self.relu5_2(self.conv5_2(h))
h = self.relu5_3(self.conv5_3(h))
h = self.pool5(h)
h = self.relu6(self.fc6(h))
h = self.drop6(h)
h = self.relu7(self.fc7(h))
h = self.drop7(h)
pool3 = self.score_pool3(pool3)
pool4 = self.score_pool4(pool4)
h = self.score_fr(h)
upscore2 = self.upscore2(h)
dh, dw = (pool4.size()[2] - upscore2.size()[2])//2, (pool4.size()[3] - upscore2.size()[3])//2
upscore2_pool4 = self.upscore2_pool4(upscore2 + pool4[:, :, dh:(dh + upscore2.size()[2]), dw:(dw + upscore2.size()[3])])
dh, dw = (pool3.size()[2] - upscore2_pool4.size()[2])//2, (pool3.size()[3] - upscore2_pool4.size()[3])//2
upscore8 = self.upscore8(upscore2_pool4 + pool3[:, :, dh:(dh + upscore2_pool4.size()[2]), dw:(dw + upscore2_pool4.size()[3])])
dh, dw = (upscore8.size()[2] - x.size()[2])//2, (upscore8.size()[3] - x.size()[3])//2
return torch.sigmoid(upscore8[:, :, dh:(dh + x.size()[2]), dw:(dw + x.size()[3])])
'EDA Study > Image Segmentation' 카테고리의 다른 글
A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation (SegNet) (2) | 2021.09.21 |
---|---|
Deconvolutional Network (DeconvNet) Code (0) | 2021.09.21 |
Deconvolutional Network (DeconvNet) (4) | 2021.09.21 |
Fully Convolutional Networks (FCN) (0) | 2021.09.21 |
Multi-Scale Context Aggregation by Dilated Convolutions (DilatedNet) Review (0) | 2021.02.06 |