参考博文:

1.faster-rcnn-pytorch代码

2.Deep Residual Learning for Image Recognition (thecvf.com)

3.ResNet50结构

4.Pytorch搭建Faster R-CNN目标检测平台

网络结构

该网络是从原论文搬过来的,也就是参考博文2里的内容。有兴趣的可以读读原论文。

image-20220503185916137

代码复现

具体网络结构:

1.Bottleneck

image-20220503193754428

2.Resnet50中50的含义:(3+4+6+3)*3+2=50(卷积全连接层数之和)

2.resnet101同理,只要把model = ResNet(Bottleneck, [3, 4, 6, 3])改成model=ResNet(Bottleneck, [3, 4, 23, 3]),101=(3+4+23+3)*3+2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import math

import torch.nn as nn
from torch.hub import load_state_dict_from_url

# c0表示输入特征图通道数,c1表示输出特征图通道数,由此可见,该类作用为通道扩张4倍
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# (N, C0, H, W)->(N, C1, H, W)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
# (N, C1, H, W)->(N, C1, H, W)
self.bn1 = nn.BatchNorm2d(planes)

# (N, C1, H, W)->(N, C1, H, W)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
# (N, C1, H, W)->(N, C1, H, W)
self.bn2 = nn.BatchNorm2d(planes)
# (N, C1, H, W)->(N, 4*C1, H, W)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
# (N, 4*C1, H, W)->(N, 4*C1, H, W)
self.bn3 = nn.BatchNorm2d(planes * 4)

# (N, 4*C1, H, W)->(N, 4*C1, H, W)
self.relu = nn.ReLU(inplace=True)
# defalt is None
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)

out += residual
out = self.relu(out)

return out

class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000):
#-----------------------------------#
# 假设输入进来的图片是600,600,3
#-----------------------------------#
self.inplanes = 64
super(ResNet, self).__init__()

# 600,600,3 -> 300,300,64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)

# 300,300,64 -> 150,150,64
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

# 150,150,64 -> 150,150,256
# 此时stride=1,但通道和原来一样,进行下采样,通道变为4*64=256
self.layer1 = self._make_layer(block, 64, layers[0])

# 150,150,256 -> 75,75,512
# 此时stride=2,进行下采样,宽高变为原来的1/2,通道变为4*128=512
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)

# 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
# self.layer4被用在classifier模型中
# 38,38,1024 -> 19,19,2048
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

# 19,19,2048 -> 2,2,2048
self.avgpool = nn.AvgPool2d(7)
self.fc = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()

# 定义每个层的操作,比如在resnet50里conv2_x重复了三次,那么就是block=Bottleck,blocks=3
# 其实conv3_x,conv4_x,conv5_x中block都是Bottlenecck,就是重复的次数不一样
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
#-------------------------------------------------------------------#
# 当模型需要进行高和宽的压缩的时候,或者通道不为原来的4倍,就需要用到残差边的downsample
# 个人认为这里通道变化不大(相比于2倍上层特征图通道数)也需要downsample是因为通道变化不
# 大,信息不够细致
#-------------------------------------------------------------------#
if stride != 1 or self.inplanes != planes * block.expansion:
# (N, C0, H, W)->(N, 4*C1, H, W)
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
# layers记录块结果
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

# 接上面假设,此时x=(N, 2, 2, 2048)
x = self.avgpool(x)
# (N, 2, 2, 2048)->(N, 4096)
x = x.view(x.size(0), -1)
# (N,4096)->(N, 1000)
x = self.fc(x)
return x

def resnet50(pretrained = False):
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data")
model.load_state_dict(state_dict)
#----------------------------------------------------------------------------#
# 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层
#----------------------------------------------------------------------------#
features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
#----------------------------------------------------------------------------#
# 获取分类部分,从model.layer4到model.avgpool
#----------------------------------------------------------------------------#
classifier = list([model.layer4, model.avgpool])

features = nn.Sequential(*features)
classifier = nn.Sequential(*classifier)
return features, classifier