Resnet论文笔记

coolboy

目标检测

发布于：2022年5月3日

字数：1.1k字

时长：5分钟

参考博文：

1.faster-rcnn-pytorch代码

2.Deep Residual Learning for Image Recognition (thecvf.com)

3.ResNet50结构

4.Pytorch搭建Faster R-CNN目标检测平台

网络结构

该网络是从原论文搬过来的，也就是参考博文2里的内容。有兴趣的可以读读原论文。

代码复现

具体网络结构：

1.Bottleneck

2.Resnet50中50的含义：（3+4+6+3)*3+2=50（卷积全连接层数之和）

2.resnet101同理，只要把model = ResNet(Bottleneck, [3, 4, 6, 3])改成model=ResNet(Bottleneck, [3, 4, 23, 3]),101=(3+4+23+3)*3+2

import math

import torch.nn as nn
from torch.hub import load_state_dict_from_url

# c0表示输入特征图通道数，c1表示输出特征图通道数，由此可见，该类作用为通道扩张4倍
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        # (N, C0, H, W)->(N, C1, H, W)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        # (N, C1, H, W)->(N, C1, H, W)
        self.bn1 = nn.BatchNorm2d(planes)
		
        # (N, C1, H, W)->(N, C1, H, W)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        # (N, C1, H, W)->(N, C1, H, W)
        self.bn2 = nn.BatchNorm2d(planes)
		# (N, C1, H, W)->(N, 4*C1, H, W)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        # (N, 4*C1, H, W)->(N, 4*C1, H, W)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        
		# (N, 4*C1, H, W)->(N, 4*C1, H, W)
        self.relu = nn.ReLU(inplace=True)
        # defalt is None
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        #-----------------------------------#
        #   假设输入进来的图片是600,600,3
        #-----------------------------------#
        self.inplanes = 64
        super(ResNet, self).__init__()

        # 600,600,3 -> 300,300,64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # 300,300,64 -> 150,150,64
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        # 150,150,64 -> 150,150,256
        # 此时stride=1,但通道和原来一样，进行下采样，通道变为4*64=256
        self.layer1 = self._make_layer(block, 64, layers[0])
        
        # 150,150,256 -> 75,75,512
        # 此时stride=2,进行下采样，宽高变为原来的1/2，通道变为4*128=512
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        
        # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        # self.layer4被用在classifier模型中
        # 38,38,1024 -> 19,19,2048
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        
        # 19,19,2048 -> 2,2,2048
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                
	# 定义每个层的操作，比如在resnet50里conv2_x重复了三次，那么就是block=Bottleck,blocks=3
    # 其实conv3_x,conv4_x,conv5_x中block都是Bottlenecck,就是重复的次数不一样
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        #-------------------------------------------------------------------#
        # 当模型需要进行高和宽的压缩的时候，或者通道不为原来的4倍，就需要用到残差边的downsample
        # 个人认为这里通道变化不大（相比于2倍上层特征图通道数）也需要downsample是因为通道变化不
        # 大，信息不够细致
        #-------------------------------------------------------------------#
        if stride != 1 or self.inplanes != planes * block.expansion:
            # (N, C0, H, W)->(N, 4*C1, H, W)
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        # layers记录块结果
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
		
        # 接上面假设，此时x=(N, 2, 2, 2048)
        x = self.avgpool(x)
        # (N, 2, 2, 2048)->(N, 4096)
        x = x.view(x.size(0), -1)
        # (N,4096)->(N, 1000)
        x = self.fc(x)
        return x

def resnet50(pretrained = False):
    model = ResNet(Bottleneck, [3, 4, 6, 3])
    if pretrained:
        state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data")
        model.load_state_dict(state_dict)
    #----------------------------------------------------------------------------#
    #   获取特征提取部分，从conv1到model.layer3，最终获得一个38,38,1024的特征层
    #----------------------------------------------------------------------------#
    features    = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
    #----------------------------------------------------------------------------#
    #   获取分类部分，从model.layer4到model.avgpool
    #----------------------------------------------------------------------------#
    classifier  = list([model.layer4, model.avgpool])
    
    features    = nn.Sequential(*features)
    classifier  = nn.Sequential(*classifier)
    return features, classifier