AlexNet：深度卷积神经网络

AlexNet简介

AlexNet是2012年ISLVRC 2012（ImageNet Large Scale Visual Recognition Challenge）竞赛的冠军网络，分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后，深度学习开始迅速发展。

AlexNet的优势

论文中采用多个GPU并行训练数据，加快了训练过程
使用了 ReLU 激活函数，而不是传统的 Sigmoid 激活函数以及 Tanh 激活函数。
局部响应归一化（Local Response Normalization, LRN）
- 首先，局部响应归一化是一种在卷积神经网络中常用的技术，目的是在网络的某些层中减少神经元之间的干扰，从而提高网络的泛化能力。这种技术通过规范化神经元的输出来实现。
Dropout
- 使用 Dropout 的方式在网络正向传播过程中随机失活一部分神经元：

AlexNet卷积过程

经卷积后的矩阵尺寸大小计算公式为： N = (W − F + 2P ) / S + 1

输入图片大小 W×W
Filter大小 F×F
步长 S
padding的像素数 P

Cov 1 :

kernels: 48 * 2 = 96
kernel_size: 11
padding: [1, 2] （左边上边补充一列padding，右边下边补充两列padding）
stride: 4

N = (W − F + 2P ) / S + 1 = (224 - 11 + 1 + 2) / 4 + 1 = 55

input_size = [224,224,3] output_size = [55,55,96]

Maxpool 1 :

kernels: 3
padding: 0
stride: 2

该池化层使得宽高减半，通道数不变

input_size = [55,55,96] output_size = [27,27,96]

Cov 2 :

kernels: 128 * 2 = 256
kernel_size: 5
padding: [2, 2]
stride: 1

N = (W − F + 2P ) / S + 1 = (27 - 5 + 2 * 2) / 1 + 1 = 27

input_size = [27,27,96] output_size = [27,27,256]

Maxpool 2 :

kernels: 3
padding: 0
stride: 2

该池化层使得宽高减半，通道数不变

input_size = [27,27,256] output_size = [13,13,256]

Cov 3 :

kernels: 192 * 2 = 384
kernel_size: 3
padding: [1, 1]
stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,256] output_size = [13,13,384]

Cov 4 :

kernels: 192 * 2 = 384
kernel_size: 3
padding: [1, 1]
stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,384] output_size = [13,13,384]

Cov 5 :

kernels: 128 * 2 = 256
kernel_size: 3
padding: [1, 1]
stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,384] output_size = [13,13,256]

Maxpool 3 :

kernels: 3
padding: 0
stride: 2

该池化层使得宽高减半，通道数不变

input_size = [13,13,256] output_size = [6,6,256]

AlexNet模型构建

# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# ********************************************************************************************************************
#       Created:     2024/07/30
#       Filename:    GoogLeNet_model.py
#       Email:       72110902110jq@gmail.com
#       Create By:   coderfjq
#       LastModify:  2024/07/30
# ********************************************************************************************************************
# This code sucks, you know it and I know it.  
# Move on and call me an idiot later.
import torch
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            # nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2),
            # --------------------------------
            nn.Conv2d(in_channels=3, out_channels=48, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # --------------------------------
            nn.Conv2d(in_channels=48, out_channels=128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # --------------------------------
            nn.Conv2d(in_channels=128, out_channels=192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # --------------------------------
            nn.Conv2d(in_channels=192, out_channels=192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # --------------------------------
            nn.Conv2d(in_channels=192, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128 * 6 * 6,2048),
            nn.ReLU(inplace=True),

            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),

            nn.Linear(2048,num_classes)
        )

        if init_weights:
            self._initialize_weights()

    def forward(self,x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        # x = x.view(x.shape[0], -1)
        # x = x.reshape(-1, 6 * 6 * 128)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

AlxeNet模型训练

from 深度学习基础学习.day05.AlexNet_model import AlexNet

import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import dataloader, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

data_transform = {

    "train": transforms.Compose([
        # 增大训练集数量，实现数据增强
        # RandomResizedCrop :随机裁剪，像素大小为224 * 224
        # RandomHorizontalFlip :水平方向随机翻转
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),

    "val": transforms.Compose([
        transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
}
data_root = os.path.abspath(os.getcwd())  # get data root path
image_path = os.path.join(data_root, 'data_set',"flower_data")  # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)

train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                     transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                  transform=data_transform["val"])
batch_size = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size, shuffle=True,
     num_workers=0)

validate_loader = DataLoader(
    validate_dataset,
    batch_size=4, shuffle=False,
    num_workers=0)

train_num = len(train_dataset)
val_num = len(validate_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)

# nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
# print('Using {} dataloader workers every process'.format(nw))

print("using {} images for training, {} images for validation.".format(train_num,
                                                                       val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = next(test_data_iter)
#
# def imshow(img):
#     img = img / 2 + 0.5  # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))

net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)

epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
    # train
    net.train()  # 启用Dropout方法
    running_loss = 0.0
    train_bar = tqdm(train_loader, file=sys.stdout)
    for step, data in enumerate(train_bar):
        images, labels = data
        optimizer.zero_grad()
        outputs = net(images.to(device))
        loss = loss_function(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()

        train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                 epochs,
                                                                 loss)
    # validate
    net.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        val_bar = tqdm(validate_loader, file=sys.stdout)
        for val_data in val_bar:
            val_images, val_labels = val_data
            outputs = net(val_images.to(device))
            predict_y = torch.max(outputs, dim=1)[1]
            acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

    val_accurate = acc / val_num
    print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
          (epoch + 1, running_loss / train_steps, val_accurate))

    if val_accurate > best_acc:
        best_acc = val_accurate
        torch.save(net.state_dict(), save_path)

print('Finished Training')

Deep_Learning

#深度学习 #AlexNet

AlexNet：深度卷积神经网络

https://fu-jingqi.github.io/2024/07/25/AlexNet：深度卷积神经网络/

作者

coderfjq

发布于

2024年7月25日

许可协议

VGG：使用块的卷积神经网络上一篇

LeNet：第一个卷积神经网络下一篇