AlexNet:深度卷积神经网络

AlexNet简介

AlexNet是2012年ISLVRC 2012(ImageNet Large Scale Visual Recognition Challenge)竞赛的冠军网络,分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后,深度学习开始迅速发展。

pkbwmVJ.png

AlexNet的优势

  • 论文中采用多个GPU并行训练数据,加快了训练过程

  • 使用了 ReLU 激活函数,而不是传统的 Sigmoid 激活函数以及 Tanh 激活函数。

  • 局部响应归一化(Local Response Normalization, LRN)

    • 首先,局部响应归一化是一种在卷积神经网络中常用的技术,目的是在网络的某些层中减少神经元之间的干扰,从而提高网络的泛化能力。这种技术通过规范化神经元的输出来实现。
  • Dropout

    • 使用 Dropout 的方式在网络正向传播过程中随机失活一部分神经元:

AlexNet卷积过程

经卷积后的矩阵尺寸大小计算公式为: N = (W − F + 2P ) / S + 1

  1. 输入图片大小 W×W
  2. Filter大小 F×F
  3. 步长 S
  4. padding的像素数 P

pkbwtVH.png

Cov 1 :

  • kernels: 48 * 2 = 96
  • kernel_size: 11
  • padding: [1, 2] (左边上边补充一列padding,右边下边补充两列padding)
  • stride: 4

N = (W − F + 2P ) / S + 1 = (224 - 11 + 1 + 2) / 4 + 1 = 55

input_size = [224,224,3] output_size = [55,55,96]

pkbwhR0.png

Maxpool 1 :

  • kernels: 3
  • padding: 0
  • stride: 2

该池化层使得宽高减半,通道数不变

input_size = [55,55,96] output_size = [27,27,96]

pkbwHZ4.png

Cov 2 :

  • kernels: 128 * 2 = 256
  • kernel_size: 5
  • padding: [2, 2]
  • stride: 1

N = (W − F + 2P ) / S + 1 = (27 - 5 + 2 * 2) / 1 + 1 = 27

input_size = [27,27,96] output_size = [27,27,256]

pkbwbdJ.png

Maxpool 2 :

  • kernels: 3
  • padding: 0
  • stride: 2

该池化层使得宽高减半,通道数不变

input_size = [27,27,256] output_size = [13,13,256]

pkbwOiR.png

Cov 3 :

  • kernels: 192 * 2 = 384
  • kernel_size: 3
  • padding: [1, 1]
  • stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,256] output_size = [13,13,384]

Cov 4 :

  • kernels: 192 * 2 = 384
  • kernel_size: 3
  • padding: [1, 1]
  • stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,384] output_size = [13,13,384]

Cov 5 :

  • kernels: 128 * 2 = 256
  • kernel_size: 3
  • padding: [1, 1]
  • stride: 1

N = (W − F + 2P ) / S + 1 = (13 - 3 + 2 * 1) / 1 + 1 = 13

input_size = [13,13,384] output_size = [13,13,256]

Maxpool 3 :

  • kernels: 3
  • padding: 0
  • stride: 2

该池化层使得宽高减半,通道数不变

input_size = [13,13,256] output_size = [6,6,256]

AlexNet模型构建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# ********************************************************************************************************************
# Created: 2024/07/30
# Filename: GoogLeNet_model.py
# Email: 72110902110jq@gmail.com
# Create By: coderfjq
# LastModify: 2024/07/30
# ********************************************************************************************************************
# This code sucks, you know it and I know it.
# Move on and call me an idiot later.
import torch
import torch.nn as nn

class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
# nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2),
# --------------------------------
nn.Conv2d(in_channels=3, out_channels=48, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
# --------------------------------
nn.Conv2d(in_channels=48, out_channels=128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
# --------------------------------
nn.Conv2d(in_channels=128, out_channels=192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
# --------------------------------
nn.Conv2d(in_channels=192, out_channels=192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
# --------------------------------
nn.Conv2d(in_channels=192, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)

self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6,2048),
nn.ReLU(inplace=True),

nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),

nn.Linear(2048,num_classes)
)

if init_weights:
self._initialize_weights()

def forward(self,x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
# x = x.view(x.shape[0], -1)
# x = x.reshape(-1, 6 * 6 * 128)
x = self.classifier(x)
return x

def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)

AlxeNet模型训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from 深度学习基础学习.day05.AlexNet_model import AlexNet

import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import dataloader, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

data_transform = {

"train": transforms.Compose([
# 增大训练集数量,实现数据增强
# RandomResizedCrop :随机裁剪,像素大小为224 * 224
# RandomHorizontalFlip :水平方向随机翻转
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]),

"val": transforms.Compose([
transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
}
data_root = os.path.abspath(os.getcwd()) # get data root path
image_path = os.path.join(data_root, 'data_set',"flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)

train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
batch_size = 32

train_loader = DataLoader(
train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)

validate_loader = DataLoader(
validate_dataset,
batch_size=4, shuffle=False,
num_workers=0)

train_num = len(train_dataset)
val_num = len(validate_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)

# nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
# print('Using {} dataloader workers every process'.format(nw))

print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = next(test_data_iter)
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))

net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)

epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train() # 启用Dropout方法
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()

train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))

if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)

print('Finished Training')

AlexNet:深度卷积神经网络
https://fu-jingqi.github.io/2024/07/25/AlexNet:深度卷积神经网络/
作者
coderfjq
发布于
2024年7月25日
许可协议