TensorFlow学习--AlexNet实现&图像识别
2017-11-22 18:59
543 查看
AlexNet主要技术点
AlexNet使用的主要技术:1. 使用ReLU作为CNN的激活函数,解决了Sigmoid在较深网络中的梯度弥散问题(vanishing gradient problem).
2. 训练时使用Dropout随机忽略一部分神经元,避免了模型的过拟合问题.
3. 在CNN中使用重叠的最大池化,避免了平均池化造成的模糊效果.同时让步长小于池化核的尺寸,使池化层的输出发生重叠和覆盖,提升特征的丰富性.
4. 提出了LRN层,对局部神经元的活动创建竞争机制,强化响应比较大的神经元,抑制反馈较小的神经元,增强模型的泛化能力.
5. 数据增强.随机地从256*256的原始图像中截取224*224大小的区域以及水平翻转图像(相当于增加了 (256−224)2×2=2048 倍的数据量).进行预测时,取图片的四个角加中间共5个位置并进行翻转,即10个图像,对其进行预测并对10次结果求均值.
6. 使用CUDA加速深度卷积网络的训练,利用GPU强大的并行运算能力,处理神经网络训练时大量的矩阵运算.
AlexNet网络结构
AlexNet的网络结构:5个卷积层+3个全连接层
AlexNet每层的超参数如图.
两个GPU,一个GPU运行图形顶部的图层部分,另一个运行图层底部的图层部分。 GPU仅在某些层进行通信。
输入的图片规格为224*224*3,预处理后为227*227*3.
第一个卷积层使用96个较大的11*11尺寸的卷积核,步长为4,(采用了2个GPU处理,每个GPU处理48个).原图像为RGB 图像,是3通道,此处96个过滤器也是3通道的.得到的特征图大小new_feture_size=(img_size - filter_size)/stride +1 = (227-11)/4+1=55即大小为55*55.紧接着一个LRN层,然后是一个3*3的Max pooling最大池化层,步长为2.
AlexNet耗时测试
使用随机图片数据测试AlexNet前馈/反馈的平均耗时:#!/usr/bin/python # coding:utf-8 # TensorFlow实现AlexNet from datetime import datetime import math import time import tensorflow as tf def convLayer(x, name, kh, kw, n_out, dh, dw, p): # 输入x的通道数 n_in = x.get_shape()[-1].value with tf.name_scope(name) as scorp: # 使用截断正态分布函数初始化卷积核(kh*kw*n_in)卷积核数量为n_out kernel = tf.Variable(tf.truncated_normal([kh, kw, n_in, n_out], dtype=tf.float32, stddev=1e-1), name='weights') # 对x进行卷积操作,strides步长为dh*dw,卷积核大小为kh*kw,padding模式为SAME即填充边界的点 conv = tf.nn.conv2d(x, kernel, [1, dh, dw, 1], padding='SAME') # biases初始化为0 biases = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=tf.float32), trainable=True, name='biases') # conv+biases bias = tf.nn.bias_add(conv, biases) activation =tf.nn.relu(bias, name=scorp) # 将训练参数kernel.biaases添加到p中 p += [kernel, biases] # 打印出tensor activation结构 print activation.op.name, ' ', activation.get_shape().as_list() return activation, p # 总共测试100个batch的数据 num_batches = 100 # 全连接层 def fcLayer(x, inputData, outputData, reluFlag, name): with tf.variable_scope(name) as scope: w = tf.get_variable('w', shape=[inputData, outputData], dtype='float') b = tf.get_variable('b', [outputData], dtype='float') out = tf.nn.xw_plus_b(x, w, b, name=scope.name) if reluFlag: return tf.nn.relu(out) else: return out # 接受images作为输入,返回最后一层pool5及AlexNet中所有需要训练的模型参数 def AlexNet(images, classNum=None, dropoutrate=None): parameters = [] # 卷积层1 conv1, parameters = convLayer(images, name='conv1', kh=11, kw=11, n_out=64, dh=4, dw=4, p=parameters) # 添加LRN层和最大池化层 # 对conv1进行LRN处理 lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001/9, beta=0.75, name='lrn1') # 对lrn1进行最大池化处理,池化尺寸3*3,步长2*2,padding模式选VALID即取样不能超过边框 pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1') # 打印出结果pool1的结构 print pool1.op.name, ' ', pool1.get_shape().as_list() # 卷积层2 conv2, parameters = convLayer(pool1, name='conv2', kh=5, kw=5, n_out=192, dh=1, dw=1, p=parameters) # LRN处理 lrn2 = tf.nn.lrn(conv2, 4, bias=1.0,alpha=0.001/9, beta=0.75, name='lrn2') # 最大池化处理 pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool2') # 打印出结果pool2的结构 print pool2.op.name, ' ', pool2.get_shape().as_list() # 卷积层3 conv3, parameters = convLayer(pool2, name='conv3', kh=3, kw=3, n_out=384, dh=1, dw=1, p=parameters) # 卷积层4 conv4, parameters = convLayer(conv3, name='conv4', kh=3, kw=3, n_out=256, dh=1, dw=1, p=parameters) # 卷积层5 conv5, parameters = convLayer(conv4, name='conv5', kh=3, kw=3, n_out=256, dh=1, dw=1, p=parameters) pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool5') print pool5.op.name, ' ', pool5.get_shape().as_list() fc_in = tf.reshape(pool5, [-1, 256*6*6]) fc6 = fcLayer(fc_in, 256*6*6, 4096, True, 'fc6') dropout6 = tf.nn.dropout(fc6, dropoutrate) fc7 = fcLayer(dropout6, 4096, 4096,True, 'fc7') dropout7 = tf.nn.dropout(fc7, dropoutrate) fc8 = fcLayer(dropout7, 4096, classNum, True, 'fc8') return pool5, parameters # 评估AlexNet每轮计算占用的时间 # 输入TensorFlow的Session,需要测评的算子target,测试的名称info_string def time_tensorflow_run(session, target, info_string): # 定义预热轮数(忽略前10轮,不考虑显存加载等因素的影响) num_steps_burn_in = 10 total_duration = 0.0 total_duration_squared = 0.0 for i in range(num_batches + num_steps_burn_in): start_time = time.time() _ = session.run(target) # 持续时间 duration = time.time()- start_time if i >= num_steps_burn_in: # 只考量10轮迭代之后的计算时间 if not i % 10: print '%s: step %d, duration = %.3f' % (datetime.now().strftime('%X'), i - num_steps_burn_in, duration) # 记录总时间 total_duration += duration total_duration_squared += duration * duration # 计算每轮迭代的平均耗时mn,和标准差sd mn = total_duration / num_batches vr = total_duration_squared / num_batches - mn * mn sd = math.sqrt(vr) # 打印出每轮迭代耗时 print '%s: %s across %d steps, %.3f +/- %.3f sec / batch' % (datetime.now().strftime('%X'), info_string, num_batches, mn, sd) # 使用随机图片数据测试前馈和反馈计算的耗时 def run_benchmark(): with tf.Graph().as_default(): batch_size = 32 image_size = 224 # 生成随机图片数据 images = tf.Variable(tf.random_normal([batch_size, # 每轮迭代的样本数 image_size,image_size,# 图片尺寸224*224 3], # 图片的通道数 dtype=tf.float32, # 数据类型 stddev=1e-1)) # 标准差 # 构建AlexNet,得到pool5和训练参数集合parameters pool5, parameters= AlexNet(images, classNum=1000, dropoutrate=0.5) # 初始化所有参数 init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # AlexNet的forward计算测评 time_tensorflow_run(sess, pool5, 'Forward') # 为pool5设置一个优化目标loss,计算poll5的loss objective = tf.nn.l2_loss(pool5) # 求相对于loss的所有模型参数的梯度,模拟训练过程 grad = tf.gradients(objective, parameters) # AlexNet的backward计算测评 time_tensorflow_run(sess, grad, 'Forward-backward') if __name__ == '__main__': run_benchmark()
打印输出:
conv1 [32, 56, 56, 64] pool1 [32, 27, 27, 64] conv2 [32, 27, 27, 192] pool2 [32, 13, 13, 192] conv3 [32, 13, 13, 384] conv4 [32, 13, 13, 256] conv5 [32, 13, 13, 256] pool5 [32, 6, 6, 256] 19:43:26: step 0, duration = 1.526 19:43:43: step 10, duration = 2.018 19:44:03: step 20, duration = 1.618 19:44:19: step 30, duration = 1.583 19:44:37: step 40, duration = 1.808 19:44:56: step 50, duration = 1.749 19:45:13: step 60, duration = 1.849 19:45:32: step 70, duration = 1.837 19:45:49: step 80, duration = 1.587 19:46:06: step 90, duration = 1.663 19:46:23: Forward across 100 steps, 1.789 +/- 0.210 sec / batch 19:47:30: step 0, duration = 5.831 19:48:34: step 10, duration = 5.831 19:49:49: step 20, duration = 8.383 19:50:57: step 30, duration = 6.152 19:52:48: step 40, duration = 13.673 19:54:44: step 50, duration = 10.054 19:56:32: step 60, duration = 11.055 19:58:17: step 70, duration = 10.246 20:00:06: step 80, duration = 12.227 20:02:01: step 90, duration = 10.946 20:03:31: Forward-backward across 100 steps, 9.666 +/- 2.279 sec / batch
可以看到5个卷积层以及最后一个池化层,以及每一层输出tensor的尺寸.
然后还可以看到forward以及backword运算的时间,此处没有使用GPU,因此可以看到每轮迭代的时间消耗比较大.
AlexNet实现及图像识别
# AlexNet实现 import tensorflow as tf import numpy as np # 卷积层 # group=2时等于AlexNet分上下两部分 def convLayer(x, kHeight, kWidth, strideX, strideY, featureNum, name, padding="SAME", groups=1): # 获取channel数 channel = int(x.get_shape()[-1]) # 定义卷积的匿名函数 conv = lambda a, b: tf.nn.conv2d(a, b, strides=[1, strideY, strideX, 1], padding=padding) with tf.variable_scope(name) as scope: w = tf.get_variable("w", shape=[kHeight, kWidth, channel / groups, featureNum]) b = tf.get_variable("b", shape=[featureNum]) # 将张量分解成子张量,划分后的输入和权重 xNew = tf.split(value=x, num_or_size_splits=groups, axis=3) wNew = tf.split(value=w, num_or_size_splits=groups, axis=3) # 分别提取feature map featureMap = [conv(t1, t2) for t1, t2 in zip(xNew, wNew)] # feature map整合 mergeFeatureMap = tf.concat(axis=3, values=featureMap) out = tf.nn.bias_add(mergeFeatureMap, b) # relu后的结果 return tf.nn.relu(tf.reshape(out, mergeFeatureMap.get_shape().as_list()), name=scope.name) # 全连接层 def fcLayer(x, inputD, outputD, reluFlag, name): with tf.variable_scope(name) as scope: w = tf.get_variable("w", shape=[inputD, outputD], dtype="float") b = tf.get_variable("b", [outputD], dtype="float") out = tf.nn.xw_plus_b(x, w, b, name=scope.name) if reluFlag: return tf.nn.relu(out) else: return out # alexNet模型 class alexNet(object): def __init__(self, x, keepPro, classNum, modelPath="bvlc_alexnet.npy"): self.X = x self.KEEPPRO = keepPro self.CLASSNUM = classNum self.MODELPATH = modelPath self.buildCNN() def buildCNN(self): # 卷积层1 conv1 = convLayer(self.X, 11, 11, 4, 4, 96, "conv1", "VALID") # 最大池化层,池化窗口3*3,步长2*2 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1') lrn1 = tf.nn.lrn(pool1, depth_radius=2, alpha=2e-05,beta=0.75, bias=1.0, name='norm1') # 卷积层2 conv2 = convLayer(lrn1, 5, 5, 1, 1, 256, "conv2", groups=2) # 最大池化层,池化窗口3*3,步长2*2 pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool2') lrn2 = tf.nn.lrn(pool2, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0, name='lrn2') # 卷积层3 conv3 = convLayer(lrn2, 3, 3, 1, 1, 384, "conv3") # 卷积层4 conv4 = convLayer(conv3, 3, 3, 1, 1, 384, "conv4", groups=2) # 卷积层5 conv5 = convLayer(conv4, 3, 3, 1, 1, 256, "conv5", groups=2) # 最大池化层,池化窗口3*3,步长2*2 pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool5') # 全连接层1 fcIn = tf.reshape(pool5, [-1, 256 * 6 * 6]) fc1 = fcLayer(fcIn, 256 * 6 * 6, 4096, True, "fc6") dropout1 = tf.nn.dropout(fc1, self.KEEPPRO) # 全连接层2 fc2 = fcLayer(dropout1, 4096, 4096, True, "fc7") dropout2 = tf.nn.dropout(fc2, self.KEEPPRO) # 全连接层3 self.fc3 = fcLayer(dropout2, 4096, self.CLASSNUM, True, "fc8") # 加载modeel def loadModel(self, sess): wDict = np.load(self.MODELPATH, encoding="bytes").item() # 模型中的层 for name in wDict: if name not in []: with tf.variable_scope(name, reuse=True): for p in wDict[name]: if len(p.shape) == 1: # bias 只有一维 sess.run(tf.get_variable('b', trainable=False).assign(p)) else: # weights sess.run(tf.get_variable('w', trainable=False).assign(p)) import os import cv2 import caffe_classes # AlexNet测试 if __name__=='__main__': dropoutPro = 1 classNum = 1000 testPath = "testimage" # 读取测试图像 testImg = [] for f in os.listdir(testPath): testImg.append(cv2.imread(testPath + "/" + f)) imgMean = np.array([104, 117, 124], np.float) x = tf.placeholder("float", [1, 227, 227, 3]) # alexNet模型 model = alexNet(x, dropoutPro, classNum) score = model.fc3 print score softmax = tf.nn.softmax(score) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # 加载模型 model.loadModel(sess) for i, img in enumerate(testImg): # resize成网络输入大小,去均值 test = cv2.resize(img.astype(np.float), (227, 227)) - imgMean # test拉成tensor test = test.reshape((1, 227, 227, 3)) # 取概率最大类的下标 maxx = np.argmax(sess.run(softmax, feed_dict={x: test})) # 概率最大的类 res = caffe_classes.class_names[maxx] print(res) # 设置字体 font = cv2.FONT_HERSHEY_SIMPLEX # 显示类的名字 cv2.putText(img, res, (int(img.shape[0] / 3), int(img.shape[1] / 3)), font, 1, (0, 0, 255), 2) # 显示 cv2.imshow("test", img) cv2.waitKey(0)
可以看到斑马zebar和鹤crane的测试结果:
AlexNet相关连接:
1 .论文:Imagenet Classification with Deep Convolutional Neural Networks训练好的文件bvlc_alexnet.npy以及与网络对应的类别文件caffe_classes.py下载链接
相关文章推荐
- TensorFlow学习--VGGNet实现&图像识别
- 机器学习实验(十二):深度学习之图像分类模型AlexNet结构分析和tensorflow实现
- 深度学习与计算机视觉(12)_tensorflow实现基于深度学习的图像补全
- TensorFlow学习---实现mnist手写数字识别
- Tensorflow深度学习之十:Tensorflow实现经典卷积神经网络AlexNet
- Android手机移植TensorFlow,实现物体识别、行人检测、图像风格迁移
- 将 TensorFlow 移植到 Android手机,实现物体识别、行人检测和图像风格迁移详细教程
- tensorflow 学习笔记9 卷积神经网络(CNN)实现mnist手写识别
- TensorFlow学习-基于CNN实现手写数字识别
- TensorFlow实现经典深度学习网络(1):TensorFlow实现AlexNet
- 将 TensorFlow 移植到 Android手机,实现物体识别、行人检测和图像风格迁移详细教程
- 学习笔记TF052:卷积网络,神经网络发展,AlexNet的TensorFlow实现
- Tensorflow学习笔记(7)——CNN识别mnist编程实现
- tensorflow 学习笔记12 循环神经网络RNN LSTM结构实现MNIST手写识别
- 第1章:阿里云机器学习实践之路 / 第5节:深度学习--使用TensorFlow实现图像分类
- 【图像识别】【论文】残疾人手语交流辅助系统手语识别与翻译&&基于数据手套的虚拟手的实现
- 将 TensorFlow 移植到 Android手机,实现物体识别、行人检测和图像风格迁移详细教程
- tensorflow学习笔记九:将 TensorFlow 移植到 Android手机,实现物体识别、行人检测和图像风格迁移详细教程
- Andrew Ng机器学习课程笔记--week11(图像识别&总结划重点)
- 【深度学习】使用tensorflow实现AlexNet