python搭建简易神经网络结构
2017-12-04 21:04
866 查看
本节使用python环境,在不使用深度学习工具箱情况下搭建一个简单的神经网络结构(非CNN卷积网络)来训练mnist手写体数据库。
网络的结构可以很简单,比如就是([784,200,100,10]),输入维度为784是一个样本大小的28*28,网络包含dropout操作,更多的是理解这种最基础的反向传播机制的实现过程。
完整的项目点击github主页获取
下面看下可运行的包含训练测试的代码:
网络的结构可以很简单,比如就是([784,200,100,10]),输入维度为784是一个样本大小的28*28,网络包含dropout操作,更多的是理解这种最基础的反向传播机制的实现过程。
完整的项目点击github主页获取
下面看下可运行的包含训练测试的代码:
# -*- coding: utf-8 -*- """ @author: chen """ import numpy as np import struct from datetime import datetime import matplotlib.pyplot as plt #读取图像 def read_image(filename): binfile = open(filename , 'rb') buf = binfile.read() index = 0 magic, numImages , numRows , numColumns = struct.unpack_from('>IIII' , buf , index) index += struct.calcsize('>IIII') data = np.zeros((numImages,numRows*numColumns)) for i in range(numImages): im = struct.unpack_from('>784B' ,buf, index) index += struct.calcsize('>784B') im = np.array(im) data[i,:] = im return data #读取图像label def read_label(filename): binfile = open(filename , 'rb') buf = binfile.read() index = 0 magic, numLabels = struct.unpack_from('>II' , buf , index) index += struct.calcsize('>II') data = np.zeros((numLabels,10)) for i in range(numLabels): label = struct.unpack_from('>B' ,buf, index)[0] label = np.array(label) data[i,label] = 1 index += struct.calcsize('>B') return data # 建立与初始化网络参数 class nn_setup(): def __init__(self,net,learningRate = 2, epochs = 100, batch = 100, dropoutFraction = 0.05): self.net = net self.size = net.size self.learningRate = learningRate self.dropoutFraction = dropoutFraction self.epochs = epochs self.batch = batch # 权值以list的形式保存,方便不同层之间的矩阵参数索引 self.W = list() self.a = list() self.d = list() self.dW = list() self.dropoutMask = list() self.L = 0 # 初始化网络参数 for i in range(1,self.size): weight = (np.random.rand(self.net[i], self.net[i - 1]+1) - 0.5) * 2 * 4 * np.sqrt(6 / (self.net[i] + self.net[i - 1])) self.W.append(weight) weight = np.zeros([self.net[i], self.net[i - 1]+1]) self.dW.append(weight) for i in range(self.size): if i == self.size-1: a_weight = np.zeros([self.batch, self.net[i]]) else: a_weight = np.zeros([self.batch, self.net[i]+1]) self.a.append(a_weight) if self.dropoutFraction > 0: for i in range(self.size): if i == self.size-1: dropout_weight = np.zeros([self.batch, self.net[i]]) else: dropout_weight = np.zeros([self.batch, self.net[i]+1]) self.dropoutMask.append(dropout_weight) for i in range(self.size): if i == self.size-1: d_weight = np.zeros([self.batch, self.net[i]]) else: d_weight = np.zeros([self.batch, self.net[i]+1]) self.d.append(d_weight) self.e = np.zeros(self.batch,self.net[self.size - 1]) def sigmoid(inputs): row,col = inputs.shape for i in range(row): for j in range(col): inputs[i,j] = 1 / (1 + np.exp(- inputs[i,j])) return inputs ##---------------------------------------------------------------- if __name__ == '__main__': # 数据库文件夹选择 filename_traindata = 'MNIST_data/train-images.idx3-ubyte' filename_trainlabel = 'MNIST_data/train-labels.idx1-ubyte' filename_testdata = 'MNIST_data/t10k-images.idx3-ubyte' filename_testlabel = 'MNIST_data/t10k-labels.idx1-ubyte' train_data = read_image(filename_traindata)/255; train_label = read_label(filename_trainlabel) test_data = read_image(filename_testdata)/255; test_label = read_label(filename_testlabel) # 自定义网络结构与网络参数 net = np.array([784,200,100,10]) learningRate = 2 #学习率 batch = 100 #batch大小 epochs = 100 #迭代次数 dropoutFraction = 0.05 #dropout率 # 初始化网络 nn = nn_setup(net,learningRate = learningRate,batch = batch,epochs = epochs) plot_flag = 0 #是否图像画出中间结果 0-不画 Loss = np.array([]) accuracy_all = np.array([]) ##----------------------训练---------------------------- for epochs in range(nn.epochs): time_start = datetime.now() #记录训练开始时间 num = int(np.floor(train_data.shape[0]/nn.batch)) for num_batch in range(num) : choose = np.random.randint(1,train_data.shape[0],nn.batch) batch_x = train_data[choose,:] batch_y = train_label[choose,:] ##--------------------nn前向传播计算各层输出值--------------- m = batch_x.shape[0] nn.a[0] = np.hstack((np.ones([m,1]),batch_x)) #从前往后依次计算各层输出 for i in range(1,nn.size-1): nn.a[i] = sigmoid(np.dot(nn.a[i-1],nn.W[i-1].T)) if nn.dropoutFraction > 0: nn.dropoutMask[i] = np.random.rand(nn.a[i].shape[0],nn.a[i].shape[1]) nn.dropoutMask[i][nn.dropoutMask[i] > nn.dropoutFraction] = 1 nn.dropoutMask[i][nn.dropoutMask[i] <= nn.dropoutFraction] = 0 nn.a[i] = nn.a[i] * nn.dropoutMask[i] nn.a[i] = np.hstack((np.ones([m,1]),nn.a[i])) # 计算最后一层的误差 nn.a[nn.size-1] = sigmoid(np.dot(nn.a[nn.size-2],nn.W[nn.size-2].T)) nn.e = batch_y - nn.a[nn.size-1] #误差计算 nn.L = 1/2 * np.sum(nn.e * nn.e)/m Loss = np.hstack((Loss,nn.L)) ##---------------------nn反向传播计算各层梯度---------------- nn.d[nn.size-1] = - nn.e * (nn.a[nn.size-1] * (1 - nn.a[nn.size-1])) # 从后往前依次计算反向传播的各层梯度 for i in range(nn.size-2,0,-1): d_act = nn.a[i] * (1 - nn.a[i]) if i+1 == nn.size-1: nn.d[i] = np.dot(nn.d[i+1],nn.W[i]) * d_act else: nn.d[i] = np.dot(nn.d[i+1][:,1:],nn.W[i]) * d_act if nn.dropoutFraction > 0: nn.d[i] = nn.d[i] * np.hstack((np.ones([nn.d[i].shape[0],1]),nn.dropoutMask[i])) for i in range(nn.size-2): if i+1 == nn.size-1: nn.dW[i] = np.dot(nn.d[i + 1].T , nn.a[i]) / nn.d[i + 1].shape[0] else: nn.dW[i] = np.dot(nn.d[i + 1][:,1:].T , nn.a[i]) / nn.d[i + 1].shape[0] ##-------------------nn计算各层梯度更新------------------- for i in range(nn.size-2): dW = nn.dW[i] dW = nn.learningRate * dW nn.W[i] = nn.W[i] - dW # 相关结果输出 if num_batch % 100 == 0: print('epochs = ', epochs,' / ', nn.epochs, '; batch = ',num_batch,' / ',num, '; error_batch = ', nn.L) time_end = datetime.now() print('time using for this epoch = ', (time_end.minute -time_start.minute)*60 + (time_end.second-time_start.second) + (time_end.microsecond - time_start.microsecond)/1000000, 's') ##-------------------计算测试样本的准确率----------------- m = test_data.shape[0] nn.a[0] = np.hstack((np.ones([m,1]),test_data)) for i in range(1,nn.size-1): nn.a[i] = sigmoid(np.dot(nn.a[i-1],nn.W[i-1].T)) nn.a[i] = nn.a[i] * (1-nn.dropoutFraction) nn.a[i] = np.hstack((np.ones([m,1]),nn.a[i])) nn.a[nn.size-1] = sigmoid(np.dot(nn.a[nn.size-2],nn.W[nn.size-2].T)) res = nn.a[nn.size-1] pre_y = np.zeros(res.shape[0]) y_label = np.zeros(res.shape[0]) count = 0 for i in range(res.shape[0]): pre_y[i] = np.argmax(res[i,:]) y_label[i] = np.argmax(test_label[i,:]) if pre_y[i] == y_label[i]: count = count + 1 accuracy = count/y_label.size accuracy_all = np.hstack((accuracy_all,accuracy)) print('-----------------------------------------\n', 'test accuracy = ', accuracy, '(',count,'/',y_label.size,')', '\n-----------------------------------------\n') if plot_flag: plt.figure(1) plt.plot(Loss) plt.title("training batch error") plt.figure(2) plt.plot(accuracy_all) plt.title("testing accuracy in different epochs") plt.show()
相关文章推荐
- Tensorflow 搭建简单神经网络 | Python
- Keras学习笔记01——快速搭建神经网络结构
- 【神经网络与深度学习】【python开发】caffe-windows使能python接口使用draw_net.py绘制网络结构图过程
- 使用python定义一个神经网络结构
- 从零开始:用Python搭建神经网络
- [置顶] 【python keras实战】用keras搭建卷起神经网络训练模型
- Python基于numpy灵活定义神经网络结构的方法
- 9行Python代码搭建神经网络
- Python + Graphviz 绘制神经网络结构图
- 如何用9行Python代码编写一个简易神经网络
- python搭建循环神经网络
- Python搭建Web服务器,与Ajax交互,接收处理Get和Post请求的简易结构
- TensorFlow练习(二)——搭建神经网络结构
- Ch2r_ood_understanding 本文档为论文限定领域口语对话系统中超出领域话语的对话行为识别的部分实验代码。代码基于Python,需要用到的外部库有: Keras(搭建神经网络) S
- Python手动搭建神经网络
- Python + Graphviz绘制神经网络结构图--简化版本实现
- TensorFlow搭建自己的神经网络(一)
- 神经网络结构简单小结
- [转]使用 Python 构造神经网络
- lecture2.a --- 神经网络结构的分类