从头到尾用Python实现一个深度神经网络
2017-09-17 21:19
846 查看
from sklearn.datasets import make_classification from sklearn import preprocessing import numpy as np import math from matplotlib import pyplot as plt from copy import deepcopy def ReLu(X): return X*(X>0) def dReLu(X): return 1.*(X>0) def Sigmod(X): return 1.0/(1.0+np.exp(-X)) def dSigmod(X): return Sigmod(X)*(1-Sigmod(X)) X, Y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_samples=6000) X_train = X[0:int(X.shape[0]*0.7),:] Y_train = Y[0:int(X.shape[0]*0.7)] X_test = X[int(X.shape[0]*0.7):,:] Y_test = Y[int(X.shape[0]*0.7):] scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) X_train = X_train.T # n*m 特征数*样本数 Y_train = Y_train.reshape(1,Y_train.shape[0]) X_test = X_test.T # n*m Y_test = Y_test.reshape(1,Y_test.shape[0]) X = X.T # 构造神经网络 # superparam m = X_train.shape[1] n = [X_train.shape[0],8,4,2,1] # 各层神经元数量, 包含第一层输入层 f = [0, ReLu, ReLu, ReLu, Sigmod] # 各隐层激活函数, 第一个0为占位 df = [0, dReLu, dReLu, dReLu, dSigmod] # 各隐层激活函数求导, 第一个0为占位 # n = [X_train.shape[0],1] # 各层神经元数量, 包含第一层输入层 # f = [0, Sigmod] # 各隐层激活函数, 第一个0为占位 # df = [0, dSigmod] # 各隐层激活函数求导, 第一个0为占位 layers = len(n)-1 # param W = [0 for i in range(len(n))] # 第一个0补位,为了让W[i]对应第i层参数 b = [0 for i in range(len(n))] for l in range(1, len(n)): W[l] = np.random.randn(n[l],n[l-1])*np.sqrt( 4000 2.0/n[l-1]) # 为了避免梯度消失和梯度爆炸问题*np.sqrt(1.0/n[l-1]),对于ReLu2.0更好 b[l] = np.random.randn(n[l],1) #构造中间值 Z = [0 for i in range(len(n))] A = [0 for i in range(len(n))] dZ = [0 for i in range(len(n))] dA = [0 for i in range(len(n))] dW = [0 for i in range(len(n))] db = [0 for i in range(len(n))] # 迭代 # super param rate = 0.01 iteration = 5000 lambd = 0.01 # for graph loss_train = [] loss_test = [] accuracy_train = 0 accuracy_test = 0 # if debug, grad check debug = False epsilon = 0.00001 for i in range(iteration): i += 1 # forward Z[0] = X_train A[0] = X_train for l in range(1, len(n)): Z[l] = np.dot(W[l], A[l-1]) + b[l] A[l] = f[l](Z[l]) assert(A[layers].shape == (n[layers],m)) assert(True not in (A[layers]<0)[:]) l2_norm = sum([np.sum(w**2) for w in W])*lambd/(2.0*m) J_train = -(np.dot(np.log(A[layers]),Y_train.T)+np.dot(np.log(1-A[layers]),(1-Y_train).T))/m + l2_norm # add l2_norm , it only affect dW[l] # predict train Y_pred = 1*(A[layers]>0.5) accuracy_train = (Y_pred == Y_train).mean() # print(J) # backward dA[layers] = -Y_train/A[layers] + (1-Y_train)/(1-A[layers]) # end layer for l in range(len(n)-1, 0, -1): dZ[l] = dA[l]*df[l](Z[l]) # after calcute, it is dZ2 = A2-Y_train assert(dZ[l].shape == Z[l].shape) dW[l] = np.dot(dZ[l], A[l-1].T)/m +lambd*W[l]/m assert(dW[l].shape == W[l].shape) db[l] = np.sum(dZ[l], axis=1, keepdims=True)/m assert(db[l].shape == b[l].shape) dA[l-1] = np.dot(W[l].T,dZ[l]) # grad check if debug: W_big = deepcopy(W) b_big = deepcopy(b) W_small = deepcopy(W) b_small = deepcopy(b) dW_diff = deepcopy(W) db_diff = deepcopy(b) Z_big = deepcopy(Z) A_big = deepcopy(A) Z_small = deepcopy(Z) A_small = deepcopy(A) # flatten to vector theta = np.array([]) dtheta = np.array([]) # store dW db for check for l in range(1, len(n)): theta = np.concatenate([theta,W[l].flatten()]) theta = np.concatenate([theta,b[l].flatten()]) dtheta = np.concatenate([dtheta,dW[l].flatten()]) dtheta = np.concatenate([dtheta,db[l].flatten()]) # calculate every theta dtheta_debug = np.zeros(dtheta.shape) for t in range(len(theta)): # add or minus a little bit theta_big = theta.copy() theta_small = theta.copy() theta_big[t] = theta[t] + epsilon theta_small[t] = theta[t] - epsilon node_cnt = 0 # resore big and mall of W b for l in range(1, len(n)): W_big[l] = theta_big[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) W_small[l] = theta_small[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) node_cnt = node_cnt+n[l]*n[l-1] b_big[l] = theta_big[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) b_small[l] = theta_small[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) node_cnt = node_cnt+n[l]*1 # forward Z_big[0] = X_train A_big[0] = X_train Z_small[0] = X_train A_small[0] = X_train for l in range(1, len(n)): Z_big[l] = np.dot(W_big[l], A_big[l-1]) + b_big[l] A_big[l] = f[l](Z_big[l]) Z_small[l] = np.dot(W_small[l], A_small[l-1]) + b_small[l] A_small[l] = f[l](Z_small[l]) l2_norm_big = sum([np.sum(w**2) for w in W_big])*lambd/(2.0*m) J_train_big = -(np.dot(np.log(A_big[layers]),Y_train.T)+np.dot(np.log(1-A_big[layers]),(1-Y_train).T))/m + l2_norm_big l2_norm_small = sum([np.sum(w**2) for w in W_small])*lambd/(2.0*m) J_train_small = -(np.dot(np.log(A_small[layers]),Y_train.T)+np.dot(np.log(1-A_small[layers]),(1-Y_train).T))/m + l2_norm_small dtheta_debug[t] = (J_train_big-J_train_small)/(2.0*epsilon ) d_diff = dtheta - dtheta_debug node_cnt = 0 # restore to dw and db for l in range(1, len(n)): dW_diff[l] = d_diff[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) node_cnt = node_cnt+n[l]*n[l-1] db_diff[l] = d_diff[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) node_cnt = node_cnt+n[l]*1 grad_diff = np.sqrt(np.sum((dtheta-dtheta_debug)**2))/(np.sqrt(np.sum(dtheta**2))+np.sqrt(np.sum(dtheta_debug**2))) # print("dtheta diff: %f" % grad_diff) # gradient for l in range(len(n)-1, 0, -1): W[l] -= rate*dW[l] b[l] -= rate*db[l] # print("Iteration %d Loss: %lf" % (i, J)) # predict A_tmp = X_test for l in range(1, len(n)): Z_tmp = np.dot(W[l], A_tmp) + b[l] A_tmp = f[l](Z_tmp) J_test = -(np.dot(np.log(A_tmp),Y_test.T)+np.dot(np.log(1-A_tmp),(1-Y_test).T))/X_test.shape[1] Y_pred = 1*(A_tmp>0.5) accuracy_test = (Y_pred == Y_test).mean() # save loss loss_train.append(J_train[0][0]) loss_test.append(J_test[0][0]) # final accuracy print("accuracy_train: %lf" % accuracy_train) print("accuracy_test: %lf" % accuracy_test) plt.figure(num=0, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k') plt.plot(range(iteration), loss_train, c="blue") plt.plot(range(iteration), loss_test, c="red") plt.show() plt.figure(num=None, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k') plt.scatter(X[0], X[1], marker='o', c=Y, s=5, edgecolor='k') plt.show()
相关文章推荐
- Python实现深度学习之-神经网络识别手写数字(更新中,更新日期:2017-07-12)
- 用python实现一个神经网络
- 十一行Python代码实现一个神经网络(第一部分)
- 神经网络与深度学习 使用Python实现基于梯度下降算法的神经网络和自制仿MNIST数据集的手写数字分类可视化程序 web版本
- 【深度学习】1.2:简单神经网络的python实现
- 一个 11 行 Python 代码实现的神经网络
- 一个 11 行 Python 代码实现的神经网络
- 深度学习与神经网络-吴恩达(Part1Week3)-单隐层神经网络编程实现(python)
- 基于深度学习神经网络等机器学习技术实现一个医学辅助诊断的专家系统原型
- 深度学习笔记(五)用Torch实现RNN来制作一个神经网络计时器
- 深度学习论文-神经网络的代码实现(python版本)
- 一个 11 行 Python 代码实现的神经网络
- 十一行Python代码实现一个神经网络(第一部分)
- 神经网络与深度学习笔记(二)python 实现随机梯度下降
- 【Python开发】【神经网络与深度学习】网络爬虫之python实现
- 使用python实现深度神经网络 3
- 一个 11 行 Python 代码实现的神经网络
- 十一行Python代码实现一个神经网络(第一部分)
- 一个 11 行 Python 代码实现的神经网络
- 一个 11 行 Python 代码实现的神经网络