您的位置：首页 > 理论基础 > 计算机网络
从头到尾用Python实现一个深度神经网络

2017-09-17 21:19 846 查看
from sklearn.datasets import make_classification
from sklearn import preprocessing
import numpy as np
import math
from matplotlib import pyplot as plt
from copy import deepcopy

def ReLu(X):
return X*(X>0)

def dReLu(X):
return 1.*(X>0)

def Sigmod(X):
return 1.0/(1.0+np.exp(-X))

def dSigmod(X):
return Sigmod(X)*(1-Sigmod(X))

X, Y = make_classification(n_features=2, n_redundant=0, n_informative=2,
n_clusters_per_class=1, n_samples=6000)

X_train = X[0:int(X.shape[0]*0.7),:]
Y_train = Y[0:int(X.shape[0]*0.7)]
X_test = X[int(X.shape[0]*0.7):,:]
Y_test = Y[int(X.shape[0]*0.7):]

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train = X_train.T  # n*m 特征数*样本数
Y_train = Y_train.reshape(1,Y_train.shape[0])
X_test = X_test.T  # n*m
Y_test = Y_test.reshape(1,Y_test.shape[0])

X = X.T

# 构造神经网络
# superparam
m = X_train.shape[1]

n = [X_train.shape[0],8,4,2,1]  # 各层神经元数量, 包含第一层输入层
f = [0, ReLu, ReLu, ReLu, Sigmod]  # 各隐层激活函数, 第一个0为占位
df = [0, dReLu, dReLu, dReLu, dSigmod]  # 各隐层激活函数求导, 第一个0为占位
# n = [X_train.shape[0],1]  # 各层神经元数量, 包含第一层输入层
# f = [0, Sigmod]  # 各隐层激活函数, 第一个0为占位
# df = [0, dSigmod]  # 各隐层激活函数求导, 第一个0为占位

layers = len(n)-1
# param
W = [0 for i in range(len(n))]  # 第一个0补位，为了让W[i]对应第i层参数
b = [0 for i in range(len(n))]
for l in range(1, len(n)):
W[l] = np.random.randn(n[l],n[l-1])*np.sqrt(
4000
2.0/n[l-1])  # 为了避免梯度消失和梯度爆炸问题*np.sqrt(1.0/n[l-1]），对于ReLu2.0更好
b[l] = np.random.randn(n[l],1)

#构造中间值
Z = [0 for i in range(len(n))]
A = [0 for i in range(len(n))]
dZ = [0 for i in range(len(n))]
dA = [0 for i in range(len(n))]
dW = [0 for i in range(len(n))]
db = [0 for i in range(len(n))]

# 迭代
# super param
rate = 0.01
iteration = 5000
lambd = 0.01

# for graph
loss_train = []
loss_test = []
accuracy_train = 0
accuracy_test = 0

# if debug, grad check
debug = False
epsilon  = 0.00001

for i in range(iteration):
i += 1
# forward
Z[0] = X_train
A[0] = X_train
for l in range(1, len(n)):
Z[l] = np.dot(W[l], A[l-1]) + b[l]
A[l] = f[l](Z[l])
assert(A[layers].shape == (n[layers],m))
assert(True not in (A[layers]<0)[:])
l2_norm = sum([np.sum(w**2) for w in W])*lambd/(2.0*m)
J_train = -(np.dot(np.log(A[layers]),Y_train.T)+np.dot(np.log(1-A[layers]),(1-Y_train).T))/m + l2_norm  # add l2_norm , it only affect dW[l]
# predict train
Y_pred = 1*(A[layers]>0.5)
accuracy_train = (Y_pred == Y_train).mean()
#     print(J)

# backward
dA[layers] = -Y_train/A[layers] + (1-Y_train)/(1-A[layers])  # end layer
for l in range(len(n)-1, 0, -1):
dZ[l] = dA[l]*df[l](Z[l])  # after calcute, it is  dZ2 = A2-Y_train
assert(dZ[l].shape == Z[l].shape)
dW[l] = np.dot(dZ[l], A[l-1].T)/m +lambd*W[l]/m
assert(dW[l].shape == W[l].shape)
db[l] = np.sum(dZ[l], axis=1, keepdims=True)/m
assert(db[l].shape == b[l].shape)
dA[l-1] = np.dot(W[l].T,dZ[l])

# grad check
if debug:
W_big = deepcopy(W)
b_big = deepcopy(b)
W_small = deepcopy(W)
b_small = deepcopy(b)
dW_diff = deepcopy(W)
db_diff = deepcopy(b)
Z_big = deepcopy(Z)
A_big = deepcopy(A)
Z_small = deepcopy(Z)
A_small = deepcopy(A)
# flatten to vector
theta = np.array([])
dtheta = np.array([])  # store dW db for check
for l in range(1, len(n)):
theta = np.concatenate([theta,W[l].flatten()])
theta = np.concatenate([theta,b[l].flatten()])
dtheta = np.concatenate([dtheta,dW[l].flatten()])
dtheta = np.concatenate([dtheta,db[l].flatten()])
# calculate every theta
dtheta_debug = np.zeros(dtheta.shape)
for t in range(len(theta)):
# add or minus a little bit
theta_big = theta.copy()
theta_small = theta.copy()
theta_big[t] = theta[t] + epsilon
theta_small[t] = theta[t] - epsilon
node_cnt = 0
# resore big and mall of W b
for l in range(1, len(n)):
W_big[l] = theta_big[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))
W_small[l] = theta_small[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))
node_cnt = node_cnt+n[l]*n[l-1]
b_big[l] = theta_big[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))
b_small[l] = theta_small[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))
node_cnt = node_cnt+n[l]*1
# forward
Z_big[0] = X_train
A_big[0] = X_train
Z_small[0] = X_train
A_small[0] = X_train
for l in range(1, len(n)):
Z_big[l] = np.dot(W_big[l], A_big[l-1]) + b_big[l]
A_big[l] = f[l](Z_big[l])
Z_small[l] = np.dot(W_small[l], A_small[l-1]) + b_small[l]
A_small[l] = f[l](Z_small[l])
l2_norm_big = sum([np.sum(w**2) for w in W_big])*lambd/(2.0*m)
J_train_big = -(np.dot(np.log(A_big[layers]),Y_train.T)+np.dot(np.log(1-A_big[layers]),(1-Y_train).T))/m + l2_norm_big
l2_norm_small = sum([np.sum(w**2) for w in W_small])*lambd/(2.0*m)
J_train_small = -(np.dot(np.log(A_small[layers]),Y_train.T)+np.dot(np.log(1-A_small[layers]),(1-Y_train).T))/m + l2_norm_small
dtheta_debug[t] = (J_train_big-J_train_small)/(2.0*epsilon )
d_diff = dtheta - dtheta_debug
node_cnt = 0
# restore to dw and db
for l in range(1, len(n)):
dW_diff[l] = d_diff[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))
node_cnt = node_cnt+n[l]*n[l-1]
db_diff[l] = d_diff[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))
node_cnt = node_cnt+n[l]*1
grad_diff = np.sqrt(np.sum((dtheta-dtheta_debug)**2))/(np.sqrt(np.sum(dtheta**2))+np.sqrt(np.sum(dtheta_debug**2)))
#         print("dtheta diff: %f" % grad_diff)

# gradient
for l in range(len(n)-1, 0, -1):
W[l] -= rate*dW[l]
b[l] -= rate*db[l]

#     print("Iteration %d Loss: %lf" % (i, J))

# predict
A_tmp = X_test
for l in range(1, len(n)):
Z_tmp = np.dot(W[l], A_tmp) + b[l]
A_tmp = f[l](Z_tmp)
J_test = -(np.dot(np.log(A_tmp),Y_test.T)+np.dot(np.log(1-A_tmp),(1-Y_test).T))/X_test.shape[1]
Y_pred = 1*(A_tmp>0.5)
accuracy_test = (Y_pred == Y_test).mean()

# save loss
loss_train.append(J_train[0][0])
loss_test.append(J_test[0][0])

# final accuracy
print("accuracy_train: %lf" % accuracy_train)
print("accuracy_test: %lf" % accuracy_test)

plt.figure(num=0, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')
plt.plot(range(iteration), loss_train, c="blue")
plt.plot(range(iteration), loss_test, c="red")
plt.show()

plt.figure(num=None, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')
plt.scatter(X[0], X[1], marker='o', c=Y, s=5, edgecolor='k')
plt.show()
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签：
相关文章推荐
新的分享
章节导航