机器学习-周志华-课后习题答案5.5
2017-03-17 21:39
405 查看
5.5 试编程实现标准BP算法和累计BP算法,在西瓜数据集3.0上分别用这两个算法训练一个单隐层网络,并进行比较。
通过编程实践发现,在本例下要达到某一限定的均方误差时,标准BP算法比累积BP算法明显收敛更快,特别在本例中,将ABP算法误差设定到0.01时,其更新权重次数十分庞大。
本人采用标准BP算法(隐层10个神经元)获取数据集在误差小于0.01时的各项权重算得其错误率为2/17,训练291轮,更新权重2910次;相应地,用ABP算法(隐层10个神经元)误差小于0.2时的权重系数算得其错误率为2/17,训练1884轮,更新权重1884次。由此可见,虽然ABP可能收敛更慢,但是其分类精度比同等条件下的BP算法要高。
下面附上代码:
# -*- coding: utf-8 -*-
# STANDARD BP-NN & ACCUMULATED BP-NN
import numpy as np
class Data(object):
def __init__(self, data):
self.data = np.array(data)
self.rows = len(self.data[:,0])
self.cols = len(self.data[0,:]) # it include the column of labels
self.__eta = 0.1 # initial eta=0.1
self.__in = self.cols - 1 # number of input neurons
self.__out = len(np.unique(self.data[:,-1])) # number of output neurons
def set_eta(self, n):
self.__eta = n
def get_eta(self):
return self.__eta
def get_in(self):
return self.__in
def get_out(self):
return self.__out
def BP_NN(self,q=10,err=0.1):
X = self.data[:,:-1]
# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-theta
X = np.insert(X,[0],-1,axis=1)
Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
d, l = self.__in, self.__out
v = np.mat(np.random.random((d+1, q))) # v_0 = gama
w = np.mat(np.random.random((q+1, l))) # w_0 = theta
def f(x): # sigmoid function
s = 1/(1+np.exp(-x))
return s
n = self.__eta
gap = 1
counter = 0
while gap > err: # set E_k<=0.01 to quit the loop
counter += 1
for i in range(self.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为b
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal = np.array(f(beta)) # 1*l array
g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
w_g = w[1:,:] * np.mat(g).T # q*1 matrix
e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T) # 1*q array
d_w = n * b * np.mat(g)
d_v = n * np.mat(X[i,:]).T * np.mat(e)
w += d_w
v += d_v
gap = 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
print('BP_round:', counter)
return v,w
def ABP_NN(self,q=10,err=0.1):
X = self.data[:,:-1]
# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-theta
X = np.insert(X,[0],-1,axis=1)
Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
d, l = self.__in, self.__out
v = np.mat(np.random.random((d+1, q))) # v_0 = gama
w = np.mat(np.random.random((q+1, l))) # w_0 = theta
def f(x): # sigmoid function
s = 1/(1+np.exp(-x))
return s
n = self.__eta
gap = 1
counter = 0
while gap > err: # set E_k<=1 to quit the loop
d_v,d_w,gap = 0,0,0
counter += 1
for i in range(self.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为b
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal = np.array(f(beta)) # 1*l array
g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
w_g = w[1:,:] * np.mat(g).T # q*1 matrix
e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T) # 1*q array
d_w += n * b * np.mat(g)
d_v += n * np.mat(X[i,:]).T * np.mat(e)
gap += 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
w += d_w/self.rows
v += d_v/self.rows
gap = gap/self.rows
print('ABP_round:', counter)
return v,w
def test_NN(a,v,w):
X = a.data[:,:-1]
X = np.insert(X,[0],-1,axis=1)
Y = np.array([a.data[:,-1], 1-a.data[:,-1]]).transpose()
y_cal = np.zeros((a.rows,2))
def f(x): # sigmoid function
s = 1 / (1 + np.exp(-x))
return s
for i in range(a.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal[i,:] = np.array(f(beta)) # 1*l array
print(y_cal)
D = np.array([
[1, 1, 1, 1, 1, 1, 0.697, 0.460, 1],
[2, 1, 2, 1, 1, 1, 0.774, 0.376, 1],
[2, 1, 1, 1, 1, 1, 0.634, 0.264, 1],
[1, 1, 2, 1, 1, 1, 0.608, 0.318, 1],
[3, 1, 1, 1, 1, 1, 0.556, 0.215, 1],
[1, 2, 1, 1, 2, 2, 0.403, 0.237, 1],
[2, 2, 1, 2, 2, 2, 0.481, 0.149, 1],
[2, 2, 1, 1, 2, 1, 0.437, 0.211, 1],
[2, 2, 2, 2, 2, 1, 0.666, 0.091, 0],
[1, 3, 3, 1, 3, 2, 0.243, 0.267, 0],
[3, 3, 3, 3, 3, 1, 0.245, 0.057, 0],
[3, 1, 1, 3, 3, 2, 0.343, 0.099, 0],
[1, 2, 1, 2, 1, 1, 0.639, 0.161, 0],
[3, 2, 2, 2, 1, 1, 0.657, 0.198, 0],
[2, 2, 1, 1, 2, 2, 0.360, 0.370, 0],
[3, 1, 1, 3, 3, 1, 0.593, 0.042, 0],
[1, 1, 2, 2, 2, 1, 0.719, 0.103, 0]])
a = Data(D)
v,w = a.ABP_NN(err=0.2)
v1,w1 = a.BP_NN(err=0.01)
test_NN(a,v,w)
test_NN(a,v1,w1)
运行结果:
ABP_round: 1884
BP_round: 291
[[ 0.52207288 0.45324987]
[ 0.52987926 0.44755556]
[ 0.54584984 0.42441809]
[ 0.4985367 0.48468109]
[ 0.56875787 0.39464855]
[ 0.52142392 0.47297261]
[ 0.46626988 0.53539895]
[ 0.50013411 0.49477303]
[ 0.41035128 0.60548034]
[ 0.42516587 0.59000489]
[ 0.3507589 0.67957016]
[ 0.40119524 0.61470023]
[ 0.43723545 0.57121177]
[ 0.46565608 0.532883 ]
[ 0.54464163 0.43843949]
[ 0.37772451 0.64457881]
[ 0.40085134 0.61430352]]
[[ 0.84115947 0.13747515]
[ 0.80969383 0.17228699]
[ 0.86565802 0.11538309]
[ 0.6917523 0.2886161 ]
[ 0.8867624 0.09633574]
[ 0.80368707 0.17604059]
[ 0.4655449 0.52490606]
[ 0.53996253 0.44998827]
[ 0.07757502 0.9308757 ]
[ 0.10231002 0.90658563]
[ 0.03851867 0.96698173]
[ 0.12009371 0.88737141]
[ 0.16490322 0.8421109 ]
[ 0.17730987 0.83332648]
[ 0.84579538 0.13594652]
[ 0.05885429 0.94756339]
[ 0.10192718 0.90597301]]
通过编程实践发现,在本例下要达到某一限定的均方误差时,标准BP算法比累积BP算法明显收敛更快,特别在本例中,将ABP算法误差设定到0.01时,其更新权重次数十分庞大。
本人采用标准BP算法(隐层10个神经元)获取数据集在误差小于0.01时的各项权重算得其错误率为2/17,训练291轮,更新权重2910次;相应地,用ABP算法(隐层10个神经元)误差小于0.2时的权重系数算得其错误率为2/17,训练1884轮,更新权重1884次。由此可见,虽然ABP可能收敛更慢,但是其分类精度比同等条件下的BP算法要高。
下面附上代码:
# -*- coding: utf-8 -*-
# STANDARD BP-NN & ACCUMULATED BP-NN
import numpy as np
class Data(object):
def __init__(self, data):
self.data = np.array(data)
self.rows = len(self.data[:,0])
self.cols = len(self.data[0,:]) # it include the column of labels
self.__eta = 0.1 # initial eta=0.1
self.__in = self.cols - 1 # number of input neurons
self.__out = len(np.unique(self.data[:,-1])) # number of output neurons
def set_eta(self, n):
self.__eta = n
def get_eta(self):
return self.__eta
def get_in(self):
return self.__in
def get_out(self):
return self.__out
def BP_NN(self,q=10,err=0.1):
X = self.data[:,:-1]
# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-theta
X = np.insert(X,[0],-1,axis=1)
Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
d, l = self.__in, self.__out
v = np.mat(np.random.random((d+1, q))) # v_0 = gama
w = np.mat(np.random.random((q+1, l))) # w_0 = theta
def f(x): # sigmoid function
s = 1/(1+np.exp(-x))
return s
n = self.__eta
gap = 1
counter = 0
while gap > err: # set E_k<=0.01 to quit the loop
counter += 1
for i in range(self.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为b
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal = np.array(f(beta)) # 1*l array
g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
w_g = w[1:,:] * np.mat(g).T # q*1 matrix
e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T) # 1*q array
d_w = n * b * np.mat(g)
d_v = n * np.mat(X[i,:]).T * np.mat(e)
w += d_w
v += d_v
gap = 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
print('BP_round:', counter)
return v,w
def ABP_NN(self,q=10,err=0.1):
X = self.data[:,:-1]
# 为X矩阵左边插入列-1来计算vx-gama,在后面对b操作应该同样加一列,来计算wb-theta
X = np.insert(X,[0],-1,axis=1)
Y = np.array([self.data[:,-1], 1-self.data[:,-1]]).transpose()
d, l = self.__in, self.__out
v = np.mat(np.random.random((d+1, q))) # v_0 = gama
w = np.mat(np.random.random((q+1, l))) # w_0 = theta
def f(x): # sigmoid function
s = 1/(1+np.exp(-x))
return s
n = self.__eta
gap = 1
counter = 0
while gap > err: # set E_k<=1 to quit the loop
d_v,d_w,gap = 0,0,0
counter += 1
for i in range(self.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
# 注意把中间变量b_init增加一个b_0,且b_0 = -1,此时成为b
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal = np.array(f(beta)) # 1*l array
g = y_cal * (1-y_cal) * (Y[i,:]-y_cal) # 1*l array
w_g = w[1:,:] * np.mat(g).T # q*1 matrix
e = np.array(b_init) * (1-np.array(b_init)) * np.array(w_g.T) # 1*q array
d_w += n * b * np.mat(g)
d_v += n * np.mat(X[i,:]).T * np.mat(e)
gap += 0.5 * np.sum((Y[i, :] - y_cal) ** 2)
w += d_w/self.rows
v += d_v/self.rows
gap = gap/self.rows
print('ABP_round:', counter)
return v,w
def test_NN(a,v,w):
X = a.data[:,:-1]
X = np.insert(X,[0],-1,axis=1)
Y = np.array([a.data[:,-1], 1-a.data[:,-1]]).transpose()
y_cal = np.zeros((a.rows,2))
def f(x): # sigmoid function
s = 1 / (1 + np.exp(-x))
return s
for i in range(a.rows):
alpha = np.mat(X[i,:]) * v # 1*q matrix
b_init = f(alpha) # 1*q matrix
b = np.insert(b_init.T,[0],-1,axis=0) # (q+1)*1 matrix
beta = b.T * w # 1*l matrix
y_cal[i,:] = np.array(f(beta)) # 1*l array
print(y_cal)
D = np.array([
[1, 1, 1, 1, 1, 1, 0.697, 0.460, 1],
[2, 1, 2, 1, 1, 1, 0.774, 0.376, 1],
[2, 1, 1, 1, 1, 1, 0.634, 0.264, 1],
[1, 1, 2, 1, 1, 1, 0.608, 0.318, 1],
[3, 1, 1, 1, 1, 1, 0.556, 0.215, 1],
[1, 2, 1, 1, 2, 2, 0.403, 0.237, 1],
[2, 2, 1, 2, 2, 2, 0.481, 0.149, 1],
[2, 2, 1, 1, 2, 1, 0.437, 0.211, 1],
[2, 2, 2, 2, 2, 1, 0.666, 0.091, 0],
[1, 3, 3, 1, 3, 2, 0.243, 0.267, 0],
[3, 3, 3, 3, 3, 1, 0.245, 0.057, 0],
[3, 1, 1, 3, 3, 2, 0.343, 0.099, 0],
[1, 2, 1, 2, 1, 1, 0.639, 0.161, 0],
[3, 2, 2, 2, 1, 1, 0.657, 0.198, 0],
[2, 2, 1, 1, 2, 2, 0.360, 0.370, 0],
[3, 1, 1, 3, 3, 1, 0.593, 0.042, 0],
[1, 1, 2, 2, 2, 1, 0.719, 0.103, 0]])
a = Data(D)
v,w = a.ABP_NN(err=0.2)
v1,w1 = a.BP_NN(err=0.01)
test_NN(a,v,w)
test_NN(a,v1,w1)
运行结果:
ABP_round: 1884
BP_round: 291
[[ 0.52207288 0.45324987]
[ 0.52987926 0.44755556]
[ 0.54584984 0.42441809]
[ 0.4985367 0.48468109]
[ 0.56875787 0.39464855]
[ 0.52142392 0.47297261]
[ 0.46626988 0.53539895]
[ 0.50013411 0.49477303]
[ 0.41035128 0.60548034]
[ 0.42516587 0.59000489]
[ 0.3507589 0.67957016]
[ 0.40119524 0.61470023]
[ 0.43723545 0.57121177]
[ 0.46565608 0.532883 ]
[ 0.54464163 0.43843949]
[ 0.37772451 0.64457881]
[ 0.40085134 0.61430352]]
[[ 0.84115947 0.13747515]
[ 0.80969383 0.17228699]
[ 0.86565802 0.11538309]
[ 0.6917523 0.2886161 ]
[ 0.8867624 0.09633574]
[ 0.80368707 0.17604059]
[ 0.4655449 0.52490606]
[ 0.53996253 0.44998827]
[ 0.07757502 0.9308757 ]
[ 0.10231002 0.90658563]
[ 0.03851867 0.96698173]
[ 0.12009371 0.88737141]
[ 0.16490322 0.8421109 ]
[ 0.17730987 0.83332648]
[ 0.84579538 0.13594652]
[ 0.05885429 0.94756339]
[ 0.10192718 0.90597301]]
相关文章推荐
- 《机器学习》周志华 习题答案5.5
- 机器学习-周志华-课后习题答案5.7
- 《机器学习(周志华)》 习题5.5答案
- 机器学习-周志华-课后习题答案-决策树
- 机器学习--周志华--课后习题3.3答案
- 《机器学习》(周志华)课后习题参考答案
- 习题答案探讨 - 《机器学习》周志华 - 第一章 绪论
- 周志华机器学习第一章总结及课后答案参考
- 《机器学习》周志华 课后习题3.3:编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
- 周志华《机器学习》课后习题解答系列(六):Ch5.8 - SOM网络实验
- 周志华《机器学习》课后习题解答系列(四):Ch3.3 - 编程实现对率回归
- 《机器学习》周志华 课后习题3.3:编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
- 机器学习(周志华) 参考答案 第五章 神经网络 5.5
- 周志华《机器学习》课后习题解答系列(四):Ch3.5 - 编程实现线性判别分析
- 周志华机器学习第二章总结及课后答案
- 周志华《机器学习》课后答案——第4章.决策树
- 周志华《机器学习》课后习题解答系列(六):Ch5.10 - 卷积神经网络实验
- 《机器学习(周志华)》 习题4.3答案
- 《机器学习(周志华)》习题10.1 答案
- 周志华《机器学习》课后习题解答系列(六):Ch5 - 神经网络