吴恩达深度学习笔记一:神经网络和深度学习
2018-03-01 16:05
453 查看
1. 梯度下降中的一些计算
第一个框:logistic回归方程的代价函数的导数
dj/dz可认为是
a-y
第二个框:前一级变量的导数为后一级变量导数乘以其系数。
另外求dZ和dA不需要进行 ”/m” 操作
# dA/dA_pre = (dA/dZ * dZ/dA_pre) = (dA/dZ * w), 为了表示方便去掉了"dA/", 故乘法不变 dA_pre = np. 4000 dot(W.T, dZ)
或者说在起前面的变量通过其系数来“放大”对最终结果的影响。例如:
y=2a + b, 令v=2a+b, 则 y=v, dy/dv=1 , dy/da = (dy/dv) * (dv/da) = 1*2 = 2
有关维度的计算,假设有[5,3,2,1]的网络,m个样本则:
W1[3,5], W2[2,3], W3[1,2]; Z1=A1=[3,m], Z2=A2=[2,m], Z3=A3=[1,m], 所以:
dZ3=A3−Y−>[1,m]dZ3=A3−Y−>[1,m] dW3[1,2]=dZ3[1,m]∗A2[2,m].TdW3[1,2]=dZ3[1,m]∗A2[2,m].T dA2[2,m]=W3[1,2].T∗dZ3[1,m]dA2[2,m]=W3[1,2].T∗dZ3[1,m]
可以看出dZ的位置及转置情况有所不同,因为1、W的行列分别为本层的单元数和上一层的单元数且与m无关,而A的行为上一层的单元数,故求dW时转置取A行为列,其列消去。2、A的列和Z的列总为m, 故求dA时Z不变取其列。3、总结为:Z为纽带不转置
2. 激活函数的选择
sigmoid函数一般只用于二分类的输出层,tanh函数类似,其缺点在于当z的绝对值太大时导数很小,训练的过程就会很慢
ReLU函数是在神经网络中应用最多的激活函数,其优点在于当z为正值时导数值一般较大,训练过程较快,而在实际中z处于负值的情况很少(此时函数不可导)。
3. 损失函数和准确度的计算
一般来说,损失函数是对单个样本而言,使用 L 表示,代价(成本)函数是损失函数在整个样本空间的扩展,用 J 表示,但为了方便也可以使用损失函数来表达代价函数的意思。交叉熵损失函数:
J(θ)=−1m∑mi=1[y(i)log(hθ(x(i)))+(1−y(i))log(1−hθ(x(i))]J(θ)=−1m∑i=1m[y(i)log(hθ(x(i)))+(1−y(i))log(1−hθ(x(i))]
m = Y.shape[1] cost = -1 / m * np.sum(np.dot(Y, np.log(AL).T) + np.dot(1 - Y, np.log(1 - AL).T)) cost = np.squeeze(cost)
准确率,AL表示最后一层的输出:
acc = (np.dot(y_train, AL.T) + np.dot(1 - y_train, 1 - AL.T)) / y_train.shape[1] * 100
4. 神经网络模型的建立
代码示例:
def initialize_deep_parameters(layer_dims=np.array([5, 4, 1])):
"""
初始化任意层的神经网络参数
:param layer_dims: 记录每层单元数的数组,包括输入输出,如[2,4,1]则有输入层2,W1[4,2],b1[4,1], W2[1,4].b2[1,1]
最后一个1表示只有一个类别,即二分类识别
:return:
"""
parameters = {}
num_layers = len(layer_dims)
for l in range(1, num_layers):
parameters['W' + str(l)] = np.random.rand(layer_dims[l], layer_dims[l - 1]) * 0.01
parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
# 测试
# for key in parameters.keys():
# print(key, parameters[key].shape)
return parameters
def linear_forward(A, W, b):
"""
一层神经网络中的线性计算
:param A: 上一层激活函数的输出,第一层即为X
:param W:
:param b:
:return: cache,其实就是Z或A的复制值,用了计算反向传播,Z和A变量进行正向传播
"""
Z = np.dot(W, A) + b
assert (Z.shape == (W.shape[0], A.shape[1]))
cache = (A, W, b)
return Z, cache
def linear_activation_forward(A_pre, W, b, activation='relu'):
"""
激活函数
:param A_pre: 上一层的输出
:param W:
:param b:
:param activation:
:return:
"""
Z, linear_cache = linear_forward(A_pre, W, b)
if activation == 'sigmoid':
A, activation_cache = dnn_utils_v2.sigmoid(Z)
assert (A.shape == (W.shape[0], A_pre.shape[1]))
cache = (linear_cache, activation_cache)
return A, cache
elif activation == 'relu':
A, activation_cache = dnn_utils_v2.relu(Z)
assert (A.shape == (W.shape[0], A_pre.shape[1]))
cache = (linear_cache, activation_cache)
return A, cache
def L_model_forward(X, parameters):
"""
实现L层模型的前向传播
:param X:
:param parameters:
:return:
"""
caches = []
A = X
L = len(parameters) // 2 # 根据参数W或b的个数确定层数,一层两个参数,Wl.bl
for l in range(1, L):
A_pre = A
A, cache = linear_activation_forward(A_pre, parameters['W' + str(l)], parameters['b' + str(l)],
activation='relu')
caches.append(cache)
# 最后一层
AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation='sigmoid')
caches.append(cache)
assert (AL.shape == (1, X.shape[1]))
return AL, caches
def compute_cost(AL, Y):
"""
代价函数计算
:param AL: L层激活函数的输出即最好的输出
:param Y:
:return:
"""
m = Y.shape[1] cost = -1 / m * np.sum(np.dot(Y, np.log(AL).T) + np.dot(1 - Y, np.log(1 - AL).T)) cost = np.squeeze(cost)
return cost
def linear_backward(dZ, cache):
"""
反向传播计算梯度
:param dZ: 当前层损失函数的导数
:param cache:
:return:
"""
A_pre, W, b = cache
m = A_pre.shape[1]
dW = np.dot(dZ, A_pre.T) / m
db = np.sum(dZ, axis=1, keepdims=True) / m
# dA/dA_pre = (dA/dZ * dZ/dA_pre) = (dA/dZ * w), 为了表示方便去掉了"dA/", 故乘法不变
dA_pre = np.dot(W.T, dZ)
return dA_pre, dW, db
def linear_activation_backward(dA, cache, activation='relu'):
"""
激活函数部分梯度计算
:param dA:
:param cache:
:param activation:
:return:
"""
linear_cache, activation_cache = cache
if activation == 'relu':
dZ = dnn_utils_v2.relu_backward(dA, activation_cache)
dA_pre, dW, db = linear_backward(dZ, linear_cache)
return dA_pre, dW, db
elif activation == 'sigmoid':
dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache)
dA_pre, dW, db = linear_backward(dZ, linear_cache)
return dA_pre, dW, db
def L_model_backward(AL, Y, caches):
"""
反向传播整合
:param AL:
:param Y:
:param caches:
:return:
"""
grads = {}
L = len(caches) # 网络层数
m = AL.shape[1]
Y = Y.reshape(AL.shape)
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
current_cache = caches[L - 1]
grads['dA' + str(L)], grads['dW' + str(L)], grads['db' + str(L)] = linear_activation_backward(
dAL, current_cache, activation='sigmoid')
for l in reversed(range(L - 1)):
current_cache = caches[l]
dA_pre, dW, db = linear_activation_backward(grads['dA' + str(l + 2)], current_cache, activation='relu')
grads['dA' + str(l + 1)] = dA_pre
grads['dW' + str(l + 1)] = dW
grads['db' + str(l + 1)] = db
return grads
def update_parameters(parametes, grads, learning_rate=0.1):
"""
更新参数
:param parametes:
:param grads:
:param learning_rate:
:return:
"""
L = len(parametes) // 2 # 一层有w和b两个参数
for l in range(L):
parametes['W' + str(l + 1)] = parametes['W' + str(l + 1)] - learning_rate * grads['dW' + str(l + 1)]
parametes['b' + str(l + 1)] = parametes['b' + str(l + 1)] - learning_rate * grads['db' + str(l + 1)]
return parametes
if __name__ == '__main__':
np.random.rand(1)
# 线性单元测试
# A, W, b = testCases_v2.linear_forward_test_case()
# Z, linear_cache = linear_forward(A, W, b)
# print(Z)
# 激活函数测试
# A_pre, W, b = testCases_v2.linear_activation_forward_test_case()
# A, cache = linear_activation_forward(A_pre, W, b, activation='sigmoid')
# print('sigmoid: ' + str(A))
# A, cache = linear_activation_forward(A_pre, W, b, activation='relu')
# print('relu: ' + str(A))
# 正向传播测试
# X, parameters = testCases_v2.L_model_forward_test_case()
# AL, caches = L_mode_forward(X, parameters)
# print(str(AL), str(len(caches)))
# # 损失函数测试
# Y, AL = testCases_v2.compute_cost_test_case()
# print(compute_cost(AL, Y))
# 梯度测试
# dZ, linear_cache = testCases_v2.linear_backward_test_case()
# print(linear_backward(dZ, linear_cache))
# AL, cache = testCases_v2.linear_activation_backward_test_case()
# print(linear_activation_backward(AL, cache, activation='sigmoid'))
# print(linear_activation_backward(AL, cache, activation='relu'))
# 完整的反向出传播梯度测试
# AL, Y, caches = testCases_v2.L_model_backward_test_case()
# grads = L_mode_backward(AL, Y, caches)
# print(grads)
# 更新参数
parameters, grads = testCases_v2.update_parameters_test_case()
parameters = update_parameters(parameters, grads, learning_rate=0.1)
print(parameters)
使用:
def load_data(): train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels classes = np.array(test_dataset["list_classes"][:]) # the list of classes train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes def L_layer_model_train(x_train, y_train, layers_dims, train_num=2000, learning_rate=0.02, print_cost=True): """ 多层模型 :param x_train: :param y_train: :param layers_dims: 整个网络模型的维度数组 :param train_num: :param learning_rate: :param print_cost: :return: """ parameters = dnn_app_utils_v2.initialize_parameters_deep(layers_dims) costs = [] for i in range(1, train_num + 1): AL, cache = deep_neural_network.L_model_forward(x_train, parameters) cost = deep_neural_network.compute_cost(AL, y_train) grads = deep_neural_network.L_model_backward(AL, y_train, cache) parameters = deep_neural_network.update_parameters(parameters, grads, learning_rate) if print_cost and i % 100 == 0: costs.append(cost) acc = (np.dot(y_train, AL.T) + np.dot(1 - y_train, 1 - AL.T)) / y_train.shape[1] * 100 print("num {} cost is: {} and acc is : {}".format(i, cost, np.squeeze(acc))) if print_cost: plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate = " + str(learning_rate)) plt.show() return parameters def L_layers_model_test(x_test, y_test, parameters): """ 多层模型的检验 :param x_test: :param y_test: :param parameters: :return: """ y_pre, cache = deep_neural_network.L_model_forward(x_test, parameters) cost = deep_neural_network.compute_cost(y_pre, y_test) acc = (np.dot(y_test,y_pre.T) + np.dot(1-y_test, 1-y_pre.T)) / y_test.shape[1] * 100 print(cost, np.squeeze(acc)) return cost, acc if __name__ == '__main__': np.random.rand(1) # 从文件中读取数据 train_x_orig, train_y, test_x_orig, test_y, classes = deep_neural_network.load_data() print(train_x_orig.shape, test_x_orig.shape) # 将 [m,px,px,3] 转为 [px * px * 3, m], 并归一化 x_train = train_x_orig.reshape(train_x_orig.shape[0], -1).T # 将类数目作为输入的列数,可保持不变 x_test = test_x_orig.reshape(test_x_orig.shape[0], -1).T x_train = x_train / 255 x_test = x_test / 255 print(x_train.shape) # 两层模型 # parameters = two_layer_model_train(x_train, train_y) # two_layer_model_test(x_train, train_y, parameters) # two_layer_model_test(x_test, test_y, parameters) # 任意层模型 layer_dims = np.array([12288, 20, 10, 5, 1]) parameters = L_layer_model_train(x_train, train_y, layer_dims, learning_rate=0.01) L_layers_model_test(x_test, test_y, parameters)
相关文章推荐
- 吴恩达神经网络和深度学习课程自学笔记(六)之优化算法
- 吴恩达Coursera深度学习课程 DeepLearning.ai 提炼笔记(1-4)-- 深层神经网络(转载)
- 吴恩达深度学习笔记之改善神经网络(三)
- 吴恩达深度学习视频笔记1-2:《神经网络和深度学习》之《神经网络基础》
- 吴恩达Coursera深度学习课程 DeepLearning.ai 提炼笔记(1-2)-- 神经网络基础
- 吴恩达深度学习笔记 (补)1.1~1.5 神经网络概述
- 吴恩达(Andrew Ng)深度学习工程师笔记 - 第一门课-神经网络和深度学习-第一周深度学习概论-第二节:什么是神经网络
- 吴恩达(Andrew Ng)深度学习工程师笔记 - 第一门课-神经网络和深度学习-第一周深度学习概论-第三节:用神经网络进行监督学习
- 吴恩达深度学习笔记之改善神经网络(一)
- 吴恩达神经网络和深度学习课程自学笔记(七)之超参数调试,Batch正则化和程序框架
- 吴恩达神经网络和深度学习课程自学笔记(三)之浅层神经网络
- 吴恩达(Andrew Ng)深度学习工程师笔记 - 第一门课-神经网络和深度学习-第一周深度学习概论-第四节:为什么深度学习会兴起?
- 神经网络与深度学习_吴恩达 学习笔记(一)
- 吴恩达老师深度学习视频课笔记:单隐含层神经网络公式推导及C++实现(二分类)
- 吴恩达(Andrew Ng)深度学习工程师笔记 - 第一门课-神经网络和深度学习-第一周深度学习概论-第五节:关于这门课
- 吴恩达深度学习入门学习笔记之神经网络和深度学习(第二周:神经网络基础)
- 吴恩达深度学习课程笔记 1.2什么是神经网络?
- 吴恩达深度学习入门学习笔记之神经网络和深度学习(第二周:神经网络基础)
- 吴恩达深度学习课程笔记之卷积神经网络基本操作详解
- 吴恩达(Andrew Ng)深度学习工程师笔记 - 第一门课-神经网络和深度学习-第一周深度学习概论-第六节:课程资源