算法流程:
python代码实现:
- import numpy as np
- np.set_printoptions(suppress=True) #取消使用科学计数法
-
- def Sigmoid(x): # 激活函数
- function = 1.0 / (1.0 + np.exp(-x))
- return function
-
-
- def forward(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4): # 前向传播
- y_hat1 = []
- y_hat2 = []
- for i in range(X.shape[0]): # X.shape表示有几个样本
- x = X[i]
- # 隐层的两个神经元的输入值
- h1 = np.dot(x, v1)
- h2 = np.dot(x, v2)
- # 隐层的两个神经元的输出值 也就是 输出层的输入
- h1_out = Sigmoid(h1 - yz1)
- h2_out = Sigmoid(h2 - yz2)
- h_out = np.array([h1_out, h2_out])
- # 输出层的两个神经元的输出层
- out1 = np.dot(h_out, w1)
- out2 = np.dot(h_out, w2)
- # 预测值的输出
- y_hat1.append(Sigmoid(out1 - yz3))
- y_hat2.append(Sigmoid(out2 - yz4))
-
- Y_hat = np.array(list(zip(y_hat1, y_hat2))) # 训练集对应的预测值
- return Y_hat
-
-
- def loss(y, y_hat): # 误差函数
- out = np.sum((y - y_hat) ** 2) / 2 * y.shape[0] # y.shape[0]指神经元的个数
- return out
-
- # 误差逆传播(标准bp算法)
- def BackPropagation(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4, Y, learnrate):
- Y_hat = forward(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4)
- Ek_sum = 0
- for i in range(X.shape[0]):
- x = X[i]
- # 隐层的两个神经元的输入值
- h1 = np.dot(x, v1)
- h2 = np.dot(x, v2)
- # 隐层的两个神经元的输出值 也就是 输出层的输入
- h1_out = Sigmoid(h1 - yz1)
- h2_out = Sigmoid(h2 - yz2)
- y1 = Y_hat[i][0]
- y2 = Y_hat[i][1]
- Ek = loss(Y[i], Y_hat[i])
- Ek_sum = Ek_sum + Ek
- # 输出层神经元的梯度项
- g1 = y1 * (1 - y1) * (Y[i][0] - y1)
- g2 = y2 * (1 - y2) * (Y[i][1] - y2)
- # 隐层神经元的梯度项
- e1 = h1_out * (1 - h1_out) * (w1[0] * g1 + w1[1] * g2)
- e2 = h2_out * (1 - h2_out) * (w2[0] * g1 + w2[1] * g2)
- # 更新连接权和阈值
- v1[0] = v1[0] + (learnrate * e1 * x[0])
- v1[1] = v1[1] + (learnrate * e2 * x[0])
- v2[0] = v2[0] + (learnrate * e1 * x[1])
- v2[1] = v2[1] + (learnrate * e2 * x[1])
- w1[0] = w1[0] + (learnrate * g1 * h1_out)
- w1[1] = w1[1] + (learnrate * g2 * h1_out)
- w2[0] = w2[0] + (learnrate * g1 * h2_out)
- w2[1] = w2[1] + (learnrate * g2 * h2_out)
- yz1 = yz1 + (-learnrate * e1)
- yz2 = yz2 + (-learnrate * e2)
- yz3 = yz3 + (-learnrate * g1)
- yz4 = yz4 + (-learnrate * g2)
- print(Ek_sum)
- return v1, v2, w1, w2, yz1, yz2, yz3, yz4
-
- # 误差逆传播(累计bp算法)
- def BackPropagation_accumulate(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4, Y, learnrate):
- Y_hat = forward(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4)
- Ek_sum = 0
- g1 = []
- g2 = []
- e1 = []
- e2 = []
- for i in range(X.shape[0]):
- x = X[i]
- # 隐层的两个神经元的输入值
- h1 = np.dot(x, v1)
- h2 = np.dot(x, v2)
- # 隐层的两个神经元的输出值 也就是 输出层的输入
- h1_out = Sigmoid(h1 - yz1)
- h2_out = Sigmoid(h2 - yz2)
- y1 = Y_hat[i][0]
- y2 = Y_hat[i][1]
- Ek = loss(Y[i], Y_hat[i])
- Ek_sum = Ek_sum + Ek
- # 输出层神经元的梯度项
- g1.append(y1 * (1 - y1) * (Y[i][0] - y1))
- g2.append(y2 * (1 - y2) * (Y[i][1] - y2))
- # 隐层神经元的梯度项
- e1.append(h1_out * (1 - h1_out) * (w1[0] * g1[i] + w1[1] * g2[i]))
- e2.append(h2_out * (1 - h2_out) * (w2[0] * g1[i] + w2[1] * g2[i]))
- # 更新连接权和阈值
- g1_aver = np.mean(g1)
- g2_aver = np.mean(g2)
- e1_aver = np.mean(e1)
- e2_aver = np.mean(e2)
- v1[0] = v1[0] + (learnrate * e1_aver * x[0])
- v1[1] = v1[1] + (learnrate * e2_aver * x[0])
- v2[0] = v2[0] + (learnrate * e1_aver * x[1])
- v2[1] = v2[1] + (learnrate * e2_aver * x[1])
- w1[0] = w1[0] + (learnrate * g1_aver * h1_out)
- w1[1] = w1[1] + (learnrate * g2_aver * h1_out)
- w2[0] = w2[0] + (learnrate * g1_aver * h2_out)
- w2[1] = w2[1] + (learnrate * g2_aver * h2_out)
- yz1 = yz1 + (-learnrate * e1_aver)
- yz2 = yz2 + (-learnrate * e2_aver)
- yz3 = yz3 + (-learnrate * g1_aver)
- yz4 = yz4 + (-learnrate * g2_aver)
- print(Ek_sum)
- return v1, v2, w1, w2, yz1, yz2, yz3, yz4
-
-
- # ---------------------main-------------------
- # 对于输出层神经元,假设好瓜应为10,坏瓜为01
- # 在(0,1)范围内随机初始化网络中所有连接权和阈值
- v11, v12, v21, v22, w11, w12, w21, w22, yz1, yz2, yz3, yz4 = np.random.random(12)
- v1 = np.array([v11, v12])
- v2 = np.array([v21, v22])
- w1 = np.array([w11, w12])
- w2 = np.array([w21, w22])
- # 学习率
- learnrate = 0.1
- p = [0.634, 0.608, 0.556, 0.403, 0.481, 0.437,
- 0.666, 0.639, 0.657, 0.593,0.719] # 密度
- sug = [0.264, 0.318, 0.215, 0.237, 0.149, 0.211,
- 0.091, 0.161, 0.198, 0.042,0.103] # 糖分
- Y = np.array([[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0],
- [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]) # y值
- X = np.array(list(zip(p, sug)))
- a = forward(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4) # 初始的权值和阈值 算出来的预测值
- print(a)
- for i in range(10000):
- v1, v2, w1, w2, yz1, yz2, yz3, yz4 = BackPropagation_accumulate(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4, Y, learnrate) # 训练
- b = forward(X, v1, v2, w1, w2, yz1, yz2, yz3, yz4) # 训练的权值和阈值 算出来的预测值
- print(b)
本次实验中,使用一个隐层,且一个隐层只包含2个神经元,在输出层我使用【0,1】表示坏瓜,【1,0】表示好瓜
训练集:
训练之前根据初始的权值和阈值 算出来的预测值:
训练时:误差函数不断下降
训练10000次后 输出的预测值:
累计BP:误差函数下降的很慢(我认为可能是分类编码的原因)
训练了一万次:
下降的非常慢,以我目前的水平还不知道为啥。