标签:tor 结果 网络 导数 需要 lam 基础 情况 activator
前向传播:
\begin{align}\notag x_j^l = f(\sum_ {i\in M_j} x_i^{l-1} * k_{ij}^l + b_j^l) \end{align}
def forward(self, input_array):
'''
计算卷积层的输出
输出结果保存在self.output_array
'''
self.input_array = input_array
self.padded_input_array = padding(input_array, self.zero_padding)
for f in range(self.filter_number):
filter = self.filters[f]
conv(self.padded_input_array, filter.get_weights(), self.output_array[f], self.stride, filter.get_bias())
element_wise_op(self.output_array, self.activator.forward)
# 计算卷积
def conv(input_array, kernel_array, output_array, stride, bias):
'''
计算卷积,自动适配输入为2D和3D的情况
'''
channel_number = input_array.ndim
output_width = output_array.shape[1]
output_height = output_array.shape[0]
kernel_width = kernel_array.shape[-1]
kernel_height = kernel_array.shape[-2]
for i in range(output_height):
for j in range(output_width):
output_array[i][j] = (get_patch(input_array, i, j, kernel_width, kernel_height, stride) * kernel_array).sum() + bias
# 获取卷积区域
def get_patch(input_array, i, j, filter_width, filter_height, stride):
'''
从输入数组中获取本次卷积的区域,
自动适配输入为2D和3D的情况
'''
start_i = i * stride
start_j = j * stride
if input_array.ndim == 2:
return input_array[start_i: start_i + filter_height, start_j: start_j + filter_width]
elif input_array.ndim == 3:
return input_array[:, start_i: start_i + filter_height, start_j: start_j + filter_width]
前向传播:
\begin{align}\notag x_j^l = f(\beta_j^l down(x_j^{l-1}) + b_j^l) \end{align}
def forward(self, input_array):
for d in range(self.channel_number):
for i in range(int(self.output_height)):
for j in range(int(self.output_width)):
self.output_array[d, i, j] = (get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride).max())
反向传播:
\begin{align}\notag \delta_j^l = f^\prime(u_j^l)\circ conv2(\delta_j^{l+1},rot180(k_j^{l+1}),‘full‘) \end{align}
def bp_sensitivity_map(self, sensitivity_array, activator):
'''
计算传递到上一层的sensitivity map
sensitivity_array: 本层的sensitivity map
activator: 上一层的激活函数
'''
# 处理卷积步长,对原始sensitivity map进行扩展
expanded_array = self.expand_sensitivity_map(sensitivity_array)
# full卷积,对sensitivitiy map进行zero padding
# 虽然原始输入的zero padding单元也会获得残差
# 但这个残差不需要继续向上传递,因此就不计算了
expanded_width = expanded_array.shape[2]
zp = (self.input_width + self.filter_width - 1 - expanded_width) / 2
padded_array = padding(expanded_array, zp)
# 初始化delta_array,用于保存传递到上一层的
# sensitivity map
self.delta_array = self.create_delta_array()
# 对于具有多个filter的卷积层来说,最终传递到上一层的
# sensitivity map相当于所有的filter的
# sensitivity map之和
for f in range(self.filter_number):
filter = self.filters[f]
# 将filter权重翻转180度
'''
flipped_weights = np.array(map(lambda i: np.rot90(i, 2), filter.get_weights()))
'''
flipped_weights = np.rot90(filter.get_weights(), 2, (1, 2))
# 计算与一个filter对应的delta_array
delta_array = self.create_delta_array()
for d in range(delta_array.shape[0]):
conv(padded_array[f], flipped_weights[d], delta_array[d], 1, 0)
self.delta_array += delta_array
# 将计算结果与激活函数的偏导数做element-wise乘法操作
derivative_array = np.array(self.input_array)
element_wise_op(derivative_array, activator.backward)
self.delta_array *= derivative_array
def expand_sensitivity_map(self, sensitivity_array):
depth = sensitivity_array.shape[0]
# 确定扩展后sensitivity map的大小
# 计算stride为1时sensitivity map的大小
expanded_width = (self.input_width - self.filter_width + 2 * self.zero_padding + 1)
expanded_height = (self.input_height - self.filter_height + 2 * self.zero_padding + 1)
# 构建新的sensitivity_map
expand_array = np.zeros((depth, expanded_height, expanded_width))
# 从原始sensitivity map拷贝误差值
for i in range(int(self.output_height)):
for j in range(int(self.output_width)):
i_pos = i * self.stride
j_pos = j * self.stride
expand_array[:, i_pos, j_pos] = sensitivity_array[:, i, j]
return expand_array
def create_delta_array(self):
return np.zeros((self.channel_number, self.input_height, self.input_width))
反向传播:
\begin{align}\notag \delta_j^l = \beta_j^{l+1}(f^\prime(u_j^l) \circ up(\delta_j^{l+1})) \end{align}
def backward(self, input_array, sensitivity_array):
self.delta_array = np.zeros(input_array.shape)
for d in range(self.channel_number):
for i in range(int(self.output_height)):
for j in range(int(self.output_width)):
patch_array = get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride)
k, l = get_max_index(patch_array)
self.delta_array[d, i * self.stride + k, j * self.stride + l] = sensitivity_array[d, i, j]
标签:tor 结果 网络 导数 需要 lam 基础 情况 activator
原文地址:https://www.cnblogs.com/ratels/p/12312938.html