稀疏自编码器和矢量化编程

时间：2016-08-29 12:52:03 阅读：292 评论：0 收藏：0 [点我收藏+]

标签：

相关的公式

技术分享

证明参考PPT: http://wenku.baidu.com/link?url=dBZZq7TYJOnIw2mwilKsJT_swT52I0OoikmvmgBaYE_NvP_KChFZ-HOURH5LMiLEuSVFcGmJ0bQfkG-ZYk-IRJf7D-w6P9PBec8EZ9IxgFS

Python实现代码参考

@author: Paul Rothnie
email : paul.rothnie@googlemail.com
https://github.com/siddharth950/Sparse-Autoencoder

  1 # -*- coding: utf-8 -*-
  2 # Refer to https://github.com/siddharth950/Sparse-Autoencoder
  3 
  4 import numpy as np
  5 import numpy.linalg as la
  6 import scipy.io
  7 import scipy.optimize
  8 import matplotlib.pyplot
  9 import time
 10 import struct
 11 import array
 12 
 13 class sparse_autoencoder(object):  #稀疏自编码类
 14     def __init__(self, visible_size, hidden_size, lambda_, rho, beta):
 15         self.visible_size = visible_size  
 16         self.hidden_size = hidden_size 
 17         self.lambda_ = lambda_ 
 18         self.rho = rho 
 19         self.beta = beta
 20         w_max = np.sqrt(6.0 / (visible_size + hidden_size + 1.0))
 21         w_min = -w_max
 22         W1 = (w_max - w_min) * np.random.random_sample(size = (hidden_size, 
 23                                                         visible_size)) + w_min
 24         W2 = (w_max - w_min) * np.random.random_sample(size = (visible_size, 
 25                                                         hidden_size)) + w_min
 26         b1 = np.zeros(hidden_size)
 27         b2 = np.zeros(visible_size)
 28         self.idx_0 = 0
 29         self.idx_1 = hidden_size * visible_size # 64*25
 30         self.idx_2 = self.idx_1 +  hidden_size * visible_size # 25*64
 31         self.idx_3 = self.idx_2 + hidden_size # 64
 32         self.idx_4 = self.idx_3 + visible_size # 25
 33         self.initial_theta = np.concatenate((W1.flatten(), W2.flatten(), 
 34                                              b1.flatten(), b2.flatten()))
 35         
 36     def sigmoid(self, x):  # sigmoid函数
 37         return 1.0 / (1.0 + np.exp(-x))
 38     
 39     def unpack_theta(self, theta):  # 获取传递给scipy.optimize.minimize的theta
 40         W1 = theta[self.idx_0 : self.idx_1]
 41         W1 = np.reshape(W1, (self.hidden_size, self.visible_size))
 42         W2 = theta[self.idx_1 : self.idx_2]
 43         W2 = np.reshape(W2, (self.visible_size, self.hidden_size))
 44         b1 = theta[self.idx_2 : self.idx_3]
 45         b1 = np.reshape(b1, (self.hidden_size, 1))
 46         b2 = theta[self.idx_3 : self.idx_4]
 47         b2 = np.reshape(b2, (self.visible_size, 1))
 48         return W1, W2, b1, b2     
 49 
 50     def cost(self, theta, visible_input):  # cost函数
 51         W1, W2, b1, b2 = self.unpack_theta(theta)
 52         # layer=f(w*l+b)
 53         hidden_layer = self.sigmoid(np.dot(W1, visible_input) + b1)
 54         output_layer = self.sigmoid(np.dot(W2, hidden_layer) + b2)
 55         m = visible_input.shape[1]
 56         error = -(visible_input - output_layer)
 57         sum_sq_error =  0.5 * np.sum(error * error, axis = 0)
 58         avg_sum_sq_error = np.mean(sum_sq_error)
 59         reg_cost =  self.lambda_ * (np.sum(W1 * W1) + np.sum(W2 * W2)) / 2.0  # L2正则化
 60         rho_bar = np.mean(hidden_layer, axis=1) # 平均激活程度
 61         KL_div = np.sum(self.rho * np.log(self.rho / rho_bar) +
 62                         (1 - self.rho) * np.log((1-self.rho) / (1- rho_bar)))   # 相对熵
 63         cost = avg_sum_sq_error + reg_cost + self.beta * KL_div  # 损失函数
 64         KL_div_grad = self.beta * (- self.rho / rho_bar + (1 - self.rho) / 
 65                                     (1 - rho_bar))
 66         del_3 = error * output_layer * (1.0 - output_layer)
 67         del_2 = np.transpose(W2).dot(del_3) + KL_div_grad[:, np.newaxis]
 68 
 69         del_2 *= hidden_layer * (1 - hidden_layer)  # *=残差项
 70         W1_grad = del_2.dot(visible_input.transpose()) / m  # delt_w=del*(l.T)
 71         W2_grad = del_3.dot(hidden_layer.transpose()) / m
 72         b1_grad = del_2   # delt_b=del
 73         b2_grad = del_3
 74         W1_grad += self.lambda_ * W1
 75         W2_grad += self.lambda_ * W2
 76         b1_grad = b1_grad.mean(axis = 1)
 77         b2_grad = b2_grad.mean(axis = 1)
 78         theta_grad = np.concatenate((W1_grad.flatten(), W2_grad.flatten(), 
 79                                      b1_grad.flatten(), b2_grad.flatten()))        
 80         return [cost, theta_grad]
 81 
 82     def train(self, data, max_iterations):  # 训练令cost最小
 83         opt_soln = scipy.optimize.minimize(self.cost, 
 84                                            self.initial_theta, 
 85                                            args = (data,), method = ‘L-BFGS-B‘,
 86                                            jac = True, options =
 87                                            {‘maxiter‘:max_iterations} )
 88         opt_theta = opt_soln.x
 89         return opt_theta
 90    
 91 
 92 def normalize_data(data):  # 0.1<=data[i][j]<=0.9
 93     data = data - np.mean(data)
 94     pstd = 3 * np.std(data)
 95     data = np.maximum(np.minimum(data, pstd), -pstd) / pstd
 96     data = (data + 1.0) * 0.4 + 0.1
 97     return data
 98 
 99 def loadMNISTImages(file_name):  # 获取mnist数据
100     image_file = open(file_name, ‘rb‘)
101     head1 = image_file.read(4)
102     head2 = image_file.read(4)
103     head3 = image_file.read(4)
104     head4 = image_file.read(4)
105     num_examples = struct.unpack(‘>I‘, head2)[0]
106     num_rows     = struct.unpack(‘>I‘, head3)[0]
107     num_cols     = struct.unpack(‘>I‘, head4)[0]
108     dataset = np.zeros((num_rows*num_cols, num_examples))
109     images_raw  = array.array(‘B‘, image_file.read())
110     image_file.close()
111     for i in range(num_examples):    
112         limit1 = num_rows * num_cols * i
113         limit2 = num_rows * num_cols * (i + 1)        
114         dataset[:, i] = images_raw[limit1: limit2]
115     return dataset / 255
116 
117 
118 def load_data(num_patches, patch_side):  # 随机选取num_patches个数据
119     images = scipy.io.loadmat(‘IMAGES.mat‘)  # 515*512*10
120     images = images[‘IMAGES‘]
121     patches = np.zeros((patch_side * patch_side, num_patches))
122     seed = 1234
123     rand = np.random.RandomState(seed)
124     image_index = rand.random_integers( 0, 512 - patch_side, size = 
125                                         (num_patches, 2))
126     image_number = rand.random_integers(0, 10 - 1, size = num_patches)
127     for i in xrange(num_patches):
128         idx_1 = image_index[i, 0]
129         idx_2 = image_index[i, 1]
130         idx_3 = image_number[i]        
131         patch = images[idx_1:idx_1 + patch_side, idx_2:idx_2 + patch_side, 
132                        idx_3]
133         patch = patch.flatten()        
134         patches[:,i] = patch
135     patches = normalize_data(patches)
136     return patches        
137 
138 def visualizeW1(opt_W1, vis_patch_side, hid_patch_side):  # 可视化
139     figure, axes = matplotlib.pyplot.subplots(nrows = hid_patch_side,
140                                               ncols = hid_patch_side)
141     index = 0                                              
142     for axis in axes.flat:
143         axis.imshow(opt_W1[index, :].reshape(vis_patch_side,
144                             vis_patch_side), cmap = matplotlib.pyplot.cm.gray,
145                             interpolation = ‘nearest‘)
146         axis.set_frame_on(False)
147         axis.set_axis_off()
148         index += 1
149     matplotlib.pyplot.show()        
150 
151 def run_sparse_ae(): # 稀疏自编码器
152     beta = 3.0
153     lamda = 0.0001
154     rho = 0.01
155     visible_side = 8
156     hidden_side = 5
157     visible_size = visible_side * visible_side
158     hidden_size = hidden_side * hidden_side
159     m = 10000
160     max_iterations = 400
161     training_data = load_data(num_patches = m, patch_side = visible_side)
162     sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
163     opt_theta = sae.train(training_data, max_iterations)
164     opt_W1 = opt_theta[0 : visible_size * hidden_size].reshape(hidden_size, 
165                                                                visible_size)
166     visualizeW1(opt_W1,visible_side, hidden_side)  
167 
168 def run_sparse_ae_MNIST():  # 矢量化MNIST
169     beta = 3.0
170     lamda = 3e-3
171     rho = 0.1
172     visible_side = 28
173     hidden_side = 14
174     visible_size = visible_side * visible_side
175     hidden_size = hidden_side * hidden_side
176     m = 10000
177     max_iterations = 400
178     training_data = loadMNISTImages(‘train-images.idx3-ubyte‘)  
179     training_data = training_data[:, 0:m]
180     sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
181     opt_theta = sae.train(training_data, max_iterations)
182     opt_W1 = opt_theta[0 : visible_size * hidden_size].reshape(hidden_size, 
183                                                                visible_size)    
184     visualizeW1(opt_W1, visible_side, hidden_side)
185 
186 if __name__ == "__main__":
187     run_sparse_ae()
188     #run_sparse_ae_MNIST()

技术分享

稀疏自编码器和矢量化编程

标签：

原文地址：http://www.cnblogs.com/qw12/p/5817530.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行