标签:
本篇代码来自:
https://github.com/rhololkeolke/lspi-python
这是lspi文件夹basisfunction.py文件
(1)python ABC(abstract base class)用法:
https://mozillazg.com/2014/06/python-define-abstract-base-classes.html
http://blog.csdn.net/nixawk/article/details/42970321
1 # -*- coding: utf-8 -*- 2 """Abstract Base Class for Basis Function and some common implementations.""" 3 4 import abc 5 6 import numpy as np 7 8 9 class BasisFunction(object): 10 11 r"""ABC for basis functions used by LSPI Policies. 12 13 A basis function is a function that takes in a state vector and an action 14 index and returns a vector of features. The resulting feature vector is 15 referred to as :math:`\phi` in the LSPI paper (pg 9 of the PDF referenced 16 in this package‘s documentation). The :math:`\phi` vector is dotted with 17 the weight vector of the Policy to calculate the Q-value. 18 19 The dimensions of the state vector are usually smaller than the dimensions 20 of the :math:`\phi` vector. However, the dimensions of the :math:`\phi` 21 vector are usually much smaller than the dimensions of an exact 22 representation of the state which leads to significant savings when 23 computing and storing a policy. 24 25 """ 26 #该函数输入状态向量,和动作,返回特征向量(feature vector)
#也就是说该模块是进行系统状态更新的?也可能是不更新,直接就是通过状态计算特征向量
#特征向量和权重矩阵点成得到了Q值,通过点乘得到说明是通过线性方程对Q(S,A)J进行近似
27 __metaclass__ = abc.ABCMeta #构建类的方法 28 29 @abc.abstractmethod #python的@修饰符,说明要在子类中必须具体实现,不然报错,这似乎是abc库的常规用法 30 def size(self): #返回phi也就是特征向量的长度 31 r"""Return the vector size of the basis function. 32 33 Returns 34 ------- 35 int 36 The size of the :math:`\phi` vector. 37 (Referred to as k in the paper). 38 39 """ 40 pass # pragma: no cover 41 42 @abc.abstractmethod 43 def evaluate(self, state, action): #该函数通过状态动作对儿计算phi矩阵,完全依赖于子类的实现方法 44 r"""Calculate the :math:`\phi` matrix for the given state-action pair. 45 46 The way this value is calculated depends entirely on the concrete 47 implementation of BasisFunction. 48 49 Parameters 50 ---------- 51 state : numpy.array 52 The state to get the features for. 53 When calculating Q(s, a) this is the s. 54 action : int 55 The action index to get the features for. 56 When calculating Q(s, a) this is the a. 57 58 59 Returns 60 ------- 61 numpy.array 62 The :math:`\phi` vector. Used by Policy to compute Q-value. 63 64 """ 65 pass # pragma: no cover 66 67 @abc.abstractproperty #修饰符,必须实现? 68 def num_actions(self): #返回可能的动作action的数目 69 """Return number of possible actions. 70 71 Returns 72 ------- 73 int 74 Number of possible actions. 75 """ 76 pass # pragma: no cover 77 78 @staticmethod #静态方法 79 def _validate_num_actions(num_actions): #确认动作的数目是否满足要求 80 """Return num_actions if valid. Otherwise raise ValueError. 81 82 Return 83 ------ 84 int 85 Number of possible actions. 86 87 Raises 88 ------ 89 ValueError 90 If num_actions < 1 91 92 """ 93 if num_actions < 1: 94 raise ValueError(‘num_actions must be >= 1‘) #动作数目一定要大于1 95 return num_actions 96 97 98 class FakeBasis(BasisFunction): #基于上面的类构建一个新类 99 100 r"""Basis that ignores all input. Useful for random sampling. 101 102 When creating a purely random Policy a basis function is still required. 103 This basis function just returns a :math:`\phi` equal to [1.] for all 104 inputs. It will however, still throw exceptions for impossible values like 105 negative action indexes. 106 107 """ 108 #产生随机的策略时返回的特征向量phi只有一个值?就是1? 109 def __init__(self, num_actions):#在初始化的时候调用父函数对动作数目进行验证 110 """Initialize FakeBasis.""" 111 self.__num_actions = BasisFunction._validate_num_actions(num_actions) 112 113 def size(self):#返回特征向量的长度为1 114 r"""Return size of 1. 115 116 Returns 117 ------- 118 int 119 Size of :math:`phi` which is always 1 for FakeBasis 120 121 Example 122 ------- 123 124 >>> FakeBasis().size() 125 1 126 127 """ 128 return 1 129 130 def evaluate(self, state, action): #返回特征向量1 131 r"""Return :math:`\phi` equal to [1.]. 132 133 Parameters #参数 134 ---------- 135 state : numpy.array#状态 136 The state to get the features for. 137 When calculating Q(s, a) this is the s. FakeBasis ignores these 138 values. 139 action : int#所采取的动作 140 The action index to get the features for. 141 When calculating Q(s, a) this is the a. FakeBasis ignores these 142 values. 143 144 Returns 145 ------- 146 numpy.array#返回phi矩阵 147 :math:`\phi` vector equal to [1.]. 148 149 Raises 150 ------ 151 IndexError 152 If action index is < 0 153 154 Example 155 ------- 156 157 >>> FakeBasis().evaluate(np.arange(10), 0) 158 array([ 1.]) 159 160 """ 161 if action < 0: 162 raise IndexError(‘action index must be >= 0‘) 163 if action >= self.num_actions: 164 raise IndexError(‘action must be < num_actions‘) 165 return np.array([1.])#此处返回的phi矩阵就是1 166 167 @property 168 def num_actions(self):#返回动作的数目 169 """Return number of possible actions.""" 170 return self.__num_actions 171 172 @num_actions.setter #该修饰符表示将num_action转化为一个属性而不是一个方法 173 def num_actions(self, value): 174 """Set the number of possible actions. 175 176 Parameters 177 ---------- 178 value: int 179 Number of possible actions. Must be >= 1. 180 181 Raises 182 ------ 183 ValueError 184 If value < 1. 185 186 """ 187 if value < 1: 188 raise ValueError(‘num_actions must be at least 1.‘) 189 self.__num_actions = value 190 191 192 class OneDimensionalPolynomialBasis(BasisFunction): #一维多项式基 193 194 """Polynomial features for a state with one dimension. 195 #一维的多项式特征 196 Takes the value of the state and constructs a vector proportional 197 to the specified degree and number of actions. The polynomial is first 198 constructed as [..., 1, value, value^2, ..., value^k, ...]#这里k是自由度的数目 199 where k is the degree. The rest of the vector is 0. 200 #输入是状态的值构建一个和动作数目成比例的向量,构建的方法如上,就是一个多项式 201 Parameters 202 ---------- 203 degree : int#多项式的自由度 204 The polynomial degree. 205 num_actions: int #动作的数量 206 The total number of possible actions 207 208 Raises 209 ------ 210 ValueError 211 If degree is less than 0 212 ValueError 213 If num_actions is less than 1 214 215 """ 216 217 def __init__(self, degree, num_actions):#初始化,输入量:自由度,动作数目 218 """Initialize polynomial basis function.""" 219 self.__num_actions = BasisFunction._validate_num_actions(num_actions)#通过父类检测函数检测动作数目 220 221 if degree < 0:#检测自由度数目 222 raise ValueError(‘Degree must be >= 0‘) 223 self.degree = degree 224 225 def size(self):#返回基函数(特征函数的)长度 226 """Calculate the size of the basis function. 227 228 The base size will be degree + 1. This basic matrix is then 229 duplicated once for every action. Therefore the size is equal to 230 (degree + 1) * number of actions 231 #基的长度是自由度长度+1(因为由零次方)
#对于每一个action都复制一个基向量,因此,总长度为:(degree + 1) * number of action
232 233 Returns#返回 234 ------- 235 int #基矩阵(基向量/phi)的长度,是一个整数 236 The size of the phi matrix that will be returned from evaluate. 237 238 239 Example#例子 240 ------- 241 242 >>> basis = OneDimensionalPolynomialBasis(2, 2)#基是一维多项式,自由度2,动作数2 243 >>> basis.size()#总长度:3*2 244 6 245 246 """ 247 return (self.degree + 1) * self.num_actions 248 249 def evaluate(self, state, action):#评估函数 250 r"""Calculate :math:`\phi` matrix for given state action pair. 251 #计算给出状态动作对儿后的phi矩阵 252 The :math:`\phi` matrix is used to calculate the Q function for the 253 given policy. 254 #输出的phi矩阵用来计算Q值 255 Parameters #输入参数 256 ---------- 257 state : numpy.array#状态 258 The state to get the features for. 259 When calculating Q(s, a) this is the s. 260 action : int #动作 261 The action index to get the features for. 262 When calculating Q(s, a) this is the a. 263 264 Returns #返回 265 ------- 266 numpy.array #phi 267 The :math:`\phi` vector. Used by Policy to compute Q-value. 268 269 Raises 270 ------#报错的情况,对动作的数目检查,对状态向量的维度进行检查 271 IndexError 272 If :math:`0 \le action < num\_actions` then IndexError is raised. 273 ValueError 274 If the state vector has any number of dimensions other than 1 a 275 ValueError is raised. 276 277 Example#一个例子,没看懂 278 ------- 279 280 >>> basis = OneDimensionalPolynomialBasis(2, 2) 281 >>> basis.evaluate(np.array([2]), 0) 282 array([ 1., 2., 4., 0., 0., 0.]) 283 284 """ 285 if action < 0 or action >= self.num_actions: #如果动作的序号不对,也就是不是合理的动作 286 raise IndexError(‘Action index out of bounds‘)#报错 287 288 if state.shape != (1, ):#如果状态向量的维度不对 289 raise ValueError(‘This class only supports one dimensional states‘)#报错 290 291 phi = np.zeros((self.size(), ))#初始化phi值 292 293 offset = (self.size()/self.num_actions)*action #偏置:(自由度的数目+1)*动做的序号?这一步是跳转到对应的动作的那一组的序号 294 295 value = state[0]#状态向量的第一个值 296 297 phi[offset:offset + self.degree + 1] = \更改对应动作那一组的phi值 298 np.array([pow(value, i) for i in range(self.degree+1)])更改成一维多项式向量 299 300 return phi#返回phi 301 302 @property 303 def num_actions(self):#返回动作的数目 304 """Return number of possible actions.""" 305 return self.__num_actions 306 307 @num_actions.setter 308 def num_actions(self, value): 309 """Set the number of possible actions. 310 311 Parameters 312 ---------- 313 value: int 314 Number of possible actions. Must be >= 1. 315 316 Raises 317 ------ 318 ValueError 319 If value < 1. 320 321 """ 322 if value < 1: 323 raise ValueError(‘num_actions must be at least 1.‘) 324 self.__num_actions = value 325 326 327 class RadialBasisFunction(BasisFunction):#径向基方程 328 329 r"""Gaussian Multidimensional Radial Basis Function (RBF). 330 #高斯多维径向基函数 331 Given a set of k means :math:`(\mu_1 , \ldots, \mu_k)` produce a feature 332 vector :math:`(1, e^{-\gamma || s - \mu_1 ||^2}, \cdots, 333 e^{-\gamma || s - \mu_k ||^2})` where `s` is the state vector and 334 :math:`\gamma` is a free parameter. This vector will be padded with 335 0‘s on both sides proportional to the number of possible actions 336 specified. 337 #给出一组均值,求出一组高斯径向基特征 338 Parameters#输入 339 ---------- 340 means: list(numpy.array)#一组均值 341 List of numpy arrays representing :math:`(\mu_1, \ldots, \mu_k)`. 342 Each :math:`\mu` is a numpy array with dimensions matching the state 343 vector this basis function will be used with. If the dimensions of each 344 vector are not equal than an exception will be raised. If no means are 345 specified then a ValueError will be raised 346 gamma: float #超参数,大于0 347 Free parameter which controls the size/spread of the Gaussian "bumps". 348 This parameter is best selected via tuning through cross validation. 349 gamma must be > 0. 350 num_actions: int #动作数目 351 Number of actions. Must be in range [1, :math:`\infty`] otherwise 352 an exception will be raised. 353 354 Raises #一些错误的定义 355 ------ 356 ValueError 357 If means list is empty 358 ValueError 359 If dimensions of each mean vector do not match. 360 ValueError 361 If gamma is <= 0. 362 ValueError 363 If num_actions is less than 1. 364 365 Note 366 ---- 367 368 The numpy arrays specifying the means are not copied. 369 370 """ 371 372 def __init__(self, means, gamma, num_actions):#初始化 373 """Initialize RBF instance.""" 374 self.__num_actions = BasisFunction._validate_num_actions(num_actions) #检查动作的数目是否满足 375 376 if len(means) == 0:#检查均值向量的长度是否满足 377 raise ValueError(‘You must specify at least one mean‘) 378 379 if reduce(RadialBasisFunction.__check_mean_size, means) is None:#各种检查 380 raise ValueError(‘All mean vectors must have the same dimensions‘) 381 382 self.means = means 383 384 if gamma <= 0: 385 raise ValueError(‘gamma must be > 0‘) 386 387 self.gamma = gamma 388 389 @staticmethod 390 def __check_mean_size(left, right):#检查mean矩阵的维度问题 391 """Apply f if the value is not None. 392 393 This method is meant to be used with reduce. It will return either the 394 right most numpy array or None if any of the array‘s had 395 differing sizes. I wanted to use a Maybe monad here, 396 but Python doesn‘t support that out of the box. 397 398 Return 399 ------ 400 None or numpy.array 401 None values will propogate through the reduce automatically. 402 403 """ 404 if left is None or right is None: 405 return None 406 else: 407 if left.shape != right.shape: 408 return None 409 return right 410 411 def size(self): 412 r"""Calculate size of the :math:`\phi` matrix. 413 414 The size is equal to the number of means + 1 times the number of 415 number actions. 416 417 Returns 418 ------- 419 int 420 The size of the phi matrix that will be returned from evaluate. 421 422 """ 423 return (len(self.means) + 1) * self.num_actions#为什么要在mean矩阵的长度上加1??,因为打头的是1!! 424 425 def evaluate(self, state, action):#计算phi 426 r"""Calculate the :math:`\phi` matrix. 427 428 Matrix will have the following form:
#矩阵会有如下形式 429 430 :math:`[\cdots, 1, e^{-\gamma || s - \mu_1 ||^2}, \cdots, 431 e^{-\gamma || s - \mu_k ||^2}, \cdots]` 432 433 where the matrix will be padded with 0‘s on either side depending 434 on the specified action index and the number of possible actions. 435 436 Returns 437 ------- 438 numpy.array#返回phi 439 The :math:`\phi` vector. Used by Policy to compute Q-value. 440 441 Raises 442 ------ 443 IndexError#一些错误的定义 444 If :math:`0 \le action < num\_actions` then IndexError is raised. 445 ValueError 446 If the state vector has any number of dimensions other than 1 a 447 ValueError is raised. 448 449 """ 450 if action < 0 or action >= self.num_actions:#检查action的编号是否合格 451 raise IndexError(‘Action index out of bounds‘) 452 453 if state.shape != self.means[0].shape:#检查状态和均值矩阵的维度问题 454 raise ValueError(‘Dimensions of state must match ‘ 455 ‘dimensions of means‘) 456 457 phi = np.zeros((self.size(), ))#初始化 458 offset = (len(self.means[0])+1)*action#确定action要更改的位置 459 460 rbf = [RadialBasisFunction.__calc_basis_component(state, 461 mean, 462 self.gamma) 463 for mean in self.means]#构建径向基向 464 phi[offset] = 1.#开头的是1 465 phi[offset+1:offset+1+len(rbf)] = rbf #后面的就是刚才计算的径向基项 466 467 return phi 468 469 @staticmethod 470 def __calc_basis_component(state, mean, gamma):#计算径向基项的方法 471 mean_diff = state - mean#偏差 472 return np.exp(-gamma*np.sum(mean_diff*mean_diff))#径向基计算公式 473 474 @property 475 def num_actions(self):#返回动作的数目 476 """Return number of possible actions.""" 477 return self.__num_actions 478 479 @num_actions.setter 480 def num_actions(self, value): 481 """Set the number of possible actions. 482 483 Parameters 484 ---------- 485 value: int 486 Number of possible actions. Must be >= 1. 487 488 Raises 489 ------ 490 ValueError 491 If value < 1. 492 493 """ 494 if value < 1: 495 raise ValueError(‘num_actions must be at least 1.‘) 496 self.__num_actions = value 497 498 499 class ExactBasis(BasisFunction): 500 501 """Basis function with no functional approximation. 502 #计算准确的状态基,没有经过方程近似的 503 This can only be used in domains with finite, discrete state-spaces. For 504 example the Chain domain from the LSPI paper would work with this basis, 505 but the inverted pendulum domain would not. 506 #这种方法用在有有限离散状态的情况下,在比如倒立摆等问题中不适用 507 Parameters 508 ----------#参数 509 num_states: list#状态的数目,这个解释略奇怪,这个列表包含了一些整数,这些整数代表了每个状态变量可能的value的数目?什么鬼 510 A list containing integers representing the number of possible values 511 for each state variable. 512 num_actions: int#动作的数目 513 Number of possible actions. 514 """ 515 516 def __init__(self, num_states, num_actions):初始化 517 """Initialize ExactBasis.""" 518 if len(np.where(num_states <= 0)[0]) != 0:#num_state必须大于0 519 raise ValueError(‘num_states value\‘s must be > 0‘) 520 521 self.__num_actions = BasisFunction._validate_num_actions(num_actions)#检查动作数目 522 self._num_states = num_states#给自身变量_num_state赋值 523 524 self._offsets = [1]#偏差量,用来存放要更新的phi的位置 525 for i in range(1, len(num_states)): 526 self._offsets.append(self._offsets[-1]*num_states[i-1])#由于可能的状态数目不一致,所以要记录每一次变化情况,后面求phi的时候可以用 527 528 def size(self): 529 r"""Return the vector size of the basis function. 530 #返回Phi的长度 531 Returns 532 ------- 533 int 534 The size of the :math:`\phi` vector. 535 (Referred to as k in the paper). 536 """#先计算可能的状态数目,用了lambda函数,再乘以动作数目 537 return reduce(lambda x, y: x*y, self._num_states, 1)*self.__num_actions 538 539 def get_state_action_index(self, state, action): 540 """Return the non-zero index of the basis. 541 #获得state-action对儿对应的位置 542 Parameters 543 ----------#输入 544 state: numpy.array#状态 545 The state to get the index for. 546 action: int#动作 547 The state to get the index for. 548 549 Returns 550 ------- 551 int 552 The non-zero index of the basis 553 554 Raises 555 ------ 556 IndexError 557 If action index < 0 or action index > num_actions 558 """ 559 if action < 0:#动作的编号的合理化检测 560 raise IndexError(‘action index must be >= 0‘) 561 if action >= self.num_actions: 562 raise IndexError(‘action must be < num_actions‘) 563 564 base = action * int(self.size() / self.__num_actions)#动作数乘以状态数 565 566 offset = 0#偏置置零 567 for i, value in enumerate(state): 568 offset += self._offsets[i] * state[i] 569 570 return base + offset#找到要更新的phi的起始位置 571 572 def evaluate(self, state, action): 573 r"""Return a :math:`\phi` vector that has a single non-zero value. 574 575 Parameters 576 ---------- 577 state: numpy.array 578 The state to get the features for. When calculating Q(s, a) this is 579 the s. 580 action: int 581 The action index to get the features for. 582 When calculating Q(s, a) this is the a. 583 584 Returns 585 ------- 586 numpy.array 587 :math:`\phi` vector 588 589 Raises 590 ------ 591 IndexError 592 If action index < 0 or action index > num_actions 593 ValueError 594 If the size of the state does not match the the size of the 595 num_states list used during construction. 596 ValueError 597 If any of the state variables are < 0 or >= the corresponding 598 value in the num_states list used during construction. 599 """ 600 if len(state) != len(self._num_states):#各种检查是否合格 601 raise ValueError(‘Number of state variables must match ‘ 602 + ‘size of num_states.‘) 603 if len(np.where(state < 0)[0]) != 0: 604 raise ValueError(‘state cannot contain negative values.‘) 605 for state_var, num_state_values in zip(state, self._num_states): 606 if state_var >= num_state_values: 607 raise ValueError(‘state values must be <= corresponding ‘ 608 + ‘num_states value.‘) 609 610 phi = np.zeros(self.size())#初始化phi 611 phi[self.get_state_action_index(state, action)] = 1#更新对应位置的phi值. 612 613 return phi 614 615 @property 616 def num_actions(self): 617 """Return number of possible actions.""" 618 return self.__num_actions 619 620 @num_actions.setter#动作数目的计算函数 621 def num_actions(self, value): 622 """Set the number of possible actions. 623 624 Parameters 625 ---------- 626 value: int 627 Number of possible actions. Must be >= 1. 628 629 Raises 630 ------ 631 ValueError 632 if value < 1. 633 """ 634 if value < 1: 635 raise ValueError(‘num_actions must be at least 1.‘) 636 self.__num_actions = value
(六)Value Function Approximation-LSPI code (1)
标签:
原文地址:http://www.cnblogs.com/lijiajun/p/5486490.html