标签:cfa 对象 -- 标准 cos pytho 访问 适用于 总结
pip install numpy
import numpy as np
a = np.array(range(10))
a.dtype # 数据类型
In [23]: import numpy as np
...: data = np.array([[2,3,4],[4,5,6]])
In [24]: data
Out[24]:
array([[2, 3, 4],
[4, 5, 6]])
In [26]: data.size
Out[26]: 6 # 元素个数
In [27]: data.shape
Out[27]: (2, 3) # 2行三列
In [28]: data.dtype
Out[28]: dtype(‘int64‘)
In [29]: data.T
Out[29]:
array([[2, 4],
[3, 5],
[4, 6]])
In [22]: data.ndim
Out[22]: 2 # 2维
创建一个全是0的数组
In [30]: np.zeros(10)
Out[30]: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
In [31]: np.zeros(10,dtype=‘int‘)
Out[31]: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
创建一个全1的数组
In [41]: a = np.ones(10)
Out[41]: array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
In [44]: a = np.ones(10,dtype=‘int‘)
Out[45]: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
创建一个空数组
In [48]: b = np.empty(100)
Out[48]:
array([-3.10503618e+231, -3.10503618e+231, 6.94694804e-310,
6.94694804e-310, 6.94694804e-310, 5.43472210e-323,
0.00000000e+000, 3.95252517e-323, 0.00000000e+000,
0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
......
0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
0.00000000e+000])
创建一个连续数组
In [50]: np.arange(10)
Out[50]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [51]: np.arange(1,5)
Out[51]: array([1, 2, 3, 4])
In [52]: np.arange(1,100,2)
Out[52]:
array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99])
In [53]: np.arange(1,10,0.5)
Out[53]:
array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. ,
7.5, 8. , 8.5, 9. , 9.5])
指定长度平均拆分
In [54]: np.linspace(1,10,100) # 将1-10之间的数字分成100份并且差值相等
Out[54]:
array([ 1. , 1.09090909, 1.18181818, 1.27272727, 1.36363636,
1.45454545, 1.54545455, 1.63636364, 1.72727273, 1.81818182,
1.90909091, 2. , 2.09090909, 2.18181818, 2.27272727,
2.36363636, 2.45454545, 2.54545455, 2.63636364, 2.72727273,
2.81818182, 2.90909091, 3. , 3.09090909, 3.18181818,
3.27272727, 3.36363636, 3.45454545, 3.54545455, 3.63636364,
3.72727273, 3.81818182, 3.90909091, 4. , 4.09090909,
4.18181818, 4.27272727, 4.36363636, 4.45454545, 4.54545455,
4.63636364, 4.72727273, 4.81818182, 4.90909091, 5. ,
5.09090909, 5.18181818, 5.27272727, 5.36363636, 5.45454545,
5.54545455, 5.63636364, 5.72727273, 5.81818182, 5.90909091,
6. , 6.09090909, 6.18181818, 6.27272727, 6.36363636,
6.45454545, 6.54545455, 6.63636364, 6.72727273, 6.81818182,
6.90909091, 7. , 7.09090909, 7.18181818, 7.27272727,
7.36363636, 7.45454545, 7.54545455, 7.63636364, 7.72727273,
7.81818182, 7.90909091, 8. , 8.09090909, 8.18181818,
8.27272727, 8.36363636, 8.45454545, 8.54545455, 8.63636364,
8.72727273, 8.81818182, 8.90909091, 9. , 9.09090909,
9.18181818, 9.27272727, 9.36363636, 9.45454545, 9.54545455,
9.63636364, 9.72727273, 9.81818182, 9.90909091, 10. ])
线性代数的单位矩阵
In [56]: np.eye(10)
Out[56]:
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
In [57]: a = np.arange(1,10)
Out[57]: array([1, 2, 3, 4, 5, 6, 7, 8, 9])
In [59]: b = np.arange(11,20)
Out[59]: array([11, 12, 13, 14, 15, 16, 17, 18, 19])
In [61]: a + b
Out[61]: array([12, 14, 16, 18, 20, 22, 24, 26, 28])
In [62]: a - b
Out[62]: array([-10, -10, -10, -10, -10, -10, -10, -10, -10])
In [63]: a * b
Out[63]: array([ 11, 24, 39, 56, 75, 96, 119, 144, 171])
In [64]: a / b
Out[64]:
array([0.09090909, 0.16666667, 0.23076923, 0.28571429, 0.33333333,
0.375 , 0.41176471, 0.44444444, 0.47368421])
In [65]: a ** b
Out[65]:
array([ 1, 4096, 1594323,
268435456, 30517578125, 2821109907456,
232630513987207, 18014398509481984, 1350851717672992089])
In [66]: a > b
Out[66]: array([False, False, False, False, False, False, False, False, False])
In [72]: a = np.arange(10)
Out[72]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [74]: a[0]
Out[74]: 0
In [75]: a[1]
Out[75]: 1
二维数组
In [77]: a = np.arange(15).reshape(3,5) # 把一个0-14的一维数组变成一个3*5的二维数组
In [78]: a
Out[78]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
In [78]: a
Out[78]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
In [79]: a[0][0] # 类似于嵌套列表
Out[79]: 0
In [80]: a[0,0] # numpy独有
Out[80]: 0
一维数组的切片
In [85]: a = np.arange(20)
In [86]: a
Out[86]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
In [87]: a[1:10]
Out[87]: array([1, 2, 3, 4, 5, 6, 7, 8, 9])
二维数组的切片
In [92]: b
Out[92]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
In [93]: b[2:4,1:] # 2-4行 之有第2行 1: 表示从1切到最后
Out[93]: array([[11, 12, 13, 14]])
In [97]: c = b[2:4,1:].copy()
Out[97]: array([[11, 12, 13, 14]])
# 给定一个随机数组,筛选出大于5的数
# 方法1 filter()函数
In [99]: import random
In [100]: a = [random.randint(0,20) for i in range(20)]
In [101]: a
Out[101]: [8, 15, 6, 14, 6, 10, 19, 3, 11, 13, 10, 9, 2, 2, 3, 7, 5, 16, 19, 12]
In [102]: filter(lambda x:x>5,a)
Out[102]: <filter at 0x7fe1cfafcd90>
In [103]: list(filter(lambda x:x>5,a))
Out[103]: [8, 15, 6, 14, 6, 10, 19, 11, 13, 10, 9, 7, 16, 19, 12]
# 方法2 array布尔索引
In [104]: a
Out[104]: [8, 15, 6, 14, 6, 10, 19, 3, 11, 13, 10, 9, 2, 2, 3, 7, 5, 16, 19, 12]
In [105]: c = np.array(a)
In [106]: c[c>5]
Out[106]: array([ 8, 15, 6, 14, 6, 10, 19, 11, 13, 10, 9, 7, 16, 19, 12])
In [107]: a = np.arange(4)
In [108]: a
Out[108]: array([0, 1, 2, 3])
In [109]: a[[True,True,False,False]]
Out[109]: array([0, 1]) # 只返回了索引为True的
# 给定一个随机数组,筛选出大于5的数 并且为偶数
# 方法一
In [115]: b = a[a>5]
Out[116]: array([ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
In [117]: c = b[b%2==0]
Out[118]: array([ 6, 8, 10, 12, 14, 16, 18])
# 方法二
In [113]: a
Out[113]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19])
In [114]: a[(a>5) & (a%2==0)]
Out[114]: array([ 6, 8, 10, 12, 14, 16, 18])
In [125]: a = np.arange(10)
In [126]: a[[1,2,4,6,8]]
Out[126]: array([1, 2, 4, 6, 8])
In [127]: a = np.arange(-5,5)
Out[127]: array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4])
In [129]: abs(a)
Out[129]: array([5, 4, 3, 2, 1, 0, 1, 2, 3, 4])
In [130]: np.abs(a)
Out[130]: array([5, 4, 3, 2, 1, 0, 1, 2, 3, 4])
In [131]: a = np.arange(1,20)
In [132]: np.sqrt(a)
Out[132]:
array([1. , 1.41421356, 1.73205081, 2. , 2.23606798,
2.44948974, 2.64575131, 2.82842712, 3. , 3.16227766,
3.31662479, 3.46410162, 3.60555128, 3.74165739, 3.87298335,
4. , 4.12310563, 4.24264069, 4.35889894])
In [134]: a
Out[134]: array([1, 2, 3, 4])
In [135]: np.exp(a)
Out[135]: array([ 2.71828183, 7.3890561 , 20.08553692, 54.59815003])
In [134]: a
Out[134]: array([1, 2, 3, 4])
In [136]: np.log(a)
Out[136]: array([0. , 0.69314718, 1.09861229, 1.38629436])
sin()
cos()
tan()
floor()
ceil()
round() 四舍五入
trunc()
rint() == round()
np.modf() 小数和整数分开
add 加
substract 减
multiply 乘
divide 除
power
mod
maximum 去两个array中最大值
mininum 去两个array中最小值
In [137]: a
Out[137]: array([1, 2, 3, 4])
In [138]: a.sum()
Out[138]: 10
In [137]: a
Out[137]: array([1, 2, 3, 4])
In [139]: a.mean()
Out[139]: 2.5
In [137]: a
Out[137]: array([1, 2, 3, 4])
In [141]: a.std()
Out[141]: 1.118033988749895
In [137]: a
Out[137]: array([1, 2, 3, 4])
In [140]: a.var()
Out[140]: 1.25
In [142]: import random
In [143]: random.random()
Out[143]: 0.08129507565064709
In [144]: random.random()
Out[144]: 0.544656798663411
In [145]: random.randint(0,10)
Out[145]: 9
In [146]: random.randint(0,10)
Out[146]: 10
In [147]: random.randint(0,10)
Out[147]: 3
In [149]: random.choice([1,2,3,4,5])
Out[149]: 4
In [150]: random.choice([1,2,3,4,5])
Out[150]: 5
In [157]: a = [1,2,3,4,5]
In [158]: random.shuffle(a)
In [159]: a
Out[159]: [3, 1, 5, 4, 2]
np.random方法
np.random.ranint(1,10) --- 取1-10之间随机整数
np.random.ranint(1,10,10) --- 取1-10之间随机整数 取10个组成一个数组
In [162]: np.random.randint(0,10,10)
Out[162]: array([4, 4, 0, 0, 9, 2, 8, 6, 9, 4])
In [164]: np.random.randint(0,100,(2,5,5))
Out[164]:
array([[[74, 91, 75, 12, 13],
[59, 99, 15, 42, 33],
[ 9, 62, 60, 63, 55],
[81, 45, 80, 88, 86],
[10, 96, 3, 77, 92]],
[[68, 83, 93, 18, 46],
[55, 86, 31, 87, 6],
[53, 91, 58, 78, 37],
[ 4, 70, 68, 45, 82],
[23, 73, 53, 75, 53]]])
?
有两种索引
In [166]: pd.Series([1,2,3,4])
Out[166]:
0 1
1 2
2 3
3 4
dtype: int64
In [167]: pd.Series([1,2,3,4],index=[‘a‘,‘b‘,‘c‘,‘d‘]) # 指定索引是abcd 但索引未被修改 用普通索引还是可以访问
Out[167]:
a 1
b 2
c 3
d 4
dtype: int64
In [168]: pd.Series(np.arange(5)) # 于numpy结合使用
Out[168]:
0 0
1 1
2 2
3 3
4 4
dtype: int64
和标量运算
In [166]: sr = pd.Series([1,2,3,4])
Out[166]:
0 1
1 2
2 3
3 4
dtype: int64
In [173]: sr+sr
Out[173]:
0 2
1 4
2 6
3 8
dtype: int64
可以传递一个字典
In [174]: sr = pd.Series({‘a‘:1,‘b‘:2,‘c‘:3})
In [175]: sr
Out[175]:
a 1
b 2
c 3
dtype: int64
获取索引/获取值
In [177]: sr.index
Out[177]: Index([‘a‘, ‘b‘, ‘c‘], dtype=‘object‘)
In [178]: sr.values
Out[178]: array([1, 2, 3])
In [6]: sr = pd.Series(np.arange(20))
In [7]: sr
Out[7]:
0 0
1 1
2 2
......
18 18
19 19
dtype: int64
In [8]: sr2 = sr[10:].copy()
In [9]: sr2
Out[9]:
10 10
11 11
......
18 18
19 19
dtype: int64
In [12]: sr2[10]
Out[12]: 10
In [14]: sr2.loc[10] # loc解析为标签
Out[14]: 10
In [15]: sr2.iloc[0] # iloc解析为索引
Out[15]: 10
In [16]: sr1 = pd.Series([1,2,3],index=[‘c‘,‘b‘,‘a‘])
In [17]: sr2 = pd.Series([4,5,6],index=[‘b‘,‘a‘,‘c‘])
In [18]: sr1 + sr2
Out[18]:
a 8
b 6
c 7
dtype: int64
# Series会按照标签(‘a‘,‘b‘,‘c‘)来相加
缺失数据的方法
In [10]: sr4 = pd.Series([4,5,6,7],index=[‘c‘,‘a‘,‘b‘,‘d‘])
In [11]: sr3 = pd.Series([1,2,3],index=[‘a‘,‘b‘,‘c‘])
In [12]: sr5 = sr3 + sr4
In [14]: sr5
Out[14]:
a 6.0
b 8.0
c 7.0
d NaN
dtype: float64
判断一行数据里面是不是有NaN
In [15]: sr5.isnull()
Out[15]:
a False
b False
c False
d True # 表示为NaN
dtype: bool
In [17]: sr5.notnull()
Out[17]:
a True
b True
c True
d False
dtype: bool
sr5[sr5.notnull()] 判断NaN的方法+布尔索引
In [18]: sr6 = sr5[sr5.notnull()]
In [19]: sr6
Out[19]:
a 6.0
b 8.0
c 7.0
dtype: float64
dropna()方法去掉NaN
In [21]: sr5.dropna()
Out[21]:
a 6.0
b 8.0
c 7.0
dtype: float64
fillna(0)给NaN赋值
In [22]: sr5.fillna(0) # 赋值为0
Out[22]:
a 6.0
b 8.0
c 7.0
d 0.0
dtype: float64
In [25]: sr5.fillna(sr5.mean()) # sr5.mean() 赋值为平均值
Out[25]:
a 6.0
b 8.0
c 7.0
d 7.0
dtype: float64
数组+字典的结合体
整数索引出现两种情况
NaN确实数据的处理
# 方式1 不指定索引默认为数字
In [28]: pd.DataFrame({‘one‘:[1,2,3],‘two‘:[4,5,6]})
Out[28]:
one two
0 1 4
1 2 5
2 3 6
# 方式1 指定索引
In [29]: pd.DataFrame({‘one‘:[1,2,3],‘two‘:[4,5,6]},index=[‘a‘,‘b‘,‘c‘])
Out[29]:
one two
a 1 4
b 2 5
c 3 6
# 方式2 要根据索引对齐
In [32]: df = pd.DataFrame({‘one‘:pd.Series([1,2,3],index=[‘a‘,‘b‘,‘c‘]),‘two‘:pd.Series([4,5,6,7],index=[‘c‘,‘a‘,‘b‘,‘d‘])})
In [33]: df
Out[33]:
one two
a 1.0 5
b 2.0 6
c 3.0 4
d NaN 7
In [35]: pd.read_csv(‘/Users/huhao/1.csv‘)
Out[35]:
a b c
0 1 2 3
1 4 5 6
2 7 8 9
In [36]: df.to_csv(‘/Users/huhao/2.csv‘)
"""获取行索引"""
In [44]: df.index
Out[44]: Index([‘a‘, ‘b‘, ‘c‘, ‘d‘], dtype=‘object‘)
"""获取列索引"""
In [48]: df.columns
Out[48]: Index([‘one‘, ‘two‘], dtype=‘object‘)
"""获取数据"""
In [45]: df.values
Out[45]:
array([[ 1., 5.],
[ 2., 6.],
[ 3., 4.],
[nan, 7.]])
"""转置"""
In [46]: df.T
Out[46]:
a b c d
one 1.0 2.0 3.0 NaN
two 5.0 6.0 4.0 7.0
"""返回一些描述"""
In [50]: df.describe()
Out[50]:
one two
count 3.0 4.000000
mean 2.0 5.500000
std 1.0 1.290994
min 1.0 4.000000
25% 1.5 4.750000
50% 2.0 5.500000
75% 2.5 6.250000
max 3.0 7.000000
"""df对象对比"""
In [51]: df
Out[51]:
one two
a 1.0 5
b 2.0 6
c 3.0 4
d NaN 7
"""df对象对比"""
In [51]: df
Out[51]:
one two
a 1.0 5
b 2.0 6
c 3.0 4
d NaN 7
# 先行后列
In [57]: df.loc[‘a‘][‘one‘]
Out[57]: 1.0
# 先行后列
In [58]: df.loc[‘a‘,‘one‘]
Out[58]: 1.0
# 传递一个列表
In [64]: df.loc[[‘a‘,‘b‘,‘c‘],:]
Out[64]:
one two
a 1.0 5
b 2.0 6
c 3.0 4
数据对其需要行索引和列索引都对齐
fillna(0)
dropna(how=‘all‘,axis=0)
参数how
"any" 一行数据一个是NaN就会删除
参数axis
丢弃数据默认删除行
In [15]: df = pd.read_csv(‘/Users/huhao/1.csv‘)
Out[15]:
a b c
0 1 2 3
1 4 5 6
2 7 8 9
In [18]: df.mean(axis=1) # 按行求平均值
Out[18]:
0 2.0
1 5.0
2 8.0
dtype: float64
In [19]: df.mean(axis=0) # 按列求平均值
Out[19]:
a 4.0
b 5.0
c 6.0
dtype: float64
In [20]: df
Out[20]:
a b c
0 1 2 3
1 4 5 6
2 7 8 9
In [21]: df.sum() # 默认按列求和
Out[21]:
a 12
b 15
c 18
dtype: int64
In [22]: df.sum(axis=0) # 按列求和
Out[22]:
a 12
b 15
c 18
dtype: int64
In [23]: df.sum(axis=1) # 按行求和
Out[23]:
0 6
1 15
2 24
dtype: int64
python处理时间
datetime&time
dateutil
import dateutil
dateutil.parser.parse(‘2020-12-12‘)
dateutil.parser.parse(‘2020/12/12‘)
dateutil.parser.parse(‘2020/DEC/12‘)
pandas处理时间
In [12]: pd.to_datetime([‘2020-12-13‘,‘2020/02/12‘])
Out[12]: DatetimeIndex([‘2020-12-13‘, ‘2020-02-12‘], dtype=‘datetime64[ns]‘, freq=None)
pandas.date_range()
参数 start 起始时间
参数 end 终止时间
参数 periods 时间跨度
参数 freq
freq = "D" 按天
freq = "w" 按周
freq = "H" 按小时
freq = "B" 工作日
In [14]: pd.date_range(‘2020-01-01‘,‘2020-01-30‘) # 设置起始时间
Out[14]:
DatetimeIndex([‘2020-01-01‘, ‘2020-01-02‘, ‘2020-01-03‘, ‘2020-01-04‘,
‘2020-01-05‘, ‘2020-01-06‘, ‘2020-01-07‘, ‘2020-01-08‘,
‘2020-01-09‘, ‘2020-01-10‘, ‘2020-01-11‘, ‘2020-01-12‘,
‘2020-01-13‘, ‘2020-01-14‘, ‘2020-01-15‘, ‘2020-01-16‘,
‘2020-01-17‘, ‘2020-01-18‘, ‘2020-01-19‘, ‘2020-01-20‘,
‘2020-01-21‘, ‘2020-01-22‘, ‘2020-01-23‘, ‘2020-01-24‘,
‘2020-01-25‘, ‘2020-01-26‘, ‘2020-01-27‘, ‘2020-01-28‘,
‘2020-01-29‘, ‘2020-01-30‘],
dtype=‘datetime64[ns]‘, freq=‘D‘)
In [15]: pd.date_range(‘2020-01-01‘,periods=30) # 设置开始时间和时间跨度
Out[15]:
DatetimeIndex([‘2020-01-01‘, ‘2020-01-02‘, ‘2020-01-03‘, ‘2020-01-04‘,
‘2020-01-05‘, ‘2020-01-06‘, ‘2020-01-07‘, ‘2020-01-08‘,
‘2020-01-09‘, ‘2020-01-10‘, ‘2020-01-11‘, ‘2020-01-12‘,
‘2020-01-13‘, ‘2020-01-14‘, ‘2020-01-15‘, ‘2020-01-16‘,
‘2020-01-17‘, ‘2020-01-18‘, ‘2020-01-19‘, ‘2020-01-20‘,
‘2020-01-21‘, ‘2020-01-22‘, ‘2020-01-23‘, ‘2020-01-24‘,
‘2020-01-25‘, ‘2020-01-26‘, ‘2020-01-27‘, ‘2020-01-28‘,
‘2020-01-29‘, ‘2020-01-30‘],
dtype=‘datetime64[ns]‘, freq=‘D‘)
字符串切片
In [9]: sr = pd.Series(np.arange(1000),index=pd.date_range(‘2020-01-01‘,periods=1000))
In [10]: sr
Out[10]:
2020-01-01 0
2020-01-02 1
2020-01-03 2
2020-01-04 3
2020-01-05 4
...
2022-09-22 995
2022-09-23 996
2022-09-24 997
2022-09-25 998
2022-09-26 999
Freq: D, Length: 1000, dtype: int64
In [14]: sr[‘2020‘:‘2021-03‘] # 截取2020年至2021年3月的数据
Out[14]:
2020-01-01 0
2020-01-02 1
2020-01-03 2
2020-01-04 3
2020-01-05 4
...
2021-03-27 451
2021-03-28 452
2021-03-29 453
2021-03-30 454
2021-03-31 455
Freq: D, Length: 456, dtype: int64
函数的使用
resample()
In [16]: sr2
Out[16]:
2020-01-01 0
2020-01-02 1
2020-01-03 2
2020-01-04 3
......
2020-01-30 29
2020-01-31 30
2020-02-01 31
Freq: D, dtype: int64
In [17]: sr2.resample(‘W‘)
Out[17]: <pandas.core.resample.DatetimeIndexResampler object at 0x7fe58d136d50>
In [18]: sr2.resample(‘W‘).sum()
Out[18]:
2020-01-05 10
2020-01-12 56
2020-01-19 105
2020-01-26 154
2020-02-02 171
Freq: W-SUN, dtype: int64
truncate()
In [19]: sr.truncate(before=‘2021-06-01‘)
Out[19]:
2021-06-01 517
2021-06-02 518
2021-06-03 519
2021-06-04 520
2021-06-05 521
...
2022-09-22 995
2022-09-23 996
2022-09-24 997
2022-09-25 998
2022-09-26 999
Freq: D, Length: 483, dtype: int64
dataframe操作时间序列
"""
1.set index
res = df.set_index(keys=[‘date‘])
2. 索引转为时间对象
res.index = pd.to_datetime(res.index)
3. 用loc切片
res.loc[‘2010-05-09‘:‘2010-05-16‘]
"""
标签:cfa 对象 -- 标准 cos pytho 访问 适用于 总结
原文地址:https://www.cnblogs.com/gokublog/p/14610065.html