标签:
问题导读:
1.合并数据集
2.重塑和轴向旋转
3.数据转换(待续)
解决方案:
In [3]: df1 = pd.DataFrame(
...: {'key':['b','b','a','c','a','a','b'],
...: 'data1':range(7)}
...: )
In [4]: df1
Out[4]:
data1 key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
[7 rows x 2 columns]
In [5]: df2 = pd.DataFrame(
...: {'key':['a','b','d'],
...: 'data2':range(3)}
...: )
In [6]: df2
Out[6]:
data2 key
0 0 a
1 1 b
2 2 d
[3 rows x 2 columns]
In [7]: pd.merge(df1,df2)
Out[7]:
data1 key data2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
[6 rows x 3 columns]
In [8]: pd.merge(df1,df2, on='key')
Out[8]:
data1 key data2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
[6 rows x 3 columns]
In [10]: df3 = pd.DataFrame({'lkey':['b','b','a','c','a','a','b'],
....: 'data1':range(7)})
In [11]: df4 = pd.DataFrame({'rkey':['a','b','d'],'data2':range(3)})
In [12]: pd.merge(df3,df4,left_on='lkey',right_on='rkey')
Out[12]:
data1 lkey data2 rkey
0 0 b 1 b
1 1 b 1 b
2 6 b 1 b
3 2 a 0 a
4 4 a 0 a
5 5 a 0 a
[6 rows x 4 columns]
In [13]: pd.merge(df1,df2,how='outer') Out[13]: data1 key data2 0 0 b 1 1 1 b 1 2 6 b 1 3 2 a 0 4 4 a 0 5 5 a 0 6 3 c NaN 7 NaN d 2 [8 rows x 3 columns]
In [15]: df1 = pd.DataFrame({'key':['b','b','a','c','a','b'],
....: 'data1':range(6)})
In [16]: df2 = pd.DataFrame({'key':['a','b','a','b','d'],
....: 'data2':range(5)})
In [17]: pd.merge(df1,df2,on='key', how = 'left')
Out[17]:
data1 key data2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
10 3 c NaN
[11 rows x 3 columns]
In [18]: pd.merge(df1,df2,on='key', how = 'right')
Out[18]:
data1 key data2
0 0 b 1
1 1 b 1
2 5 b 1
3 0 b 3
4 1 b 3
5 5 b 3
6 2 a 0
7 4 a 0
8 2 a 2
9 4 a 2
10 NaN d 4
[11 rows x 3 columns]
In [19]: pd.merge(df1,df2,on='key',how='inner')
Out[19]:
data1 key data2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
[10 rows x 3 columns]
In [21]: pd.merge(df1,df2,on='key',how='outer')
Out[21]:
data1 key data2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
10 3 c NaN
11 NaN d 4
[12 rows x 3 columns]
In [27]: left = pd.DataFrame({'key1':['foo','foo','bar'],
'key2':['one','two','one'],
'key3':[1,2,3]})
In [28]: right = pd.DataFrame({'key1':['foo','foo','foo','bar'],
'key2':['one','one','one','two'],
'rval':[4,5,6,7]})
In [29]: pd.merge(left,right,on=['key1','key2'],how='outer')
Out[29]:
key1 key2 key3 rval
0 foo one 1 4
1 foo one 1 5
2 foo one 1 6
3 foo two 2 NaN
4 bar one 3 NaN
5 bar two NaN 7
[6 rows x 4 columns]
In [30]: pd.merge(left,right,on='key1')
Out[30]:
key1 key2_x key3 key2_y rval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo one 1 one 6
3 foo two 2 one 4
4 foo two 2 one 5
5 foo two 2 one 6
6 bar one 3 two 7
[7 rows x 5 columns]
In [31]: pd.merge(left,right,on='key1',suffixes=('_left','_right'))
Out[31]:
key1 key2_left key3 key2_right rval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo one 1 one 6
3 foo two 2 one 4
4 foo two 2 one 5
5 foo two 2 one 6
6 bar one 3 two 7
[7 rows x 5 columns]In [37]: left1 = pd.DataFrame({'key':['a','b','a','a','b','c'],
'value':range(6)})
In [38]: right1 = pd.DataFrame({'group_val':[3.5,7]},index = ['a','b'])
In [39]: pd.merge(left1,right1,left_on='key',right_index=True)
Out[39]:
key value group_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0
[5 rows x 3 columns]
In [40]: right1
Out[40]:
group_val
a 3.5
b 7.0
[2 rows x 1 columns]
In [41]: left1
Out[41]:
key value
0 a 0
1 b 1
2 a 2
3 a 3
4 b 4
5 c 5
[6 rows x 2 columns]
In [48]: lefth = pd.DataFrame({'key1':['Ohio','Ohio','Ohio','Nevada','Nevada'],
'key2':[2000,2001,2002,2001,2002],
'data':np.arange(5.)})
In [49]: righth = pd.DataFrame(np.arange(12).reshape((6,2)),
index = [['Nevada','Nevada','Ohio','Ohio','Ohio','Ohio'],
[2001,2000,2000,2000,2001,2002]],
columns = ['event1','event2'])
In [50]: lefth
Out[50]:
data key1 key2
0 0 Ohio 2000
1 1 Ohio 2001
2 2 Ohio 2002
3 3 Nevada 2001
4 4 Nevada 2002
[5 rows x 3 columns]
In [52]: righth
Out[52]:
event1 event2
Nevada 2001 0 1
2000 2 3
Ohio 2000 4 5
2000 6 7
2001 8 9
2002 10 11
[6 rows x 2 columns]
In [53]: pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True)
Out[53]:
data key1 key2 event1 event2
0 0 Ohio 2000 4 5
0 0 Ohio 2000 6 7
1 1 Ohio 2001 8 9
2 2 Ohio 2002 10 11
3 3 Nevada 2001 0 1
[5 rows x 5 columns]
In [54]: pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer')
Out[54]:
data key1 key2 event1 event2
0 0 Ohio 2000 4 5
0 0 Ohio 2000 6 7
1 1 Ohio 2001 8 9
2 2 Ohio 2002 10 11
3 3 Nevada 2001 0 1
4 4 Nevada 2002 NaN NaN
4 NaN Nevada 2000 2 3
[7 rows x 5 columns]
In [55]: left2 = pd.DataFrame([[1.,2.],[3.,4.],[5.,6.]], index = ['a','c','e'], ....: columns= ['Ohio','Nevada']) In [56]: right2 = pd.DataFrame([[7.,8.],[9.,10.],[11.,12],[13,14]], ....: index = ['b','c','d','e'],columns=['Missouri','Alabama']) In [57]: left2 Out[57]: Ohio Nevada a 1 2 c 3 4 e 5 6 [3 rows x 2 columns] In [58]: right2 Out[58]: Missouri Alabama b 7 8 c 9 10 d 11 12 e 13 14 [4 rows x 2 columns] In [59]: pd.merge(left2,right2,how='outer',left_index=True,right_index=True) Out[59]: Ohio Nevada Missouri Alabama a 1 2 NaN NaN b NaN NaN 7 8 c 3 4 9 10 d NaN NaN 11 12 e 5 6 13 14 [5 rows x 4 columns]
In [60]: left2.join(right2,how='outer') Out[60]: Ohio Nevada Missouri Alabama a 1 2 NaN NaN b NaN NaN 7 8 c 3 4 9 10 d NaN NaN 11 12 e 5 6 13 14 [5 rows x 4 columns] In [61]: left2.join(right2) Out[61]: Ohio Nevada Missouri Alabama a 1 2 NaN NaN c 3 4 9 10 e 5 6 13 14 [3 rows x 4 columns] In [62]: left1 Out[62]: key value 0 a 0 1 b 1 2 a 2 3 a 3 4 b 4 5 c 5 [6 rows x 2 columns] In [63]: right1 Out[63]: group_val a 3.5 b 7.0 [2 rows x 1 columns] In [64]: left1.join(right1,on='key') Out[64]: key value group_val 0 a 0 3.5 1 b 1 7.0 2 a 2 3.5 3 a 3 3.5 4 b 4 7.0 5 c 5 NaN [6 rows x 3 columns]
In [65]: another = pd.DataFrame([[7.,8.],[9.,10.],[11.,12.],[16.,17.]], ....: index=['a','c','e','f'],columns=['New York','Oregon']) In [66]: left2.join([right2,another]) Out[66]: Ohio Nevada Missouri Alabama New York Oregon a 1 2 NaN NaN 7 8 c 3 4 9 10 9 10 e 5 6 13 14 11 12 [3 rows x 6 columns]
# coding=utf-8
import numpy as np
import pandas as pd
"""
numpy 的concatenate 函数:该函数用于合并原始的numpy 的数组
"""
arr = np.arange(12).reshape((3, 4))
print arr
'''
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
'''
print np.concatenate([arr, arr], axis=1)
'''
[[ 0 1 2 3 0 1 2 3]
[ 4 5 6 7 4 5 6 7]
[ 8 9 10 11 8 9 10 11]]
'''
"""
对于pandas 对象 Series 和 DataFrame 他们的带有标签的轴使你能够进一步推广数组的连接运算
"""
"""
(1) 各对象其他轴上的索引不同时,那些轴做的交集还是并集?
答案:默认是并集
"""
# concat 默认在axis=0 上工作
s1 = pd.Series([0,1], index=['a','b'])
# s1 = pd.Series([1,2,2], index=['d','b','f'])
s2 = pd.Series([2,3,4], index=['c','d','e'])
s3 = pd.Series([5,6], index=['f','g'])
print pd.concat([s1,s2,s3])
'''
a 0
b 1
c 2
d 3
e 4
f 5
g 6
dtype: int64
'''
# 如果要求在axis=1 进行操作则结果变成一个DataFrame
print pd.concat([s1,s2,s3], axis=1)
'''
0 1 2
a 0 NaN NaN
b 1 NaN NaN
c NaN 2 NaN
d NaN 3 NaN
e NaN 4 NaN
f NaN NaN 5
g NaN NaN 6
'''
# 传入的如果是join=‘inner’ 即可得到他们的交集
s4 = pd.concat([s1 * 5, s3])
print pd.concat([s1,s4], axis=1)
'''
dtype: int64
0 1
a 0 0
b 1 5
f NaN 5
g NaN 6
[4 rows x 2 columns]
'''
print pd.concat([s1,s4],axis=1,join='inner')
'''
0 1
a 0 0
b 1 5
[2 rows x 2 columns]
'''
# 指定轴进行连接操作,如果连接的两个Series 都没有该轴,则值为NaN
print pd.concat([s1,s4], axis=1, join_axes=[['a','c','b','e']])
'''
0 1
a 0 0
c NaN NaN
b 1 5
e NaN NaN
[4 rows x 2 columns]
'''
# 参与连接的片段的在结果中区分不开,这时候我们使用keys 参数建立一个层次化索引
result = pd.concat([s1,s2,s3], keys=['one','two','three'])
print result
'''
one a 0
b 1
two c 2
d 3
e 4
three f 5
g 6
dtype: int64
'''
"""
s1 = pd.Series([1,2,6], index=['a','b','f'])
s2 = pd.Series([3,4], index=['c','d'])
s3 = pd.Series([5,6], index=['e','f'])
result = pd.concat([s1,s2,s3], keys=['one','two','three'])
print result
'''
one a 1
b 2
f 6
two c 3
d 4
three e 5
f 6
dtype: int64
'''
"""
# print result.unstack()
# 沿axis=1 对series 进行合并,则keys 就会成为DataFrame的列头
print pd.concat([s1,s2,s3], axis=1, keys=['one','two','three'])
print pd.concat([s1,s2,s3], axis=1)
'''
dtype: int64
one two three
a 0 NaN NaN
b 1 NaN NaN
c NaN 2 NaN
d NaN 3 NaN
e NaN 4 NaN
f NaN NaN 5
g NaN NaN 6
[7 rows x 3 columns]
0 1 2
a 0 NaN NaN
b 1 NaN NaN
c NaN 2 NaN
d NaN 3 NaN
e NaN 4 NaN
f NaN NaN 5
g NaN NaN 6
[7 rows x 3 columns]
'''
# coding=utf-8
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.arange(6).reshape(3,2), index=['a','b','c'], columns=['one','two'])
df2 = pd.DataFrame(5+np.arange(4).reshape(2,2), index=['a','c'], columns=['three','four'])
print pd.concat([df1,df2],axis=1, keys=['level1','levle2'])
'''
level1 levle2
one two three four
a 0 1 5 6
b 2 3 NaN NaN
c 4 5 7 8
[3 rows x 4 columns]
'''
# 如果传入的不是列表而是一个字典,则字典的建就会被当做keys选项的值
dic = {'level1':df1, 'level2':df2}
print pd.concat(dic,axis=1)
'''
level1 level2
one two three four
a 0 1 5 6
b 2 3 NaN NaN
c 4 5 7 8
[3 rows x 4 columns]
'''
# coding=utf-8 import pandas as pd import numpy as np df1 = pd.DataFrame(np.random.randn(3,4), columns=['a','b','c','d']) df2 = pd.DataFrame(np.random.randn(2,3), columns=['b','d','a']) print pd.concat([df1,df2]) # 不保留连接轴的索引,创建新索引 print pd.concat([df1,df2], ignore_index=True) print pd.concat([df1,df2], ignore_index=False)
# coding=utf-8
import pandas as pd
import numpy as np
a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f','e','d','c','b','a'])
b = pd.Series(np.arange(len(a), dtype=np.float64), index=['f','e','d','c','b','a'])
b[-1] = np.nan
print np.where(pd.isnull(a), b, a)
print b[:-2].combine_first(a[2:])
'''
[ 0. 2.5 2. 3.5 4.5 nan]
a NaN
b 4.5
c 3.0
d 2.0
e 1.0
f 0.0
dtype: float64
'''
# 对于DataFrame, combine_first 自然也是会在列上做同样的事情,我们可以把这个动作当成是参数对象数据对调用对象的数据打补丁
df1 = pd.DataFrame({
'a':[1.,np.nan, 5., np.nan],
'b':[np.nan, 2., np.nan, 6.],
'c':range(2,18,4)
})
df2 = pd.DataFrame({
'a':[5., 4., np.nan, 3., 7.],
'b':[np.nan, 3., 4., 6., 8.]
})
print df1.combine_first(df2)
# coding=utf-8
import pandas as pd
import numpy as np
"""
层次化索引为DataFrame 数据的重排任务提供了一种具有良好一致性的方式
(1) stack 将数据的列“旋转”为行
(2) unstack 将数据的行“旋转”为列
"""
data = pd.DataFrame(np.arange(6).reshape((2,3)),
index=pd.Index(['Ohio', 'Colorado'], name='state'),
columns=pd.Index(['one','two','three'], name='number')
)
print data
# 我们使用stack方法将DataFrame的列转换成行,得到一个Series
result = data.stack()
print result
# 同样使用unstack也可以将一个层次化索引的Series 转化得到一个DataFrame
print result.unstack()
'''
number one two three
state
Ohio 0 1 2
Colorado 3 4 5
[2 rows x 3 columns]
state number
Ohio one 0
two 1
three 2
Colorado one 3
two 4
three 5
dtype: int64
number one two three
state
Ohio 0 1 2
Colorado 3 4 5
[2 rows x 3 columns]
'''
# unstack操作的是最内层的(stack也是)
# 当我们传入的分层级别的编号或名称,同样可以对其他级别进行unstack 操作
print result.unstack(0) == result.unstack('state')
'''
state Ohio Colorado
number
one True True
two True True
three True True
[3 rows x 2 columns]
'''
# coding=utf-8
import pandas as pd
import numpy as np
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one','two'])
print data2
# 连接 -> 层次化索引的Series -> DataFrame(行:每个Series;列:Series的索引)
print data2.unstack()
'''
one a 0
b 1
c 2
d 3
two c 4
d 5
e 6
dtype: int64
a b c d e
one 0 1 2 3 NaN
two NaN NaN 4 5 6
[2 rows x 5 columns]
'''
print data2.unstack().stack()
print data2.unstack().stack(dropna=False)
'''
one a 0
b 1
c 2
d 3
two c 4
d 5
e 6
dtype: float64
one a 0
b 1
c 2
d 3
e NaN
two a NaN
b NaN
c 4
d 5
e 6
dtype: float64
'''
# 对DataFrame进行操作,旋转轴 的级别将会成为结果中最低级别
df = pd.DataFrame(
{'left': result, 'right': result + 5},
columns=pd.Index(['left', 'right'], name='side')
)
print df.unstack('state')
print df.unstack('state').stack('side')
# coding=utf-8
import pandas as pd
import numpy as np
data = pd.read_csv("/home/peerslee/py/pydata/pydata-book-master/ch07/macrodata.csv")
frame01 = pd.DataFrame(data, columns=['year', 'realgdp', 'infl', 'unemp'])
path01 = '/home/peerslee/py/pydata/pydata-book-master/ch07/macrodata01.csv'
frame01.to_csv(path01, index=False, header=False) # 将去除index 和 header
names02 = ['year', 'realgdp', 'infl', 'unemp']
frame02 = pd.read_table(path01, sep=',', names=names02, index_col='year') # 将'year'列设置为行索引
path02 = '/home/peerslee/py/pydata/pydata-book-master/ch07/macrodata02.csv'
frame02.stack().to_csv(path02) # 轴向旋转之后写入文件中,year列会自动向后填充
names03 = ['date','item','value']
frame03 = pd.read_table(path02, sep=',', names=names03,)
print frame03
'''
date item value
0 1959 realgdp 2710.349
1 1959 infl 0.000
2 1959 unemp 5.800
3 1959 realgdp 2778.801
4 1959 infl 2.340
5 1959 unemp 5.100
6 1959 realgdp 2775.488
7 1959 infl 2.740
8 1959 unemp 5.300
9 1959 realgdp 2785.204
10 1959 infl 0.270
'''
result_path = '/home/peerslee/py/pydata/pydata-book-master/ch07/result_data.csv'
frame03.to_csv(result_path) # 将数据保存起来
但是我在进行数据pivot 的时候,出现了错误:
raise ValueError('Index contains duplicate entries, '
ValueError: Index contains duplicate entries, cannot reshape
# coding=utf-8
import pandas as pd
import numpy as np
"""
DataFrame 中的pivot方法可以将“长格式”旋转为“宽格式”
"""
# 因为没有数据所以我们只能自己写点
ldata = pd.DataFrame({'date':['03-31','03-31','03-31','06-30','06-30','06-30'],
'item':['real','infl','unemp','real','infl','unemp'],
'value':['2710.','000.','5.8','2778.','2.34','5.1']
})
print ldata
# 将date作为行索引的名字,item为列索引的名字,将value填充进去
pivoted = ldata.pivot('date','item','value')
print pivoted
'''
item infl real unemp
date
03-31 000. 2710. 5.8
06-30 2.34 2778. 5.1
[2 rows x 3 columns]
'''
# 将需要重塑的列扩为两列
ldata['value2'] = np.random.randn(len(ldata))
print ldata
# 忽略pivot的最后一个参数,会得到一个带有层次化的列
pivoted = ldata.pivot('date','item')
print pivoted
'''
value value2
item infl real unemp infl real unemp
date
03-31 000. 2710. 5.8 1.059406 0.437246 0.106987
06-30 2.34 2778. 5.1 -1.087665 -0.811100 -0.579266
[2 rows x 6 columns]
'''
print pivoted['value'][:5]
'''
item infl real unemp
date
03-31 000. 2710. 5.8
06-30 2.34 2778. 5.1
[2 rows x 3 columns]
'''
# 这个操作是完整的pivot 操作
unstacked = ldata.set_index(['date','item']).unstack('item')
print unstacked
'''
value value2
item infl real unemp infl real unemp
date
03-31 000. 2710. 5.8 -1.018416 -1.476397 1.579151
06-30 2.34 2778. 5.1 0.863437 1.606538 -1.147549
[2 rows x 6 columns]
'''# coding=utf-8
import pandas as pd
import numpy as np
data = pd.DataFrame({'k1': ['one'] * 3 + ['two'] * 4,
'k2': [1, 1, 2, 3, 3, 4, 4]})
print data
'''
k1 k2
0 one 1
1 one 1
2 one 2
3 two 3
4 two 3
5 two 4
6 two 4
[7 rows x 2 columns]
'''
"""
DataFrame 的duplicated 方法返回一个布尔型Series,表示各行是否是重复行
"""
print data.duplicated()
'''
0 False
1 True
2 False
3 False
4 True
5 False
6 True
dtype: bool
'''
"""
DataFrame 的drop_duplicated 方法用于移除了重复行的DataFrame
"""
print data.drop_duplicates()
'''
k1 k2
0 one 1
2 one 2
3 two 3
5 two 4
[4 rows x 2 columns]
'''
"""
具体根据 某一列 来判断是否有重复值
"""
data['v1'] = range(7)
print data.drop_duplicates(['k1'])
'''
k1 k2 v1
0 one 1 0
3 two 3 3
[2 rows x 3 columns]
'''
"""
默认保留的是第1个值,如果想保留最后一个值则传入 take_last=True
"""
print data.drop_duplicates(['k1','k2'], take_last=True)
'''
k1 k2 v1
1 one 1 1
2 one 2 2
4 two 3 4
6 two 4 6
[4 rows x 3 columns]
'''# coding=utf-8
import pandas as pd
import numpy as np
# 一张食物和重量的表格
data = pd.DataFrame({'food':['bacon','pulled pork','bacon','Pastrami','corned beef',
'Bacon','pastrami','honey ham','nova lox'],
'ounces':[4,3,12,6,7.5,8,3,5,6]})
# 食物和动物的表格映射
meat_to_ainmal = {
'bacon':'pig',
'pulled pork':'pig',
'pastrami':'cow',
'corned beef':'pig',
'nova lox':'salmon'
}
"""
Series 的map 方法接受一个函数或含有映射关系的字典型对象
"""
# 这里我们先将表1 中的所有食物转换为小写,再做一个map
data['animal'] = data['food'].map(str.lower).map(meat_to_ainmal)
print data
'''
food ounces animal
0 bacon 4.0 pig
1 pulled pork 3.0 pig
2 bacon 12.0 pig
3 Pastrami 6.0 cow
4 corned beef 7.5 pig
5 Bacon 8.0 pig
6 pastrami 3.0 cow
7 honey ham 5.0 NaN
8 nova lox 6.0 salmon
[9 rows x 3 columns]
'''
"""
我们可以传入一个能够完成全部这些工作的函数
"""
# coding=utf-8
import pandas as pd
import numpy as np
data = pd.Series([1., -999., 2., -999., -1000., 3.])
"""
我们要使用pandas 将-999 这样的数据替换成 NA 值
"""
data.replace(-999,np.nan)
"""
如果希望一次性替换多个值,可以传入一个由待替换值组成的列表以及一个替换值
"""
data.replace([-999,-1000],np.nan)
"""
如果希望对不同的值进行不同的替换,则传入一个由替换关系组成的列表即可
"""
data.replace([-999,-1000],[np.nan,0])
"""
传入的参数也可以是字典
"""
data.replace({-999:np.nan, -1000:0})# coding=utf-8
import pandas as pd
import numpy as np
"""
轴标签可以通过函数或映射进行转换,从而得到一个新对象,轴还可以被就地修改
"""
data = pd.DataFrame(np.arange(12).reshape((3,4)),
index=['Ohio','Colorado','New York',],
columns=['one','two','three','four'])
"""
轴标签也有一个map 方法
"""
print data.index.map(str.upper)
"""
直接赋值给index
"""
data.index = data.index.map(str.upper)
print data
"""
创建数据集的转换版本,而不是修改原始数据
"""
data.rename(index=str.title, columns=str.upper)
"""
rename 可以结合字典型对象实现对部分轴标签的更新
"""
data.rename(index={'OHIO':'INDIANA'},
columns={'three','peekaboo'})
"""
rename 也可以就地对DataFrame 进行修改,inplace=True
"""
_=data.rename(index={'OHIO':'INDIANA'},inplace=True)
标签:
原文地址:http://blog.csdn.net/peerslee/article/details/51498713