码迷,mamicode.com
首页 > 编程语言 > 详细

Python 清洗数据

时间:2018-07-17 20:17:26      阅读:222      评论:0      收藏:0      [点我收藏+]

标签:数据   4.0   from   axis   创建   max   type   字典   class   

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

s=Series([1,2,3],index=[a,b,c])
print(s)
‘‘‘
a    1
b    2
c    3
dtype: int64
‘‘‘
print(np.max(s))#可以进行np运算

s.name=rank
s.index.name=name
print(s)

#创建DataFrame
sdata1={name:[a,b,c],rank:[1,2,3],score:[98,89,54]}
print(sdata1)#字典
df1=DataFrame(sdata1)
print(df1)
‘‘‘
  name  rank  score
0    a     1     98
1    b     2     89
2    c     3     54
‘‘‘
df2=DataFrame(sdata1,columns=[score,name,rank])
print(df2)
‘‘‘
可以自动对齐,只是位置变化
   score name  rank
0     98    a     1
1     89    b     2
2     54    c     3
‘‘‘
df3=DataFrame(sdata1,columns=[score,name,rank,class],index=[1,2,3])
print(df3)
‘‘‘
class这列是缺失值
   score name  rank class
1     98    a     1   NaN
2     89    b     2   NaN
3     54    c     3   NaN
‘‘‘
df4=df3.reindex([1,2,3,4])
print(df4)
‘‘‘
重新索引
   score name  rank class
1   98.0    a   1.0   NaN
2   89.0    b   2.0   NaN
3   54.0    c   3.0   NaN
4    NaN  NaN   NaN   NaN
‘‘‘

print(df4[score])
print(df4.ix[1])
print(df2[df2[score]>60])#返回df2中score大于60的值
‘‘‘
   score name  rank
0     98    a     1
1     89    b     2
‘‘‘
del df3[class]
print(df3)#删除class这列

sdata1={name:[a,b,c],rank:[1,2,3],score:[98,89,54]}
print(sdata1)
df3=DataFrame(sdata1,columns=[score,name,rank,class],index=[1,2,3])
del df3[class]
print(df3)
print(df3.reindex([1,2,3,4]))
print(df3.reindex([1,2,3,4],fill_value=0))#缺失值赋值为0
‘‘‘
   score name  rank
1     98    a     1
2     89    b     2
3     54    c     3
4      0    0     0
‘‘‘

print(df3.reindex([0,1,2,3]))
‘‘‘
   score name  rank
0    NaN  NaN   NaN
1   98.0    a   1.0
2   89.0    b   2.0
3   54.0    c   3.0
‘‘‘
print(df3.reindex([0,1,2,3],method=bfill))#向后填充
‘‘‘
   score name  rank
0     98    a     1
1     98    a     1
2     89    b     2
3     54    c     3
‘‘‘

print(df3.drop(1))#删除第一行
print(df3.drop(score,axis=1))#删除指定列,axis是维数,0是行,1是列

print(df3.T)#转置

 

Python 清洗数据

标签:数据   4.0   from   axis   创建   max   type   字典   class   

原文地址:https://www.cnblogs.com/lzxanthony/p/9325541.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!