码迷,mamicode.com
首页 > 其他好文 > 详细

pandas

时间:2017-08-20 21:05:32      阅读:304      评论:0      收藏:0      [点我收藏+]

标签:pip   and   closed   使用   play   数据   分享   排序   ilo   

pandas

a.基本使用

技术分享
#结构化的数据分析

pip3 install jupyter
pip3 install pandas


#import pandas as pd
#import numpy as np

#事例一:

#s = pd.Series([1,3,5,np.NaN,8,4])
#s

    Out[5]: 
    0    1.0
    1    3.0
    2    5.0
    3    NaN
    4    8.0
    5    4.0
    dtype: float64

#事例二:

#创建日期序列,里面有6个日期
#datas = pd.date_range(‘20170821‘,periods=6)    
#datas

    DatetimeIndex([2017-08-21, 2017-08-22, 2017-08-23, 2017-08-24,2017-08-25, 2017-08-26], dtype=datetime64[ns], freq=D)


#创建二维数组

#创建6行4列的随机数,索引datas,列ABCD

#data = pd.DataFrame(np.random.randn(6,4),index=datas,columns=list(‘ABCD‘))
#data

                       A         B         C         D
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344
    2017-08-24  0.761821 -0.159621 -1.172729 -1.317056
    2017-08-25  0.186582  0.739702 -1.688458  0.480121
    2017-08-26 -0.519489 -0.002741  0.875164 -0.783657

#data.values

    array([[ -2.45343824e-01,   2.60401419e-01,  -2.00362100e+00,  4.27142120e-01],
           [ -2.77384841e+00,   1.60472878e+00,  -7.11768546e-01,  -6.77211441e-01],
           [  3.96086166e-01,  -7.31910686e-01,  -1.00241967e-01,   9.66344486e-01],
           [  7.61820910e-01,  -1.59621471e-01,  -1.17272904e+00,   -1.31705593e+00],
           [  1.86582291e-01,   7.39702155e-01,  -1.68845777e+00,   4.80121151e-01],
           [ -5.19488872e-01,  -2.74128435e-03,   8.75164076e-01,  -7.83656946e-01]])


#data.head(2)    #看前两行的数据
                       A         B         C         D
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211


#data.T         #行列转置

       2017-08-21  2017-08-22  2017-08-23  2017-08-24  2017-08-25  2017-08-26
    A   -0.245344   -2.773848    0.396086    0.761821    0.186582   -0.519489
    B    0.260401    1.604729   -0.731911   -0.159621    0.739702   -0.002741
    C   -2.003621   -0.711769   -0.100242   -1.172729   -1.688458    0.875164
    D    0.427142   -0.677211    0.966344   -1.317056    0.480121   -0.783657


#data.sort_index(axis=1)        #列正序

                       A         B         C         D
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344
    2017-08-24  0.761821 -0.159621 -1.172729 -1.317056
    2017-08-25  0.186582  0.739702 -1.688458  0.480121
    2017-08-26 -0.519489 -0.002741  0.875164 -0.783657


#data.sort_index(axis=1,ascending=False)        #列倒序

                       D         C         B         A
    2017-08-21  0.427142 -2.003621  0.260401 -0.245344
    2017-08-22 -0.677211 -0.711769  1.604729 -2.773848
    2017-08-23  0.966344 -0.100242 -0.731911  0.396086
    2017-08-24 -1.317056 -1.172729 -0.159621  0.761821
    2017-08-25  0.480121 -1.688458  0.739702  0.186582
    2017-08-26 -0.783657  0.875164 -0.002741 -0.519489


#data.sort_index(axis=0,ascending=False)        #行倒序

                       A         B         C         D
    2017-08-26 -0.519489 -0.002741  0.875164 -0.783657
    2017-08-25  0.186582  0.739702 -1.688458  0.480121
    2017-08-24  0.761821 -0.159621 -1.172729 -1.317056
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142


#data.sort_values(by=‘A‘)        # A列排序

                       A         B         C         D
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211
    2017-08-26 -0.519489 -0.002741  0.875164 -0.783657
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142
    2017-08-25  0.186582  0.739702 -1.688458  0.480121
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344
    2017-08-24  0.761821 -0.159621 -1.172729 -1.317056


#data[‘A‘] 或 data.A                #数据选择
 
    2017-08-21   -0.245344
    2017-08-22   -2.773848
    2017-08-23    0.396086
    2017-08-24    0.761821
    2017-08-25    0.186582
    2017-08-26   -0.519489


data.loc[2017-08-21:2017-08-23]        #选择哪几行

                       A         B         C         D
    2017-08-21 -0.245344  0.260401 -2.003621  0.427142
    2017-08-22 -2.773848  1.604729 -0.711769 -0.677211
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344


#data.iloc[2:4]                            #行

                       A         B         C         D
    2017-08-23  0.396086 -0.731911 -0.100242  0.966344
    2017-08-24  0.761821 -0.159621 -1.172729 -1.317056


#data.loc[:,[‘B‘,‘C‘]]                #取出列

                       B         C
    2017-08-21  0.260401 -2.003621
    2017-08-22  1.604729 -0.711769
    2017-08-23 -0.731911 -0.100242
    2017-08-24 -0.159621 -1.172729
    2017-08-25  0.739702 -1.688458
    2017-08-26 -0.002741  0.875164


#data.loc[‘2017-08-21‘:‘2017-08-23‘,[‘B‘,‘C‘]]        #指定行和列

                       B         C
    2017-08-21  0.260401 -2.003621
    2017-08-22  1.604729 -0.711769
    2017-08-23 -0.731911 -0.100242


#data.loc[‘2017-08-21‘,‘B‘]            #访问特定的值

    0.26040141861580018

#data.at[pd.Timestamp(‘2017-08-21‘),‘B‘]    #比上面效率高
    
    #0.26040141861580018


#data.A = range(6)                #修改列数据
#data
                A         B         C         D
    2017-08-21  0  0.260401 -2.003621  0.427142
    2017-08-22  1  1.604729 -0.711769 -0.677211
    2017-08-23  2 -0.731911 -0.100242  0.966344
    2017-08-24  3 -0.159621 -1.172729 -1.317056
    2017-08-25  4  0.739702 -1.688458  0.480121
    2017-08-26  5 -0.002741  0.875164 -0.783657


#data.iloc[:,2:5] = 1000        #修改某几列
#data

                A         B     C     D
    2017-08-21  0  0.260401  1000  1000
    2017-08-22  1  1.604729  1000  1000
    2017-08-23  2 -0.731911  1000  1000
    2017-08-24  3 -0.159621  1000  1000
    2017-08-25  4  0.739702  1000  1000
    2017-08-26  5 -0.002741  1000  1000


#事例三:

#d = {‘A‘:1,‘B‘:pd.Timestamp("20160821"),‘C‘:list(range(4)),‘D‘:np.arange(4)}
#d

    {A: 1,
     B: Timestamp(2016-08-21 00:00:00),
     C: [0, 1, 2, 3],
     D: array([0, 1, 2, 3])}

df = pd.DataFrame(d)
#df
 
       A          B  C  D
    0  1 2016-08-21  0  0
    1  1 2016-08-21  1  1
    2  1 2016-08-21  2  2
    3  1 2016-08-21  3  3

#df.dtypes 
    A             int64
    B    datetime64[ns]
    C             int64
    D             int64
View Code

 

pandas

标签:pip   and   closed   使用   play   数据   分享   排序   ilo   

原文地址:http://www.cnblogs.com/oyoui/p/7401177.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!