标签:pip and closed 使用 play 数据 分享 排序 ilo
a.基本使用
#结构化的数据分析 pip3 install jupyter pip3 install pandas #import pandas as pd #import numpy as np #事例一: #s = pd.Series([1,3,5,np.NaN,8,4]) #s Out[5]: 0 1.0 1 3.0 2 5.0 3 NaN 4 8.0 5 4.0 dtype: float64 #事例二: #创建日期序列,里面有6个日期 #datas = pd.date_range(‘20170821‘,periods=6) #datas DatetimeIndex([‘2017-08-21‘, ‘2017-08-22‘, ‘2017-08-23‘, ‘2017-08-24‘,‘2017-08-25‘, ‘2017-08-26‘], dtype=‘datetime64[ns]‘, freq=‘D‘) #创建二维数组 #创建6行4列的随机数,索引datas,列ABCD #data = pd.DataFrame(np.random.randn(6,4),index=datas,columns=list(‘ABCD‘)) #data A B C D 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 2017-08-24 0.761821 -0.159621 -1.172729 -1.317056 2017-08-25 0.186582 0.739702 -1.688458 0.480121 2017-08-26 -0.519489 -0.002741 0.875164 -0.783657 #data.values array([[ -2.45343824e-01, 2.60401419e-01, -2.00362100e+00, 4.27142120e-01], [ -2.77384841e+00, 1.60472878e+00, -7.11768546e-01, -6.77211441e-01], [ 3.96086166e-01, -7.31910686e-01, -1.00241967e-01, 9.66344486e-01], [ 7.61820910e-01, -1.59621471e-01, -1.17272904e+00, -1.31705593e+00], [ 1.86582291e-01, 7.39702155e-01, -1.68845777e+00, 4.80121151e-01], [ -5.19488872e-01, -2.74128435e-03, 8.75164076e-01, -7.83656946e-01]]) #data.head(2) #看前两行的数据 A B C D 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 #data.T #行列转置 2017-08-21 2017-08-22 2017-08-23 2017-08-24 2017-08-25 2017-08-26 A -0.245344 -2.773848 0.396086 0.761821 0.186582 -0.519489 B 0.260401 1.604729 -0.731911 -0.159621 0.739702 -0.002741 C -2.003621 -0.711769 -0.100242 -1.172729 -1.688458 0.875164 D 0.427142 -0.677211 0.966344 -1.317056 0.480121 -0.783657 #data.sort_index(axis=1) #列正序 A B C D 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 2017-08-24 0.761821 -0.159621 -1.172729 -1.317056 2017-08-25 0.186582 0.739702 -1.688458 0.480121 2017-08-26 -0.519489 -0.002741 0.875164 -0.783657 #data.sort_index(axis=1,ascending=False) #列倒序 D C B A 2017-08-21 0.427142 -2.003621 0.260401 -0.245344 2017-08-22 -0.677211 -0.711769 1.604729 -2.773848 2017-08-23 0.966344 -0.100242 -0.731911 0.396086 2017-08-24 -1.317056 -1.172729 -0.159621 0.761821 2017-08-25 0.480121 -1.688458 0.739702 0.186582 2017-08-26 -0.783657 0.875164 -0.002741 -0.519489 #data.sort_index(axis=0,ascending=False) #行倒序 A B C D 2017-08-26 -0.519489 -0.002741 0.875164 -0.783657 2017-08-25 0.186582 0.739702 -1.688458 0.480121 2017-08-24 0.761821 -0.159621 -1.172729 -1.317056 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 #data.sort_values(by=‘A‘) # A列排序 A B C D 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 2017-08-26 -0.519489 -0.002741 0.875164 -0.783657 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 2017-08-25 0.186582 0.739702 -1.688458 0.480121 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 2017-08-24 0.761821 -0.159621 -1.172729 -1.317056 #data[‘A‘] 或 data.A #数据选择 2017-08-21 -0.245344 2017-08-22 -2.773848 2017-08-23 0.396086 2017-08-24 0.761821 2017-08-25 0.186582 2017-08-26 -0.519489 data.loc[‘2017-08-21‘:‘2017-08-23‘] #选择哪几行 A B C D 2017-08-21 -0.245344 0.260401 -2.003621 0.427142 2017-08-22 -2.773848 1.604729 -0.711769 -0.677211 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 #data.iloc[2:4] #行 A B C D 2017-08-23 0.396086 -0.731911 -0.100242 0.966344 2017-08-24 0.761821 -0.159621 -1.172729 -1.317056 #data.loc[:,[‘B‘,‘C‘]] #取出列 B C 2017-08-21 0.260401 -2.003621 2017-08-22 1.604729 -0.711769 2017-08-23 -0.731911 -0.100242 2017-08-24 -0.159621 -1.172729 2017-08-25 0.739702 -1.688458 2017-08-26 -0.002741 0.875164 #data.loc[‘2017-08-21‘:‘2017-08-23‘,[‘B‘,‘C‘]] #指定行和列 B C 2017-08-21 0.260401 -2.003621 2017-08-22 1.604729 -0.711769 2017-08-23 -0.731911 -0.100242 #data.loc[‘2017-08-21‘,‘B‘] #访问特定的值 0.26040141861580018 #data.at[pd.Timestamp(‘2017-08-21‘),‘B‘] #比上面效率高 #0.26040141861580018 #data.A = range(6) #修改列数据 #data A B C D 2017-08-21 0 0.260401 -2.003621 0.427142 2017-08-22 1 1.604729 -0.711769 -0.677211 2017-08-23 2 -0.731911 -0.100242 0.966344 2017-08-24 3 -0.159621 -1.172729 -1.317056 2017-08-25 4 0.739702 -1.688458 0.480121 2017-08-26 5 -0.002741 0.875164 -0.783657 #data.iloc[:,2:5] = 1000 #修改某几列 #data A B C D 2017-08-21 0 0.260401 1000 1000 2017-08-22 1 1.604729 1000 1000 2017-08-23 2 -0.731911 1000 1000 2017-08-24 3 -0.159621 1000 1000 2017-08-25 4 0.739702 1000 1000 2017-08-26 5 -0.002741 1000 1000 #事例三: #d = {‘A‘:1,‘B‘:pd.Timestamp("20160821"),‘C‘:list(range(4)),‘D‘:np.arange(4)} #d {‘A‘: 1, ‘B‘: Timestamp(‘2016-08-21 00:00:00‘), ‘C‘: [0, 1, 2, 3], ‘D‘: array([0, 1, 2, 3])} df = pd.DataFrame(d) #df A B C D 0 1 2016-08-21 0 0 1 1 2016-08-21 1 1 2 1 2016-08-21 2 2 3 1 2016-08-21 3 3 #df.dtypes A int64 B datetime64[ns] C int64 D int64
标签:pip and closed 使用 play 数据 分享 排序 ilo
原文地址:http://www.cnblogs.com/oyoui/p/7401177.html