感觉很详细:数据分析:pandas 基础
import pandas as pd import numpy as np import matplotlib.pyplot as plt dates = pd.date_range(‘20180116‘, periods=3) # 创建 16 17 18 等六个日期 df = pd.DataFrame(np.random.randn(3,4), index=dates, columns=list(‘ABCD‘)) # 这是二维的,类似于一个表! # 通过 numpy 随机了一个 3 * 4 的数据,这和行数、列数是相对应的 # print(df) # A B C D # 2018-01-16 -0.139759 0.857653 0.754470 0.224313 # 2018-01-17 1.565070 0.521973 -1.265168 -0.278524 # 2018-01-18 -0.668574 -0.527155 0.877785 -1.123334 # print(df.head(1)) # 默认值是 5 # A B C D # 2018-01-16 -0.039203 1.211976 0.664805 0.307147 df.tail(5) # 同上,顾名思义 # print(df.index) # 顾名思义 + 1 # print(df.columns) # DatetimeIndex([‘2018-01-16‘, ‘2018-01-17‘, ‘2018-01-18‘], dtype=‘datetime64[ns]‘, freq=‘D‘) # Index([‘A‘, ‘B‘, ‘C‘, ‘D‘], dtype=‘object‘) # print(df.describe()) # 对每列数据做一些简单的统计学处理 # A B C D # count 3.000000 3.000000 3.000000 3.000000 # mean -0.163883 -0.107242 -0.621706 0.618341 # std 0.360742 0.429078 0.800366 0.609524 # min -0.505212 -0.502887 -1.352274 0.055032 # 25% -0.352602 -0.335291 -1.049444 0.294803 # 50% -0.199991 -0.167695 -0.746613 0.534574 # 75% 0.006782 0.090581 -0.256421 0.899995 # max 0.213556 0.348857 0.233770 1.265416 # print(df.T) # 转置(Transposing) # 2018-01-16 2018-01-17 2018-01-18 # A -1.137015 -0.067200 0.737709 # B -1.141811 0.335953 1.023016 # C 2.481266 -0.957599 0.011144 # D 1.485434 -0.605588 0.592746 # print(df) # print(df.sort_index(axis=1, ascending=False)) # axis=1 按照列名排序 axis=0 按照行名排序 # A B C D # 2018-01-16 -0.787226 0.321619 1.097938 -0.701082 # 2018-01-17 -0.417257 -0.163390 -0.943166 -0.497475 # 2018-01-18 0.486670 -0.733582 1.923475 -1.145891 # D C B A # 2018-01-16 -0.701082 1.097938 0.321619 -0.787226 # 2018-01-17 -0.497475 -0.943166 -0.163390 -0.417257 # 2018-01-18 -1.145891 1.923475 -0.733582 0.486670 # print(df.sort_values(by=‘B‘)) # A B C D # 2018-01-17 0.817088 -0.792903 1.643429 -0.008784 # 2018-01-18 0.540910 0.662119 0.190846 -0.960926 # 2018-01-16 0.333727 1.196133 -0.527796 0.677337