标签:多重 values color 索引 必须 rop panda 数位 返回结果
a = {‘a‘:[0,1,2], ‘b‘:[3,4,5], ‘c‘:[6,7,8]} df = pd.DataFrame(a) print(df,type(df),‘\n‘) print(df.index,type(df.index),‘\n‘) print(df.columns,type(df.columns),‘\n‘)
print(df.values,type(df.values) #运行结果 a b c 0 0 3 6 1 1 4 7 2 2 5 8 <class ‘pandas.core.frame.DataFrame‘> RangeIndex(start=0, stop=3, step=1) <class ‘pandas.indexes.range.RangeIndex‘> Index([‘a‘, ‘b‘, ‘c‘], dtype=‘object‘) <class ‘pandas.indexes.base.Index‘>
[[0 3 6]
[1 4 7]
[2 5 8]] <class ‘numpy.ndarray‘>
a1 = {‘a‘:[0,1,2], ‘b‘:[3,4,5], ‘c‘:[6,7,8]} df1 = pd.DataFrame(a1) print(df1) a2 = {‘one‘:np.random.rand(2), ‘two‘:np.random.rand(2)} df2 = pd.DataFrame(a2) print(df2) #运行结果 a b c 0 0 3 6 1 1 4 7 2 2 5 8 one two 0 0.964716 0.420940 1 0.356944 0.893939
#接上面 df1 = pd.DataFrame(a1,columns = [‘b‘,‘a‘,‘c‘,‘new‘]) print(df1) #可以重新定义columns的顺序,格式为list,若新增列则赋予NaN df2 = pd.DataFrame(a2,index = list(‘ab‘)) print(df2) #可以重新定义index,格式为list,长度必须与原来保持一致 #运行结果 b a c new 0 3 0 6 NaN 1 4 1 7 NaN 2 5 2 8 NaN one two a 0.110687 0.824890 b 0.634459 0.112624
a = {‘one‘:pd.Series(np.random.rand(2)), ‘two‘:pd.Series(np.random.rand(3))} df = pd.DataFrame(a) print(df) #运行结果 one two 0 0.371240 0.971477 1 0.122308 0.534714 2 NaN 0.306376
a = {‘Jack‘:{‘Math‘:90,‘English‘:90,‘Chinese‘:90}, ‘Tom‘:{‘Math‘:80,‘English‘:80,‘Chinese‘:80}, ‘Marry‘:{‘Math‘:70,‘English‘:70,‘Chinese‘:70}} df1 = pd.DataFrame(a) print(df1) # 由字典组成的字典创建Dataframe,columns为字典的key,index为子字典的key df2 = pd.DataFrame(a,columns = [‘Tom‘,‘Jack‘,‘July‘]) print(df2) #columns参数可以改变顺序、增加、减少现有列,如出现新的列,值为NaN df3 = pd.DataFrame(a,index = [‘Math‘,‘a‘,‘b‘,‘c‘]) print(df3) # index在这里和之前不同,并不能改变原有index,如果指向新的标签,值为NaN #运行结果 Jack Marry Tom Chinese 90 70 80 English 90 70 80 Math 90 70 80 Tom Jack July Chinese 80 90 NaN English 80 90 NaN Math 80 90 NaN Jack Marry Tom Math 90.0 70.0 80.0 a NaN NaN NaN b NaN NaN NaN c NaN NaN NaN
a = [{‘one‘:1,‘two‘:2}, {‘one‘:10,‘two‘:20,‘three‘:30}] df1 = pd.DataFrame(a) df2 = pd.DataFrame(a,index = [‘a‘,‘b‘]) #index是直接改变值的 print(df1) print(df2) #运行结果 one three two 0 1 NaN 2 1 10 30.0 20 one three two a 1 NaN 2 b 10 30.0 20
a = np.random.rand(9).reshape(3,3) df = pd.DataFrame(a,index = list(‘abc‘),columns = [‘one‘,‘two‘,‘three‘]) print(df) #注:这里的index和columns长度必须和二维数组一致 #运行结果 one two three a 0.478309 0.741675 0.953912 b 0.034855 0.561662 0.563623 c 0.139156 0.705862 0.491152
df = pd.DataFrame(np.arange(12).reshape(3,4), columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘]) print(df) data1 = df[[‘a‘,‘b‘]] #注:这里面还需要一个括号 print(data1) #一般用df[]进行列切片,括号里填列名,如果填数字的话默认行切片,且不能单独选择,如df[0]是不行的 #运行结果 a b c d one 0 1 2 3 two 4 5 6 7 three 8 9 10 11 a b one 0 1 two 4 5 three 8 9
df = pd.DataFrame(np.arange(12).reshape(3,4), columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘]) print(df) data1 = df.loc[‘one‘] #单个位置的索引 data2 = df.loc[[‘one‘,‘three‘]] #多个位置的索引,这里面要有括号 data3 = df.loc[‘one‘:‘three‘] #还可以切片索引 data4 = df.loc[[‘one‘,‘four‘]] #对没有的index进行索引,返回NaN print(data1,‘\n‘) print(data2,‘\n‘) print(data3,‘\n‘) print(data4,‘\n‘) #运行结果 a b c d one 0 1 2 3 two 4 5 6 7 three 8 9 10 11 a 0 b 1 c 2 d 3 Name: one, dtype: int32 a b c d one 0 1 2 3 three 8 9 10 11 a b c d one 0 1 2 3 two 4 5 6 7 three 8 9 10 11 a b c d one 0.0 1.0 2.0 3.0 four NaN NaN NaN NaN
df = pd.DataFrame(np.arange(12).reshape(3,4), columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘]) print(df) data1 = df.iloc[1] data2 = df.iloc[[0,2]] data3 = df.iloc[:2] print(data1,‘\n‘) print(data2,‘\n‘) print(data3,‘\n‘) #运行结果 a b c d one 0 1 2 3 two 4 5 6 7 three 8 9 10 11 a 4 b 5 c 6 d 7 Name: two, dtype: int32 a b c d one 0 1 2 3 three 8 9 10 11 a b c d one 0 1 2 3 two 4 5 6 7
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100, columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘,‘four‘]) print(df,‘\n‘) data1 = df[df > 50] #放回符合条件的值,不符合条件的值为NaN print(data1,‘\n‘) data2 = df[df[‘a‘] > 50] #先单列做判断,再返回结果为True的行 print(data2,‘\n‘) data3 = df[df[[‘a‘,‘b‘]] > 50] #多列做判断 print(data3,‘\n‘) data4 = df[df.loc[[‘one‘,‘two‘]] > 50] #多行做判断 print(data4) #运行结果 a b c d one 62.787540 20.666730 91.358127 9.435425 two 53.820915 56.264460 54.644562 75.337214 three 26.538461 48.790031 86.984291 2.213960 four 94.865606 73.560053 5.123474 84.851537 a b c d one 62.787540 NaN 91.358127 NaN two 53.820915 56.264460 54.644562 75.337214 three NaN NaN 86.984291 NaN four 94.865606 73.560053 NaN 84.851537 a b c d one 62.787540 20.666730 91.358127 9.435425 two 53.820915 56.264460 54.644562 75.337214 four 94.865606 73.560053 5.123474 84.851537 a b c d one 62.787540 NaN NaN NaN two 53.820915 56.264460 NaN NaN three NaN NaN NaN NaN four 94.865606 73.560053 NaN NaN a b c d one 62.787540 NaN 91.358127 NaN two 53.820915 56.26446 54.644562 75.337214 three NaN NaN NaN NaN four NaN NaN NaN NaN
df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100, columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘,‘four‘]) print(df,‘\n‘) print(df[‘a‘].loc[[‘one‘,‘three‘]],‘\n‘) print(df[df[‘a‘] > 50].iloc[:2]) #运行结果 a b c d one 50.411475 31.087751 18.958850 46.813210 two 23.162358 39.535175 45.106366 78.041107 three 91.972419 82.752934 94.821926 86.654188 four 63.743092 62.591314 59.975080 29.558177 one 50.411475 three 91.972419 Name: a, dtype: float64 a b c d one 50.411475 31.087751 18.958850 46.813210 three 91.972419 82.752934 94.821926 86.654188
df = pd.DataFrame(np.random.rand(12).reshape(6,2)*100, columns = [‘one‘,‘two‘]) print(df.head(2)) #查看头部数据 print(df.tail()) #查看尾部数据,不填数字默认5条 print(df.T) #转置,行列互换 #运行结果 one two 0 53.489385 31.202920 1 5.997141 3.141106 one two 1 5.997141 3.141106 2 96.424950 29.471567 3 27.111331 80.542447 4 35.198373 62.578070 5 28.974724 40.596728 0 1 2 3 4 5 one 53.489385 5.997141 96.424950 27.111331 35.198373 28.974724 two 31.202920 3.141106 29.471567 80.542447 62.578070 40.596728
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100, columns = list(‘abcd‘)) print(df) df[‘e‘] = 10 #添加列 print(df) df.loc[4] = 20 #添加行 print(df) #运行结果 a b c d 0 77.383301 91.582829 12.947135 33.315974 1 86.272310 41.139458 86.445219 99.999344 2 89.409235 28.999194 22.190588 13.010493 a b c d e 0 77.383301 91.582829 12.947135 33.315974 10 1 86.272310 41.139458 86.445219 99.999344 10 2 89.409235 28.999194 22.190588 13.010493 10 a b c d e 0 77.383301 91.582829 12.947135 33.315974 10 1 86.272310 41.139458 86.445219 99.999344 10 2 89.409235 28.999194 22.190588 13.010493 10 4 20.000000 20.000000 20.000000 20.000000 20
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100, columns = list(‘abcd‘)) print(df) df1 = df.copy() df2 = df.copy() df1[[‘a‘,‘b‘]] = 20 #直接索引修改 print(df1) df2.loc[df2[‘b‘] > 50,‘a‘] = 0 #逗号前面是条件,逗号面是要改变的列名 print(df2) #运行结果 a b c d 0 58.247472 70.337448 63.115804 91.517310 1 59.591559 28.327665 66.339979 0.902682 2 21.920386 17.240483 36.502033 93.849510 a b c d 0 20.0 20.0 63.115804 91.517310 1 20.0 20.0 66.339979 0.902682 2 20.0 20.0 36.502033 93.849510 a b c d 0 0.000000 70.337448 63.115804 91.517310 1 59.591559 28.327665 66.339979 0.902682 2 21.920386 17.240483 36.502033 93.849510
df = pd.DataFrame(np.random.rand(12).reshape(3,4)*100, columns = list(‘abcd‘), index = [‘one‘,‘two‘,‘three‘]) print(df) del df[‘d‘] print(df) print(df.drop([‘one‘])) #默认是删除行,括号里填入行名 print(df.drop([‘a‘],axis = 1)) #加了参数后,删除列,括号里填入列名 #运行结果 a b c d one 54.013312 42.523130 6.792826 35.433455 two 18.595228 75.123504 17.026400 10.564516 three 7.532090 76.689347 43.479484 20.220647 a b c one 54.013312 42.523130 6.792826 two 18.595228 75.123504 17.026400 three 7.532090 76.689347 43.479484 a b c two 18.595228 75.123504 17.026400 three 7.532090 76.689347 43.479484 b c one 42.523130 6.792826 two 75.123504 17.026400 three 76.689347 43.479484
df1 = pd.DataFrame(np.random.rand(10,4), columns = list(‘abcd‘)) df2 = pd.DataFrame(np.random.rand(7,3), columns = list(‘abc‘)) print(df1 + df2) #按照列和行标签进行对齐的,对不齐的值为NaN #运行结果 a b c d 0 0.716637 1.150983 1.369721 NaN 1 0.226954 0.821476 0.277249 NaN 2 0.771878 1.078424 0.248526 NaN 3 1.120488 1.107775 0.749390 NaN 4 0.975615 0.515302 0.987700 NaN 5 0.957985 1.459794 1.080611 NaN 6 0.665720 1.114098 0.453194 NaN 7 NaN NaN NaN NaN 8 NaN NaN NaN NaN 9 NaN NaN NaN NaN
df = pd.DataFrame(np.random.rand(4,3)*100, columns = list(‘abc‘)) print(df) df1 = df.sort_values([‘a‘]) #默认是升序,且这个方法不会改变原数据 df2 = df.sort_values([‘a‘],ascending = False) print(df1) print(df2) #运行结果 a b c 0 16.014360 18.315673 4.582076 1 17.572265 12.793833 36.774427 2 82.945503 61.148299 34.235598 3 47.561511 46.960933 18.928759 a b c 0 16.014360 18.315673 4.582076 1 17.572265 12.793833 36.774427 3 47.561511 46.960933 18.928759 2 82.945503 61.148299 34.235598 a b c 2 82.945503 61.148299 34.235598 3 47.561511 46.960933 18.928759 1 17.572265 12.793833 36.774427 0 16.014360 18.315673 4.582076
df1 = pd.DataFrame(np.random.rand(5,2), index = [4,3,1,2,5]) df2 = pd.DataFrame(np.random.rand(5,2), index = [‘d‘,‘c‘,‘a‘,‘b‘,‘e‘]) print(df1.sort_index()) print(df2.sort_index(ascending = False)) #运行结果 0 1 1 0.263742 0.462527 2 0.485418 0.621751 3 0.888531 0.886704 4 0.179775 0.148224 5 0.141401 0.009850 0 1 e 0.743093 0.529084 d 0.546709 0.898403 c 0.092891 0.543375 b 0.685351 0.017085 a 0.768035 0.215217
python-pandas基础数据结构(DataFrame)
标签:多重 values color 索引 必须 rop panda 数位 返回结果
原文地址:https://www.cnblogs.com/sparkingplug/p/12782585.html