pandas的使用

时间：2020-06-22 23:16:15 阅读：66 评论：0 收藏：0 [点我收藏+]

标签：list join() null ace col -- als 信息 head

# Series 以为，带标签数组
# DataFrame二维，Series容器

import string
import pandas as pd
import numpy as np

# t = pd.Series(np.arange(10), index=list(string.ascii_uppercase[:10]))
# string.ascii_uppercase[:10]表示创建10个大写字母，从小到大A-Z
# 创建索引index=
# print(t)
# print(t[‘A‘])
# 类似与字典的方法

# print(t[[‘A‘, ‘B‘]])
# 还可以多取

# print(t[t > 4])
# 布尔索引，取键值大于4的


# temp = {‘name‘: ‘宵夜‘, ‘age‘: ‘14‘}
# t = pd.Series(temp)
# 通过字典创建带标签的的数组


# 如果索引和键值长度不匹配，键值会变为NaN

# df = pd.read_csv(‘we.csv‘)
# 读取csv文件
# print(df)
# 24

import pymongo
myclient = pymongo.MongoClient(‘mongodb://localhost:27017/‘)
mydb = myclient[‘mongodb‘]
mycol = mydb[‘sites‘]
# mylist = [
#     {"name": "Taobao", "alexa": 100, "url": "https://www.taobao.com"},
#     {"name": "QQ", "alexa": 101, "url": "https://www.qq.com"},
#     {"name": "Facebook", "alexa": 10, "url": "https://www.facebook.com"},
#     {"name": "知乎", "alexa": 103, "url": "https://www.zhihu.com"},
#     {"name": "Github", "alexa": 109, "url": "https://www.github.com"}
# ]
# x = mycol.insert_many(mylist)
data = mycol.find()
# for i in data:
    # print(i)
# t1 = pd.Series(data[0])
# print(t1)

# ---------------------------索引-------------------------------#
t2 = pd.DataFrame(data)
# print(t2)

# print(t2[:10])
# print(t2[‘name‘])
# 取10行
# 字符串表示取列索引

# print(t2.loc[2, ‘name‘])
# 通过标签索引某行数据

# print(t2.loc[:, ‘name‘])
# 选取多行，某列

# print(t2.loc[[1, 2], :])
# 选取多列，某行


# print(t2.loc[[1, 2]])
# 索引多行


# print(t2.iloc[3, :])
# 索引某行


# print(t2.iloc[:, 2])
# 索引某列

# print(t2[(t2["alexa"] == ‘100‘)])
# 布尔索引

# --------------------------------------字符串操作函数-------------------------------------#
# print(t2.iloc[:, 2].str.split())
# 将读取到的内容切割为一个个的列表


# print(t2.iloc[:, 2].str.split().tolist())
# 将读取到的内容切割为一个大的列表


# ----------------------------------判断是否空--------------------------------#

# print(pd.isnull(t2))
# print(pd.notnull(t2))

# ----------------------------删除行有nan的-----------------------------#
# t2.dropna(axis=0, how=‘all‘)
# 全部为nan时删除

# t2.dropna(axis=0, how=‘any‘)
# 只要有nan是就删除

# ---------------------------修改nan处的只-------------------------------#
# t2.dropna(axis=0, how=‘any‘, inplace=True)
# 原地修改

# t2.fillna(19)
# 将nan处的值换成19

# --------------------------求均值--------------------------------#
# print(t2.mean())
# 求每一列的均值

# -----------------------------------内构建函数------------------------------- #

# print(t2.head(2))
# 显示头2行

# print(t2.info())
# 展示相关信息

# print(t2.describe())
# 展示数据里数字的相关信息

# print(t2.sort_values(by=‘_id‘, ascending=False).head(10))
# 通过指定索引排序,取前10个

# df = pd.read_sql()
# print(pd.DataFrame(np.arange(12).reshape(3, 4), index=list(‘abc‘), columns=list(‘defg‘)))
# 行索引指定index，列索引指定columns


# --------------------------------------合并数组--------------------------------#
# t2.join()
# 索引相同的的合并到一起

# print(t2.merge(t2, left_on=‘name‘, right_on=‘alexa‘))
# 按照指定的列把数据按照一个顶的方式合并到一起

# print(t2.merge(t2, on=‘name‘, how=‘inner‘))
# 默认是交集

# print(t2.merge(t2, on=‘name‘, how=‘outer‘))
# 并集,NaN补全

# print(t2.merge(t2, on=‘name‘, how=‘left‘))
# 左边为准，NaN补全

# print(t2.merge(t2, on=‘name‘, how=‘left‘))
# 右边为准，NaN补全

# ------------------------------------分组和聚合-------------------------------------#
t4 = t2.groupby(by=‘name‘)
# 分组

# for i, j in t4:
#     print(i)
#     print(1‘-‘ * 100)
#     print(j)
#     print(‘*‘ * 100)
# 遍历


# print(t4.count())
# print(t4[‘name‘].count())
# print(t4[‘name‘].count()[‘Facebook‘])
# 统计

t6 = t2.groupby(by=[t2[‘alexa‘], t2[‘url‘]])
# 支持多个条件进行分组
# for i in t6:
#     print(i)

# -------------------------------------索引和复合索引------------------------------#
# print(t2.index)
# 获取index

# t2.index = [i for i in range(0, 39)]
# 指定index

# t2.reindex(list(‘abcdef‘))
# 重设index

# t2.set_index(‘name‘, drop=False)
# 将某一列作为index,不删除那一列

# print(t2[‘name‘].unique())
# 将name列变成唯一索引

print(t2.set_index(‘name‘).index.unique())
# 将name列变成唯一索引

# ----复合索引-----
t2.swaplevel()
# swaplevel接受两个级别编号或名称，并返回一个互换了级别的新对象（但数据不会发生变化）将分层的索引调换位置，1级索引遍2级索引



print(t2.index)

# 36 25.03

官方文档：https://www.pypandas.cn/docs/　　

pandas的使用

标签：list join() null ace col -- als 信息 head

原文地址：https://www.cnblogs.com/superSmall/p/13179278.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行