标签:type head code size [1] import idt .data set
# -*- coding:utf-8 -*- import calendar import pandas as pd from datetime import datetime import warnings import matplotlib.pyplot as plt import seaborn as sn #pd.options.mode.chained_assignment = None warnings.filterwarnings("ignore", category=DeprecationWarning) #设置选项,防止head()出现省略号 pd.set_option(‘display.width‘,None) dailyData = pd.read_csv("d:/train.csv", encoding=‘gbk‘) print(dailyData.shape) print(dailyData.head()) print(dailyData.columns.tolist()) #2011-01-01 00:00:00 1 0 0 1 9.84 14.395 81 0.0 3 13 16 dailyData["date"] = dailyData.datetime.apply(lambda x : x.split()[0]) dailyData["hour"] = dailyData.datetime.apply(lambda x : x.split()[1].split(":")[0]) dailyData["weekday"] = dailyData.date.apply(lambda dateString : calendar.day_name[datetime.strptime(dateString,"%Y-%m-%d").weekday()]) dailyData["month"] = dailyData.date.apply(lambda dateString : calendar.month_name[datetime.strptime(dateString,"%Y-%m-%d").month]) dailyData["season"] = dailyData.season.map({1: "Spring", 2 : "Summer", 3 : "Fall", 4 :"Winter" }) dailyData["weather"] = dailyData.weather.map({1: " Clear + Few clouds + Partly cloudy + Partly cloudy", 2 : " Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist ", 3 : " Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds", 4 :" Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog " }) categoryVariableList = ["hour","weekday","month","season","weather","holiday","workingday"] for var in categoryVariableList: dailyData[var] = dailyData[var].astype("category") dailyData = dailyData.drop(["datetime"],axis=1) dailyData.head() dataTypeDf = pd.DataFrame(dailyData.dtypes.value_counts()).reset_index().rename(columns={"index":"variableType",0:"count"}) print(dataTypeDf) #------------------------------------------------------------------ new_dic = {} for i in range(dataTypeDf.shape[0]): temp = dataTypeDf.loc[i, "variableType"] tempType = temp.name if tempType in new_dic: new_dic[tempType] = new_dic[tempType] + dataTypeDf.loc[i, "count"] else: new_dic[tempType] = dataTypeDf.loc[i, "count"] print(new_dic) mylist = list() for key in new_dic.keys(): mylist.append([key, new_dic[key]]) print(mylist) dataTypeDf = pd.DataFrame(mylist, columns=list(dataTypeDf)) #-------------------------------------------------------------------------- print(dataTypeDf) fig,ax = plt.subplots() fig.set_size_inches(12,5) sn.barplot(data=dataTypeDf,x="variableType",y="count",ax=ax)#### ax.set(xlabel=‘variableType‘, ylabel=‘Count‘,title="Variables DataType Count") plt.show()
标签:type head code size [1] import idt .data set
原文地址:https://www.cnblogs.com/muhe221/p/10888907.html