码迷,mamicode.com
首页 > 其他好文 > 详细

aaa

时间:2019-05-19 14:37:48      阅读:102      评论:0      收藏:0      [点我收藏+]

标签:type   head   code   size   [1]   import   idt   .data   set   

 

 

# -*- coding:utf-8 -*-
import calendar
import pandas as pd
from datetime import datetime
import warnings
import matplotlib.pyplot as plt
import seaborn as sn


#pd.options.mode.chained_assignment = None
warnings.filterwarnings("ignore", category=DeprecationWarning)

#设置选项,防止head()出现省略号
pd.set_option(display.width,None)

dailyData = pd.read_csv("d:/train.csv", encoding=gbk)

print(dailyData.shape)
print(dailyData.head())
print(dailyData.columns.tolist())
#2011-01-01 00:00:00       1        0           0        1  9.84  14.395        81        0.0       3          13     16
dailyData["date"] = dailyData.datetime.apply(lambda x : x.split()[0])
dailyData["hour"] = dailyData.datetime.apply(lambda x : x.split()[1].split(":")[0])
dailyData["weekday"] = dailyData.date.apply(lambda dateString : calendar.day_name[datetime.strptime(dateString,"%Y-%m-%d").weekday()])
dailyData["month"] = dailyData.date.apply(lambda dateString : calendar.month_name[datetime.strptime(dateString,"%Y-%m-%d").month])
dailyData["season"] = dailyData.season.map({1: "Spring", 2 : "Summer", 3 : "Fall", 4 :"Winter" })
dailyData["weather"] = dailyData.weather.map({1: " Clear + Few clouds + Partly cloudy + Partly cloudy",                                        2 : " Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist ",                                         3 : " Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds",                                         4 :" Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog " })


categoryVariableList = ["hour","weekday","month","season","weather","holiday","workingday"]
for var in categoryVariableList:
    dailyData[var] = dailyData[var].astype("category")

dailyData  = dailyData.drop(["datetime"],axis=1)
dailyData.head() 

dataTypeDf = pd.DataFrame(dailyData.dtypes.value_counts()).reset_index().rename(columns={"index":"variableType",0:"count"})

print(dataTypeDf)
#------------------------------------------------------------------
new_dic = {}
for i in range(dataTypeDf.shape[0]):
    temp = dataTypeDf.loc[i, "variableType"]
    tempType = temp.name
    if tempType in new_dic:
        new_dic[tempType] = new_dic[tempType] + dataTypeDf.loc[i, "count"]
    else:
        new_dic[tempType] = dataTypeDf.loc[i, "count"]
print(new_dic)

mylist = list()
for key in new_dic.keys():
    mylist.append([key, new_dic[key]])
print(mylist)

dataTypeDf = pd.DataFrame(mylist, columns=list(dataTypeDf))
#--------------------------------------------------------------------------
print(dataTypeDf)
fig,ax = plt.subplots()
fig.set_size_inches(12,5)
sn.barplot(data=dataTypeDf,x="variableType",y="count",ax=ax)####
ax.set(xlabel=variableType, ylabel=Count,title="Variables DataType Count")
plt.show()

 

aaa

标签:type   head   code   size   [1]   import   idt   .data   set   

原文地址:https://www.cnblogs.com/muhe221/p/10888907.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!