标签:mda 日期格式 als may txt site mob com demo
导入包
import numpy as np
import pandas as pd
from pandas import Series,DataFrame方便操作,将月份和参选人以及所在政党进行定义
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6,
          'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
of_interest = ['Obama, Barack', 'Romney, Mitt', 'Santorum, Rick', 
               'Paul, Ron', 'Gingrich, Newt']
parties = {
  'Bachmann, Michelle': 'Republican',
  'Romney, Mitt': 'Republican',
  'Obama, Barack': 'Democrat',
  "Roemer, Charles E. 'Buddy' III": 'Reform',
  'Pawlenty, Timothy': 'Republican',
  'Johnson, Gary Earl': 'Libertarian',
  'Paul, Ron': 'Republican',
  'Santorum, Rick': 'Republican',
  'Cain, Herman': 'Republican',
  'Gingrich, Newt': 'Republican',
  'McCotter, Thaddeus G': 'Republican',
  'Huntsman, Jon': 'Republican',
  'Perry, Rick': 'Republican'           
 }df = pd.read_csv('./data/usa_election.txt')
df.head()C:\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2728: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)| cmte_id | cand_id | cand_nm | contbr_nm | contbr_city | contbr_st | contbr_zip | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | receipt_desc | memo_cd | memo_text | form_tp | file_num | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 250.0 | 20-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | 
| 1 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 50.0 | 23-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | 
| 2 | C00410118 | P20002978 | Bachmann, Michelle | SMITH, LANIER | LANETT | AL | 3.68633e+08 | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 05-JUL-11 | NaN | NaN | NaN | SA17A | 749073 | 
| 3 | C00410118 | P20002978 | Bachmann, Michelle | BLEVINS, DARONDA | PIGGOTT | AR | 7.24548e+08 | NONE | RETIRED | 250.0 | 01-AUG-11 | NaN | NaN | NaN | SA17A | 749073 | 
| 4 | C00410118 | P20002978 | Bachmann, Michelle | WARDENBURG, HAROLD | HOT SPRINGS NATION | AR | 7.19016e+08 | NONE | RETIRED | 300.0 | 20-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | 
# 新建一列各个候选人所在党派party
df['party'] = df['cand_nm'].map(parties)
df.head()| cmte_id | cand_id | cand_nm | contbr_nm | contbr_city | contbr_st | contbr_zip | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | receipt_desc | memo_cd | memo_text | form_tp | file_num | party | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 250.0 | 20-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
| 1 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 50.0 | 23-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
| 2 | C00410118 | P20002978 | Bachmann, Michelle | SMITH, LANIER | LANETT | AL | 3.68633e+08 | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 05-JUL-11 | NaN | NaN | NaN | SA17A | 749073 | Republican | 
| 3 | C00410118 | P20002978 | Bachmann, Michelle | BLEVINS, DARONDA | PIGGOTT | AR | 7.24548e+08 | NONE | RETIRED | 250.0 | 01-AUG-11 | NaN | NaN | NaN | SA17A | 749073 | Republican | 
| 4 | C00410118 | P20002978 | Bachmann, Michelle | WARDENBURG, HAROLD | HOT SPRINGS NATION | AR | 7.19016e+08 | NONE | RETIRED | 300.0 | 20-JUN-11 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
# party这一列中有哪些元素
df['party'].unique()array(['Republican', 'Democrat', 'Reform', 'Libertarian'], dtype=object)# 统计party列中各个元素出现次数
df['party'].value_counts()Democrat       292400
Republican     237575
Reform           5364
Libertarian       702
Name: party, dtype: int64# 查看各个党派收到的政治献金总数contb_receipt_amt
df.groupby(by='party')['contb_receipt_amt'].sum()party
Democrat       8.105758e+07
Libertarian    4.132769e+05
Reform         3.390338e+05
Republican     1.192255e+08
Name: contb_receipt_amt, dtype: float64# 查看每天各个党派收到的政治献金总数contb_receipt_amt
df.groupby(by=['contb_receipt_dt','party'])['contb_receipt_amt'].sum()contb_receipt_dt  party      
01-APR-11         Reform              50.00
                  Republican       12635.00
01-AUG-11         Democrat        175281.00
                  Libertarian       1000.00
                  Reform            1847.00
                  Republican      234598.46
01-DEC-11         Democrat        651532.82
                  Libertarian        725.00
                  Reform             875.00
                  Republican      486405.96
01-FEB-11         Republican         250.00
01-JAN-11         Republican        8600.00
01-JAN-12         Democrat         58098.80
                  Reform             515.00
                  Republican       75704.72
01-JUL-11         Democrat        165961.00
                  Libertarian       2000.00
                  Reform             100.00
                  Republican      115848.72
01-JUN-11         Democrat        145459.00
                  Libertarian        500.00
                  Reform              50.00
                  Republican      433109.20
01-MAR-11         Republican        1000.00
01-MAY-11         Democrat         82644.00
                  Reform             480.00
                  Republican       28663.87
01-NOV-11         Democrat        122529.87
                  Libertarian       3000.00
                  Reform            1792.00
                                    ...    
30-OCT-11         Reform            3910.00
                  Republican       43913.16
30-SEP-11         Democrat       3373517.24
                  Libertarian        550.00
                  Reform            2050.00
                  Republican     4886331.76
31-AUG-11         Democrat        374387.44
                  Libertarian      10750.00
                  Reform             450.00
                  Republican     1017735.02
31-DEC-11         Democrat       3553072.57
                  Reform             695.00
                  Republican     1094376.72
31-JAN-11         Republican        6000.00
31-JAN-12         Democrat       1418410.31
                  Reform             150.00
                  Republican      869890.41
31-JUL-11         Democrat         20305.00
                  Reform             966.00
                  Republican       12781.02
31-MAR-11         Reform             200.00
                  Republican       62475.00
31-MAY-11         Democrat        351705.66
                  Libertarian        250.00
                  Reform             100.00
                  Republican      301339.80
31-OCT-11         Democrat        204996.87
                  Libertarian       4250.00
                  Reform            3105.00
                  Republican      734601.83
Name: contb_receipt_amt, Length: 1183, dtype: float64# 将表中日期格式转换为'yyyy-mm-dd'  day-m-y
def transformDate(d):
    day,month,year = d.split('-')
    month = months[month]
    return '20'+year+'-'+str(month)+'-'+day
df['contb_receipt_dt'] = df['contb_receipt_dt'].apply(transformDate)
df.head()| cmte_id | cand_id | cand_nm | contbr_nm | contbr_city | contbr_st | contbr_zip | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | receipt_desc | memo_cd | memo_text | form_tp | file_num | party | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 250.0 | 2011-6-20 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
| 1 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | AL | 3.6601e+08 | RETIRED | RETIRED | 50.0 | 2011-6-23 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
| 2 | C00410118 | P20002978 | Bachmann, Michelle | SMITH, LANIER | LANETT | AL | 3.68633e+08 | INFORMATION REQUESTED | INFORMATION REQUESTED | 250.0 | 2011-7-05 | NaN | NaN | NaN | SA17A | 749073 | Republican | 
| 3 | C00410118 | P20002978 | Bachmann, Michelle | BLEVINS, DARONDA | PIGGOTT | AR | 7.24548e+08 | NONE | RETIRED | 250.0 | 2011-8-01 | NaN | NaN | NaN | SA17A | 749073 | Republican | 
| 4 | C00410118 | P20002978 | Bachmann, Michelle | WARDENBURG, HAROLD | HOT SPRINGS NATION | AR | 7.19016e+08 | NONE | RETIRED | 300.0 | 2011-6-20 | NaN | NaN | NaN | SA17A | 736166 | Republican | 
# 查看老兵(捐献者职业)主要支持谁  :查看老兵们捐赠给谁的钱最多
# 1.将老兵对应的行数据取出
df['contbr_occupation'] == 'DISABLED VETERAN'
old_bing = df.loc[df['contbr_occupation'] == 'DISABLED VETERAN']# 2.根据候选人分组
old_bing.groupby(by='cand_nm')['contb_receipt_amt'].sum()cand_nm
Cain, Herman       300.00
Obama, Barack     4205.00
Paul, Ron         2425.49
Santorum, Rick     250.00
Name: contb_receipt_amt, dtype: float64df['contb_receipt_amt'].max()1944042.43#捐赠金额最大的人的职业以及捐献额  .通过query("查询条件来查找捐献人职业")
df.query('contb_receipt_amt == 1944042.43')| cmte_id | cand_id | cand_nm | contbr_nm | contbr_city | contbr_st | contbr_zip | contbr_employer | contbr_occupation | contb_receipt_amt | contb_receipt_dt | receipt_desc | memo_cd | memo_text | form_tp | file_num | party | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 176127 | C00431445 | P80003338 | Obama, Barack | OBAMA VICTORY FUND 2012 - UNITEMIZED | CHICAGO | IL | 60680 | NaN | NaN | 1944042.43 | 2011-12-31 | NaN | X | * | SA18 | 763233 | Democrat | 
标签:mda 日期格式 als may txt site mob com demo
原文地址:https://www.cnblogs.com/zyyhxbs/p/11708549.html