码迷,mamicode.com
首页 > 其他好文 > 详细

关于alzheimer disease论文的统计

时间:2016-12-14 13:50:43      阅读:287      评论:0      收藏:0      [点我收藏+]

标签:app   append   pre   res   python   from   use   sql   country   

1.获取2016年的所有关键字,保存到keyword_2016.json中

import pymysql
import json

conn= pymysql.connect(
        host=localhost,
        port = 3306,
        user=root,
        passwd=‘‘,
        db =python,
        )
cursor = conn.cursor()

sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = ‘2016‘ && union_kwd_str != ‘‘ "
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

abstract_list = []
pmc_id_dict= {}

for j in range(a):
    abstract_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]



def output_to_json(data,filename):
    with open(filename,w) as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        pub_year: "2016",
        count: a,
        keyword: abstract_list
    }
output_to_json(output_data, keyword_2016.json)

从keyword_2016。json中读取关键词,并统计选出前25的关键词

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,‘r‘) as file:
        data = json.loads(file.read())
        file.close()
        return data

def count_word(path):  
    result = {}
    keyword_list = input_from_json(path)[‘keyword‘]  
    for all_the_text in keyword_list:
        for word in all_the_text.split(‘,‘): 
            if word not in result:  
                result[word] = 0  
            result[word] += 1                 
    return result
  
      

 
def sort_by_count(d):  

    d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
    return d  

 
if __name__ == ‘__main__‘:  
    file_name = "keyword_2016.json"  
    fobj2 = open(‘sort_keyword_2016.json‘,‘w‘)
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for key,value in dword.items():
        num += 1
        key = re.sub("_", " ", key)
        data = {
        ‘name‘: key,
        ‘value‘: value
        }
        json_data = json.dumps(data)

        if num < 25:
            fobj2.write(json_data)
            fobj2.write(‘,‘)
        if num == 25:
            fobj2.write(json_data)
        

  

2.获取发表论文量排名前十的国家

1)把所有第一作者的信息保存到authorinfor.json中

import pymysql
import json

conn= pymysql.connect(
        host=localhost,
        port = 3306,
        user=root,
        passwd=‘‘,
        db =python,
        )
cursor = conn.cursor()

sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ‘‘"
a = cursor.execute(sql)
print a
b = cursor.fetchmany(a)  #b has 7887 abstract list

authorinfor_list = []
pmc_id_dict= {}

for j in range(a):
    authorinfor_list.append(b[j][0])
    pmc_id_dict[j] = b[j][1]

def output_to_json(data,filename):
    with open(filename,w) as file:
        file.write(json.dumps(data))
        file.close()
    return json.dumps(data)

output_data = {
        pub_year: "2016",
        count: a,
        authorinfor: authorinfor_list,
        pmc_id: pmc_id_dict
    }
output_to_json(output_data, authorinfor.json)

2)选出排名前十的国家

import re  
import collections  
import json

def input_from_json(filename):
    with open(filename,r) as file:
        data = json.loads(file.read())
        file.close()
        return data

def count_word(path):  
    result = {}
    authorinfor_list = input_from_json(path)[authorinfor]  
    for all_the_text in authorinfor_list:
        country = all_the_text.split(,)[-1]
        country = re.sub("\.","",country)
        country = re.sub("\\n","",country)
        country = country.encode(utf-8)

        if country not in result:
            result[country] = 0
        result[country] += 1                
    return result 
      
 
def sort_by_count(d):  

    d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
    return d  

 
if __name__ == __main__:  
    file_name = "authorinfor.json"  
    fobj2 = open(sort_country.json,w)
 
    dword = count_word(file_name)  
    dword = sort_by_count(dword)  
      
    jsonlist = []
    num = 0

    for country,value in dword.items():
        num += 1
        data = {
        name: country,
        value: value
        }
        json_data = json.dumps(data)

        if num < 50:
            fobj2.write(json_data)
            fobj2.write(\n)

    countrylist = dword.keys()
    valuelist = dword.values()

    print countrylist[:11]
    print valuelist[:11]

 

关于alzheimer disease论文的统计

标签:app   append   pre   res   python   from   use   sql   country   

原文地址:http://www.cnblogs.com/lovely7/p/6178829.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!