码迷,mamicode.com
首页 > 其他好文 > 详细

你是啥成份?

时间:2018-09-26 01:09:52      阅读:145      评论:0      收藏:0      [点我收藏+]

标签:return   href   add   +=   hog   mat   append   type   visit   

各种编程语言我都很喜欢,但平时用的最多的是什么呢?

一个github小爬虫,获取全部repo及其主要语言,画出饼图。

技术分享图片

"""
你是什么成份?
"""

import requests
from pyquery import PyQuery as pq
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np


def parse_page(url):
    print(url)
    resp = requests.get(url)
    html = pq(resp.text)
    repo_list = html("#user-repositories-list li")
    repos = []
    for i in range(repo_list.length):
        repo = repo_list.eq(i)
        it = dict()
        repo_name = repo('h3').text()
        repo_language = repo("[itemprop='programmingLanguage']").text()
        it['name'] = repo_name
        it['language'] = repo_language
        repos.append(it)
    sons = html(".pagination a").eq(0).attr('href')
    sons = [sons] if sons else []
    return repos, sons


def analyze(repos):
    # unique
    ma = dict([(i['name'], i) for i in repos])
    repos = ma.values()
    cnt = Counter([i['language'] for i in repos if i['language']])
    labels = cnt.keys()
    sizes = np.array(list(cnt.values()))
    explode = np.zeros_like(sizes, dtype=np.float32)  # 0.1表示将Hogs那一块凸显出来
    explode[np.argsort(sizes)[-3:].reshape(-1, 1)] = 0.1  # 前三名突出显示
    plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=False, startangle=90)
    # startangle表示饼图的起始角度
    plt.show()


def schedule():
    user = "weiyinfu"
    q = []
    seed = "https://github.com/" + user + "?tab=repositories"
    q.append(seed)
    visited = set()
    repos = []
    while q:
        now = q.pop()
        repo_list, url_list = parse_page(now)
        for i in url_list:
            if i not in visited:
                q.append(i)
            visited.add(i)
        repos += repo_list

    return repos


def main():
    repos = schedule()
    print(repos)
    analyze(repos)


if __name__ == '__main__':
    main()

你是啥成份?

标签:return   href   add   +=   hog   mat   append   type   visit   

原文地址:https://www.cnblogs.com/weiyinfu/p/9704368.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!