爬取微信文章

时间：2019-05-03 18:14:46 阅读：203 评论：0 收藏：0 [点我收藏+]

标签：pass ima == get try val style 去除 type

1.抓包

　　打开微信网页版

　　技术图片

　　　　抓包：

　　　　技术图片

　　　　根据接口数据构造请求，便能获取公众号文章了！

2.构造请求，获取数据

import requests
import json
import time


def parse(__biz, uin, key, pass_ticket, appmsg_token="", offset="0"):
    """
    文章信息获取
    """
    url = ‘?txe_eliforp/pm/moc.qq.nixiew.pm//:sptth‘[::-1]
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat QBCore/3.43.901.400 QQBrowser/9.0.2524.400",
    }
    params = {
        "action": "getmsg",
        "__biz": __biz,
        "f": "json",
        "offset": str(offset),
        "count": "10",
        "is_ok": "1",
        "scene": "124",
        "uin": uin,
        "key": key,
        "pass_ticket": pass_ticket,
        "wxtoken": "",
        "appmsg_token": appmsg_token,
        "x5": "0",
    }

    res = requests.get(url, headers=headers, params=params, timeout=3)
    data = json.loads(res.text)
    print(data)
    # 获取信息列表
    msg_list = eval(data.get("general_msg_list")).get("list", [])
    for i in msg_list:
        # 去除文字链接
        try:
            # 文章标题
            title = i["app_msg_ext_info"]["title"].replace(‘,‘, ‘，‘)
            # 文章摘要
            digest = i["app_msg_ext_info"]["digest"].replace(‘,‘, ‘，‘)
            # 文章链接
            url = i["app_msg_ext_info"]["content_url"].replace("\\", "").replace("http", "https")
            # 文章发布时间
            date = i["comm_msg_info"]["datetime"]
            print(title, digest, url, date)
            with open(‘article.csv‘, ‘a‘) as f:
                f.write(title + ‘,‘ + digest + ‘,‘ + url + ‘,‘ + str(date) + ‘\n‘)
        except:
            pass
    # 判断是否可继续翻页 1-可以翻页  0-到底了
    if 1 == data.get("can_msg_continue", 0):
        time.sleep(3)
        parse(__biz, uin, key, pass_ticket, appmsg_token, data["next_offset"])
    else:
        print("爬取完毕")


if __name__ == ‘__main__‘:
    # 请求参数
    __biz = input(‘biz: ‘)
    uin = input(‘uin: ‘)
    key = input(‘key: ‘)
    pass_ticket = input(‘passtick: ‘)
    # 解析函数
    parse(__biz, uin, key, pass_ticket, appmsg_token="", offset="0")

　　数据：

　　技术图片

爬取微信文章

标签：pass ima == get try val style 去除 type

原文地址：https://www.cnblogs.com/tjp40922/p/10805773.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行