码迷,mamicode.com
首页 > 其他好文 > 详细

获取动态IP

时间:2018-12-25 11:24:16      阅读:146      评论:0      收藏:0      [点我收藏+]

标签:sessionid   class   display   pre   sts   需要   zab   find   rom   

import requests
import re
import lxml.html


class Exam_spider:
    def __init__(self):
        self.base_url = http://datamining.comratings.com/exam
        self.s = requests.session()

    def down_first(self):
        """
        进行第一次访问
        :return: sessionid
        """
        res = self.s.get(self.base_url)
        sessionid = res.cookies.get_dict().get(session)
        return sessionid

    def down_second(self, cookie):
        """
        进行第二次访问
        :param cookie: 访问需要的完整cookie
        :return: 响应结果
        """
        res = self.s.get(self.base_url + 3, cookies=cookie)
        return res.content

    def f1(self, a):
        """
        获得js动态加载的cookie
        :param a: 第一次访问获得到的cookie中的sessionid
        :return: js动态加载的cookie
        """
        encoderchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="

        length = len(a)
        i = 0
        b = ""
        while i < length:
            c = ord(a[i]) & 0xff
            i += 1
            if i == length:
                b += encoderchars[c >> 2]
                b += encoderchars[(c & 0x3) << 4]
                b += "=="
                break

            c2 = ord(a[i])
            i += 1
            if i == length:
                b += encoderchars[c >> 2]
                b += encoderchars[((c & 0x3) << 4) | ((c2 & 0xf0) >> 4)]
                b += encoderchars[(c2 & 0xf) << 2]
                b += "="
                break

            c3 = ord(a[i])
            i += 1
            b += encoderchars[c >> 2]
            b += encoderchars[((c & 0x3) << 4) | ((c2 & 0xf0) >> 4)]
            b += encoderchars[((c2 & 0xf) << 2) | ((c3 & 0xc0) >> 6)]
            b += encoderchars[c3 & 0x3f]

        return b

    def make_cookie(self, sessionid):
        """
        获得完整的cookie
        :param sessionid: 第一访问得到的sessionid
        :return: 完整的cookie
        """
        lt = []
        lt.append("session=" + sessionid + ;)
        lt.append("c1=" + self.f1(sessionid[1:4]) + ;)
        lt.append("c2=" + self.f1(sessionid))

        cookie = {
            Cookie: " ".join(lt)
        }
        return cookie

    def save_result(self, result):
        """
        将结果保存进文件中
        :param result: 第二次访问的响应结果
        :return:
        """
        with open(example_spider_result.html, wb) as fp:
            fp.write(result)

    def analysis_content(self, result):
        """
        解析文件,得到ip
        :param result:
        :return:
        """
        test_data = result.decode(utf-8)
        pattern = re.compile(r\.([A-Z]+){display:none})
        class_none_list = pattern.findall(test_data)
        pattern_div = re.compile(<div\s.*)
        t = pattern_div.sub("", test_data)
        pattern_span_none = re.compile(<span\sstyle="display:none">.*?</span>)
        t1 = pattern_span_none.sub("", t)

        pattern_class_none1 = re.compile(<span\sclass=" + class_none_list[0] + ">.*</span>)
        t2 = pattern_class_none1.sub("", t1)
        pattern_class_none2 = re.compile(<span\sclass=" + class_none_list[1] + ">.*</span>)
        t3 = pattern_class_none2.sub("", t2)

        html = lxml.html.fromstring(t3.replace("\n", ""))
        html_data = html.xpath(//body/descendant-or-self::text())
        tt = ""
        ln = []
        for i in html_data[1:]:
            if tt.count(.) == 3 and tt[-1] != .:
                ln.append(tt)
                tt = ""
            tt = tt + i
        ln.append(tt)
        print(ln)
        print(len(ln))

    def run(self):
        """
        运行主线程
        :return:
        """
        sesionid = self.down_first()
        cookie = self.make_cookie(sesionid)
        result = self.down_second(cookie)
        self.analysis_content(result)
        self.save_result(result)


if __name__ == __main__:
    e = Exam_spider()
    e.run()

 

获取动态IP

标签:sessionid   class   display   pre   sts   需要   zab   find   rom   

原文地址:https://www.cnblogs.com/liangliangzz/p/10172329.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!