码迷,mamicode.com
首页 > Web开发 > 详细

jsoup爬取某网站安全数据

时间:2019-11-02 22:06:49      阅读:97      评论:0      收藏:0      [点我收藏+]

标签:writer   tps   win   tac   pre   web   public   serial   post   

jsoup爬取某网站安全数据

package com.vfsd.net;

import java.io.IOException;
import java.sql.SQLException;
import java.util.Map;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.vfsd.dao.ManageMySQL;

/**
 * Servlet implementation class GetURL13
 */
@WebServlet("/GetURL13")
public class GetURL13 extends HttpServlet {
    private static final long serialVersionUID = 1L;
       
    /**
     * @see HttpServlet#HttpServlet()
     */
    public GetURL13() {
        super();
        // TODO Auto-generated constructor stub
    }
    private String message;
    
    @Override
    public void init() throws ServletException {
        message = "Hello world, this message is from servlet!";
        System.out.println("------"+message);
        try {
            ManageMySQL.getConnection();
            
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    /**
     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        //response.getWriter().append("Served at: ").append(request.getContextPath());
        String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
        
        int pageNum=1;
        int pageSize=10;
        //for(pageNum=1;pageNum<101;pageNum++)
        for(pageNum=1;pageNum<924;pageNum++)
        {
            try {
                int page1= (pageNum-1)*pageSize;
                Map<Integer,String> map1 = ManageMySQL.getNewsLinkInTable(page1,pageSize,"data_bjszfhcxjswyh");
                for(Integer key : map1.keySet())
                {
                    System.out.println(key+"  "+map1.get(key));
                    String news_link = map1.get(key);
                    String context1="";
                    String source1="";
                    String publishDate = "";
                    //String context1 = getContentByURL(news_link).replace(" ", "");
                    
                    if(!news_link.contains("void"))
                    {
                        if(news_link.endsWith("html"))
                        {
                            Document documentRoot = Jsoup.connect(news_link).userAgent(agent1).get();
                            Elements elements2 = documentRoot.select("#content_list");
                            //Elements elements2_1 = documentRoot.select("div.div_right");
                            if(elements2.size()==1)
                            {
                                Element div_ele = elements2.get(0);
                                context1 = div_ele.text();
                                ManageMySQL.updateContextAndPublishDate2(key, context1.replace("‘", "").replace("\"", ""),source1,publishDate,"data_bjszfhcxjswyh");
                            }
                            
                            
                        }
                        
                    }
                    
                }
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            
        }
    }



    /**
     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
    }

}

 

jsoup爬取某网站安全数据

标签:writer   tps   win   tac   pre   web   public   serial   post   

原文地址:https://www.cnblogs.com/herd/p/11784128.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!