码迷,mamicode.com
首页 > 其他好文 > 详细

寒假学习进度-8(热词爬取)

时间:2020-02-05 13:33:12      阅读:80      评论:0      收藏:0      [点我收藏+]

标签:hot   lin   image   jsoup   main   select   ESS   图片   address   

package hotword;

import java.io.IOException;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import dao.Dao;

public class Baidu1 {
    public static void main(String[] args)
    {
        lianjie("https://baike.baidu.com/item/%E6%89%8B%E6%9C%BA/6342");
        
    }
    public static void lianjie(String address)
    {
        String explanation="";
        String title="";
        String url="";
        try {
            Document document=Jsoup.connect(address).get();    
            Elements links=document.getElementsByClass("para");
            Elements reault=links.select("a[href]");
            for(Element link:reault)
            {
                String linkHref=link.attr("href");
                //System.out.println(linkHref);
                String title1=link.text();
                //System.out.println(title1);
                title=timu("https://baike.baidu.com"+linkHref);
                explanation=ex("https://baike.baidu.com"+linkHref);
                url="https://baike.baidu.com"+linkHref;
                System.out.println(title);
                System.out.println(explanation);
                Dao dao=new Dao();
                try {
                    //dao.Hotword(title, explanation, url);
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        
    }
    public static String timu(String address)
    {
        String name="";
        try {
            Document document=Jsoup.connect(address).get();
            Elements ProjectName=document.getElementsByClass("lemmaWgt-lemmaTitle-title");
            Elements h1=ProjectName.select("h1");
            Elements h2=ProjectName.select("h2");
            name=h1.text()+h2.text();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return name;
    }
    public static String ex(String address)
    {
        String name="";
        try {
            Document document=Jsoup.connect(address).get();
            Elements ProjectName=document.getElementsByClass("lemma-summary");
            name=ProjectName.text();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return name;
    }

}

截图:

技术图片

寒假学习进度-8(热词爬取)

标签:hot   lin   image   jsoup   main   select   ESS   图片   address   

原文地址:https://www.cnblogs.com/liujinxin123/p/12263238.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!