码迷,mamicode.com
首页 > 编程语言 > 详细

java通过Jsoup爬取网页(入门教程)

时间:2019-09-26 23:49:07      阅读:94      评论:0      收藏:0      [点我收藏+]

标签:获取   static   oid   blog   creat   pcl   test   des   org   

一,导入依赖

     <!--java爬虫-->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.10.3</version>
        </dependency>
        <!--httpclient依赖-->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
        </dependency>

二,编写demo类

注意不要导错包了,是org.jsoup.nodes下面的

package com.taotao.entity;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
 * Author: TaoTao  2019/9/26
 */
public class intefaceTest {
    public static void main(String[] args) throws IOException {
        CloseableHttpClient httpClient = HttpClients.createDefault();//创建httpClient
        HttpGet httpGet = new HttpGet("http://www.cnblogs.com/");//创建httpget实例

        CloseableHttpResponse response = httpClient.execute(httpGet);//执行get请求
        HttpEntity entity = response.getEntity();//获取返回实体
        String content =  EntityUtils.toString(entity,"utf-8");//网页内容
        response.close();//关闭流和释放系统资源

        Jsoup.parse(content);
        Document doc = Jsoup.parse(content);//解析网页得到文档对象
        Elements elements = doc.getElementsByTag("title");//获取tag是title的所有dom文档
        Element element = elements.get(0);//获取第一个元素
        String title = element.text(); //.html是返回html
        System.out.println("网页标题:"+title);
        Element element1 = doc.getElementById("site_nav_top");//获取id=site_nav_top标签
        String str = element1.text();
        System.out.println("str:"+str);
    }
}

 

java通过Jsoup爬取网页(入门教程)

标签:获取   static   oid   blog   creat   pcl   test   des   org   

原文地址:https://www.cnblogs.com/book-mountain/p/11595018.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!