码迷,mamicode.com
首页 > Web开发 > 详细

Node.js抓取新浪新闻标题

时间:2018-04-10 23:59:22      阅读:353      评论:0      收藏:0      [点我收藏+]

标签:world   cti   news   抓取   let   als   cat   tle   UNC   

"use strict";

let cheerio = require("cheerio");
let http = require("http");
let iconv = require("iconv-lite");

let mainUrl = "http://news.sina.com.cn/world/";

http.get(mainUrl, function(sres) {
    var chunks = [];
    sres.on(‘data‘, function(chunk) {
        chunks.push(chunk);
    });

    sres.on(‘end‘, function() {

        var html = iconv.decode(Buffer.concat(chunks), ‘utf8‘);
        var $ = cheerio.load(html, {decodeEntities: false});

        $(‘.content a‘).each(function (idx,element){
            let ele = $(element);
            let title = ele.text();
            title = !!title?title.trim():‘‘;
            let url = ele.attr(‘href‘);
            url = !!url?url.trim():‘‘;
            if(title.length>4 && url.length>0&&url.indexOf(‘javascript‘)===-1){
                console.log(title+‘|‘+url);
            }

        })

    });
});

 

Node.js抓取新浪新闻标题

标签:world   cti   news   抓取   let   als   cat   tle   UNC   

原文地址:https://www.cnblogs.com/sheryee/p/8783446.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!