标签:save 信息 pos tin www. desc image 爬虫 date
const request=require(‘request‘); const cheerio=require(‘cheerio‘); (function () { //页面信息==》简介相关 var getInfo=function (i) { var url=‘http://www.cnblogs.com/flyings/default.html?page=‘+i; request(url,function (err,res,body){ let $ = cheerio.load(body); for(let i=0; i<$(‘.postTitle‘).length; i++){ let href=$(‘.postTitle‘).eq(i).find(‘a‘).attr(‘href‘); let bid=/\d*.html/.exec(href)[0].replace(‘.html‘,‘‘); let title=$(‘.postTitle‘).eq(i).find(‘a‘).text(); let desc=$(‘.postCon‘).eq(i).find(‘.c_b_p_desc‘).text().replace(/阅读全文/ig,"").replace(/摘要:/ig,""); let blog={bid:bid,title:title, desc:desc } console.log(blog) // 存到数据库 // let param={ // where:{bid:blog.bid}, // data:blog, // option:{upsert:true} // } //blogModel.saveOrUpdate(param, function (error) {}); //根据pid爬出详情数据 getInforDetail(bid) } }) } //页面信息==》详情相关 var getInforDetail=function (j) { var url=‘http://www.cnblogs.com/flyings/p/‘+j+‘.html‘; request(url,function (err,res,body){ let $ = cheerio.load(body); let bid=j; let title=$(‘.postTitle‘).find(‘a‘).text(); let content=$(‘#post_body‘).html(); let blogDetail={bid:bid,title:title,content:content} console.log(blogDetail) // 存到数据库 // let param={ // where:{bid:blogDetail.bid}, // data:blogDetail, // option:{upsert:true} // } // blogDetailModel.saveOrUpdate(param, function (error) {}); }) } //获取自己博客文章总页数,并开始爬数据 var getInit=function () { request(‘http://www.cnblogs.com/flyings/default.html?page=2‘,function (err,res,body){ console.log(123) let $ = cheerio.load(body); let str= $(‘.pager‘).eq(1).text(); let pages=/共\d*页/.exec(str)[0].replace(/[^0-9]/ig,""); for(let i=0; i<pages; i++){ getInfo(i) } }) } getInit() })()
标签:save 信息 pos tin www. desc image 爬虫 date
原文地址:http://www.cnblogs.com/flyings/p/8016639.html