码迷,mamicode.com
首页 > Web开发 > 详细

nodejs的爬虫

时间:2018-12-19 15:39:12      阅读:170      评论:0      收藏:0      [点我收藏+]

标签:str   field   htm   roo   mysql   dirname   find   user   await   

//爬取静态页面
const request = require (‘request‘);
const cheerio = require(‘cheerio‘);
const mysql = require(‘mysql‘);
var conn = mysql.createConnection({
    host:‘localhost‘,
    user:‘root‘,
    password:‘root‘,
    port:‘3306‘,
    database:‘xiaomi‘
});
// conn.connect();
request(‘https://www.epet.com/cleargoodsmdog.html‘,function(err,res,body){
    var $ = cheerio.load(body);
    var list = $(‘.qcGoodsBox.bgwhite .fl.rela‘);
    console.log(list);
    request(imgsrc).pipe(fs.createWriteStream(__dirname+"/downloadimg/"+path.parse(imgsrc).base));
    list.each(function(index){
        var pic = $(this).find(‘.cloud-zoom img‘).attr(‘src0‘);
        var title = $(this).find(‘.qcGoodsTit a‘).text();
        var price = $(this).find(‘.qcPriceBox .ft20‘).text();
        var yprice = $(this).find(‘.qcPriceBox .ft12‘).text();
        conn.query(‘insert into goods(goodsname,price,pic,goodsclass) values(?,?,?,?)‘,[title,price,pic,yprice],function(err,results,fields){
            console.log(results);
        })
    });
    conn.end();
})
//爬取动态数据用的是phantom
const phantom = require (‘phantom‘);
const cheerio = require (‘cheerio‘);
 
(async function() {
  const instance = await phantom.create();
  const page = await instance.createPage();
  await page.on(‘onResourceRequested‘, function(requestData) {
    console.info(‘Requesting‘, requestData.url);
  });
 
  const status = await page.open(‘http://you.163.com/item/list?categoryId=1065000&subCategoryId=1065001‘);
  const content = await page.property(‘content‘);
//   console.log(content);
  var $ = cheerio.load(content);
  var list = $(‘.m-itemList.m-itemList-level2Category .item‘);
  list.each(function(index){
        var title = $(this).find(‘.name a span:nth-of-type(3)‘).text();
        console.log(title);
    });
  

  await instance.exit();
})();

 

nodejs的爬虫

标签:str   field   htm   roo   mysql   dirname   find   user   await   

原文地址:https://www.cnblogs.com/bao2333/p/10142910.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!