标签:爬图片 wait require settime code att url begin 爬取
const path = require(‘path‘); const request = require(‘request‘); const cheerio = require(‘cheerio‘); const fs = require(‘fs‘); let page = 0; const total = 10; //总共几页 const waitTime = 30000; //间隔时间, 毫秒 const savePath = ‘./image‘; begin(); function begin() { if (!fs.existsSync(savePath)) { fs.mkdirSync(savePath); } const url = ‘http://www.图片网址.com‘; getImageByUrl(url); } //爬取指定页面的图片 function getImageByUrl (_url) { page++; console.log(`开始第${page}页`); request(_url, function(err, resp, body) { if (err) { console.log(err); } else { const $ = cheerio.load(body); $(‘.commentlist > li .text img‘).each(function() { const src = $(this).attr(‘src‘); const baseName = path.win32.basename(src); request(`http:${src}`).pipe(fs.createWriteStream(`${savePath}/${baseName}`)); }); const prevUrl = $(‘.previous-comment-page‘).attr(‘href‘); if (page < total && prevUrl.length > 0) { setTimeout(() => { getImageByUrl(`http:${prevUrl}`); }, waitTime); } } }); }
标签:爬图片 wait require settime code att url begin 爬取
原文地址:https://www.cnblogs.com/qiuxd/p/13168799.html