标签:class load text blank cti rda rar course 课程
代码:
1 var http = require("http"); 2 3 var cheerio = require("cheerio"); 4 5 6 var url = ‘http://www.imooc.com/learn/348‘; 7 8 9 http.get(url, function(res){ 10 var html = ‘‘; 11 12 res.on(‘data‘, function(data){ 13 html += data; 14 }); 15 16 res.on(‘end‘, function(){ 17 var courseData = filterChapters(html); 18 19 printCourseInfo(courseData); 20 console.log(courseData); 21 }); 22 }).on(‘error‘, function(){ 23 console.log("获取课程数据出错!"); 24 }); 25 26 function filterChapters(html) 27 { 28 var $ = cheerio.load(html); 29 30 //所有章节 31 var chapters = $(‘.chapter‘); 32 33 var courseData = []; 34 35 chapters.each(function(item){ 36 var chapter = $(this); 37 var chapterTitle = chapter.find(‘h3 strong‘).text().replace(/\r|\n/ig,"").trim(); 38 var videos = chapter.find(".video").children(‘li‘); 39 40 var chapterData = { 41 chapterTitle: chapterTitle, 42 videos: [] 43 }; 44 45 videos.each(function(index, item2) { 46 var video = $(this).find(‘.J-media-item‘); 47 var videoTitle = video.text().replace(/\r|\n/ig,"").trim(); 48 var id = video.attr(‘href‘).split(‘video/‘)[1]; 49 50 chapterData.videos.push({ 51 title: videoTitle, 52 id: id 53 }) 54 }); 55 56 courseData.push(chapterData); 57 }); 58 59 return courseData; 60 } 61 62 63 function printCourseInfo(courseData) 64 { 65 courseData.forEach(function(item){ 66 var chapterTitle = item.chapterTitle; 67 console.log(chapterTitle + ‘\n‘); 68 69 item.videos.forEach(function(video){ 70 console.log(‘ [‘ + video.id+ ‘]‘ + video.title); 71 }); 72 }); 73 }
运行:
----------------------------------------------------------------------
参考链接:
nodejs .http模块, cheerio模块 实现 小爬虫.
标签:class load text blank cti rda rar course 课程
原文地址:http://www.cnblogs.com/cbza/p/7281367.html