标签:
HTTP源码解读
HTTP性能测试
1 var http = require(‘http‘); 2 3 http 4 .createServer(function (request, response) { 5 response.writeHead(200, {‘Content-type‘: ‘text/plain‘}); 6 response.write(‘Hello Nodejs‘); 7 response.end(); 8 }) 9 .listen(2016); 10 11 console.log(‘success‘); 12 13 //访问localhost:2016会看到Hello Nodejs的返回
HTTP小爬虫
//安装cheerio
npm install cheerio
/**********************************/
1 var http = require(‘http‘); 2 var url = ‘http://www.imooc.com/learn/348‘; 3 4 http.get(url, function (response) { 5 var html = ‘‘; 6 7 response.on(‘data‘, function (data) { 8 html += data; 9 }); 10 11 response.on(‘end‘, function () { 12 console.log(html); 13 }); 14 }).on(‘error‘, function () { 15 console.log(‘获取课程数据出错‘); 16 });
/**********************************/
1 /** 2 * NodeJs爬虫实例,爬imooc*/ 3 4 var http = require(‘http‘); 5 //安装cheerio,并引入进来 6 var cheerio = require(‘cheerio‘); 7 var url = ‘http://www.imooc.com/learn/348‘; 8 9 function filterChapter(html) { 10 var $ = cheerio.load(html); 11 var chapters = $(‘.chapter‘); 12 13 //[{ 14 // chapterTitle: ‘‘, 15 // videos: [ 16 // title: ‘‘, 17 // id: ‘‘ 18 // ] 19 //}] 20 21 var courseData = []; 22 23 chapters.each(function (item) { 24 var chapter = $(this); 25 var chapterTitle = chapter.find(‘strong‘).text(); 26 var videos = chapter.find(‘.video‘).children(‘li‘); 27 28 var chapterData = { 29 chapterTitle: chapterTitle, 30 videos: [] 31 }; 32 33 videos.each(function (item) { 34 var video = $(this).find(‘a‘); 35 var videoTitle = video.text(); 36 var id = video.attr(‘href‘); 37 38 chapterData.videos.push({ 39 videoTitle: videoTitle, 40 id: id 41 }); 42 }); 43 //console.log(chapterData); 44 45 courseData.push(chapterData); 46 }); 47 //console.log(courseData); 48 //console.log(courseData.videos); 49 50 /*courseData.forEach(function (item) { 51 //console.log(item.videos); 52 var videoTitle = item.videos[0].videoTitle; 53 var id = item.videos[0].id; 54 console.log(‘[‘ + id + ‘]‘ + videoTitle); 55 });*/ 56 57 return courseData; 58 } 59 60 function printCourseInfo(courseData) { 61 // courseData是一个数组 62 courseData.forEach(function (item) { 63 var chapterTitle = item.chapterTitle; 64 console.log(chapterTitle + ‘\n‘); 65 }); 66 67 courseData.forEach(function (item) { 68 var videoTitle = item.videos[0].videoTitle; 69 var id = item.videos[0].id; 70 console.log(‘[‘ + id + ‘]‘ + videoTitle); 71 }); 72 //console.log(‘test‘); 73 //console.log(courseData.videos); 74 } 75 76 http.get(url, function (response) { 77 var html = ‘‘; 78 79 response.on(‘data‘, function (data) { 80 html += data; 81 }); 82 83 response.on(‘end‘, function () { 84 var courseData = filterChapter(html); 85 86 printCourseInfo(courseData); 87 //console.log(courseData); 88 }); 89 }).on(‘error‘, function () { 90 console.log(‘获取课程数据出错‘); 91 });
标签:
原文地址:http://www.cnblogs.com/lqcdsns/p/5354353.html