爬蟲目標:獲取http://www.imooc.com/learn/348網頁中的章節標題和視頻信息。html
var http = require('http'); var cheerio = require('cheerio'); var url = 'http://www.imooc.com/learn/348'; //得到html後,取得章節標題和視頻信息 function filterChapters(html) { var $ = cheerio.load(html); var chapters = $('.chapter'); var courseData = []; //遍歷每一個章節 chapters.each(function(item) { var chapter = $(this); var chapterTitle = chapter.find('strong').text(); var videos = chapter.find('.video').children('li'); var chapterData = { 'chapterTitle' : chapterTitle, 'videos':[] }; //遍歷每一個視頻 videos.each(function(item) { var video = $(this).find('.studyvideo'); var videoTitle = video.text(); var id = video.attr('href').split('video/')[1]; chapterData.videos.push({ 'title': videoTitle, 'id':id }); }); //將每一個章節獲取的內容導入課程信息 courseData.push(chapterData); }); return courseData; }; //輸出結果 function printCourseInfo(courseData) { courseData.forEach(function(item) { var chapterTitle = item.chapterTitle; console.log(chapterTitle + '\n'); item.videos.forEach(function(video) { console.log(' [' + video.id + ']' + video.title + '\n'); }); }); }; //獲取html文件內容 http.get(url, function(res) { var html = ''; res.on('data', function(data) { html += data; }); res.on('end', function() { var courseData = filterChapters(html); printCourseInfo(courseData); }); }).on('error', function() { console.log('error!!'); });