[一步一步構建一個react應用-開篇](https://segmentfault.com/a/11...html
git地址前端
本身寫表單錄入電影信息比較費勁,因此選擇爬取豆瓣電影的信息主要爬取電影的封面、主演、年份、產地、簡介等信息。前端有個輸入框,輸入電影名而後去爬取電影信息錄入到MongoDB中react
打開豆瓣電影,搜索千與千尋
jquery
經過分析,發現git
PS(比較有意思的是最開始我爬取的時候,想要的信息就在這個接口的響應中,從響應裏面提取出來就行,但後來我發現響應裏沒有了,豆瓣電影裏的搜索後的列表信息變成了js動態渲染出來的,全部的信息在響應的window_data中存放,因此又把代碼修改了下,使用phantom來渲染爬取到的頁面)github
全部大致邏輯就是經過接口A獲取一個電影列表,從列表中提取出咱們須要的電影詳情的url B,爬取B接口,獲取詳情,從詳情中提取信息express
項目接口 /api/reptile/:namejson
代碼segmentfault
var express = require('express'); var router = express.Router(); const CONFIG = require('../config/config') const cheerio = require('cheerio') const rq = require('request-promise') var phantom = require("phantom"); <!--獲取電影列表--> function getMovieSubjectUrl(name) { var _ph, _page, _outObj; return phantom.create().then(ph => { _ph = ph; return _ph.createPage(); }).then(page => { _page = page; return _page.open('https://movie.douban.com/subject_search?search_text=' + encodeURIComponent(name)); }).then(status => { return _page.property('content') }).then(content => { _page.close(); _ph.exit(); return content }).catch(e => console.log(e)); } function getMovieDetail(href, res, next) { rq(href).then(str => { const $ = cheerio.load(str) const data = fillData($) res.json({ code: CONFIG.ERR_OK, data }) }) } <!--提取電影信息--> function fillData($) { const movie = { thumb: '', actors: '', type: '', time: '', instruct: '' } /** * 爲方便提取數據,換行標籤替換 */ let info_html = $('#info').html().replace(/<br>/g, '**') let txt = cheerio.load(info_html).text() txt = txt.replace(/\s+/g, '').split('**') movie.thumb = $('#mainpic img').attr('src') movie.instruct = $('#link-report').find('span[property]').text() movie.actors = txt[2].split(':')[1].split('/') movie.type = txt[3].split(':')[1].split('/') movie.time = txt[6].split(':')[1] return movie } router.get('/:name', function (req, res, next) { getMovieSubjectUrl(req.params.name).then(str => { const $ = cheerio.load(str) let detail = $('.detail') if (detail.length) { let a if (detail.eq(0).has('.rating_nums').length) { a = detail.eq(0).find('.title a') } else { a = detail.eq(1).find('.title a') } getMovieDetail(a.attr('href'), res, next) } else { next(10001) } }); }) module.exports = router;
reptile.jsxapi
import React from 'react'; import { Button, Switch } from "antd-mobile" import cloneDeep from "lodash/cloneDeep" import Util from "../util/Util.js" import MovieInfo from "./movieInfo" import DownForm from "./download-form" export default class Reptile extends React.Component { constructor(props) { super(props) this.state = { m_name: '', m_info: null, } this.reptile = this.reptile.bind(this) } /** * 爬取數據 */ reptile() { if (!this.state.m_name) { return } Util.fetch('/api/reptile/' + this.state.m_name).then(data => { if (data.code) { Util.Toast.info(data.message) return } this.setState({ m_info: data.data, }) }) } render() { return ( <div className='reptile'> <div className='search-form'> <input type="text" placeholder='電影名' value={this.state.m_name} onChange={(e) => { this.setState({ m_name: e.target.value }) }} /> <Button type="primary" size="small" onClick={this.reptile}>搜索</Button> </div> </div> ) } }