爬蟲cheerio亂碼問題

如今亂碼問題是php

res.write(爬到的內容),在頁面上顯示是正常的html

可是直接在node端打印console.log(爬到的內容)是亂碼 node

調整亂碼問題的關鍵屬性是cheerio.load中的{decodeEntities:false}屬性express

代碼一app

var cheerio = require('cheerio')
  , superagent = require('superagent')
  , express = require('express');

var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();

app.get('/', function (req, res, next) {

  superagent.get(url)
.end(function (err, pres) {
  var html = pres.text;
  var $ = cheerio.load(html, {decodeEntities: false});
  var ans = $('.r_search_item').eq(0).html();
  res.write(ans);
});
  });

app.listen(3000, function () {
  console.log('app is listening at port 3000');
});
複製代碼

代碼二ui

var cheerio = require('cheerio')
  , superagent = require('superagent')
  , express = require('express');

var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';
var app = express();

app.get('/', function (req, res, next) {

  superagent.get(url)
.end(function (err, pres) {
  var html = pres.text;
  var $ = cheerio.load(html);
  var ans = $('.r_search_item').eq(0).html();
  res.write(ans);
});
  });

app.listen(3000, function () {
  console.log('app is listening at port 3000');
});
複製代碼

總結

當爲'代碼一'時,添加{decodeEntities: false}屬性,node端正常,頁面亂碼 當爲'代碼一'時,沒有{decodeEntities: false}屬性,node端亂碼,頁面正常url

解決方案

www.tuicool.com/articles/6R…spa

相關文章
相關標籤/搜索