如何瞭解一個城市的房價的區域分佈,或者不一樣的城市房價的區域差別。如何獲取一個城市不一樣板塊的房價數據?本文講述瞭如何爬取鏈家各城市板塊的房價數據,以及如何如利用地理可視化引擎可視化這些數據。 javascript
鏈家網站提供了地圖找房功能,咱們能夠在地圖上瀏覽任意區域的二手房價格信息,經過地圖找房咱們能夠獲取每一個房源的價格信息和位置信息,同時鏈家也提供了區縣維度,城市板塊維度的房價統計信息。
html
咱們還能夠獲取各個區縣的,板塊的範圍數據。前端
綠色的邊界即爲板塊的邊界數據java
鏈家房價 在準確性,權威性上都很好的數據源,並且有完整地理信息數據。鏈家房價數據在科學研究,房價區域趨勢研究也有很大的做用。node
如何爬取,鏈家確定不會讓你輕易將他的數據爬走,確定作了一些反爬取機制,理論上講只要網頁能夠看到的數據都是能夠爬取的。那咱們研究下如何爬取連接各板塊的房價數據。git
經過對瀏覽器的網絡的監測咱們找到地圖數據請求接口github
返回的的數據數據ajax
正常來說我只須要把一個城市劃分紅不一樣的網格就能夠獲取一個城市的全部的房價數據,可是事情每每沒有那麼簡單。json
咱們能夠數據有一個authorization 參數,這是參數就是鏈家的反爬機制。仔細研究發現這是對請求參數作了MD5編碼,後端會進行校驗只有校驗正確纔會返回數據。後端
下面的問題就是找到參數md5編碼的方法。
代碼跟蹤測試咱們找到了 參數MD5化的方法
const md5 = function() {
function e(e, t) {
var n = (65535 & e) + (65535 & t);
return (e >> 16) + (t >> 16) + (n >> 16) << 16 | 65535 & n
}
function t(e, t) {
return e << t | e >>> 32 - t
}
function n(n, i, a, r, o, s) {
return e(t(e(e(i, n), e(r, s)), o), a)
}
function i(e, t, i, a, r, o, s) {
return n(t & i | ~t & a, e, t, r, o, s)
}
function a(e, t, i, a, r, o, s) {
return n(t & a | i & ~a, e, t, r, o, s)
}
function r(e, t, i, a, r, o, s) {
return n(t ^ i ^ a, e, t, r, o, s)
}
function o(e, t, i, a, r, o, s) {
return n(i ^ (t | ~a), e, t, r, o, s)
}
function s(t, n) {
t[n >> 5] |= 128 << n % 32,
t[14 + (n + 64 >>> 9 << 4)] = n;
var s, l, c, d, u, g = 1732584193, f = -271733879, m = -1732584194, p = 271733878;
for (s = 0; s < t.length; s += 16)
l = g,
c = f,
d = m,
u = p,
g = i(g, f, m, p, t[s], 7, -680876936),
p = i(p, g, f, m, t[s + 1], 12, -389564586),
m = i(m, p, g, f, t[s + 2], 17, 606105819),
f = i(f, m, p, g, t[s + 3], 22, -1044525330),
g = i(g, f, m, p, t[s + 4], 7, -176418897),
p = i(p, g, f, m, t[s + 5], 12, 1200080426),
m = i(m, p, g, f, t[s + 6], 17, -1473231341),
f = i(f, m, p, g, t[s + 7], 22, -45705983),
g = i(g, f, m, p, t[s + 8], 7, 1770035416),
p = i(p, g, f, m, t[s + 9], 12, -1958414417),
m = i(m, p, g, f, t[s + 10], 17, -42063),
f = i(f, m, p, g, t[s + 11], 22, -1990404162),
g = i(g, f, m, p, t[s + 12], 7, 1804603682),
p = i(p, g, f, m, t[s + 13], 12, -40341101),
m = i(m, p, g, f, t[s + 14], 17, -1502002290),
f = i(f, m, p, g, t[s + 15], 22, 1236535329),
g = a(g, f, m, p, t[s + 1], 5, -165796510),
p = a(p, g, f, m, t[s + 6], 9, -1069501632),
m = a(m, p, g, f, t[s + 11], 14, 643717713),
f = a(f, m, p, g, t[s], 20, -373897302),
g = a(g, f, m, p, t[s + 5], 5, -701558691),
p = a(p, g, f, m, t[s + 10], 9, 38016083),
m = a(m, p, g, f, t[s + 15], 14, -660478335),
f = a(f, m, p, g, t[s + 4], 20, -405537848),
g = a(g, f, m, p, t[s + 9], 5, 568446438),
p = a(p, g, f, m, t[s + 14], 9, -1019803690),
m = a(m, p, g, f, t[s + 3], 14, -187363961),
f = a(f, m, p, g, t[s + 8], 20, 1163531501),
g = a(g, f, m, p, t[s + 13], 5, -1444681467),
p = a(p, g, f, m, t[s + 2], 9, -51403784),
m = a(m, p, g, f, t[s + 7], 14, 1735328473),
f = a(f, m, p, g, t[s + 12], 20, -1926607734),
g = r(g, f, m, p, t[s + 5], 4, -378558),
p = r(p, g, f, m, t[s + 8], 11, -2022574463),
m = r(m, p, g, f, t[s + 11], 16, 1839030562),
f = r(f, m, p, g, t[s + 14], 23, -35309556),
g = r(g, f, m, p, t[s + 1], 4, -1530992060),
p = r(p, g, f, m, t[s + 4], 11, 1272893353),
m = r(m, p, g, f, t[s + 7], 16, -155497632),
f = r(f, m, p, g, t[s + 10], 23, -1094730640),
g = r(g, f, m, p, t[s + 13], 4, 681279174),
p = r(p, g, f, m, t[s], 11, -358537222),
m = r(m, p, g, f, t[s + 3], 16, -722521979),
f = r(f, m, p, g, t[s + 6], 23, 76029189),
g = r(g, f, m, p, t[s + 9], 4, -640364487),
p = r(p, g, f, m, t[s + 12], 11, -421815835),
m = r(m, p, g, f, t[s + 15], 16, 530742520),
f = r(f, m, p, g, t[s + 2], 23, -995338651),
g = o(g, f, m, p, t[s], 6, -198630844),
p = o(p, g, f, m, t[s + 7], 10, 1126891415),
m = o(m, p, g, f, t[s + 14], 15, -1416354905),
f = o(f, m, p, g, t[s + 5], 21, -57434055),
g = o(g, f, m, p, t[s + 12], 6, 1700485571),
p = o(p, g, f, m, t[s + 3], 10, -1894986606),
m = o(m, p, g, f, t[s + 10], 15, -1051523),
f = o(f, m, p, g, t[s + 1], 21, -2054922799),
g = o(g, f, m, p, t[s + 8], 6, 1873313359),
p = o(p, g, f, m, t[s + 15], 10, -30611744),
m = o(m, p, g, f, t[s + 6], 15, -1560198380),
f = o(f, m, p, g, t[s + 13], 21, 1309151649),
g = o(g, f, m, p, t[s + 4], 6, -145523070),
p = o(p, g, f, m, t[s + 11], 10, -1120210379),
m = o(m, p, g, f, t[s + 2], 15, 718787259),
f = o(f, m, p, g, t[s + 9], 21, -343485551),
g = e(g, l),
f = e(f, c),
m = e(m, d),
p = e(p, u);
return [g, f, m, p]
}
function l(e) {
var t, n = "";
for (t = 0; t < 32 * e.length; t += 8)
n += String.fromCharCode(e[t >> 5] >>> t % 32 & 255);
return n
}
function c(e) {
var t, n = [];
for (n[(e.length >> 2) - 1] = void 0,
t = 0; t < n.length; t += 1)
n[t] = 0;
for (t = 0; t < 8 * e.length; t += 8)
n[t >> 5] |= (255 & e.charCodeAt(t / 8)) << t % 32;
return n
}
function d(e) {
return l(s(c(e), 8 * e.length))
}
function u(e, t) {
var n, i, a = c(e), r = [], o = [];
for (r[15] = o[15] = void 0,
a.length > 16 && (a = s(a, 8 * e.length)),
n = 0; n < 16; n += 1)
r[n] = 909522486 ^ a[n],
o[n] = 1549556828 ^ a[n];
return i = s(r.concat(c(t)), 512 + 8 * t.length),
l(s(o.concat(i), 640))
}
function g(e) {
var t, n, i = "0123456789abcdef", a = "";
for (n = 0; n < e.length; n += 1)
t = e.charCodeAt(n),
a += i.charAt(t >>> 4 & 15) + i.charAt(15 & t);
return a
}
function f(e) {
return unescape(encodeURIComponent(e))
}
function m(e) {
return d(f(e))
}
function p(e) {
return g(m(e))
}
function _(e, t) {
return u(f(e), f(t))
}
function h(e, t) {
return g(_(e, t))
}
function v(e, t, n) {
return t ? n ? _(t, e) : h(t, e) : n ? m(e) : p(e)
}
return v
}();
function getMd5(e) {
var t = []
, i = "";
for (var a in e)
t.push(a);
t.sort();
for (var a = 0; a < t.length; a++) {
var r = t[a];
"filters" !== r && (i += r + "=" + e[r])
}
return i ? (
md5("vfkpbin1ix2rb88gfjebs0f60cbvhedl" + i)) : ""
}
複製代碼
到這裏咱們數據獲取的機制就已經搞定了
這裏咱們有nodejs做爲數據爬取語言,nodejs優點前端代碼能夠直接在本地跑,一些數據加解密的方法能夠直接拿來用。
示例代碼
function getErshoufang(extent,city_id, cb) {
const params = {
city_id: city_id,
filters: "{}",
group_type: "bizcircle",
max_lat: extent[3].toFixed(5) * 1,
max_lng: extent[2].toFixed(5) * 1,
min_lat: extent[1].toFixed(5) * 1,
min_lng: extent[0].toFixed(5) * 1,
request_ts: (new Date).getTime(),
sug_id: "",
sug_type: "",
}
urllib.request('https://ajax.lianjia.com/map/search/ershoufang/?callback=parserData', {
method: 'GET',
data:{
...params,
source:"ljpc",
authorization:getMd5(params)
},
headers: {
'Referer': 'https://hz.lianjia.com/ditu/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
}, function (err, data, res) {
const json = data ? eval(data.toString()) : {}
cb(json);
});
}
複製代碼
就一這樣的方式抓取了全國30個城市的房價數據,板塊級別的房價統計數據。
數據可視化咱們採用AntV 地理可視化引擎L7
官網地址: AntV L7
GItHub 源碼: L7 源碼
110000.json
接下來咱們介紹如何用L7 作出下圖可視化
顏色映射繪製面圖層
scene.PolygonLayer()
.source(city)
.color('unit_price', ['#b2182b', '#ef8a62', '#fddbc7', '#d1e5f0', '#67a9cf', '#2166ac'].reverse())
.shape('fill')
.active(true)
.style({
opacity: 1
}).render();
複製代碼
繪製標註
**
scene.PointLayer({
zIndex: 5
}).source(labeldata, {
parser: {
type: 'json',
x: 'longitude',
y: 'latitude'
}
}).shape('name', 'text').size(15).color('#fff').style({
// fontFamily: 'Monaco, monospace', // 字體
fontWeight: 200,
textAnchor: 'center', // 文本相對錨點的位置 center|left|right|top|bottom|top-left
textOffset: [0, 0], // 文本相對錨點的偏移量 [水平, 垂直]
spacing: 2, // 字符間距
padding: [4, 4], // 文本包圍盒 padding [水平,垂直],影響碰撞檢測結果,避免相鄰文本靠的太近
strokeColor: 'white', // 描邊顏色
strokeWidth: 2, // 描邊寬度
strokeOpacity: 1.0
}).render();
複製代碼
這樣咱們就完成房價數據的可視化。 demo源碼
藉助L7 咱們也能夠實現3D的效果展現
scene.PolygonLayer()
.source(city)
.color('unit_price', ['#b2182b', '#ef8a62', '#fddbc7', '#d1e5f0', '#67a9cf', '#2166ac'].reverse())
.shape('extrude')
.size('unit_price', [1000,1000000])
.active(true).style({
opacity: 1
}).render();
複製代碼
板塊中心點3D圓柱表達
**
scene.PointLayer({
zIndex: 5
}).source(labeldata, {
parser: {
type: 'json',
x: 'longitude',
y: 'latitude'
}
}).shape('cylinder')
.color('unit_price', ['#b2182b', '#ef8a62', '#fddbc7', '#d1e5f0', '#67a9cf', '#2166ac'].reverse())
.size('unit_price', function(level) {
return [4, 4, level / 1000];
})
.render();
複製代碼