方法一、html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:urllib2-header.py
import
urllib2
import
sys
#抓取網頁內容-發送報頭-1
send_headers
=
{
'Host'
:
'www.jb51.net'
,
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0'
,
'Accept'
:
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
,
'Connection'
:
'keep-alive'
}
req
=
urllib2.Request(url,headers
=
send_headers)
r
=
urllib2.urlopen(req)
html
=
r.read()
#返回網頁內容
receive_header
=
r.info()
#返回的報頭信息
# sys.getfilesystemencoding()
html
=
html.decode(
'utf-8'
,
'replace'
).encode(sys.getfilesystemencoding())
#轉碼:避免輸出出現亂碼
print
receive_header
# print '####################################'
print
html
|
方法二、python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:urllib2-header.py
import
urllib2
import
sys
req
=
urllib2.Request(url)
req.add_header(
'User-Agent'
,
'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0'
)
r
=
urllib2.urlopen(req)
html
=
r.read()
receive_header
=
r.info()
html
=
html.decode(
'utf-8'
).encode(sys.getfilesystemencoding())
print
receive_header
print
'#####################################'
print
html
|