判斷了字符集以後,如要顯示中文,須要用print。示例以下:ide
import urllib2url
import respa
page = 1it
url = 'http://www.qiushibaike.com/hot/page/' + str(page)class
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'import
headers = { 'User-Agent' : user_agent }file
out_file = open ("qiushibaike.txt", "w")request
request = urllib2.Request(url,headers = headers)im
response = urllib2.urlopen(request)img
buf=response.read()
out_file.write(buf)
out_file.close()
list_jpg=re.findall(r'http://.+\.jpg', buf)
list_joketxt=re.findall(r'<span>.+</span>', buf)
print buf #輸出網頁源文件,格式正確,中文顯示正常
# list_jpg=re.findall(r'<img.+src=.+\.jpg', buf)
list_jpg=re.findall(r'http://.+\.jpg', buf)
list_joketxt=re.findall(r'<span>.+</span>', buf)
print list_joketxt #顯示不正確,中文顯示不正常
print list_joketxt[0] #輸出正確,中文顯示正常
for jok in list_joketxt:
print jok #輸出正確,中文顯示正常