python 中使用 urllib2 僞造 http 報頭的2個方法

時間 2019-11-09

標籤 python 使用 urllib2 urllib 僞造 http 報頭方法欄目 Python 简体版

原文原文鏈接

方法一、html

 
         #!/usr/bin/python 
        
         # -*- coding: utf-8 -*- 
        
         #encoding=utf-8 
        
         #Filename:urllib2-header.py 
        
         import 
         urllib2 
        
         import 
         sys 
        
         #抓取網頁內容-發送報頭-1 
        
         url 
         = 
         "http://www.jb51.net" 
        
         send_headers  
         = 
         { 
        
         'Host' 
         : 
         'www.jb51.net' 
         , 
        
         'User-Agent' 
         : 
         'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0' 
         , 
        
         'Accept' 
         : 
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 
         , 
        
         'Connection' 
         : 
         'keep-alive' 
        
         } 
        
         req  
         = 
         urllib2.Request(url,headers 
         = 
         send_headers) 
        
         r  
         = 
         urllib2.urlopen(req) 
        
         html  
         = 
         r.read()         
         #返回網頁內容 
        
         receive_header  
         = 
         r.info()      
         #返回的報頭信息 
        
         # sys.getfilesystemencoding()  
        
         html  
         = 
         html.decode( 
         'utf-8' 
         , 
         'replace' 
         ).encode(sys.getfilesystemencoding())  
         #轉碼:避免輸出出現亂碼  
        
         print 
         receive_header 
        
         # print '####################################' 
        
         print 
         html

方法二、python

 
         #!/usr/bin/python 
        
         # -*- coding: utf-8 -*- 
        
         #encoding=utf-8 
        
         #Filename:urllib2-header.py 
        
         import 
         urllib2 
        
         import 
         sys 
        
         url  
         = 
         'http://www.jb51.net' 
        
         req  
         = 
         urllib2.Request(url) 
        
         req.add_header( 
         'Referer' 
         , 
         'http://www.jb51.net/' 
         ) 
        
         req.add_header( 
         'User-Agent' 
         , 
         'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0' 
         ) 
        
         r  
         = 
         urllib2.urlopen(req) 
        
         html  
         = 
         r.read() 
        
         receive_header  
         = 
         r.info() 
        
         html  
         = 
         html.decode( 
         'utf-8' 
         ).encode(sys.getfilesystemencoding()) 
        
         print 
         receive_header 
        
         print 
         '#####################################' 
        
         print 
         html