###迷迷糊糊的開端~~ 1.爬取百度貼吧的小旅途html
from urllib.request import Request,urlopen from urllib import parse # 保存頁面 def save_page(filename, data): print("正在保存:",filename) with open(filename, 'wb') as f: f.write(data) # 下載 def download_page(full_pn): print("正在下載:", full_pn) headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36" } request = Request(full_pn, headers=headers) res = urlopen(request) return res.read() #bytes類型 #拼接頁面函數 def baidu_spider(url,start_page,end_page): for page in range(start_page,end_page + 1): print(page) pn = (page - 1)*20 full_url = url + "&pn=" + str(pn) print(full_url) #請求路徑 html = download_page(full_url) filename = "第" + str(page) + "頁.html" #保存請求到的數據 save_page(filename,html) #定義個一個main函數 def main(): kw = int(input("請輸入要爬去的內容:")) start_page = int(input("請輸入首頁:")) end_page = int(input("請輸入末尾頁:")) kw = {"kw":kw} #字典 kw = parse.urlencode(kw) #編碼 url = "https://tieba.baidu.com/f?" + kw + "&ie=utf-8" print(url) baidu_spider = (url,start_page,end_page) if __name__ == "__main__": main() 原文連接: [https://mp.csdn.net/mdeditor/80897296](https://mp.csdn.net/mdeditor/80897296)