Python爬取百度貼吧

from urllib import request,parseimport os#找到藉口及關鍵字base_url = 'http://tieba.baidu.com/f?'a = input("貼吧")start = input("開始頁")end = input("結束頁")#建立文件路徑if not os.path.exists(a):    os.mkdir(a)qs = {    "kw":a}#將qs轉變爲電腦識別的語言qs = parse.urlencode(qs)for page in range(int(start),int(end)+1):    pn = (page-1)*50  #拼接成貼吧接口    url = base_url+qs+'&pn='+str(pn)    print("downing...第%d頁"%page)    response = request.urlopen(url)    html = response.read().decode("utf-8")    with open(a+'/'+str(page)+'.html','w',encoding="utf-8") as f:        f.write(html)print("下載完成")#好了再你的ide環境下運行下就能夠隨便找到你想要的帖子了
相關文章
相關標籤/搜索