好了,咱們前面已經將整個實例的功能分爲三大塊,而且也已經分別實現了,如今咱們試着將全部功能整合到一塊兒看看html
個人源碼是這樣的
python
#!/usr/bin/env python # -*- coding:UTF-8 -*- __author__ = '217LeeSin' ''' 公務員時間源碼 V-0.1 ''' from bs4 import BeautifulSoup import urllib2 import re import os import sys reload(sys) sys.setdefaultencoding("utf-8") # get the source code of html url = 'http://blog.jobbole.com/all-posts/' request = urllib2.Request(url) response = urllib2.urlopen(request) contents = response.read() # get the date search_date = os.popen(r'date -d"yesterday" +"%Y/%m/%d"').read().strip('\n') # get url, title, date of the article pattern = re.compile(r'class="archive-title".*?href="(.*?)".*?title="(.*?)">.*?<br />(.*?)<a.*?', re.S) items = re.findall(pattern, contents) i = 0 url_list = [] title_list = [] for item in items: release_date = re.sub(r'\s', "", item[2]).split(r'&')[0] if search_date == release_date: # 將數據存入列表爲了便於後面的顯示 url_list.append(item[0]) title_list.append(item[1]) continue # 顯示序號,文章網址和文章標題 for i in range(len(url_list)): print i + 1, url_list[i], title_list[i] while(True): order = input("請輸入文章序號查看文章:") art_url = url_list[order-1] art_request = urllib2.Request(art_url) art_response = urllib2.urlopen(art_request) art_soup = BeautifulSoup(art_response.read()) art_title = art_soup.title.string print art_title art_contents = art_soup.find("div", attrs={"class":"entry"}) print art_contents.get_text() while(True): action = raw_input("請選擇下一步的操做(b:返回標題列表;s:保存文章並返回標題列表;q:退出程序):") if action == "b": for i in range(len(url_list)): print i + 1, url_list[i], title_list[i] break if action == "s": path = '/home/ym/'+art_title f = open (path,"w+") f.write(art_contents.get_text()) f.close() for i in range(len(url_list)): print i + 1, url_list[i], title_list[i] break if action == "q": exit(0) continue
運行結果:
app