#-*- coding:utf-8 -*- import xlwt import urllib import re def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def get_book_message(url): page = urllib.urlopen(url) html = page.read() urls = [] page.close() imgReg = r'src="(.+?\.jpg)"' titlReg = r'rel="bookmark">(.+?)</a>' divre = re.compile(imgReg) divreTitle = re.compile(titlReg) divlist = re.findall(divre,html) titleList = re.findall(divreTitle,html) bookMessage = {} for url,title in zip(divlist,titleList): bookMessage[title] = url return bookMessage def run(): value = get_book_message('http://www.allitebooks.com/') workbook = xlwt.Workbook(encoding = 'ascii') worksheet = workbook.add_sheet('MyBookMessage') worksheet.write(0,1,u'地址') worksheet.write(0,2,u'標題') index = 1 for x in value: worksheet.write(index,1,x) worksheet.write(index,2,value[x]) index = index + 1 workbook.save('MyBookMessage.xls') run()