#encoding:utf-8 #!/usr/local/bin/python2.7 import urllib2 from bs4 import BeautifulSoup import MySQLdb as mdb import sys import re import os #URL:HTML文件的全路徑 #返回:BeautifulSoup對象 def getSoup(url): return BeautifulSoup(open(url)) #獲取全部即將讀取的HTML的全路徑 def fileNames(): temp_f = open("/root/bet/names.txt") temp = [] for line in temp_f: temp.append("/root/bet/urls/"+line.strip()) temp_f.close() return temp #獲取場次 def getScr(soup): scr = [] temp =[] for trs in soup.findAll("tr"): for tds in trs.findAll("td",{"width":"50"}): temp.append(tds.string) lt = len(temp)/4 for i in range(lt): scr.append(temp[4*i+3]) return scr #獲取賽事類型 def getLea(soup): league = [] for item in soup.findAll("tr"): for item1 in item.findAll("td",{"width":"70"}): league.append(item1.string.encode('utf-8')) return league #獲取比賽日期 def getGmdate(soup): temp = [] gmdate = [] for item in soup.findAll("tr"): for item1 in item.findAll("td",{"width":"61"}): temp.append(item1) temp =temp[3:] for item in temp: gmdate.append(re.search("\w{4}-\w{2}-\w{2}",str(item)).group()) return gmdate #獲取主隊客隊名稱 def getTeam(soup): team=[] for item in soup.findAll("tr"): for teams in item.findAll("a",{"class":"dui"}): team.append(teams.string.strip().encode('utf-8')) return team #獲取勝平負賠率 def getSpfpl(soup): spfpl =[] temp = [] for trs in soup.findAll("tr"): for tds in trs.findAll("span"): temp.append(tds.string) #刪除首尾兩個無效數據 temp =temp[1:-1] for i in range(len(temp)): for item in temp[7*i+4:7*i+7]: spfpl.append(item) return spfpl #6.比分結果以及比分結果賠率 def getResult(soup): bfjg = [] temp = [] for trs in soup.findAll("tr"): for item in trs.findAll("div",{"align":"center"}): for item1 in item.findAll("strong"): bfjg.append(item1.string.encode('utf-8')) return bfjg if __name__=="__main__": reload(sys) sys.setdefaultencoding('utf-8') names = fileNames() conn=mdb.connect(host='localhost',user='root',passwd='oracle',db='betdb',port=3306) cur = conn.cursor() SQL="insert into results(id,lea,gmd,hos,gue,win,dog,los,res,odd) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" #更新勝平負 sql_update3="update results set spf=3 where res='1:0' or res='2:0' or res='2:1' or res='3:0' or res='3:1' or res='3:2' or res='4:0' or res='4:1' or res='4:2' or res='5:0' or res='5:1' or res='5:2' or res='勝其餘';" sql_update1="update results set spf=1 where res='0:0' or res='1:1' or res='2:2' or res='3:3' or res='平其餘';" sql_update0="update results set spf=0 where res='0:1' or res='0:2' or res='1:2' or res='0:3' or res='1:3' or res='2:3' or res='0:4' or res='1:4' or res='2:4' or res='0:5' or res='1:5' or res='2:5' or res='負其餘';" #更新總進球 sql_updatezjq0="update results set zjq=0 where res='0:0'" sql_updatezjq1="update results set zjq=1 where res='1:0' or res='0:1'" sql_updatezjq2="update results set zjq=2 where res='2:0' or res='1:1' or res='0:2'" sql_updatezjq3="update results set zjq=3 where res='3:0' or res='2:1' or res='1:2' or res='0:3'" sql_updatezjq4="update results set zjq=4 where res='4:0' or res='3:1' or res='2:2' or res='1:3' or res='0:4'" sql_updatezjq5="update results set zjq=5 where res='5:0' or res='4:1' or res='3:2' or res='2:3' or res='0:5' or res='1:4'" sql_updatezjq6="update results set zjq=6 where res='5:1' or res='3:3' or res='4:2' or res='2:4' or res='1:5'" sql_updatezjq7="update results set zjq=7 where res='勝其餘' or res='負其餘'" for htmlfilename in names: soup = getSoup(htmlfilename) print "Reading %s now ..." % htmlfilename #1.場次 scr = getScr(soup) #2.賽事類型 lea = getLea(soup) #3.比賽日期 gmd = getGmdate(soup) #4.比賽隊伍 tea = getTeam(soup) #5.勝平負賠率 spf = getSpfpl(soup) #6.比賽結果 bfj = getResult(soup) #7.造成場第二天期惟一id ids=[] for i in range(len(scr)): ids.append(gmd[i]+'-'+scr[i]) #裝配結果集 res =[] for i in range(len(scr)): res.append(ids[i]) res.append(lea[i]) res.append(gmd[i]) res.append(tea[2*i]) res.append(tea[2*i+1]) res.append(spf[3*i]) res.append(spf[3*i+1]) res.append(spf[3*i+2]) res.append(bfj[2*i]) res.append(bfj[2*i+1]) l =len(res)/10 for i in range(l): for item in res[10*i:10*i+1]: cur.execute(SQL,res[10*i:10*i+10]) i+=1 cur.execute(sql_update3) cur.execute(sql_update1) cur.execute(sql_update0) cur.execute(sql_updatezjq0) cur.execute(sql_updatezjq1) cur.execute(sql_updatezjq2) cur.execute(sql_updatezjq3) cur.execute(sql_updatezjq4) cur.execute(sql_updatezjq5) cur.execute(sql_updatezjq6) cur.execute(sql_updatezjq7)
#SQL建立語句 CREATE TABLE `results` ( `id` char(20) NOT NULL, `lea` char(100) DEFAULT NULL, `gmd` date DEFAULT NULL, `hos` char(100) DEFAULT NULL, `gue` char(100) DEFAULT NULL, `win` float(5,2) DEFAULT NULL, `dog` float(5,2) DEFAULT NULL, `los` float(5,2) DEFAULT NULL, `res` char(10) DEFAULT NULL, `odd` float(5,2) DEFAULT NULL, `zjq` tinyint(4) DEFAULT NULL, `spf` tinyint(4) DEFAULT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8;