本文記錄了一個AC自動機的誕生!javascript
以前看過有人用C++寫過AC自動機,也有用C#寫的,還有一個用nodejs寫的。。php
感受他們的代碼過於冗長,並且AC率也不是很理想。
恰好在回宿舍的路上和學弟聊起這個事
隨意想了想思路,以爲仍是蠻簡單的,就順手寫了一個,效果,還能夠接受。html
先上個圖吧:java
最後應該還能夠繼續刷,若是修改代碼或者再添加如下其餘搜索引擎能夠AC更多題,
不過我有意控制在3000這個AC量,也有意跟在五虎上將以後。node
思路其實很是清晰:正則表達式
#coding='utf-8' import requests, re, os, HTMLParser, time, getpass host_url = 'http://acm.hdu.edu.cn' post_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login' sub_url = 'http://acm.hdu.edu.cn/submit.php?action=submit' csdn_url = 'http://so.csdn.net/so/search/s.do' head = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36' } html_parser = HTMLParser.HTMLParser() s = requests.session() def login(usr,psw): s.get(host_url); data = {'username':usr,'userpass':psw,'login':'Sign In'} r = s.post(post_url,data=data) def check_lan(lan): if 'java' in lan: return '5' return '0' def parser_code(code): return html_parser.unescape(code).encode('utf-8') def is_ac(pid,usr): tmp = requests.get('http://acm.hdu.edu.cn/userstatus.php?user='+usr).text accept = re.search('List of solved problems</font></h3>.*?<p align=left><script language=javascript>(.*?)</script><br></p>',tmp,re.S) if pid in accept.group(1): print '%s was solved' %pid return True else: return False def search_csdn(PID,usr): get_data = { 'q':'HDU ' + PID, 't':'blog', 'o':'', 's':'', 'l':'null' } search_html = requests.get(csdn_url,params=get_data).text linklist = re.findall('<dd class="search-link"><a href="(.*?)" target="_blank">',search_html,re.S) for l in linklist: print l tm_html = requests.get(l,headers=head).text; title = re.search('<title>(.*?)</title>',tm_html,re.S).group(1).lower() if PID not in title: continue if 'hdu' not in title: continue tmp = re.search('name="code" class="(.*?)">(.*?)</pre>',tm_html,re.S) if tmp == None: print 'code not find' continue LAN = check_lan(tmp.group(1)) CODE = parser_code(tmp.group(2)) if r'include' in CODE: pass elif r'import java' in CODE: pass else: continue print PID, LAN print '--------------' submit_data = { 'check':'0', 'problemid':PID, 'language':LAN, 'usercode':CODE } s.post(sub_url,headers=head,data=submit_data) time.sleep(5) if is_ac(PID,usr): break if __name__ == '__main__': usr = raw_input('input your username:') psw = getpass.getpass('input your password:') login(usr,psw) pro_cnt = 1000 while pro_cnt <= 5679: PID = str(pro_cnt) if is_ac(PID,usr): pro_cnt += 1 continue search_csdn(PID,usr) pro_cnt += 1
代碼不長,僅僅只有78行,是的,就是這樣!算法
目前沒有打算完善這篇博客,也不推薦去研究這個東西,推薦的是去學習真正的算法,哈哈!session
好久好久之前本身寫過的AC自動機,,,,貼一發:post
#include <cstdio> #include <cstring> #include <algorithm> #include <queue> using namespace std; #define clr( a, b ) memset( a, b, sizeof(a) ) const int SIGMA_SIZE = 26; const int NODE_SIZE = 500000 + 10; struct ac_automaton{ int ch[ NODE_SIZE ][ SIGMA_SIZE ]; int f[ NODE_SIZE ], val[ NODE_SIZE ], last[ NODE_SIZE ]; int sz; void init(){ sz = 1; clr( ch[0], 0 ), clr( val, 0 ); } void insert( char *s ){ int u = 0, i = 0; for( ; s[i]; ++i ){ int c = s[i] - 'a'; if( !ch[u][c] ){ clr( ch[sz], 0 ); val[sz] = 0; ch[u][c] = sz++; } u = ch[u][c]; } val[u]++; } void getfail(){ queue<int> q; f[0] = 0; for( int c = 0; c < SIGMA_SIZE; ++c ){ int u = ch[0][c]; if( u ) f[u] = 0, q.push(u), last[u] = 0; } while( !q.empty() ){ int r = q.front(); q.pop(); for( int c = 0; c < SIGMA_SIZE; ++c ){ int u = ch[r][c]; if( !u ){ ch[r][c] = ch[ f[r] ][c]; continue; } q.push( u ); int v = f[r]; while( v && !ch[v][c] ) v = f[v]; f[u] = ch[v][c]; last[u] = val[ f[u] ] ? f[u] : last[ f[u] ]; } } } int work( char* s ){ int res = 0; int u = 0, i = 0, e; for( ; s[i]; ++i ){ int c = s[i] - 'a'; u = ch[u][c]; e = u; while( val[e] ){ res += val[e]; val[e] = 0; e = last[e]; } } return res; } }ac;