HASH算法的本質是特徵提取——將某種不太好表示的特徵,經過某種壓縮的方式映射成一個值。這樣,就能夠優雅解決一部分難以解決的特徵統計問題。node
同時考慮到hash算法的本質是個機率算法,所以並不能保證全部的數據都不發生衝突<衝突是指兩個不一樣的特徵計算出了同一個HASH值>,所以能夠考慮使用雙hash的形式,使用兩個不一樣的HASH算法,算出來的HASH值來表示一個特徵量——pair<ull,ull>就是一種實現方式。linux
一種經常使用的hash算法來自一一個遞推式:hash[i] = ( hash[i-1] * HASH_P + val[i] ) % HASH_MOD;ios
這種方式實際上能夠比喻成爲一個在%HASH_MOD意義下P進制的大數,且每次都添加一個新的個位數進入hash值中。c++
所以,實際使用中能夠支持%HASH_MOD意義下的加法、減法。算法
另外hash算法好想好寫,能夠分外暴力的解決至關部分的問題。<甚至能夠直接使用優雅的#define來完成模板的編寫>app
Similarity of Subtrees
https://vjudge.net/problem/Aizu-2784ide
題意:給出一顆樹,詢問以1做爲樹根的樹中,結構相同的子樹個數有多少對。結構相同定義爲,以某點爲根節點,其如下每層的的節點個數都與另外一節點相應屬性相同。優化
Define the depth of a node in a rooted tree by applying the following rules recursively:spa
Let S(T,d)S(T,d) be the number of nodes of TT with depth dd. Two rooted trees TT and T′T′ are similar if and only if S(T,d)S(T,d) equals S(T′,d)S(T′,d) for all non-negative integer dd..net
You are given a rooted tree TT with NN nodes. The nodes of TT are numbered from 1 to NN. Node 1 is the root node of TT. Let TiTi be the rooted subtree of TT whose root is node ii. Your task is to write a program which calculates the number of pairs (i,j)(i,j)such that TiTi and TjTj are similar and i<ji<j.
https://cn.vjudge.net/problem/Aizu-2784
題解:能夠發現,子樹的結構其實是能夠經過HASH算法形式的遞推獲得——hash[now] = (∑(hash[child]) * HAHS_P + num[now])%HASH_MOD
該遞推式實際上表現了hash值的加法過程。
則,若是支持dfs且不爆棧的話,可使用dfs一發搞定,至關的優雅。
可是反過來,若是不支持dfs,則必須用bfs的方式來搞定樹的遍歷和遞推,實際上也很好想,由於記錄了每一個節點的父節點,也記錄了每一個節點的子節點數量,就能夠很容易的計算出來某個節點的全部子節點是否已經完成了遞推計算。提供兩個版本的代碼:dfs實現和bfs實現。
dfs:
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> using namespace std; #define ull unsigned long long #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)%HASH_MOD2) #define ll long long const int MAXN = 200233; const ull HASH_MOD1 = 1000000007; const ull HASH_MOD2 = 1000000009; const ull HASH_P1 = 100003; const ull HASH_P2 = 100019; #define veci vector<int> #define pp pair<ull,ull> veci G[MAXN]; int n; map<ull,int> mapp; ll ans; pp dfs_count(int now,int father){ // pp ret = make_pair<ull.int>(0ull,0); pp ret; ret.first = ret.second = 0; int len = G[now].size(); for(int i=0;i<len;++i){ int tar = G[now][i]; if(tar == father)continue; pp tmp = dfs_count(tar,now); ret.first += tmp.first; ret.second += tmp.second; } ret.first %= HASH_MOD1; ret.second %= HASH_MOD2; ret.first = hash1(ret.first,1); ret.second = hash2(ret.second,1); ull hash_tmp = ret.first * HASH_MOD1 + ret.second; if(mapp.count(hash_tmp)){ int tmp = mapp[hash_tmp]; ans += tmp; mapp[hash_tmp] = tmp+1; }else{ // mapp.insert(make_pair(hash_tmp,1)); mapp[hash_tmp] = 1; } return ret; } void init(){ ans = 0; for(int i=0;i<n+23;++i)G[i].clear(); mapp.clear(); for(int i=1;i<n;++i){ int a,b; cin>>a>>b; G[a].push_back(b); G[b].push_back(a); } dfs_count(1,0); cout<<ans<<"\n"; } int main(){ cin.sync_with_stdio(false); while(cin>>n)init(); return 0; }
bfs:
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> using namespace std; #define ull unsigned long long #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)%HASH_MOD2) #define ll long long const int MAXN = 200233; const ull HASH_MOD1 = 1000000007; const ull HASH_MOD2 = 1000000009; const ull HASH_P1 = 100003; const ull HASH_P2 = 100019; #define veci vector<int> #define pp pair<ull,ull> veci G[MAXN]; int n; map<ull,int> mapp; ll ans; ull hash_tmp; int fa[MAXN]; pp anss[MAXN]; int times[MAXN]; void bfs(){ queue<int>que; que.push(1); for(int i=0;i<G[1].size();++i)fa[G[1][i]] = 1; while(!que.empty()){ int now = que.front(); que.pop(); times[now] = 0; for(int i=0;i<G[now].size();++i){ int tar = G[now][i]; if(tar==fa[now])continue; times[now] ++; fa[tar] = now; que.push(tar); } } } void deal(){ queue<int> que; for(int i=2;i<=n;++i){ // G[i].size() == 1; if(times[i] == 0) que.push(i); // anss[i] = make_pair(hash1(0,1),hash2(0,1)); } while(!que.empty()){ int now = que.front(); que.pop(); // if(times[now])continue; // cout<<"check_seq: "<<now; times[fa[now]]--; if(times[fa[now]] == 0)que.push(fa[now]); int len = G[now].size(); // anss[now] = make_pair(0,0); for(int i=0;i<len;++i){ int tar = G[now][i]; if(tar == fa[now])continue; anss[now].first += anss[tar].first; anss[now].second += anss[tar].second; } anss[now].first %= HASH_MOD1; anss[now].second %= HASH_MOD2; anss[now].first = hash1(anss[now].first,1); anss[now].second = hash2(anss[now].second,1); ull hash_tmp = anss[now].first * HASH_MOD1 + anss[now].second; // cout<<" "<<hash_tmp<<endl; if(mapp.count(hash_tmp)){ int tmp = mapp[hash_tmp]; ans += tmp; mapp[hash_tmp] = tmp+1; }else{ mapp[hash_tmp] = 1; } times[now] = 1; } } void init(){ memset(anss,0,sizeof(anss)); memset(times,0,sizeof(times)); ans = 0; for(int i=0;i<n+23;++i)G[i].clear(); mapp.clear(); for(int i=1;i<n;++i){ int a,b; cin>>a>>b; G[a].push_back(b); G[b].push_back(a); } bfs(); deal(); cout<<ans<<"\n"; } int main(){ cin.sync_with_stdio(false); while(cin>>n)init(); return 0; }
Stammering Aliens
https://cn.vjudge.net/problem/UVALive-4513
題意:給一個長串,問至少出現m次的最長連續字串的長度和出現的最右一個字串的起始的位置是多少。
題解:
這道題實際上時劉汝佳藍書上的一道例題,在作的過程當中表現了用到了hash串作減法的思路。
考慮答案中的兩個量:最長長度和最右起始位置。最長長度具備某種意義上的單調性:若是長度爲n的字串能夠符合題目條件,則n-1的也能夠(n>1);所以考慮使用二分的形式來枚舉字串的長度。最右起始位置能夠直觀的求解。
考慮遞推式:hash[i] = (hash[i-1] * HAHS_P + str[i]) % HASH_MOD
若簡化爲十進制數字則能夠有以下樣例:
3129741938274 求字串由2到7的hash值
hash[7] = 31297419
hash[2] = 312
hahs[2-7] = 97419
觀察可得:hash[2-7] = hash[7] - hash[2]*10^(7-2);
則實際上只要保證上式在%HASH_MOD意義上成當即可。
#include<bits/stdc++.h> using namespace std; #define ll long long #define ull unsigned long long #define pp pair<ull,ull> const int MAXN = 1000233; const ull HASH_P1 = 233; const ull HASH_P2 = 241; const ull HASH_MOD1 = 1000000037; const ull HASH_MOD2 = 1000000049; #define hash1(x,b) (((ull)x * HASH_P1 + b) % HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b) % HASH_MOD2) #define get_next_hash(tmp,b) (make_pair(hash1(tmp.first,b),hash2(tmp.second,b))) pp hashs[MAXN]; pp hash_hex[MAXN]; int m; char str[MAXN]; int str_len,pos; ull mapp[MAXN]; int anss[MAXN]; int mapp_num; bool cmp(int a,int b){ if(mapp[a] == mapp[b])return a<b; return mapp[a]<mapp[b]; } bool check(int length){ pos = -1; mapp_num = 0; anss[mapp_num] = mapp_num; mapp[mapp_num++] = hash1(hashs[length-1].first,hashs[length-1].second); for(int i=length;i<str_len;++i){ ull a = hashs[i].first; ull tmp = (hashs[i-length].first * hash_hex[length].first)%HASH_MOD1; a-= tmp; a+=HASH_MOD1;a%=HASH_MOD1; ull b = hashs[i].second; tmp = (hashs[i-length].second * hash_hex[length].second)%HASH_MOD2; b -= tmp; b+=HASH_MOD2;b%=HASH_MOD2; ull hash_tmp = hash1(a,b); anss[mapp_num] = mapp_num ; mapp[mapp_num++] = hash_tmp; } sort(anss,anss+mapp_num,cmp); int cntt = 1; if(m == 1)pos = anss[0]; for(int i=1;i<mapp_num;++i){ if(mapp[anss[i]] == mapp[anss[i-1]])cntt++; else cntt = 1; if(cntt >= m )pos = max(pos,anss[i]); } return pos != -1; } int bin_search(int a,int b){ if(a == b-1)return a; int mid = (a+b)/2; if(check(mid))return bin_search(mid,b); else return bin_search(a,mid); } void init(){ gets(str); str_len = strlen(str); pp tmp = make_pair(0,0); for(int i=0;i<str_len;++i){ tmp = get_next_hash(tmp,str[i]); hashs[i] = tmp; } int ans = bin_search(0,str_len+1); check(ans); if(ans){ printf("%d %d\n",ans,pos); }else{ puts("none"); } } int main(){ // pp tmp = make_pair(1,1); hash_hex[0] = make_pair(1,1); for(int i=1;i<MAXN;++i){ hash_hex[i] = get_next_hash(hash_hex[i-1],0); } while(~scanf("%d\n",&m)&&m)init(); return 0; }
Hidden Anagrams
AIZU:https://cn.vjudge.net/problem/Aizu-1370
Gym:https://cn.vjudge.net/problem/Gym-101158D
UVALive:https://cn.vjudge.net/problem/UVALive-7592
題意:給出兩個字符串,求出最大的長度知足,兩個字符串都包含該子串,同時兩個字串包含的字母的種類和個數徹底相同。
題解:思路很簡單,就是枚舉長度,並檢查上面的字符串中是否存在能和下面的串長度相同的,HASH值一致的串。若是有,則檢查經過,沒有則不經過,從高往低枚舉,找到第一個經過的跳出循環<也許會有個常數優化>。
此時HASH算法應當作一個簡單的變化:統計某個字母出現的個數。HASH[I] = HASH[I-1] + HASH_HEX[STR[I]-'a']
此實HASH_HEX表明了HASH_P的在對HASH_MOD作膜法操做的前提下的若干次方。
這道題有4個來源能夠提交,Gym和AIZU可讓N2LOGN甚至更慢的代碼經過,UVALIVE容許N2的代碼加入鄰接鏈表優化經過<此時我已經開了IO掛>,HOJ。。。。。須要在進一步的取消HASH2的膜法操做。
#include<math.h> #include<algorithm> #include<vector> #include<stdlib.h> #include<string.h> #include<string> #include<stdio.h> #include<set> #include<map> #include<queue> #include<stack> #include <iostream> #include <limits.h> using namespace std; #define ull unsigned long long #define pp pair<ull,ull> const ull MAXN = 1000249; #define vecu vector<ull> #define vevi vector<int> #define vecp vector<pp > const ull HASH_P1 = 109; const ull HASH_P2 = 4007; const ull HASH_MOD1 = 1000249; const ull HASH_MOD2 = 1000000037; #define hash1(x,b) (((ull)x * HASH_P1 + b)%HASH_MOD1) #define hash2(x,b) (((ull)x * HASH_P2 + b)) #define next_hash(tmp,b) (make_pair(hash1(tmp.first,b),hash2(tmp.second,b))) #define add_hash(tmp,b) (make_pair((tmp.first + hash_hex[idx(b)].first) % HASH_MOD1,(tmp.second + hash_hex[idx(b)].second) )) #define sub_hash(tmpa,tmpb) (make_pair((tmpa.first + HASH_MOD1 - tmpb.first) % HASH_MOD1 , (tmpa.second - tmpb.second) ) ) #define idx(x) (x-'a') namespace fastIO{ #define BUF_SIZE 100000 #define OUT_SIZE 100000 #define ll long long //fread->read bool IOerror=0; inline char nc(){ static char buf[BUF_SIZE],*p1=buf+BUF_SIZE,*pend=buf+BUF_SIZE; if (p1==pend){ p1=buf; pend=buf+fread(buf,1,BUF_SIZE,stdin); if (pend==p1){IOerror=1;return -1;} //{printf("IO error!\n");system("pause");for (;;);exit(0);} } return *p1++; } inline bool blank(char ch){return ch==' '||ch=='\n'||ch=='\r'||ch=='\t';} inline void read(int &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(ll &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (sign)x=-x; } inline void read(double &x){ bool sign=0; char ch=nc(); x=0; for (;blank(ch);ch=nc()); if (IOerror)return; if (ch=='-')sign=1,ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())x=x*10+ch-'0'; if (ch=='.'){ double tmp=1; ch=nc(); for (;ch>='0'&&ch<='9';ch=nc())tmp/=10.0,x+=tmp*(ch-'0'); } if (sign)x=-x; } inline int read(char *s){ char ch=nc();if(ch == EOF)return -1; for (;blank(ch);ch=nc()); if (IOerror)return -1; for (;!blank(ch)&&!IOerror;ch=nc())*s++=ch; *s=0; return 0; } inline void read(char &c){ for (c=nc();blank(c);c=nc()); if (IOerror){c=-1;return;} } //getchar->read inline void read1(int &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(ll &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (bo)x=-x; } inline void read1(double &x){ char ch;int bo=0;x=0; for (ch=getchar();ch<'0'||ch>'9';ch=getchar())if (ch=='-')bo=1; for (;ch>='0'&&ch<='9';x=x*10+ch-'0',ch=getchar()); if (ch=='.'){ double tmp=1; for (ch=getchar();ch>='0'&&ch<='9';tmp/=10.0,x+=tmp*(ch-'0'),ch=getchar()); } if (bo)x=-x; } inline int read1(char *s){ char ch=getchar(); for (;blank(ch);ch=getchar()); for (;!blank(ch);ch=getchar())*s++=ch; *s=0; } inline void read1(char &c){for (c=getchar();blank(c);c=getchar());} //scanf->read inline void read2(int &x){scanf("%d",&x);} inline void read2(ll &x){ #ifdef _WIN32 scanf("%I64d",&x); #else #ifdef __linux scanf("%lld",&x); #else puts("error:can't recognize the system!"); #endif #endif } inline void read2(double &x){scanf("%lf",&x);} inline void read2(char *s){scanf("%s",s);} inline void read2(char &c){scanf(" %c",&c);} inline void readln2(char *s){gets(s);} //fwrite->write struct Ostream_fwrite{ char *buf,*p1,*pend; Ostream_fwrite(){buf=new char[BUF_SIZE];p1=buf;pend=buf+BUF_SIZE;} void out(char ch){ if (p1==pend){ fwrite(buf,1,BUF_SIZE,stdout);p1=buf; } *p1++=ch; } void print(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(int x){ static char s[15],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); } void println(ll x){ static char s[25],*s1;s1=s; if (!x)*s1++='0';if (x<0)out('-'),x=-x; while(x)*s1++=x%10+'0',x/=10; while(s1--!=s)out(*s1); out('\n'); } void print(double x,int y){ static ll mul[]={1,10,100,1000,10000,100000,1000000,10000000,100000000, 1000000000,10000000000LL,100000000000LL,1000000000000LL,10000000000000LL, 100000000000000LL,1000000000000000LL,10000000000000000LL,100000000000000000LL}; if (x<-1e-12)out('-'),x=-x;x*=mul[y]; ll x1=(ll)floor(x); if (x-floor(x)>=0.5)++x1; ll x2=x1/mul[y],x3=x1-x2*mul[y]; print(x2); if (y>0){out('.'); for (size_t i=1;i<y&&x3*mul[i]<mul[y];out('0'),++i); print(x3);} } void println(double x,int y){print(x,y);out('\n');} void print(char *s){while (*s)out(*s++);} void println(char *s){while (*s)out(*s++);out('\n');} void flush(){if (p1!=buf){fwrite(buf,1,p1-buf,stdout);p1=buf;}} ~Ostream_fwrite(){flush();} }Ostream; inline void print(int x){Ostream.print(x);} inline void println(int x){Ostream.println(x);} inline void print(char x){Ostream.out(x);} inline void println(char x){Ostream.out(x);Ostream.out('\n');} inline void print(ll x){Ostream.print(x);} inline void println(ll x){Ostream.println(x);} inline void print(double x,int y){Ostream.print(x,y);} inline void println(double x,int y){Ostream.println(x,y);} inline void print(char *s){Ostream.print(s);} inline void println(char *s){Ostream.println(s);} inline void println(){Ostream.out('\n');} inline void flush(){Ostream.flush();} //puts->write char Out[OUT_SIZE],*o=Out; inline void print1(int x){ static char buf[15]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(int x){print1(x);*o++='\n';} inline void print1(ll x){ static char buf[25]; char *p1=buf;if (!x)*p1++='0';if (x<0)*o++='-',x=-x; while(x)*p1++=x%10+'0',x/=10; while(p1--!=buf)*o++=*p1; } inline void println1(ll x){print1(x);*o++='\n';} inline void print1(char c){*o++=c;} inline void println1(char c){*o++=c;*o++='\n';} inline void print1(char *s){while (*s)*o++=*s++;} inline void println1(char *s){print1(s);*o++='\n';} inline void println1(){*o++='\n';} inline void flush1(){if (o!=Out){if (*(o-1)=='\n')*--o=0;puts(Out);}} struct puts_write{ ~puts_write(){flush1();} }_puts; inline void print2(int x){printf("%d",x);} inline void println2(int x){printf("%d\n",x);} inline void print2(char x){printf("%c",x);} inline void println2(char x){printf("%c\n",x);} inline void print2(ll x){ #ifdef _WIN32 printf("%I64d",x); #else #ifdef __linux printf("%lld",x); #else puts("error:can't recognize the system!"); #endif #endif } inline void println2(ll x){print2(x);printf("\n");} inline void println2(){printf("\n");} #undef ll #undef OUT_SIZE #undef BUF_SIZE }; char str1[MAXN]; char str2[MAXN]; int str1_len,str2_len; pp hash_hex[MAXN]; pp str1_hash[MAXN]; pp str2_hash[MAXN]; class hash_node{ public: ull val;int next; }; hash_node hash_nodes[MAXN]; int hash_nodes_num; int hash_table[MAXN]; inline int new_hash_nodes(int idx,ull key){ hash_nodes[hash_nodes_num].next = hash_table[idx]; hash_nodes[hash_nodes_num].val = key; return hash_nodes_num++; } inline bool hash_find_key(int idx,ull key){ int now = hash_table[idx]; while(now!=-1){ if(hash_nodes[now].val == key)return true; now = hash_nodes[now].next; }return false; } inline void hash_insert(int idx,ull key){ hash_table[idx] = new_hash_nodes(idx,key); } inline void hash_clear(int idx){ hash_table[idx] = -1; } // vecu hash_table[HASH_MOD1]; // inline bool find_key(ull idx,ull key){ // int len = hash_table[idx].size(); // for(int i=0;i<len;++i){ // if(hash_table[idx][i] == key)return true; // }return false; // } // inline void hash_insert(ull idx,ull key){ // hash_table[idx].push_back(key); // } // inline void hash_clear(ull idx){ // hash_table[idx].clear(); // } inline bool check(int length){ hash_nodes_num = 0; hash_insert(str1_hash[length-1].first,str1_hash[length-1].second); for(int i=length;i<str1_len;++i){ pp tmp = sub_hash(str1_hash[i],str1_hash[i-length]); hash_insert(tmp.first,tmp.second); } if(hash_find_key(str2_hash[length-1].first,str2_hash[length-1].second))return true; for(int i=length;i<str2_len;++i){ pp tmp = sub_hash(str2_hash[i],str2_hash[i-length]); // hash_insert(tmp.first,tmp.second); if(hash_find_key(tmp.first,tmp.second))return true; } hash_clear(str1_hash[length-1].first); for(int i=length;i<str1_len;++i){ pp tmp = sub_hash(str1_hash[i],str1_hash[i-length]); hash_clear(tmp.first); } return false; } void init(){ // for(int i=0;i<HASH_MOD1;++i)hash_table[i].clear(); memset(hash_table,-1,sizeof(hash_table)); str1_len = strlen(str1); str2_len = strlen(str2); str1_hash[0] = hash_hex[idx(str1[0])]; str2_hash[0] = hash_hex[idx(str2[0])]; for(int i=1;i<str1_len;++i)str1_hash[i] = add_hash(str1_hash[i-1],str1[i]); for(int i=1;i<str2_len;++i)str2_hash[i] = add_hash(str2_hash[i-1],str2[i]); int limit = min(str1_len,str2_len); int ans = 0; for(int i=limit;i;i--){ if(check(i)){ ans = i; break; } } // cout<<ans<<"\n"; fastIO::println(ans); } int main(){ hash_hex[0] = make_pair(1,1); for(int i=1;i<233;++i) hash_hex[i] = next_hash(hash_hex[i-1],0); // while(gets(str1)&&gets(str2))init(); while(~fastIO::read(str1) && ~fastIO::read(str2))init(); // while() return 0; }