Hash

時間 2021-02-01

標籤 ios c++ 函數 spa code 字符串 get string hash 欄目 iOS 简体版

原文原文鏈接

概念

經過一個hash函數H，將一組數據(包括字符串，較大的數等)轉化成可以用變量表示或直接能夠做爲下標的數，能夠經過hash函數轉化獲得的數值成爲hash值，hash能夠實現快速查找和匹配，經常使用的有字符串hash 和 哈希表ios

字符串hash

題目

給定一個字符串 \(A\) 和一個字符串 \(B\)，求在 \(B\) 中的出現次數。 \(A\)和\(B\)中的字符均爲英語大寫字母或小寫字母。c++

\(A\) 中不一樣位置出現的 \(B\) 可重疊函數

咱們選取兩個合適的互質的數 \(b\) 和 \(h\) (b < h)假設字符串\(C = c_1c_2c_3……c_m\)spa

\(H(C) = (c_1b^{m-1} + c_2b^{m-2} + c_mb^{0})~mod~h;\)code

\(b\) 表明的是基數，至關於把字符串看作 b 進制數——《一本通提升篇》（~~很詭異的東西~~）ci

設 \(H(C,K)\) 爲前 \(K\) 個字符組成的字符串的哈希值字符串

\(H（C,K + 1） = H（C，K)*b + C_{k + 1}\)get

舉個栗子：

若是字符串\(C = 「ACDA」\)(令A表示1,B表示2)則string

\(H(C,1) = 1;\)hash

\(H(C,2) = 1*b + 3;\)

\(H(C,3) = 1*b^2 + 3*b + 4;\)

\(H(C,4) = 1*b^3 + 3*b^2 + 4 * b + 1;\)

判斷主串的一個字符和另外一個字符是夠匹配

即判斷字符串\(C = c_1c_2……c_m\)從位置\(k+1\)開始的長度爲\(n\)的子串\(C^， = c_{k + 1}c_{k + 2}……c_{k + n}\)的哈希值與另外一個匹串\(S = s_1s_2……s_n\)是否相等

\(H(C') = H(C, k + n) - H(C, K)*b^n\)

所以可得出求字符串區間hash值(l爲左邊界，r爲右邊界)

\(H(C') = H(C, r) - H(C, l)*b^(r - l + 1)\)

int get(int l,int r){ 
   return hs[r] - hs[l - 1] * pre[r - l + 1];
}

字符串區間刪去一個字符後的hash值類比能夠獲得

int del(int l,int r,int pos){
	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}

而後就是習題了= =

T1 子串查找

模板題

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
const int M = 1e6 + 10;
typedef unsigned long long ull;
int read(){
  int x = 0,f = 1;char c = getchar();
  while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
  while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
  return x * f;
}
char sa[M],sb[M];
ull base = 155,sum[M],pow[M],falg;
int ans;
int main(){
   pow[0] = 1;
   scanf("%s%s",sa + 1,sb + 1);
   
   int lena = strlen(sa + 1),lenb = strlen(sb + 1);
   
   for(int i = 1;i < 1000000; i++)
   	   pow[i] = pow[i - 1]*base;//處理進位
   	   
   sum[0] = 0;
    
   for(int i = 1;i <= lena; i++){
   	  sum[i] = sum[i - 1] * base + (ull)(sa[i] - 'A' + 1);
   }
   for(int i = 1; i <= lenb; i++){
   	   falg = falg * base + (ull)(sb[i] - 'A' + 1);
   }
   for(int i = 0;i <= lena - lenb; i++){
   	 if(falg == sum[i + lenb] - sum[i] * pow[lenb]) ans++; 
   }
   cout<<ans;
}

T2 圖書管理

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
typedef unsigned long long ull;
const int mod1 = 1e7 + 7;
const int mod2 = 1e7 + 9;
const int base = 1e9;
const int M = 1e7;
int n;
char flag[5], S[M];
bool A[M], B[M];
int main() {
    cin>>n;

    for (int i = 1; i <= n; i++) {
        cin >> flag;
        ull sum1 = 1, sum2 = 1;
        gets(S);

        for (int j = 0; j < strlen(S); j++)
            sum1 = (sum1 * base % mod1 + S[j]) % mod1, sum2 = (sum2 * base % mod2 + S[j]) % mod2;

        if (flag[0] == 'a')
           A[sum1] = 1, B[sum2] = 1;

        if (flag[0] == 'f'){
            if (A[sum1] && B[sum2])
                puts("yes");
            else
                puts("no");
        }
    }

    return 0;
}

T3 Power Strings

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
typedef unsigned long long ull;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
char c[D];
ull hs[D],power[D],base = 37;
int main(){
	power[0] = 1; 
	for(int i = 1;i <= D; i++)
	  power[i] = power[i - 1]*base;
	  
	while (scanf("%s", c)){
	  	
	  if (!strcmp(c, "."))break;
	  
		ull ans = 0;int len = strlen(c);
		hs[len] = 0;
		
		for (int i = len - 1; i >= 0; i--){
			
			hs[i] = hs[i + 1] * base + c[i] - 'a' + 1;
		}
		for (int k = 1; k <= len; k++)
		{
			if (len % k != 0)
				continue;
			 
			 ull tomp = hs[0] - hs[k] * power[k];
			 int j = 0;
			 for(j = k;j < len; j = j + k){
			 	
			 	 if(tomp != hs[j] - hs[k + j]*power[k]) break;
			 	 else tomp = hs[j] - hs[k + j]*power[k];
			 }
			 if(j == len){
			 	ans = len / k;break;
			 }	
		}
		cout<<ans<<"\n";	
	}
	return 0;
}

T4 Seek the Name, Seek the Fame

#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
const int mod = 1e9 + 7;
typedef unsigned long long ull;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
char s[D];
ull base = 34,power[D],hs[D];
int main(){
	
   power[0] = 1;
   
   for(int i = 1;i <= D; i++) power[i] = power[i - 1] * base;
   
   while(scanf ("%s", s + 1) != EOF){
   	   int len = strlen(s + 1);
   	   hs[0] = 0;
   	   for(int i = 1; i <= len; i++){
   	        hs[i] = hs[i - 1] * base + s[i] - 'a' + 1; 	  
	   }
	   for(int i = 1;i <= len; i++){
	   	   if(hs[i] == hs[len] - hs[len - i] * power[i]){
	   	   	      printf("%d ",i);
			}
	   }
	   printf("\n");
   }
}

T5 「BalticOI 2014 Day 1」三個朋友

求區間hash和刪去字符後的hash

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 2e6 + 5;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}

char s[D];
int pre[D],base = 999983,hs[D],ans,ll,rr,flag,n,mid,mark;;
map<unsigned long long, int> vis;
int get(int l,int r){
	return hs[r] - hs[l - 1] * pre[r - l + 1];
}

int del(int l,int r,int pos){
	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}

bool check(int pos){

	if(pos == mid){
		ll = get(1, pos - 1);
		rr = get(pos + 1, n);
		return ll == rr;
	}

	else if(pos < mid){
		ll = del(1, mid, pos);
		rr = get(mid + 1, n);
		return ll == rr;
	}

	else{
		ll = get(1, mid - 1);
		rr = del(mid, n, pos);
		return ll == rr;
	}

}
void itit(){
   pre[0]=1;
  for(int i = 1;i <= n; i++){
		pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
	}
}
signed main(){
	
	cin >> n >> (s + 1);
	
	mid = (n + 1) >> 1; //取字符串的中點下標
	itit();
		
	for(int i = 1;i <= n;i++){
		
		if(check(i) == 1){ //刪掉下標i的元素以後，可以獲得倆個同樣的子串
		
            mark = i;
            
			if(mark <= mid)
				flag = rr;
			
	    	else{
	    		flag = ll;	
			}
			
			if(vis[flag] > 0) continue;
			vis[flag] = 1;
			ans++; 
			if(ans > 1){
				cout<<"NOT UNIQUE"<<endl;return 0;
			}
        }
	}

	if(!ans){
		cout<<"NOT POSSIBLE"<<endl;
	}
	else{
	   if(mark <= mid){
	   	 for(int i = mid + 1;i <= n; i++)cout<<s[i];
	   	 printf("\n");
	   }
	   else{
	   	 for(int i = 1;i <= mid - 1; i++)cout<<s[i];
	   	  printf("\n");
	   }
	}
	return 0;
}

T6 A Horrible Poem

_{被困一上午，只因進制沒取質數}

暴力枚舉循環節，若是循環節長度不能被區間整除，直接跳過，不過顯然T了

正解是數論？？

假設最短循環節長度爲len

則原串長度顯然爲len*k。若只考慮k，

而且將k的質因數依次分解，每次試除k，

則獲得的k。和len的乘積還是循環節，

利用這個性質。依次用質因數 i 試除n，

若除去後還是循環節，說明i屬於k，將其除去，結果就留下了len

#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 5e5 + 2;
const int D = 5e5 + 10;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
	int x = 0,f = 1;char c = getchar();
	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
	return x*f;
}
int  q, n, pre[D], base = 63, hs[D],prim[D],ans,len;
bool vis[D],flag;
char s[D];
void calc(){
    for (int i = 2; i <= n; ++i) {
        if (vis[i]) continue;
        for (int j = 1; i * j <= n; ++j) {
            int t = i * j;
            if (vis[t]) continue;
            vis[t] = 1;
            prim[t] = i;
        }
    }
}

signed main(){
	n = read();calc();
    scanf("%s", s + 1);
	pre[0] = 1;
	for(int i = 1;i <= n; i++){
		 pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
	}
		
	int q = read();
	while(q--){
		int l = read(), r = read();
	   len = ans = r - l + 1;
		while(len > 1){
			int k = ans / prim[len];
			len /= prim[len];
			if(hs[r - k] - hs[l - 1] * pre[r - k - l + 1] == hs[r] - hs[l - 1 + k] * pre[r - k - l + 1])
				ans = k;
		}
		printf("%d\n", ans);
	}
}

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。