算法總結篇---哈希和哈希表

時間 2021-02-01

標籤 html ios git 算法數組數據結構函數優化 spa 欄目 HTML 简体版

原文原文鏈接

哈希和哈希表
- 例題

哈希和哈希表

什麼是哈希啊?html

哈希是一種用來統計複雜數據的不完美算法，或者說思想，構造一個哈希函數將原始數據映射成便於統計的信息上，在映射過程當中會損失部分信息。相似於離散化，僅保留大小關係。ios

舉個栗子：git

維護一個數據結構，支持插入一個數，查詢一個的數在這個數據結構中的個數，數的大小 \(\le 2^{63} - 1\)算法

單哈希

把插入的一個數對一個不是很大的數取模，令新數代替原數。
若是兩個數的餘數相等，則認爲它們兩個相等
開一個 \(cnt\) 數組統計個數數組

多哈希

同時對多個模數取模，判重時判取模後的全部數是否所有相等，
實現時能夠定義一個結構體，用 \(set/map\) 維護，或寫哈希表。數據結構

正確性大幅增長，但仍不是徹底正確。
通常寫雙哈希就夠了，卡不掉。函數

哈希表

考慮單哈希。
不把餘數相等的一些數直接看作相等，開個鏈表把它們鏈起來。
判重時找到查詢的數的餘數對應的鏈表，遍歷全部元素判重。
能夠用鄰接表或 vector 實現。優化

隨機數據下鏈表最大長度（每次判重的複雜度）指望 O(\frac{n}{mod})。
犧牲了時間，保證了正確性ui

多哈希哈希表

能夠用多哈希使數的分佈更加均勻。
通常作法是對哈希獲得的多個餘數再進行哈希。spa

關於哈希函數

對應的哈希函數相等是兩元素相等的必要條件

能夠隨便構造

構造哈希函數的方式多種多樣，模一個數，乘一個數，加一個數，甚至更復雜的關係

只要正確性高就行，或者函數符合您的品味

字符串哈希

用於判重字符串
將一串字符映射成一個整數再進行判斷

因爲字符串是具備先後關係的，通常按下述方法構造：
選取兩個合適的互質常數 \(b\) 和 \(h (b < h)\)，假設有一個字符串 \(C = c_1c_2···c_m\)，那麼咱們定義哈希函數：

\[H(C) = (c_1b^{m - 1} + c_2b^{m - 2} + ···+c_mb^{0}) \mod h \]

考慮遞推實現，設 \(H(C, k)\) 爲前 \(k\) 個字符構成的字符串的哈希值，則：

\[H(C, k + 1) = H(C, k) \times b + c_{k + 1} \]

一般，題目要求的是判斷主串的一段字符與另外一個匹配串是否匹配，即判斷字符串 \(C = c_1c_2···c_m\) 從位置 \(k + 1\) 開始的長度爲 \(n\) 的子串 \(C^{'} = c_{k + 1}c_{k + 2}···c_{k + n}\) 的哈希值與另外一匹配串 \(S = s_1s_2···s_n\) 的哈希值是否相等，則：

\[H(C_{'}) = H(C, k + n) - H(C, k) \times b^{n} \]

只要預求得 \(b^{n}\) ，就能 \(O(1)\) 判斷了

總時間複雜度 \(O(n + m)\)

例題

Oulipo

板子題

/*
Work by: Suzt_ilymics
Knowledge: ??
Time: O(??)
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long
#define orz cout<<"lkp AK IOI!"<<endl
#define int long long

using namespace std;
const int MAXN = 1e6+10;
const int INF = 1;
const int mod = 1e9+9;
const int b = 1e9+7;

char A[MAXN], B[MAXN];
int H[MAXN], h[MAXN], pow, cnt = 0, sum = 0;

int read(){
	int s = 0, f = 0;
	char ch = getchar();
	while(!isdigit(ch))  f |= (ch == '-'), ch = getchar();
	while(isdigit(ch)) s = (s << 1) + (s << 3) + ch - '0' , ch = getchar();
	return f ? -s : s;
}

int quickpow(int x, int p, int mod){
	int res = 1;
	while(p){
		if(p & 1) res = res * x % mod;
		x = x * x % mod;
		p >>= 1;
	}
	return res;
}

#undef int 

int main()
{
#define int long long
//	init();
	cin>>A + 1;
	cin>>B + 1;
	int lenA = strlen(A + 1), lenB = strlen(B + 1);
	pow = quickpow(b, lenB, mod);
	for(int i = 1; i <= lenA; ++i){ H[i] = (H[i - 1] * b % mod + A[i]) % mod; }
	for(int i = 1; i <= lenB; ++i){ sum = (sum * b % mod + B[i]) % mod; }
	cnt = 0;
	for(int i = 0; i + lenB <= lenA; ++i){
//		cout<<(H[i + lenB] - H[i] * pow % mod + mod) % mod<<endl;
		if((H[i + lenB] - H[i] * pow % mod + mod) % mod == sum){
//			orz;
			cnt++;
		}
	}
	printf("%d", cnt);
	return 0;
}

圖書管理

發現圖書只有加入沒有拿出，用字符串哈希轉換後和上面的例題相似

用 bool 數組來表示其是否加入，\(O(1)\) 查詢

考慮用雙哈希優化，會使重複的可能性降到很低

我這裏只用了單哈希，開到一億多才卡過去
~~成爲全部提交記錄中用時最長空間最長的一份代碼~~

/*
Work by: Suzt_ilymics
Knowledge: ??
Time: O(??)
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long
#define orz cout<<"lkp AK IOI!"<<endl

using namespace std;
const int MAXN = 3e4+4;
const int INF = 1;
const int mod = 101451419;
const int b = 1e9 + 7;

LL n, stc[MAXN], sc = 0;
bool vis[101452419];
char nam[210], opt[10];

int read(){
	int s = 0, f = 0;
	char ch = getchar();
	while(!isdigit(ch))  f |= (ch == '-'), ch = getchar();
	while(isdigit(ch)) s = (s << 1) + (s << 3) + ch - '0' , ch = getchar();
	return f ? -s : s;
}

int main()
{
	n = read();
	for(int i = 1, len; i <= n; ++i){
		LL sum = 0;
		cin>>opt;
		gets(nam + 1);
		len = strlen(nam + 1);
		for(int j = 1; j <= len; ++j){ sum = (sum * b % mod + nam[j]) % mod; }
		if(opt[0] == 'a') vis[sum] = 1;
		if(opt[0] == 'f') 
			if(vis[sum]) printf("yes\n");
			else printf("no\n");
	}
	return 0;
}

Power Strings

比較好出思路，枚舉重複的字符串的長度，由於長度必須是總長度的因子，因此 \(O(len)\) 枚舉便可，而後掃一遍看看是否符合條件，從小到大最早遇到的必定是答案

/*
Work by: Suzt_ilymics
Knowledge: ??
Time: O(??)
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long
#define orz cout<<"lkp AK IOI!"<<endl

using namespace std;
const int MAXN = 1e6+6;
const int INF = 1;
const int mod = 1e9+9;
const int b = 1e7+7;

char s[MAXN];
LL pow[MAXN], H[MAXN], sum;
int len;

int read(){
	int s = 0, f = 0;
	char ch = getchar();
	while(!isdigit(ch))  f |= (ch == '-'), ch = getchar();
	while(isdigit(ch)) s = (s << 1) + (s << 3) + ch - '0' , ch = getchar();
	return f ? -s : s;
}

void init(){
	pow[0] = 1;
	for(int i = 1; i <= 1e6; ++i){ pow[i] = pow[i - 1] * b % mod; }
}

bool check(int mid){
	sum = H[mid];
	for(int i = 0; i <= len - 1; i += mid){
		if((H[i + mid] - H[i] * pow[mid] % mod + mod) % mod != sum) return 0;
	}
	return 1;
}

int main()
{
	init();
	while((cin>>(s + 1)) && s[1] != '.'){
		len = strlen(s + 1);
		for(int i = 1; i <= len; ++i){
			H[i] = (H[i - 1] * b % mod + s[i]) % mod;
		}
		for(int i = 1; i <= len; ++i){
			if(len % i) continue;
			if(check(i)) {
				printf("%d\n", len / i);
				break;
			}
		}
	}
	return 0;
}

[BalticOI 2014 Day1] Three Friends

相關 \(Solution\) 請跳轉個人這篇題解

P3538 [POI2012]OKR-A Horrible Poem

來自兩篇題解的思路，能夠結合着理解一下，~~另外loj上這題卡我模數和進制數~~

一、循環節必定是長度的約數
二、若是n是一個循環節，那麼k*n也一定是一個循環節（關鍵所在）
三、n是[l,r]這一段的循環節  的充要條件是  [l,r-n]和[l+n,r]相同（利用這個性質咱們在判斷是否爲循環節是能夠作到O(1)）  
因此咱們能夠在求出這個區間的長度以後，判斷它的每一個約數是不是循環節（應用性質3），而且由於性質1，它的約數是循環節，原串必定也是。

循環節的長度的循環次數都必定是總長的約數
個人作法是把總長除掉循環次數
先把len分解質因數
（線性篩質數，並記錄下每一個數的最小質因子加速分解，這已是常規操做了）
由於最小循環節的倍數也是循環節
因此從len開始試除每一個質因子並判斷（你能夠理解爲len的因子分爲循環節的因子和循環次數的因子，要把循環次數的因子除掉）

/*
Work by: Suzt_ilymics
Knowledge: ??
Time: O(??)
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long
#define LLL unsigned long long
#define orz cout<<"lkp AK IOI!"<<endl

using namespace std;
const int MAXN = 5e5+10;
const int INF = 1;
const int mod = 1e9+7;
const int b = 7;

LL n, m;
char s[MAXN];
LLL Pow[MAXN], H[MAXN], sum;
LL prim[MAXN], sc = 0;
bool vis[MAXN], flag;

LL read(){
	LL s = 0, f = 0;
	char ch = getchar();
	while(!isdigit(ch)) f |= (ch == '-'), ch = getchar();
	while(isdigit(ch)) s = (s << 3) + (s << 1) + ch - '0', ch = getchar();
	return f ? -s : s;
}

void init(){
	for(LL i = 2; i <= n; ++i){
		if(vis[i]) continue;
		for(LL j = 1; i * j <= n; ++j){
			LL t = i * j;
			if(vis[t]) continue;
			vis[t] = true;
			prim[t] = i;
		}
	}
}

int main()
{
	n = read();
	init();
	cin >> (s + 1);
	Pow[0] = 1;
	for(LL i = 1; i <= n; ++i){ Pow[i] = Pow[i - 1] * b, H[i] = H[i - 1] * b + s[i]; }
	m = read();
	for(LL i = 1, l, r, len, ans; i <= m; ++i){
		l = read(), r = read();
		ans = len = r - l + 1;
		while(len > 1){
			LL k = ans / prim[len];
			len /= prim[len];
			if(H[r - k] - H[l - 1] * Pow[r - k - l + 1] == H[r] - H[l - 1 + k] * Pow[r - k - l + 1]){
				ans = k;
			}
		}
		printf("%d\n", ans);
	}
	return 0;
}