似乎没写过多少字符串hash
今天补一补
字符串hash重要思想就是把字符串看做一个N进制大整数,进行取模后直接比较
这样子做的优劣很直观:很快很简单,也有取模后蜜汁碰撞的风险
对于i位置的hash值,可以这样求:
for (int i = 1; i <= n; i++) H[i] = H[i - 1] * p + s[i];
我们要取出子串[l,r]的hash值时,显然就是\(H[r] - H[l - 1] * p^{r - l + 1}\)
来道【正解SAM】的例题:
最长公共子串
当然对串a建SAM,用串b在上边匹配就可以了
SAM太深奥了,我们来看看简单暴力的字符串hash
我们二分长度len,对A串的所有位置的长度为len的hash排序,那B串所有位置长度为len的hash去查找
复杂度O(nlog^2n)【似乎SAM接近O(n)?】
#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>
#define LL long long int
#define Redge(u) for (int k = h[u]; k; k = ed[k].nxt)
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define ULL unsigned long long int
using namespace std;
const int maxn = 200005,maxm = 100005,INF = 1000000000;
inline int read(){
int out = 0,flag = 1; char c = getchar();
while (c < 48 || c > 57){if (c == ‘-‘) flag = -1; c = getchar();}
while (c >= 48 && c <= 57){out = (out << 3) + (out << 1) + c - 48; c = getchar();}
return out * flag;
}
char A[maxn],B[maxn];
int lena,lenb,n;
ULL Ha[maxn],Hb[maxn];
ULL b[maxn];
bool check(int len){
n = 0;
ULL P = 1;
for (int i = 1; i <= len; i++) P *= 27;
for (int i = len; i <= lena; i++) b[++n] = Ha[i] - Ha[i - len] * P;
sort(b + 1,b + 1 + n);
for (int i = len; i <= lenb; i++){
ULL temp = Hb[i] - Hb[i - len] * P;
if (b[lower_bound(b + 1,b + 1 + n,temp) - b] == temp) return true;
}
return false;
}
int main(){
scanf("%s",A + 1); lena = strlen(A + 1);
scanf("%s",B + 1); lenb = strlen(B + 1);
for (int i = 1; i <= lena; i++) Ha[i] = Ha[i - 1] * 27 + A[i];
for (int i = 1; i <= lenb; i++) Hb[i] = Hb[i - 1] * 27 + B[i];
int l = 0,r = min(lena,lenb),mid;
while (l < r){
mid = l + r + 1 >> 1;
if (check(mid)) l = mid;
else r = mid - 1;
}
printf("%d\n",l);
return 0;
}
BZOJ3207
此题K很小,我们用上hash之后,每个位置就对应一个hash值,问题就转化为了一个区间内是否存在某个值,用可持续化线段树就可以了
#include<iostream>
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long int
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
#define BUG(s,n) for (int i = 1; i <= (n); i++) cout<<s[i]<<‘ ‘; puts("");
#define inf 18446744073709551615UL
#define uLL unsigned long long int
using namespace std;
const int maxn = 100010,maxm = 8000005;
inline int read(){
int out = 0,flag = 1; char c = getchar();
while (c < 48 || c > 57) {if (c == ‘-‘) flag = -1; c = getchar();}
while (c >= 48 && c <= 57) {out = (out << 3) + (out << 1) + c - ‘0‘; c = getchar();}
return out * flag;
}
int ls[maxm],rs[maxm],sum[maxm],rt[maxn];
int n,m,K,cnt;
int A[maxn],T[maxn];
uLL H[maxn];
void modify(int& u,int pre,uLL l,uLL r,uLL pos){
u = ++cnt; sum[u] = sum[pre] + 1; ls[u] = ls[pre]; rs[u] = rs[pre];
if (l == r) return;
uLL mid = l / 2 + r / 2;
if (mid >= pos) modify(ls[u],ls[pre],l,mid,pos);
else modify(rs[u],rs[pre],mid + 1,r,pos);
}
int query(int u,int v,uLL l,uLL r,uLL pos){
if (l == r) return sum[u] - sum[v];
uLL mid = l / 2 + r / 2;
if (mid >= pos) return query(ls[u],ls[v],l,mid,pos);
else return query(rs[u],rs[v],mid + 1,r,pos);
}
int main(){
n = read(); m = read(); K = read();
REP(i,n) A[i] = read();
REP(i,n) H[i] = H[i - 1] * 107 + A[i];
uLL P = 1; REP(i,K) P *= 107;
for (int i = K; i <= n; i++)
modify(rt[i],rt[i - 1],0,inf,H[i] - H[i - K] * P);
while (m--){
int l = read() + K - 1,r = read();
uLL val = 0;
for (int i = 1; i <= K; i++) val = val * 107 + read();
if (query(rt[r],rt[l - 1],0,inf,val)) puts("No");
else puts("Yes");
}
return 0;
}