码迷,mamicode.com
首页 > 其他好文 > 详细

字符串hash

时间:2018-03-04 14:44:57      阅读:168      评论:0      收藏:0      [点我收藏+]

标签:查找   ble   字符串   bug   using   read   cpp   log   void   

似乎没写过多少字符串hash
今天补一补

字符串hash重要思想就是把字符串看做一个N进制大整数,进行取模后直接比较
这样子做的优劣很直观:很快很简单,也有取模后蜜汁碰撞的风险

对于i位置的hash值,可以这样求:

    for (int i = 1; i <= n; i++) H[i] = H[i - 1] * p + s[i];

我们要取出子串[l,r]的hash值时,显然就是\(H[r] - H[l - 1] * p^{r - l + 1}\)

来道【正解SAM】的例题:
最长公共子串
当然对串a建SAM,用串b在上边匹配就可以了

SAM太深奥了,我们来看看简单暴力的字符串hash
我们二分长度len,对A串的所有位置的长度为len的hash排序,那B串所有位置长度为len的hash去查找
复杂度O(nlog^2n)【似乎SAM接近O(n)?

#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>
#define LL long long int
#define Redge(u) for (int k = h[u]; k; k = ed[k].nxt)
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define ULL unsigned long long int
using namespace std;
const int maxn = 200005,maxm = 100005,INF = 1000000000;
inline int read(){
    int out = 0,flag = 1; char c = getchar();
    while (c < 48 || c > 57){if (c == ‘-‘) flag = -1; c = getchar();}
    while (c >= 48 && c <= 57){out = (out << 3) + (out << 1) + c - 48; c = getchar();}
    return out * flag;
}
char A[maxn],B[maxn];
int lena,lenb,n;
ULL Ha[maxn],Hb[maxn];
ULL b[maxn];
bool check(int len){
    n = 0;
    ULL P = 1;
    for (int i = 1; i <= len; i++) P *= 27;
    for (int i = len; i <= lena; i++) b[++n] = Ha[i] - Ha[i - len] * P;
    sort(b + 1,b + 1 + n);
    for (int i = len; i <= lenb; i++){
        ULL temp = Hb[i] - Hb[i - len] * P;
        if (b[lower_bound(b + 1,b + 1 + n,temp) - b] == temp) return true;
    }
    return false;
}
int main(){
    scanf("%s",A + 1); lena = strlen(A + 1);
    scanf("%s",B + 1); lenb = strlen(B + 1);
    for (int i = 1; i <= lena; i++) Ha[i] = Ha[i - 1] * 27 + A[i];
    for (int i = 1; i <= lenb; i++) Hb[i] = Hb[i - 1] * 27 + B[i];
    int l = 0,r = min(lena,lenb),mid;
    while (l < r){
        mid = l + r + 1 >> 1;
        if (check(mid)) l = mid;
        else r = mid - 1;
    }
    printf("%d\n",l);
    return 0;
}

BZOJ3207
此题K很小,我们用上hash之后,每个位置就对应一个hash值,问题就转化为了一个区间内是否存在某个值,用可持续化线段树就可以了

#include<iostream>
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long int
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
#define BUG(s,n) for (int i = 1; i <= (n); i++) cout<<s[i]<<‘ ‘; puts("");
#define inf 18446744073709551615UL
#define uLL unsigned long long int
using namespace std;
const int maxn = 100010,maxm = 8000005;
inline int read(){
    int out = 0,flag = 1; char c = getchar();
    while (c < 48 || c > 57) {if (c == ‘-‘) flag = -1; c = getchar();}
    while (c >= 48 && c <= 57) {out = (out << 3) + (out << 1) + c - ‘0‘; c = getchar();}
    return out * flag;
}
int ls[maxm],rs[maxm],sum[maxm],rt[maxn];
int n,m,K,cnt;
int A[maxn],T[maxn];
uLL H[maxn];
void modify(int& u,int pre,uLL l,uLL r,uLL pos){
    u = ++cnt; sum[u] = sum[pre] + 1; ls[u] = ls[pre]; rs[u] = rs[pre];
    if (l == r) return;
    uLL mid = l / 2 + r / 2;
    if (mid >= pos) modify(ls[u],ls[pre],l,mid,pos);
    else modify(rs[u],rs[pre],mid + 1,r,pos);
}
int query(int u,int v,uLL l,uLL r,uLL pos){
    if (l == r) return sum[u] - sum[v];
    uLL mid = l / 2 + r / 2;
    if (mid >= pos) return query(ls[u],ls[v],l,mid,pos);
    else return query(rs[u],rs[v],mid + 1,r,pos);
}
int main(){
    n = read(); m = read(); K = read();
    REP(i,n) A[i] = read();
    REP(i,n) H[i] = H[i - 1] * 107 + A[i];
    uLL P = 1; REP(i,K) P *= 107;
    for (int i = K; i <= n; i++)
        modify(rt[i],rt[i - 1],0,inf,H[i] - H[i - K] * P);
    while (m--){
        int l = read() + K - 1,r = read();
        uLL val = 0;
        for (int i = 1; i <= K; i++) val = val * 107 + read();
        if (query(rt[r],rt[l - 1],0,inf,val)) puts("No");
        else puts("Yes");
    }
    return 0;
}

字符串hash

标签:查找   ble   字符串   bug   using   read   cpp   log   void   

原文地址:https://www.cnblogs.com/Mychael/p/8504433.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!