字符串匹配算法

http://mindlee.com/2011/11/25/string-matching/
http://www.cnblogs.com/nullzx/p/7499397.html
http://www.matrix67.com/blog/archives/115
https://github.com/topics/string-search
http://blog.csdn.net/zhongwen7710/article/details/39274349

/*
运行结果:
_________________________
朴素算法,匹配位置是:7
*/
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
 
//朴素匹配算法
void NativeStringMatcher(const char *T, const char *P) {
    int n = strlen(T);
    int m = strlen(P);
    for (int j, i = 0; i < n - m; i++) {
        for (j = 0; j < m; j++) {
            if (T[i + j] != P[j]) {
                break;
            }
        }
        if (j == m) {
            printf("朴素算法,匹配位置是:%d\n", i + 1);
        }
    }
}
 
int main() {
    const char *T = "2359023141526739921";
    const char *P = "31415";
    NativeStringMatcher(T, P);
    return 0;
}
/*
运行结果:
_________________________
t1 = 8
t2 = 9
t3 = 3
t4 = 11
t5 = 0
t6 = 1
t7 = 7
匹配位置是:7
t8 = 8
t9 = 4
t10 = 5
t11 = 10
t12 = 11
t13 = 7
伪命中点:13
t14 = 9
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<algorithm>
using namespace std;
 
//朴素匹配算法,用于RabinKarp调用
bool NativeStringMatcher(const char *T, const char *P) {
    int n = strlen(T);
    int m = strlen(P);
    for (int j, i = 0; i < n - m; i++) {
        for (j = 0; j < m; j++) {
            if (T[i + j] != P[j]) {
                break;
            }
        }
        if (j == m) {
            return true;
        }
    }
    return false;
}
 
//RabinKarp算法
void RabinKarp(const char *T, const char *P, int d, int q) {
    int n = strlen(T);
    int m = strlen(P);
 
    int h = 1;
    for (int i = 0; i < m - 1; i++) {
        h *= d;//pow可能会越界,所以用乘法
        if (h >= q) {
            h %= q;
        }
    }
 
    int p = 0;
    int t = 0;
    for (int i = 0; i < m; i++) {
        p = (d * p + (P[i] - '0')) % q;
        t = (d * t + (T[i] - '0')) % q;        
    }
 
    for (int i = 0; i < n - m; i++) {
        printf("t%d = %d\n", i + 1, t);
        if (p == t) {
            if (NativeStringMatcher(T + i, P)) {
                printf("匹配位置是:%d\n", NativeStringMatcher(T + i, P) + i);
            } else {
                printf("伪命中点:%d\n", i + 1);
            }
        } 
 
        if (i < n - m) {
            t = (d * (t - h * (T[i] - '0')) + T[i + m] - '0') % q;
            if (t < 0) {
                t += q;
            }
        }
    }
}
 
int main() {
    const char *T = "2359023141526739921";
    const char *P = "31415";
    RabinKarp(T, P, 10, 13);
    return 0;
}
/*
运行结果:
————————————————
匹配位置: 1
匹配位置: 12
*/
 
#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
using namespace std;
 
//伪代码中的fail数组,用fail来表示
int fail[1000];
 
//预处理fail数组
void ComputePrefixFunction(char *P) {
    int m = strlen(P);
    memset(fail, 0, sizeof(fail));
    fail[0] = 0;
    int k = 0;
    for (int i = 2; i <= m; i++) {
        while (k > 0 && P[k] != P[i - 1]) {
            k = fail[k - 1];
        }
        if (P[k] == P[i - 1]) {
            k = k + 1;
        }
        fail[i - 1] = k;
    }
}
 
void KMPMatcher(char *T, char *P) {
    int n = strlen(T);
    int m = strlen(P);
 
    int q = 0;
    for (int i = 1; i <= n; i++) {
        while (q > 0 && P[q] != T[i - 1]) {
            q = fail[q - 1];
        }
 
        if(P[q] == T[i - 1]) {
            q = q + 1;
        }
 
        if(q == m) {
            printf("匹配位置: %d\n", i - m + 1);
            q = fail[q - 1];
        }
    }
}
 
int main() {
    KMPMatcher("123451233211234561234", "12345");
    return 0;
}
//HDU 1251 代码,字典树模板
#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cmath>
#include<algorithm>
using namespace std;
 
const int MAXN = 100010, MAXM = 11, KIND = 26;
//小写字母->26 ,大小混写->52,大小写+数字->62
int m;
struct node {
    char* s;
    int prefix;
    bool isword;
    node* next[KIND];
    void init() {
        s = NULL;
        prefix = 0;//前缀
        isword = false;
        memset(next, 0, sizeof(next));
    }
}a[MAXN*MAXM], *root;//根
 
void insert(node *root, char *str) {//插入
    node *p = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        p->s = str + i;
        if (p->next[x] == NULL) {
            a[m].init();
            p->next[x] = &a[m++];
        }
        p = p->next[x];
        p->prefix++;
    }
    p->isword = true;
}
 
bool del(node *root, char *str) {//删除
    node *p = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        if (p->next[x] == NULL) {
            return false;
        }
        p = p->next[x];
    }//for(i)
    if (p->isword) {
        p->isword = false;
    } else {
        return false;
    }
    return true;
}
 
bool search(node *root, char* str) {//查找
    node* p = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        if (p->next[x] == NULL) {
            return false;
        }
        p = p->next[x];
    }//for(i)
    return p->isword;
}
 
int count(node *root, char *str) {//统计后缀
    node *p = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        if (p->next[x] == NULL) {
            return 0;
        }
        p = p->next[x];
    }//for(i)
    return p->prefix;
}
 
int main() {
    m = 0;
    a[m].init();
    root = &a[m++];
    char str[MAXM];
 
    while (gets(str), strcmp(str, "")) {
        insert(root, str);
    }
 
    while (gets(str)) {
        printf("%d\n", count(root,str));
    }
}
#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cmath>
#include<algorithm>
using namespace std;
#define MAXN 10001
#define MAXM 51
#define KIND 26
 
struct node {
    int prefix;
    node *fail;
    node *next[26];
    void init() {
        prefix = 0;
        fail = NULL;
        memset(next, 0, sizeof(next));
    }
}*que[MAXN * MAXM], trie[MAXN * MAXM],  *root;
 
int cnt;
char keyword[MAXM];
char str[MAXN * 2];
 
void Insert(node *root, char *str) {
    node *ptr = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        if (ptr->next[x] == NULL) {
            trie[cnt].init();
            ptr->next[x] = &trie[cnt++];
        }
        ptr = ptr->next[x];
    }
    ptr->prefix++;
}//insert
 
void Build(node *root) {
    int head = 0, tail = 0;
    root->fail = NULL;
    que[head++] = root;
    while (head != tail) {
        node *tmp = que[tail++];
        node *ptr = NULL;
        for (int i = 0; i < KIND; i++) {
            if (tmp->next[i] != NULL) {
                if (tmp == root) {
                    tmp->next[i]->fail = root;
                } else {
                    ptr = tmp->fail;
                    while (ptr != NULL) {
                        if (ptr->next[i] != NULL) {
                            tmp->next[i]->fail = ptr->next[i];
                            break;
                        }
                        ptr = ptr->fail;
                    }
                    if (ptr == NULL) {
                        tmp->next[i]->fail = root;
                    }
                }//if_else
                que[head++] = tmp->next[i];
            }//if
        }//for(i)
    }//while (head != tail) 
}//Build
 
int Query(node *root, char *str) {
    int ret = 0;
    node *ptr = root;
    for (int i = 0; str[i]; i++) {
        int x = str[i] - 'a';
        while (ptr->next[x] == NULL && ptr != root) {
            ptr = ptr->fail;
        }
        ptr = ptr->next[x];
        if (ptr == NULL) {
            ptr = root;
        }
        node *tmp = ptr;
        while (tmp != root && tmp->prefix != -1) {
            ret += tmp->prefix;
            tmp->prefix = -1;
            tmp = tmp->fail;
        }
    }//for(i)
    return ret;
}
 
 
int main() {
    int cas, n;
    scanf("%d", &cas);
    while (cas--) {
        // head = tail = 0;
        cnt = 0;
        trie[cnt].init();
        root = &trie[cnt++];
 
        scanf("%d%*c", &n);
        while (n--) {
            gets(keyword);
            Insert(root, keyword);
        }
        Build(root);//构造自动机
        scanf("%s", str);
        printf("%d\n", Query(root, str));
    }
    return 0;
}

发表评论

电子邮件地址不会被公开。 必填项已用*标注