# 字符串匹配算法

http://mindlee.com/2011/11/25/string-matching/
http://www.cnblogs.com/nullzx/p/7499397.html
http://www.matrix67.com/blog/archives/115
https://github.com/topics/string-search
http://blog.csdn.net/zhongwen7710/article/details/39274349

```/*

_________________________

*/
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;

//朴素匹配算法
void NativeStringMatcher(const char *T, const char *P) {
int n = strlen(T);
int m = strlen(P);
for (int j, i = 0; i < n - m; i++) {
for (j = 0; j < m; j++) {
if (T[i + j] != P[j]) {
break;
}
}
if (j == m) {
printf("朴素算法，匹配位置是：%d\n", i + 1);
}
}
}

int main() {
const char *T = "2359023141526739921";
const char *P = "31415";
NativeStringMatcher(T, P);
return 0;
}
```
```/*

_________________________
t1 = 8
t2 = 9
t3 = 3
t4 = 11
t5 = 0
t6 = 1
t7 = 7

t8 = 8
t9 = 4
t10 = 5
t11 = 10
t12 = 11
t13 = 7

t14 = 9
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<algorithm>
using namespace std;

//朴素匹配算法，用于RabinKarp调用
bool NativeStringMatcher(const char *T, const char *P) {
int n = strlen(T);
int m = strlen(P);
for (int j, i = 0; i < n - m; i++) {
for (j = 0; j < m; j++) {
if (T[i + j] != P[j]) {
break;
}
}
if (j == m) {
return true;
}
}
return false;
}

//RabinKarp算法
void RabinKarp(const char *T, const char *P, int d, int q) {
int n = strlen(T);
int m = strlen(P);

int h = 1;
for (int i = 0; i < m - 1; i++) {
h *= d;//pow可能会越界，所以用乘法
if (h >= q) {
h %= q;
}
}

int p = 0;
int t = 0;
for (int i = 0; i < m; i++) {
p = (d * p + (P[i] - '0')) % q;
t = (d * t + (T[i] - '0')) % q;
}

for (int i = 0; i < n - m; i++) {
printf("t%d = %d\n", i + 1, t);
if (p == t) {
if (NativeStringMatcher(T + i, P)) {
printf("匹配位置是：%d\n", NativeStringMatcher(T + i, P) + i);
} else {
printf("伪命中点：%d\n", i + 1);
}
}

if (i < n - m) {
t = (d * (t - h * (T[i] - '0')) + T[i + m] - '0') % q;
if (t < 0) {
t += q;
}
}
}
}

int main() {
const char *T = "2359023141526739921";
const char *P = "31415";
RabinKarp(T, P, 10, 13);
return 0;
}
```
```/*

————————————————

*/

#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
using namespace std;

//伪代码中的fail数组，用fail来表示
int fail[1000];

//预处理fail数组
void ComputePrefixFunction(char *P) {
int m = strlen(P);
memset(fail, 0, sizeof(fail));
fail[0] = 0;
int k = 0;
for (int i = 2; i <= m; i++) {
while (k > 0 && P[k] != P[i - 1]) {
k = fail[k - 1];
}
if (P[k] == P[i - 1]) {
k = k + 1;
}
fail[i - 1] = k;
}
}

void KMPMatcher(char *T, char *P) {
int n = strlen(T);
int m = strlen(P);

int q = 0;
for (int i = 1; i <= n; i++) {
while (q > 0 && P[q] != T[i - 1]) {
q = fail[q - 1];
}

if(P[q] == T[i - 1]) {
q = q + 1;
}

if(q == m) {
printf("匹配位置: %d\n", i - m + 1);
q = fail[q - 1];
}
}
}

int main() {
KMPMatcher("123451233211234561234", "12345");
return 0;
}
```
```//HDU 1251 代码，字典树模板
#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cmath>
#include<algorithm>
using namespace std;

const int MAXN = 100010, MAXM = 11, KIND = 26;
//小写字母->26 ，大小混写->52，大小写+数字->62
int m;
struct node {
char* s;
int prefix;
bool isword;
node* next[KIND];
void init() {
s = NULL;
prefix = 0;//前缀
isword = false;
memset(next, 0, sizeof(next));
}
}a[MAXN*MAXM], *root;//根

void insert(node *root, char *str) {//插入
node *p = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
p->s = str + i;
if (p->next[x] == NULL) {
a[m].init();
p->next[x] = &a[m++];
}
p = p->next[x];
p->prefix++;
}
p->isword = true;
}

bool del(node *root, char *str) {//删除
node *p = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
if (p->next[x] == NULL) {
return false;
}
p = p->next[x];
}//for(i)
if (p->isword) {
p->isword = false;
} else {
return false;
}
return true;
}

bool search(node *root, char* str) {//查找
node* p = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
if (p->next[x] == NULL) {
return false;
}
p = p->next[x];
}//for(i)
return p->isword;
}

int count(node *root, char *str) {//统计后缀
node *p = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
if (p->next[x] == NULL) {
return 0;
}
p = p->next[x];
}//for(i)
return p->prefix;
}

int main() {
m = 0;
a[m].init();
root = &a[m++];
char str[MAXM];

while (gets(str), strcmp(str, "")) {
insert(root, str);
}

while (gets(str)) {
printf("%d\n", count(root,str));
}
}
```
```#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cmath>
#include<algorithm>
using namespace std;
#define MAXN 10001
#define MAXM 51
#define KIND 26

struct node {
int prefix;
node *fail;
node *next[26];
void init() {
prefix = 0;
fail = NULL;
memset(next, 0, sizeof(next));
}
}*que[MAXN * MAXM], trie[MAXN * MAXM],  *root;

int cnt;
char keyword[MAXM];
char str[MAXN * 2];

void Insert(node *root, char *str) {
node *ptr = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
if (ptr->next[x] == NULL) {
trie[cnt].init();
ptr->next[x] = &trie[cnt++];
}
ptr = ptr->next[x];
}
ptr->prefix++;
}//insert

void Build(node *root) {
int head = 0, tail = 0;
root->fail = NULL;
node *tmp = que[tail++];
node *ptr = NULL;
for (int i = 0; i < KIND; i++) {
if (tmp->next[i] != NULL) {
if (tmp == root) {
tmp->next[i]->fail = root;
} else {
ptr = tmp->fail;
while (ptr != NULL) {
if (ptr->next[i] != NULL) {
tmp->next[i]->fail = ptr->next[i];
break;
}
ptr = ptr->fail;
}
if (ptr == NULL) {
tmp->next[i]->fail = root;
}
}//if_else
}//if
}//for(i)
}//Build

int Query(node *root, char *str) {
int ret = 0;
node *ptr = root;
for (int i = 0; str[i]; i++) {
int x = str[i] - 'a';
while (ptr->next[x] == NULL && ptr != root) {
ptr = ptr->fail;
}
ptr = ptr->next[x];
if (ptr == NULL) {
ptr = root;
}
node *tmp = ptr;
while (tmp != root && tmp->prefix != -1) {
ret += tmp->prefix;
tmp->prefix = -1;
tmp = tmp->fail;
}
}//for(i)
return ret;
}

int main() {
int cas, n;
scanf("%d", &cas);
while (cas--) {
// head = tail = 0;
cnt = 0;
trie[cnt].init();
root = &trie[cnt++];

scanf("%d%*c", &n);
while (n--) {
gets(keyword);
Insert(root, keyword);
}
Build(root);//构造自动机
scanf("%s", str);
printf("%d\n", Query(root, str));
}
return 0;
}
```