sunday字符串匹配算法_字符串匹配算法是怎么算的

㈠ KMP匹配算法

不懂得话，就自己跟上三四遍就好了，代码附上
有什么不懂的就问，不过还是尽量自己钻研的好
#include<iostream.h>
#include<string.h>
#include<stdlib.h>
const int maxLen = 128;
class String
{
int curLen; //串的当前长度
char *ch; //串的存储数组
public:
String (const String & ob);
String (const char *init);
String ();
~String ()
{
delete [] ch;
}
int Length () const
{
return curLen;
}
String *operator () ( int pos, int len );
int operator == ( const String &ob )const
{
return strcmp (ch, ob.ch) == 0;
}
int operator != ( const String &ob ) const
{
return strcmp (ch, ob.ch) != 0;
}
int operator !() const
{
return curLen == 0;
}
String &operator = (const String &ob);
String &operator += (const String &ob);
char &operator [] (int i);
int fastFind ( String pat ) const;
//void fail (const char *T,int* &f);
void fail (int* &f);
};
String::String ( const String &ob ) //复制构造函数：从已有串ob复制
{
ch = new char[maxLen+1];
if ( !ch )
{
cout << "Allocation Error\n";
exit(1);
}
curLen = ob.curLen;
strcpy ( ch, ob.ch );
}
String::String ( const char *init ) //复制构造函数: 从已有字符数组*init复制
{
ch = new char[maxLen+1];
if ( !ch )
{
cout << "Allocation Error\n";
exit(1);
}
curLen = strlen (init);
strcpy ( ch, init );
}
String::String ( )//构造函数：创建一个空串
{
ch = new char[maxLen+1];
if ( !ch )
{
cout << "Allocation Error\n";
exit(1);
}
curLen = 0;
ch[0] = '\0';
}
String *String::operator ( ) ( int pos, int len )//从串中第pos个位置起连续提取len个字符//形成子串返回

{
String *temp = new String;
if ( pos < 0 || pos+len -1 >= maxLen|| len < 0 ) //返回空串
{

temp->curLen = 0;
temp->ch[0] = '\0';
}
else //提取子串
{
//动态分配
if ( pos+len -1 >= curLen )
len = curLen - pos;
temp->curLen = len; //子串长度
for ( int i=0, j=pos; i<len; i++, j++ )
temp->ch[i] = ch[j]; //传送串数组
temp->ch[len] = '\0'; //子串结束
}
return temp;
}
String &String::operator = ( const String &ob )//串赋值：从已有串ob复制
{
if ( &ob != this )
{
delete [ ] ch;
ch = new char [maxLen+1]; //重新分配
if ( ! ch )
{
cerr << "out of memory!\n ";
exit (1);
}
curLen = ob.curLen; //串复制
strcpy ( ch, ob.ch );
}
else
cout << "Attempted assignment of a String to itself!\n";
return *this;
}
char &String::operator [] ( int i ) //按串名提取串中第i个字符
{
if ( i < 0 && i >= curLen )
{
cout << "Out Of Boundary!\n ";
exit (1) ;
}
return ch[i];
}
String &String::operator += ( const String &ob )
{ //串连接
char * temp =ch; //暂存原串数组
curLen += ob.curLen; //串长度累加
ch = new char [maxLen+1];
if ( ! ch )
{
cerr << "Out Of Memory!\n ";
exit (1) ;
}
strcpy ( ch, temp ); //拷贝原串数组
strcat ( ch, ob.ch ); //连接ob串数组
delete [ ] temp;
return *this;
}
int String :: fastFind ( String pat ) const //带失效函数的KMP匹配算法
{
int posP = 0, posT = 0;
int lengthP = pat.curLen, lengthT = curLen;
int *f=new int[lengthP];
memset(f,-1,lengthP);
pat.fail (f);
while ( posP < lengthP && posT < lengthT )
{
if ( pat.ch[posP] == ch[posT] )
{
posP++;
posT++; //相等继续比较
}
else if ( posP == 0 )
{
posT++;
}//不相等
else
{
posP = f[posP-1]+1;
}
}
delete []f;
if ( posP < lengthP )
return -1;
else
return posT - lengthP;
}
void String::fail (int* &f)//计算失效函数
{
int lengthP = curLen;
f[0] = -1; //直接赋值
for ( int j=1; j<lengthP; j++ ) //依次求f [j]
{
int i = f[j-1];
if ( *(ch+j) != *(ch+i+1) && i >= 0 )
i = f [i]; //递推
if ( *(ch+j) == *(ch+i+1) )
f [j] = i+1;
else
f [j] = -1;
}
}
/**/
void main()
{
int end;
cout<<"hello!\n";
String s1("acabaabaabcacaabc");
String s2=("abaabcac");
end=s1.fastFind(s2);
cout<<end<<endl;
}

㈡编写函数，该函数能在一个字符串中查找某个子串，并返回该子串首字出现的下标位置。

/*
Sunday-字符串匹配算法--一种优于KMP的算法
思想类似于BM算法，只不过是从左向右匹配
遇到不匹配的看大串中匹配范围之外的右侧第一个字符在小串中的最右位置
另外：采用BM/KMP的预处理的做法，事先计算好移动步长，等到遇到不匹配的值直接使用
*/
#include<iostream>
#include<cstdlib>
#include<stdio.h>
#include<time.h>
#include<string.h>
#include<windows.h>
usingnamespacestd;
//一个字符8位最大256种
#defineMAX_CHAR_SIZE256/*设定每个字符最右移动步长，保存每个字符的移动步长
如果大串中匹配字符的右侧一个字符没在子串中，大串移动步长=整个串的距离+1
如果大串中匹配范围内的右侧一个字符在子串中，大串移动距离=子串长度-这个字符在子串中的位置
*/
int*setCharStep(char*subStr)
{
int*charStep=newint[MAX_CHAR_SIZE];
intsubStrLen=strlen(subStr);
for(inti=0;i<MAX_CHAR_SIZE;i++)
charStep[i]=subStrLen+1;
//从左向右扫描一遍保存子串中每个字符所需移动步长
for(inti=0;i<subStrLen;i++)
{
charStep[(unsignedchar)subStr[i]]=subStrLen-i;
}
returncharStep;
}
/*
算法核心思想，从左向右匹配，遇到不匹配的看大串中匹配范围之外的右侧第一个字符在小串中的最右位置
根据事先计算好的移动步长移动大串指针，直到匹配
*/
intsundaySearch(char*mainStr,char*subStr,int*charStep)
{
intmainStrLen=strlen(mainStr);
intsubStrLen=strlen(subStr);
intmain_i=0;
intsub_j=0;
while(main_i<mainStrLen)
{
//保存大串每次开始匹配的起始位置，便于移动指针
inttem=main_i;
while(sub_j<subStrLen)
{
if(mainStr[main_i]==subStr[sub_j])
{
main_i++;
sub_j++;
continue;
}
else{
//如果匹配范围外已经找不到右侧第一个字符，则匹配失败
if(tem+subStrLen>mainStrLen)
return-1;
//否则移动步长重新匹配
charfirstRightChar=mainStr[tem+subStrLen];
main_i=tem+charStep[(unsignedchar)firstRightChar];
sub_j=0;
break;//退出本次失败匹配重新一轮匹配
}
}
if(sub_j==subStrLen)
returnmain_i-subStrLen;
}
return-1;
}
intmain()
{
		unsigneStartTime=GetTickCount();
		unsigneEndTime;
		//随机生成300位的二进制
		inti,j,t,k;
	charbins[1][300];//二维数组来存放
	srand((int)time(0));//种子，防止随机数据不变
		for(i=0;i<1;i++){
		for(j=0;j<300;j++){
		bins[i][j]='0'+rand()%2;//放入随机数
		}
		bins[i][j]=0;//字符串数组，所以最后一位''
		}
		char*mainStr=bins[0];
		printf("字符串
%s
",bins[0]);
char*subStr="010101";//需要查找的子字符串
printf("需要查找的字符串
010101
");
int*charStep=setCharStep(subStr);
cout<<"位置："<<sundaySearch(mainStr,subStr,charStep)<<endl;
uEndTime=GetTickCount();
printf("%umselapsed.
",uEndTime-uStartTime);
system("pause");
return0;
}

㈢ sunday 算法的介绍

Sunday算法是Daniel M.Sunday于1990年提出的字符串模式匹配。其核心思想是：在匹配过程中，模式串发现不匹配时，算法能跳过尽可能多的字符以进行下一步的匹配，从而提高了匹配效率。

㈣【算法笔记】字符串匹配

BF 算法中的 BF 是 Brute Force 的缩写，中文叫作暴力匹配算法，也叫朴素匹配算法：

主串和模式串：
在字符串 A 中查找字符串 B，那字符串 A 就是主串，字符串 B 就是模式串。我们把主串的长度记作 n，模式串的长度记作 m

我们在主串中，检查起始位置分别是 0、1、2…n-m 且长度为 m 的 n-m+1 个子串，看有没有跟模式串匹配的。

BF 算法的时间复杂度是 O(n*m)

等价于

比如匹配Google 和Goo 是最好时间复杂度，匹配Google 和ble是匹配失败的最好时间复杂度。

KMP算法是一种改进的字符串匹配算法，由D.E.Knuth与J.H.Morris和V.R.Pratt同时发现，因此人们称它为克努特—莫里斯—普拉特算法。KMP算法主要分为两个步骤：字符串的自我匹配，目标串和模式串之间的匹配。

看来网上很多的文章，感觉很多的都没有说清楚，这里直接复制阮一峰的内容，讲的很清晰
内容来自 http://www.ruanyifeng.com/blog/

首先，字符串"BBC ABCDAB ABCDABCDABDE"的第一个字符与搜索词"ABCDABD"的第一个字符，进行比较。因为B与A不匹配，所以搜索词后移一位。

因为B与A不匹配，搜索词再往后移。

就这样，直到字符串有一个字符，与搜索词的第一个字符相同为止。

接着比较字符串和搜索词的下一个字符，还是相同。

直到字符串有一个字符，与搜索词对应的字符不相同为止。

这时，最自然的反应是，将搜索词整个后移一位，再从头逐个比较。这样做虽然可行，但是效率很差，因为你要把"搜索位置"移到已经比较过的位置，重比一遍。

一个基本事实是，当空格与D不匹配时，你其实知道前面六个字符是"ABCDAB"。KMP算法的想法是，设法利用这个已知信息，不要把"搜索位置"移回已经比较过的位置，继续把它向后移，这样就提高了效率。

怎么做到这一点呢？可以针对搜索词，算出一张《部分匹配表》（Partial Match Table）。这张表是如何产生的，后面再介绍，这里只要会用就可以了。

已知空格与D不匹配时，前面六个字符"ABCDAB"是匹配的。查表可知，最后一个匹配字符B对应的"部分匹配值"为2，因此按照下面的公式算出向后移动的位数：

因为 6 - 2 等于4，所以将搜索词向后移动4位。

因为空格与C不匹配，搜索词还要继续往后移。这时，已匹配的字符数为2（"AB"），对应的"部分匹配值"为0。所以，移动位数 = 2 - 0，结果为 2，于是将搜索词向后移2位。

因为空格与A不匹配，继续后移一位。

逐位比较，直到发现C与D不匹配。于是，移动位数 = 6 - 2，继续将搜索词向后移动4位。

逐位比较，直到搜索词的最后一位，发现完全匹配，于是搜索完成。如果还要继续搜索（即找出全部匹配），移动位数 = 7 - 0，再将搜索词向后移动7位，这里就不再重复了。

下面介绍《部分匹配表》是如何产生的。

首先，要了解两个概念："前缀"和"后缀"。 "前缀"指除了最后一个字符以外，一个字符串的全部头部组合；"后缀"指除了第一个字符以外，一个字符串的全部尾部组合。

"部分匹配值"就是"前缀"和"后缀"的最长的共有元素的长度。以"ABCDABD"为例，

"部分匹配"的实质是，有时候，字符串头部和尾部会有重复。比如，"ABCDAB"之中有两个"AB"，那么它的"部分匹配值"就是2（"AB"的长度）。搜索词移动的时候，第一个"AB"向后移动4位（字符串长度-部分匹配值），就可以来到第二个"AB"的位置。

BM（Boyer-Moore）算法。它是一种非常高效的字符串匹配算法，有实验统计，它的性能是着名的KMP 算法的 3 到 4 倍。

BM 算法包含两部分，分别是坏字符规则（bad character rule）和好后缀规则（good suffix shift）

未完待续

参考文章：
字符串匹配的Boyer-Moore算法

㈤字符串匹配算法，最快的是哪种

目前在我遇到的字符串匹配算法中，最快的应该是sunday算法了。。
(BF、KMP、BM、sunday)

㈥ C++ 字符串最后出现的位置和取字符串左边的指定几个字符

''在s[]中最后出现的位置是:11

t[] = 123

Press any key to continue

#include<stdio.h>
#include<string.h>

//返回ch在s[]最后出现的位置。返回0表示在s[]中没有发现字符ch
intLastPos(chars[],charch){
	inti,pos=0;//
	for(i=0;s[i];++i)
		if(s[i]==ch)pos=i+1;
	returnpos;
}

//复制s[]中右边的n个字符到t[]中
char*RightStr(chars[],chart[],intn){
	inti,j,len=strlen(s);
	t[0]='';
	if(n>=len)strcpy(t,s);
	elseif(n>0&&n<len){
		i=len-n;
		j=0;
		while(t[j++]=s[i++])
			;
	}
	returnt;
}

intmain(){
	chars[]="C:\WINDOWS\123";
	chart[20];
	printf("'\'在s[]中最后出现的位置是:%d
",LastPos(s,'\'));
	printf("t[]=%s
",RightStr(s,t,3));
	return0;
}

㈦求C++字符串匹配的好方法，时间复杂度小于O（mn）

代码基本都清空了……懒的写……

一个叫Sunday匹配算法一个叫KMP匹配算法请自己网络吧........

㈧ sunday算法解析

例如我们要在"substring searching algorithm"查找"search"，刚开始时，把子
串与文本左边对齐，
substring searching algorithm
search
^
结果在第二个字符处发现不匹配，于是要把子串往后移动。但是该移动多少呢？这
就是各种算法各显神通的地方了，最简单的做法是移动一个字符位置；KMP是利用
已经匹配部分的信息来移动；BM算法是做反向比较，并根据已经匹配的部分来确定
移动量。这里要介绍的方法是看紧跟在当前子串之后的那个字符（上图中的'i'。
显然，不管移动多少，这个字符是肯定要参加下一步的比较的，也就是说，如果下
一步匹配到了，这个字符必须在子串内。所以，可以移动子串，使子串中的最右边
的这个字符与它对齐。现在子串'search'中并不存在'i'，则说明可以直接跳过一
大片，从'i'之后的那个字符开始作下一步的比较，如下图：
substring searching algorithm
search
^
比较的结果，第一个字符就不匹配，再看子串后面的那个字符，是'r',它在子串中
出现在倒数第三位，于是把子串向前移动三位，使两个'r'对齐，如下：
substring searching algorithm
search

㈨编写一个方法，输出在一个字符串中，指定字符串输出的次数

这样写目测应该可以不过复杂度显然是最高的（n*m）

你应该去看看字符串匹配算法比如Sunday和Kmp（m+n）

㈩字符串匹配算法是怎么算的

这是一个毕业老师出的字符串的算法的题目！这是答案可以参考一下！ boyermoore算法的sample程序 TCHAR * BoyerMooreSearch(TCHAR *sSrc, TCHAR *sFind) { // // 声明： // 该段代码只是BoyerMoore(名字也许不准确) 的基本思想，当 // 然不是最优的，具体完善工作就留给你自己乐！嘻嘻。 // 该算法的本质就是从字符串的右端而不是左端开始比较，这 // 样，当查询不匹配时才有可能直接跃过多个字符（最多可以跃过 // strlen(sFind)个字符），如果最右边的字符匹配则回溯。比如： // // pain // ^ 这是第一次比较n和空格比 // The rain in SpainThe rain in Spain // // pain // ^ 这是第二次比较，好爽呀！ // The rain in SpainThe rain in Spain // // 当然，这样比较会产生一些问题，比如： // // pain // ^ （图1） // The rain in SpainThe rain in Spain // // 如果比较到这儿，大家都会看到，只需再向后移到两个字符 // 就匹配成功了，但如果接下去还按上面的方法跳strlen( sFind)的 // 话，就会错过一次匹配！！！！！ // // pain // ^ // The rain in SpainThe rain in Spain // // 怎么办？当然可以解决！大家回头看图1，当时a是pain的子 // 串，说明有可能在不移动strlen(sFind) 的跨度就匹配成功，那就 // 人为地给它匹配成功的机会嘛！串一下pain串，直接让两个a对齐 // 再做比较！呵呵，如果要比较的字符不是pain的子串，当然就可 // 以直接跨过strlen(sFind)个字符了！不知我说明白没？ // // // 查询串的长度 int nLenOfFind = lstrlen(sFind); // 被查询串的长度 int nLenOfSrc = lstrlen(sSrc); // 指向查询串最后一个字符的指针 TCHAR * pEndOfFind = sFind + nLenOfFind -1; // 指向被查询串最后一个字符的指针 TCHAR * pEndOfSrc = sSrc + nLenOfSrc -1; // 在比较过程中要用到的两个指针 TCHAR * pSrc = sSrc; TCHAR * pFind; // 总不能一直让它比较到 win.com 文件的地址去吧？嘻嘻！ while ( pSrc <= pEndOfSrc ) { // 每次匹配都是从右向左，这是本算法的核心。 pFind = pEndOfFind; // 如果比较不成功，被查询串指针将向右串的字符数 int nMoveRightSrc; // 比较被查询串的当前字符是否和查询串的最右边字 // 符匹配，如果匹配则回溯比较，如果全匹配了，该 // 干什么，我就不用说了吧？:-) while ( pFind >= sFind ) { // TNND，白废功夫比了！看看需要向右移动几个 // 字符吧（如果说从右到左是本算法的核心，则 // 判断向右移几个字符则是本算法的技巧）。 if ( *pSrc != *pFind ) { // 被查询串的当前字符是否在查询串里？ TCHAR * p = strrchr( sFind, *pSrc ); // 没在，直接移lstrlen(sFind)个字符 if ( NULL == p ) nMoveRightSrc = nLenOfFind; else // 哇塞！真的在，那就只需... nMoveRightSrc = pEndOfFind - p; break; } // 哈！又匹配成功了一个！接着向左回溯... pFind --; pSrc --; } // 如果在上面的while循环里每一次比较都匹配了 // 那就对了呗！告诉用户找到了 if ( pFind < sFind ) return ( pSrc + 1 ); // 没匹配成功，nMoveRightSrc上面已经算好了 // 直接用就可以了。 pSrc += nMoveRightSrc; } // 程序运行到这儿肯定是没指望了！ return NULL; } 行了，函数写完了，我们可以试一下了！ void CTNNDDlg::OnButton1() { TCHAR sSrc[] = "The rain in Spain"; TCHAR sFind[]= "pain"; TCHAR * pFound = BoyerMooreSearch( sSrc, sFind ); if ( pFound ) MessageBox(pFound); else MessageBox("没找到"); } //另外一个 void preBmBc(char *x, int m, int bmBc[]) { int i; for (i = 0; i < ASIZE; ++i) bmBc[i] = m; for (i = 0; i < m - 1; ++i) bmBc[x[i]] = m - i - 1; } void suffixes(char *x, int m, int *suff) { int f, g, i; suff[m - 1] = m; g = m - 1; for (i = m - 2; i >= 0; --i) { if (i > g && suff[i + m - 1 - f] < i - g) suff[i] = suff[i + m - 1 - f]; else { if (i < g) g = i; f = i; while (g >= 0 && x[g] == x[g + m - 1 - f]) --g; suff[i] = f - g; } } } void preBmGs(char *x, int m, int bmGs[]) { int i, j, suff[XSIZE]; suffixes(x, m, suff); for (i = 0; i < m; ++i) bmGs[i] = m; j = 0; for (i = m - 1; i >= -1; --i) if (i == -1 || suff[i] == i + 1) for (; j < m - 1 - i; ++j) if (bmGs[j] == m) bmGs[j] = m - 1 - i; for (i = 0; i <= m - 2; ++i) bmGs[m - 1 - suff[i]] = m - 1 - i; } void BM(char *x, int m, char *y, int n) { int i, j, bmGs[XSIZE], bmBc[ASIZE]; /* Preprocessing */ preBmGs(x, m, bmGs); preBmBc(x, m, bmBc); /* Searching */ j = 0; while (j <= n - m) { for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i); if (i < 0) { OUTPUT(j); j += bmGs[0]; } else j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i); } }

导航:首页 > 源码编译 > sunday字符串匹配算法

sunday字符串匹配算法

与sunday字符串匹配算法相关的资料