串的模式匹配

    技术2022-05-19  18

    还是算法有意思,感觉就是不一样,今天搞了串的匹配这一节。

     

    串的匹配算法主要有两种:1.Brute-Force算法

    2. KMP算法 D.E.Knuth  J.H.Morris  V.R.Pratt以及其改进的形式

     

    实现代码如下:

     

    #include <cstring>

    #include <iostream>

     

    #define MaxSize 100

     

    typedef std::string SqString;

     

    //Brute-Force算法

    int index(SqString s,SqString t)

    {

             unsigned int i=0,j,k;

             while(i<(int)s.length())

             {

                       for(j=i,k=0;j<s.length() && k<t.length() && s.at(j)==t.at(k);j++,k++);

                       if(k==t.length())

                                return i;

                       i++;

             }

             return -1;

    }

     

    //Brute-Force算法另一种实现

    int index1(SqString s,SqString t)

    {

             int i=0,j=0,k;

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(s[i]==t[j])

                       {

                                i++;

                                j++;

                       }

                       else

                       {

                                i=i-j+1;

                                j=0;

                       }

             }

     

             if(j>=(int)t.length())

                       k=i-t.length();

             else

                       k=-1;

             return k;

    }

     

    //KMP算法 D.E.Knuth J.H.Morris V.R.Pratt

    void GetNext(SqString t,int next[])

    {

             int j,k;

             j=0;k=-1;next[0]=-1;

             while(j<(int)t.length()-1)

             {

                       if(k==-1 || t[j]==t[k])

                       {

                                j++;k++;

                           next[j]=k;

                       }

                       else k=next[k];

             }

    }

     

    int KMPIndex(SqString s,SqString t)

    {

             int next[MaxSize],i=0,j=0,v;

             GetNext(t,next);

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(j==-1 || s[i]==t[j])

                       { i++;j++;}

                       else j=next[j];                   //i不变,j后退

             }

             if (j>=t.length())

                       v=i-t.length();

             else

                       v=-1;

             return v;

    }

     

     

    //改进的KMP算法

    void GetNextval(SqString t,int nextval[])

    {

             int j=0,k=-1;

             nextval[0]=-1;

             while(j<(int)t.length())

             {

                       if(k==-1 || t[j]==t[k])

                       {

                                j++;k++;

                                if(t[j]!=t[k]) nextval[j]=k;

                                else nextval[j]=nextval[k];

                       }

                       else k=nextval[k];

             }

    }

     

    int KMPIndex1(SqString s,SqString t)

    {

             int nextval[MaxSize],i=0,j=0,v;

             GetNextval(t,nextval);

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(j==-1 || s[i]==t[j])

                       {

                                i++;j++;

                       }

                       else j=nextval[j];

             }

             if(j>=t.length()) v=i-t.length();

             else v=-1;

             return v;

    }

     

    //在串str中查找子串substr最后一次出现的位置(不适用任何字符串标准函数)

    int indexpos(SqString str,SqString substr)

    {

             int i,j,k,idx=-1;

             for(i=0;i<(int)str.length();i++)

             {

                       for(j=i,k=0;str[j]==substr[k];j++,k++);//Brute-Force算法部分

                       if(k==(int)substr.length())

                                idx=i;

             }

             return idx;

    }

     

    //设计一个算法int like(SqString str,SqString pattern),实现以下四种匹配

    int like(SqString str,SqString pattern)

    {

             int i,j,k;

             for(i=0;i<str.length();i++)

             {

                       j=i;k=0;

                       while(1)

                       {

                                if(str[j]==pattern[k] || pattern[k]=='?'

                                         || pattern[k]=='_')

                                {

                                         j++;k++;

                                }

                                else if(pattern[k]=='//' && str[j]==pattern[k+1])  // '/'非法的字符

                                {

                                         j++;k=k+2;

                                }

                                else break;

                                if(k==pattern.length())

                                         return i;

                       }

             }

             return -1;

    }

     

    //求串s中出现的第一个最长重复子串的下标和长度

    SqString smaxsubstr(SqString s)

    {

             int index=0,length=0,length1,i=0,j,k;

             SqString t;

             while(i<(int)s.length())

             {

                       j=i+1;

                       while(j<(int)s.length())

                       {

                                if(s[i]==s[j])

                                {

                                         length1=1;

                                         for(k=1;s[i+k]==s[j+k];k++)

                                                   length1++;

                                         if(length1>length)

                                         {

                                                   index=i;

                                                   length=length1;

                                         }

                                         //j+=length1;                    //没有考虑到几个字符相同的情况

                                         j++;

                                }

                                else

                                         j++;

                       }

                       i++;

             }

     

             //测试部分

             std::cout<<"position: "<<index<<" length: "<<length<<std::endl;

             t.resize(length);

             for(i=0;i<length;i++)

                       t[i]=s[index+i];

             return t;

    }

     

    //我自己题意理解错了,认为是具有相同字符才算是重复子串,得到的算法如下:

    void maxsubstr(SqString s,int& lastpos,int& lastlen)

    {

             int pos=0;

             int len=1;

             lastpos=pos;lastlen=len;

            

             for(int i=1; i<(int)s.length();i++)

             {

                       if(s[i]==s[i-1] && i<(int)s.length())

                                len++;

                       else

                       {

                                if(len>lastlen)

                                {

                                         lastlen=len;

                                         lastpos=pos;

                                }

                                pos=i;

                                len=1;

                       }

             }

    }

     

    //两个字符串st的长度分别为m,n,求这两个字符串最大公共子串

    //算法的时间复杂度T(m,n)=O(m*n)

    //估算最优的T(m,n)=O(m+n),采用KMP进行模式匹配

    SqString maxcomstr(SqString s,SqString t)

    {

             SqString str;

             int midx=0,mlen=0,tlen,i=0,j,k;

             while(i<(int)s.length())

             {

                       j=0;

                       while(j<(int)t.length())

                       {

                                if(s[i]==t[j])

                                {

                                         tlen=1;

                                         for(k=1;(i+k)<(int)s.length() && (j+k)<(int)t.length()

                                                   && (s[i+k]==t[j+k]);k++)

                                                   tlen++;

                                         if(tlen>mlen)

                                         {

                                                   midx=i;mlen=tlen;

                                         }

                                         //j+=tlen;

                                         j++;

                                }

                                else j++;

                       }

                       i++;

             }

     

             for(i=0;i<mlen;i++)

                       //str[i]=s[midx+i];

                       std::cout<<s[midx+i];

             std::cout<<std::endl;

             str.resize(mlen);

             return str;

    }

     

    int main()

    {

             //SqString main="aababddfabhenidfhdshfjshfjkhsdkj";

             //SqString pattern="abdd";

     

             //SqString main="abcaabbcaaabababaabca";

             //SqString pattern="babab";

     

             //SqString main="a nice string";

             //SqString pattern="?nice?str_ng";

     

             SqString main="aaaabaaaaab";

     

             SqString main1="hellaaabaaabss";

             SqString qq;

     

             int pos,len;

       

             qq=maxcomstr(main,main1);

     

             //maxsubstr(main,pos,len);

             //std::cout<<"position: "<<pos<<" length: "<<len<<std::endl;

     

             //int match;

             //int intenger=-1;

             //unsigned int unintenger=9;

             //std::cout<<(intenger<unintenger)<<std::endl;            注意int unsigned int的比较

             //pos=index(main,pattern);

             //pos=index1(main,pattern);

             //pos=KMPIndex(main,pattern);

             //pos=KMPIndex1(main,pattern);

             //std::cout<<pos<<std::endl;

     

             //pos=indexpos(main,pattern);

     

        //qq=smaxsubstr(main);

             /*match=like(main,pattern);

             if(match==-1)

                       printf("fail!/n");

             else

                       printf("successful!/n");*/

     

             /*

             if(pos>0)

                       printf("the position is: %d/n",pos);

             else

                       printf("fail!/n");*/

             return 1;

    }

     

    PS:数据结构那本书上的例子代码有的有问题,我都改过来了,并且注明了。

    还是算法有意思,感觉就是不一样,今天搞了串的匹配这一节。

     

    串的匹配算法主要有两种:1.Brute-Force算法

    2. KMP算法 D.E.Knuth  J.H.Morris  V.R.Pratt以及其改进的形式

     

    实现代码如下:

     

    #include <cstring>

    #include <iostream>

     

    #define MaxSize 100

     

    typedef std::string SqString;

     

    //Brute-Force算法

    int index(SqString s,SqString t)

    {

             unsigned int i=0,j,k;

             while(i<(int)s.length())

             {

                       for(j=i,k=0;j<s.length() && k<t.length() && s.at(j)==t.at(k);j++,k++);

                       if(k==t.length())

                                return i;

                       i++;

             }

             return -1;

    }

     

    //Brute-Force算法另一种实现

    int index1(SqString s,SqString t)

    {

             int i=0,j=0,k;

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(s[i]==t[j])

                       {

                                i++;

                                j++;

                       }

                       else

                       {

                                i=i-j+1;

                                j=0;

                       }

             }

     

             if(j>=(int)t.length())

                       k=i-t.length();

             else

                       k=-1;

             return k;

    }

     

    //KMP算法 D.E.Knuth J.H.Morris V.R.Pratt

    void GetNext(SqString t,int next[])

    {

             int j,k;

             j=0;k=-1;next[0]=-1;

             while(j<(int)t.length()-1)

             {

                       if(k==-1 || t[j]==t[k])

                       {

                                j++;k++;

                           next[j]=k;

                       }

                       else k=next[k];

             }

    }

     

    int KMPIndex(SqString s,SqString t)

    {

             int next[MaxSize],i=0,j=0,v;

             GetNext(t,next);

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(j==-1 || s[i]==t[j])

                       { i++;j++;}

                       else j=next[j];                   //i不变,j后退

             }

             if (j>=t.length())

                       v=i-t.length();

             else

                       v=-1;

             return v;

    }

     

     

    //改进的KMP算法

    void GetNextval(SqString t,int nextval[])

    {

             int j=0,k=-1;

             nextval[0]=-1;

             while(j<(int)t.length())

             {

                       if(k==-1 || t[j]==t[k])

                       {

                                j++;k++;

                                if(t[j]!=t[k]) nextval[j]=k;

                                else nextval[j]=nextval[k];

                       }

                       else k=nextval[k];

             }

    }

     

    int KMPIndex1(SqString s,SqString t)

    {

             int nextval[MaxSize],i=0,j=0,v;

             GetNextval(t,nextval);

             while(i<(int)s.length() && j<(int)t.length())

             {

                       if(j==-1 || s[i]==t[j])

                       {

                                i++;j++;

                       }

                       else j=nextval[j];

             }

             if(j>=t.length()) v=i-t.length();

             else v=-1;

             return v;

    }

     

    //在串str中查找子串substr最后一次出现的位置(不适用任何字符串标准函数)

    int indexpos(SqString str,SqString substr)

    {

             int i,j,k,idx=-1;

             for(i=0;i<(int)str.length();i++)

             {

                       for(j=i,k=0;str[j]==substr[k];j++,k++);//Brute-Force算法部分

                       if(k==(int)substr.length())

                                idx=i;

             }

             return idx;

    }

     

    //设计一个算法int like(SqString str,SqString pattern),实现以下四种匹配

    int like(SqString str,SqString pattern)

    {

             int i,j,k;

             for(i=0;i<str.length();i++)

             {

                       j=i;k=0;

                       while(1)

                       {

                                if(str[j]==pattern[k] || pattern[k]=='?'

                                         || pattern[k]=='_')

                                {

                                         j++;k++;

                                }

                                else if(pattern[k]=='//' && str[j]==pattern[k+1])  // '/'非法的字符

                                {

                                         j++;k=k+2;

                                }

                                else break;

                                if(k==pattern.length())

                                         return i;

                       }

             }

             return -1;

    }

     

    //求串s中出现的第一个最长重复子串的下标和长度

    SqString smaxsubstr(SqString s)

    {

             int index=0,length=0,length1,i=0,j,k;

             SqString t;

             while(i<(int)s.length())

             {

                       j=i+1;

                       while(j<(int)s.length())

                       {

                                if(s[i]==s[j])

                                {

                                         length1=1;

                                         for(k=1;s[i+k]==s[j+k];k++)

                                                   length1++;

                                         if(length1>length)

                                         {

                                                   index=i;

                                                   length=length1;

                                         }

                                         //j+=length1;                    //没有考虑到几个字符相同的情况

                                         j++;

                                }

                                else

                                         j++;

                       }

                       i++;

             }

     

             //测试部分

             std::cout<<"position: "<<index<<" length: "<<length<<std::endl;

             t.resize(length);

             for(i=0;i<length;i++)

                       t[i]=s[index+i];

             return t;

    }

     

    //我自己题意理解错了,认为是具有相同字符才算是重复子串,得到的算法如下:

    void maxsubstr(SqString s,int& lastpos,int& lastlen)

    {

             int pos=0;

             int len=1;

             lastpos=pos;lastlen=len;

            

             for(int i=1; i<(int)s.length();i++)

             {

                       if(s[i]==s[i-1] && i<(int)s.length())

                                len++;

                       else

                       {

                                if(len>lastlen)

                                {

                                         lastlen=len;

                                         lastpos=pos;

                                }

                                pos=i;

                                len=1;

                       }

             }

    }

     

    //两个字符串st的长度分别为m,n,求这两个字符串最大公共子串

    //算法的时间复杂度T(m,n)=O(m*n)

    //估算最优的T(m,n)=O(m+n),采用KMP进行模式匹配

    SqString maxcomstr(SqString s,SqString t)

    {

             SqString str;

             int midx=0,mlen=0,tlen,i=0,j,k;

             while(i<(int)s.length())

             {

                       j=0;

                       while(j<(int)t.length())

                       {

                                if(s[i]==t[j])

                                {

                                         tlen=1;

                                         for(k=1;(i+k)<(int)s.length() && (j+k)<(int)t.length()

                                                   && (s[i+k]==t[j+k]);k++)

                                                   tlen++;

                                         if(tlen>mlen)

                                         {

                                                   midx=i;mlen=tlen;

                                         }

                                         //j+=tlen;

                                         j++;

                                }

                                else j++;

                       }

                       i++;

             }

     

             for(i=0;i<mlen;i++)

                       //str[i]=s[midx+i];

                       std::cout<<s[midx+i];

             std::cout<<std::endl;

             str.resize(mlen);

             return str;

    }

     

    int main()

    {

             //SqString main="aababddfabhenidfhdshfjshfjkhsdkj";

             //SqString pattern="abdd";

     

             //SqString main="abcaabbcaaabababaabca";

             //SqString pattern="babab";

     

             //SqString main="a nice string";

             //SqString pattern="?nice?str_ng";

     

             SqString main="aaaabaaaaab";

     

             SqString main1="hellaaabaaabss";

             SqString qq;

     

             int pos,len;

       

             qq=maxcomstr(main,main1);

     

             //maxsubstr(main,pos,len);

             //std::cout<<"position: "<<pos<<" length: "<<len<<std::endl;

     

             //int match;

             //int intenger=-1;

             //unsigned int unintenger=9;

             //std::cout<<(intenger<unintenger)<<std::endl;            注意int unsigned int的比较

             //pos=index(main,pattern);

             //pos=index1(main,pattern);

             //pos=KMPIndex(main,pattern);

             //pos=KMPIndex1(main,pattern);

             //std::cout<<pos<<std::endl;

     

             //pos=indexpos(main,pattern);

     

        //qq=smaxsubstr(main);

             /*match=like(main,pattern);

             if(match==-1)

                       printf("fail!/n");

             else

                       printf("successful!/n");*/

     

             /*

             if(pos>0)

                       printf("the position is: %d/n",pos);

             else

                       printf("fail!/n");*/

             return 1;

    }

     

    PS:数据结构那本书上的例子代码有的有问题,我都改过来了,并且注明了。

     


    最新回复(0)