string

string
posts - 27, comments - 177, trackbacks - 0, articles - 0
  C++博客 :: 首页 :: 新随笔 :: 联系 :: 聚合  :: 管理

基于sse2的strstr函数

Posted on 2008-10-28 21:47 djx_zh 阅读(2847) 评论(7)  编辑 收藏 引用
 download the code
昨天实现了基于int类型的strstr函数,可以获得1~2X左右的加速。今天按 lstrstr的流程实现了基于SSE2的STRSTR函数。可以得到2~4X左右的加速。
  1 char* lstrstrsse(char* text, char* pattern)
  2 {
  3     __m128i * sseiPtr = (__m128i *) text;
  4     unsigned char * chPtrAligned = (unsigned char*)text;
  5     __m128i sseiWord0 ;//= *sseiPtr ;
  6     __m128i sseiWord1 ;//= *sseiPtr ;
  7     __m128i sseiZero = _mm_set1_epi8(0);
  8     char chara = pattern[0];
  9     char charb = pattern[1];
 10     register __m128i byte16a;
 11     register __m128i byte16b;
 12     char* bytePtr =text;
 13     if(pattern ==NULL) return NULL;
 14     if(pattern[0== 0return NULL;
 15     if(pattern[1== 0return lstrchr(text,pattern[0]); 
 16     byte16a = _mm_set1_epi8(chara);
 17     byte16b = _mm_set1_epi8(charb);
 18 // process the unaligned bytes
 19 
 20 // the aligned bytes
 21 alignStart:
 22     sseiWord0 = *sseiPtr;
 23     sseiWord1 = *(sseiPtr+1);
 24     while( haszeroByte(sseiWord0,sseiWord1,sseiZero) ==0
 25     {
 26         unsigned int reta ;
 27 searcha:
 28         reta = hasByteC(sseiWord0,sseiWord1,  byte16a);
 29         if(reta!=0 ) {
 30             unsigned int retb ;
 31 findouta:        
 32             retb = hasByteC(sseiWord0,sseiWord1,  byte16b);
 33 findoutb:
 34             if(((reta<<1& retb)){
 35                 // have ab
 36                 int i=1;
 37                 char * bytePtr0 = (char*) ( sseiPtr );
 38                 int j;
 39                 //printf("test::%0x,%d\n",reta ,bytePtr0 -text);
 40                 bytePtr = (char*) ( sseiPtr );
 41                 for(j =0;j<8;j++){
 42                     if(reta & 0xff) {
 43                         if(bytePtr0[0== chara){
 44                             i =1;
 45                             bytePtr = bytePtr0 ;
 46                             while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
 47                             if(pattern[i] == 0return bytePtr;
 48                         }
 49                         if(bytePtr0[1== chara){
 50                             i =1;
 51                             bytePtr = bytePtr0 + 1;
 52                             while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
 53                             if(pattern[i] == 0return bytePtr;
 54                         }
 55                         if(bytePtr0[2== chara){
 56                             i =1;
 57                             bytePtr = bytePtr0 + 2;
 58                             while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
 59                             if(pattern[i] == 0return bytePtr;
 60                         }
 61                         if(bytePtr0[3== chara){
 62                             i =1;
 63                             bytePtr = bytePtr0 + 3;
 64                             while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
 65                             if(pattern[i] == 0return bytePtr;
 66                         }
 67                     }
 68                     reta = reta >> 4;
 69                     bytePtr0 += 4;
 70                 }
 71             }
 72             // search b
 73             sseiPtr += 2;
 74             sseiWord0 = *sseiPtr;
 75             sseiWord1 = *(sseiPtr+1);
 76 
 77             while( haszeroByte(sseiWord0,sseiWord1,sseiZero) ==0){ 
 78                 retb = hasByteC(sseiWord0,sseiWord1,  byte16b);
 79                 if(retb !=0){
 80                     // findout b
 81                     if((*((char*) sseiPtr)) == charb){
 82                         //b000
 83                         char * bytePtr = ((char*) ( sseiPtr )) -1;
 84                         if(bytePtr[0== chara){
 85                             int i=1;
 86                             while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
 87                             if(pattern[i] == 0return bytePtr;
 88                             if(bytePtr[i] == 0return NULL;
 89                         }
 90 
 91                     }
 92                     reta = hasByteC(sseiWord0,sseiWord1,  byte16a);
 93                     if(reta !=0
 94                         goto findoutb;
 95                     else{
 96                         goto nextWord;                    
 97                     }
 98                 }
 99                 sseiPtr += 2;
100                 sseiWord0 = *sseiPtr;
101                 sseiWord1 = *(sseiPtr+1);
102             }
103             // search  from (char*)sseiPtr
104             char * bytePtr = ((char*) ( sseiPtr )) -1;
105             if(bytePtr[0== chara){
106                 int i=1;
107                 while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
108                 if(pattern[i] == 0return bytePtr;
109             }
110 
111             goto prePareForEnd;
112         }
113 nextWord:
114         sseiPtr += 2;
115         sseiWord0 = *sseiPtr;
116         sseiWord1 = *(sseiPtr+1);
117     }
118 prePareForEnd:
119     {
120         unsigned int reta;
121         unsigned int retb;
122         reta =hasByteC(sseiWord0,sseiWord1,  byte16a);
123         retb =hasByteC(sseiWord0,sseiWord1,  byte16b);
124         if(((reta<<1& retb)){
125             bytePtr = (char*)sseiPtr;
126             while(*bytePtr){
127                 if(*bytePtr == chara) {
128                     int i=1;
129                     while((pattern[i] )&&(bytePtr[i] == pattern[i])) i++;
130                     if(pattern[i] == 0return bytePtr;
131                     if(bytePtr[i] == 0return NULL;
132 
133                 }
134                 bytePtr++;
135             }
136         }
137     }
138     return NULL;
139 }
140 

Feedback

# re: 基于sse2的strstr函数  回复  更多评论   

2008-10-30 00:53 by 肥仔
超过了C的strstr?

# re: 基于sse2的strstr函数  回复  更多评论   

2008-10-30 09:33 by djxzh
@肥仔
就目前的测试结果,是这样。还没有测试最坏情况下会是什么结果。

# re: 基于sse2的strstr函数[未登录]  回复  更多评论   

2008-10-30 10:35 by megax
做一个从后面开始查找的试试?

# re: 基于sse2的strstr函数  回复  更多评论   

2008-10-30 10:41 by djxzh
@megax
你是说BM之类的算法吗?那些算法需要对模式串预处理。

# re: 基于sse2的strstr函数  回复  更多评论   

2008-10-30 10:54 by vczh
用了SSE的指令集就可以同时计算一小部分内容了。

# re: 基于sse2的strstr函数[未登录]  回复  更多评论   

2008-10-31 12:47 by megax
不是,我说的是从一个字符串后面开始查找想要查找的内容。不是说具体的算法

# re: 基于sse2的strstr函数  回复  更多评论   

2008-11-01 11:02 by 金山词霸2008
没想到strstr函数的设计还这么复杂

只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理