首页 > 技术文章 > 实践中更高效、实现起来相对简单的基于末尾坏字符原则的BM算法实现

codeape 2013-08-20 20:07 原文

之前网上看的若干算法,无非两个原则:坏字符原则、好后缀原则。按照算法所述实现了一个版本,但发现其效率还不如本文所述的实现方式。个人分析效率较低的原因可能是因为不断地向前找坏字符或者好后缀来确定跳跃距离导致的,不断的比对操作应该是影响效率的根源。

下面贴一段实现较简单的方法,感谢之前的领导磊哥,实现参照了他的代码。

PS:大概看了下ClamAV的BM实现,感觉很复杂。

 1 #define BM_TAB_LEN  (256)
 2 
 3 uint64_t *InitBMTab(const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
 4 {
 5     uint64_t    *pui64RetVal    = NULL;
 6 
 7     if (In_ui8Pattern == NULL || In_ui64PattLen == 0)
 8     {
 9         goto fun_ret;
10     }
11 
12     pui64RetVal = (uint64_t *)malloc(sizeof(uint64_t) * BM_TAB_LEN);
13     if (pui64RetVal == NULL)
14     {
15         goto fun_ret;
16     }
17 
18     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
19     {
20         pui64RetVal[i] = In_ui64PattLen;
21     }
22 
23     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
24     {
25         pui64RetVal[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
26     }
27 
28 fun_ret:
29     return pui64RetVal;
30 }
31 
32 int8_t ReBuildBMTab(uint64_t *Out_pui64BMJmpTab, const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
33 {
34     int8_t  i8RetVal    = 0;
35 
36     if (Out_pui64BMJmpTab == NULL || In_ui8Pattern == NULL || In_ui64PattLen == 0)
37     {
38         i8RetVal = -1;
39         goto fun_ret;
40     }
41 
42     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
43     {
44         Out_pui64BMJmpTab[i] = In_ui64PattLen;
45     }
46 
47     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
48     {
49         Out_pui64BMJmpTab[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
50     }
51 
52 fun_ret:
53     return i8RetVal;
54 }
55 
56 void ReleaseBMTab(uint64_t *Out_pui64BMJmpTab)
57 {
58     if (Out_pui64BMJmpTab != NULL)
59     {
60         free(Out_pui64BMJmpTab);
61     }
62 }
63 
64 uint64_t BMSearch(const uint64_t *In_pui64BMJmpTab, const uint8_t *In_pui8Pattern, uint64_t In_ui64PattLen,
65     const uint8_t *In_pui8Buf, uint64_t In_ui64BufLen)
66 {
67     uint64_t    ui64RetVal  = -1;
68     uint64_t    ui64EndIdx  = 0;
69 
70     if (In_pui64BMJmpTab == NULL || In_pui8Pattern == NULL
71         || In_ui64PattLen == 0 || In_pui8Buf == NULL || In_ui64BufLen == 0
72         || In_ui64BufLen < In_ui64PattLen)
73     {
74         goto fun_ret;
75     }
76 
77     ui64EndIdx = In_ui64PattLen - 1;
78     do 
79     {
80         if (In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]] != 0)
81         {
82             ui64EndIdx += In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]];
83             continue;
84         }
85         if (memcmp(In_pui8Pattern, In_pui8Buf + ui64EndIdx - In_ui64PattLen + 1, In_ui64PattLen) == 0)
86         {
87             ui64RetVal = ui64EndIdx - In_ui64PattLen + 1;
88             goto fun_ret;
89         }
90         ui64EndIdx ++;
91     } while (ui64EndIdx < In_ui64BufLen);
92 
93 fun_ret:
94     return ui64RetVal;
95 }

 

推荐阅读