|
35 | 35 | * receive a different case-normalization mapping. |
36 | 36 | */ |
37 | 37 | int |
38 | | -ScanKeywordLookup(constchar*text, |
| 38 | +ScanKeywordLookup(constchar*str, |
39 | 39 | constScanKeywordList*keywords) |
40 | 40 | { |
41 | | -intlen, |
42 | | -i; |
43 | | -charword[NAMEDATALEN]; |
44 | | -constchar*kw_string; |
45 | | -constuint16*kw_offsets; |
46 | | -constuint16*low; |
47 | | -constuint16*high; |
48 | | - |
49 | | -len=strlen(text); |
| 41 | +size_tlen; |
| 42 | +inth; |
| 43 | +constchar*kw; |
50 | 44 |
|
| 45 | +/* |
| 46 | + * Reject immediately if too long to be any keyword. This saves useless |
| 47 | + * hashing and downcasing work on long strings. |
| 48 | + */ |
| 49 | +len=strlen(str); |
51 | 50 | if (len>keywords->max_kw_len) |
52 | | -return-1;/* too long to be any keyword */ |
53 | | - |
54 | | -/* We assume all keywords are shorter than NAMEDATALEN. */ |
55 | | -Assert(len<NAMEDATALEN); |
| 51 | +return-1; |
56 | 52 |
|
57 | 53 | /* |
58 | | - * Apply an ASCII-only downcasing. We must not use tolower() since it may |
59 | | - * produce the wrong translation in some locales (eg, Turkish). |
| 54 | + * Compute the hash function. We assume it was generated to produce |
| 55 | + * case-insensitive results. Since it's a perfect hash, we need only |
| 56 | + * match to the specific keyword it identifies. |
60 | 57 | */ |
61 | | -for (i=0;i<len;i++) |
62 | | -{ |
63 | | -charch=text[i]; |
| 58 | +h=keywords->hash(str,len); |
64 | 59 |
|
65 | | -if (ch >='A'&&ch <='Z') |
66 | | -ch+='a'-'A'; |
67 | | -word[i]=ch; |
68 | | -} |
69 | | -word[len]='\0'; |
| 60 | +/* An out-of-range result implies no match */ |
| 61 | +if (h<0||h >=keywords->num_keywords) |
| 62 | +return-1; |
70 | 63 |
|
71 | 64 | /* |
72 | | - * Now do a binary search using plain strcmp() comparison. |
| 65 | + * Compare character-by-character to see if we have a match, applying an |
| 66 | + * ASCII-only downcasing to the input characters. We must not use |
| 67 | + * tolower() since it may produce the wrong translation in some locales |
| 68 | + * (eg, Turkish). |
73 | 69 | */ |
74 | | -kw_string=keywords->kw_string; |
75 | | -kw_offsets=keywords->kw_offsets; |
76 | | -low=kw_offsets; |
77 | | -high=kw_offsets+ (keywords->num_keywords-1); |
78 | | -while (low <=high) |
| 70 | +kw=GetScanKeyword(h,keywords); |
| 71 | +while (*str!='\0') |
79 | 72 | { |
80 | | -constuint16*middle; |
81 | | -intdifference; |
| 73 | +charch=*str++; |
82 | 74 |
|
83 | | -middle=low+ (high-low) /2; |
84 | | -difference=strcmp(kw_string+*middle,word); |
85 | | -if (difference==0) |
86 | | -returnmiddle-kw_offsets; |
87 | | -elseif (difference<0) |
88 | | -low=middle+1; |
89 | | -else |
90 | | -high=middle-1; |
| 75 | +if (ch >='A'&&ch <='Z') |
| 76 | +ch+='a'-'A'; |
| 77 | +if (ch!=*kw++) |
| 78 | +return-1; |
91 | 79 | } |
| 80 | +if (*kw!='\0') |
| 81 | +return-1; |
92 | 82 |
|
93 | | -return-1; |
| 83 | +/* Success! */ |
| 84 | +returnh; |
94 | 85 | } |