|
35 | 35 | * receive a different case-normalization mapping.
|
36 | 36 | */
|
37 | 37 | int
|
38 |
| -ScanKeywordLookup(constchar*text, |
| 38 | +ScanKeywordLookup(constchar*str, |
39 | 39 | constScanKeywordList*keywords)
|
40 | 40 | {
|
41 |
| -intlen, |
42 |
| -i; |
43 |
| -charword[NAMEDATALEN]; |
44 |
| -constchar*kw_string; |
45 |
| -constuint16*kw_offsets; |
46 |
| -constuint16*low; |
47 |
| -constuint16*high; |
48 |
| - |
49 |
| -len=strlen(text); |
| 41 | +size_tlen; |
| 42 | +inth; |
| 43 | +constchar*kw; |
50 | 44 |
|
| 45 | +/* |
| 46 | + * Reject immediately if too long to be any keyword. This saves useless |
| 47 | + * hashing and downcasing work on long strings. |
| 48 | + */ |
| 49 | +len=strlen(str); |
51 | 50 | if (len>keywords->max_kw_len)
|
52 |
| -return-1;/* too long to be any keyword */ |
53 |
| - |
54 |
| -/* We assume all keywords are shorter than NAMEDATALEN. */ |
55 |
| -Assert(len<NAMEDATALEN); |
| 51 | +return-1; |
56 | 52 |
|
57 | 53 | /*
|
58 |
| - * Apply an ASCII-only downcasing. We must not use tolower() since it may |
59 |
| - * produce the wrong translation in some locales (eg, Turkish). |
| 54 | + * Compute the hash function. We assume it was generated to produce |
| 55 | + * case-insensitive results. Since it's a perfect hash, we need only |
| 56 | + * match to the specific keyword it identifies. |
60 | 57 | */
|
61 |
| -for (i=0;i<len;i++) |
62 |
| -{ |
63 |
| -charch=text[i]; |
| 58 | +h=keywords->hash(str,len); |
64 | 59 |
|
65 |
| -if (ch >='A'&&ch <='Z') |
66 |
| -ch+='a'-'A'; |
67 |
| -word[i]=ch; |
68 |
| -} |
69 |
| -word[len]='\0'; |
| 60 | +/* An out-of-range result implies no match */ |
| 61 | +if (h<0||h >=keywords->num_keywords) |
| 62 | +return-1; |
70 | 63 |
|
71 | 64 | /*
|
72 |
| - * Now do a binary search using plain strcmp() comparison. |
| 65 | + * Compare character-by-character to see if we have a match, applying an |
| 66 | + * ASCII-only downcasing to the input characters. We must not use |
| 67 | + * tolower() since it may produce the wrong translation in some locales |
| 68 | + * (eg, Turkish). |
73 | 69 | */
|
74 |
| -kw_string=keywords->kw_string; |
75 |
| -kw_offsets=keywords->kw_offsets; |
76 |
| -low=kw_offsets; |
77 |
| -high=kw_offsets+ (keywords->num_keywords-1); |
78 |
| -while (low <=high) |
| 70 | +kw=GetScanKeyword(h,keywords); |
| 71 | +while (*str!='\0') |
79 | 72 | {
|
80 |
| -constuint16*middle; |
81 |
| -intdifference; |
| 73 | +charch=*str++; |
82 | 74 |
|
83 |
| -middle=low+ (high-low) /2; |
84 |
| -difference=strcmp(kw_string+*middle,word); |
85 |
| -if (difference==0) |
86 |
| -returnmiddle-kw_offsets; |
87 |
| -elseif (difference<0) |
88 |
| -low=middle+1; |
89 |
| -else |
90 |
| -high=middle-1; |
| 75 | +if (ch >='A'&&ch <='Z') |
| 76 | +ch+='a'-'A'; |
| 77 | +if (ch!=*kw++) |
| 78 | +return-1; |
91 | 79 | }
|
| 80 | +if (*kw!='\0') |
| 81 | +return-1; |
92 | 82 |
|
93 |
| -return-1; |
| 83 | +/* Success! */ |
| 84 | +returnh; |
94 | 85 | }
|