14
14
* NextChar
15
15
* MatchText - to name of function wanted
16
16
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17
- * MATCH_LOWER - defineiff using to_lower ontext chars
17
+ * MATCH_LOWER - definefor case (4), using to_lower onsingle-byte chars
18
18
*
19
19
* Copyright (c) 1996-2010, PostgreSQL Global Development Group
20
20
*
21
21
* IDENTIFICATION
22
- *$PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.27 2010/01/02 16:57:54 momjian Exp $
22
+ *$PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.28 2010/05/28 17:35:23 tgl Exp $
23
23
*
24
24
*-------------------------------------------------------------------------
25
25
*/
70
70
*/
71
71
72
72
#ifdef MATCH_LOWER
73
- #define TCHAR (t ) ((char) tolower((unsigned char) (t)))
73
+ #define GETCHAR (t ) ((char) tolower((unsigned char) (t)))
74
74
#else
75
- #define TCHAR (t ) (t)
75
+ #define GETCHAR (t ) (t)
76
76
#endif
77
77
78
78
static int
@@ -101,85 +101,80 @@ MatchText(char *t, int tlen, char *p, int plen)
101
101
ereport (ERROR ,
102
102
(errcode (ERRCODE_INVALID_ESCAPE_SEQUENCE ),
103
103
errmsg ("LIKE pattern must not end with escape character" )));
104
- if (TCHAR (* p )!= TCHAR (* t ))
104
+ if (GETCHAR (* p )!= GETCHAR (* t ))
105
105
return LIKE_FALSE ;
106
106
}
107
107
else if (* p == '%' )
108
108
{
109
+ char firstpat ;
110
+
109
111
/*
110
- * % processing is essentially a search for a match for what
111
- * follows the %, plus a recursive match of the remainder. We
112
- * succeed if and only if both conditions are met.
112
+ * % processing is essentially a search for a text position at
113
+ * which the remainder of the text matches the remainder of the
114
+ * pattern, using a recursive call to check each potential match.
115
+ *
116
+ * If there are wildcards immediately following the %, we can skip
117
+ * over them first, using the idea that any sequence of N _'s and
118
+ * one or more %'s is equivalent to N _'s and one % (ie, it will
119
+ * match any sequence of at least N text characters). In this
120
+ * way we will always run the recursive search loop using a
121
+ * pattern fragment that begins with a literal character-to-match,
122
+ * thereby not recursing more than we have to.
113
123
*/
124
+ NextByte (p ,plen );
114
125
115
- /* %% is the same as % according to the SQL standard */
116
- /* Advance past all %'s */
117
- while (plen > 0 && * p == '%' )
118
- NextByte (p ,plen );
119
- /* Trailing percent matches everything. */
126
+ while (plen > 0 )
127
+ {
128
+ if (* p == '%' )
129
+ NextByte (p ,plen );
130
+ else if (* p == '_' )
131
+ {
132
+ /* If not enough text left to match the pattern, ABORT */
133
+ if (tlen <=0 )
134
+ return LIKE_ABORT ;
135
+ NextChar (t ,tlen );
136
+ NextByte (p ,plen );
137
+ }
138
+ else
139
+ break ;/* Reached a non-wildcard pattern char */
140
+ }
141
+
142
+ /*
143
+ * If we're at end of pattern, match: we have a trailing % which
144
+ * matches any remaining text string.
145
+ */
120
146
if (plen <=0 )
121
147
return LIKE_TRUE ;
122
148
123
149
/*
124
150
* Otherwise, scan for a text position at which we can match the
125
- * rest of the pattern.
151
+ * rest of the pattern. The first remaining pattern char is known
152
+ * to be a regular or escaped literal character, so we can compare
153
+ * the first pattern byte to each text byte to avoid recursing
154
+ * more than we have to. This fact also guarantees that we don't
155
+ * have to consider a match to the zero-length substring at the
156
+ * end of the text.
126
157
*/
127
- if (* p == '_ ' )
158
+ if (* p == '\\ ' )
128
159
{
129
- /* %_ is the same as _% - avoid matching _ repeatedly */
160
+ if (plen < 2 )
161
+ return LIKE_FALSE ;/* XXX should throw error */
162
+ firstpat = GETCHAR (p [1 ]);
163
+ }
164
+ else
165
+ firstpat = GETCHAR (* p );
130
166
131
- do
132
- {
133
- NextChar (t ,tlen );
134
- NextByte (p ,plen );
135
- }while (tlen > 0 && plen > 0 && * p == '_' );
136
-
137
- /*
138
- * If we are at the end of the pattern, succeed: % followed by
139
- * n _'s matches any string of at least n characters, and we
140
- * have now found there are at least n characters.
141
- */
142
- if (plen <=0 )
143
- return LIKE_TRUE ;
144
-
145
- /* Look for a place that matches the rest of the pattern */
146
- while (tlen > 0 )
167
+ while (tlen > 0 )
168
+ {
169
+ if (GETCHAR (* t )== firstpat )
147
170
{
148
171
int matched = MatchText (t ,tlen ,p ,plen );
149
172
150
173
if (matched != LIKE_FALSE )
151
- return matched ;/* TRUE or ABORT */
152
-
153
- NextChar (t ,tlen );
174
+ return matched ;/* TRUE or ABORT */
154
175
}
155
- }
156
- else
157
- {
158
- char firstpat = TCHAR (* p );
159
176
160
- if (* p == '\\' )
161
- {
162
- if (plen < 2 )
163
- return LIKE_FALSE ;
164
- firstpat = TCHAR (p [1 ]);
165
- }
166
-
167
- while (tlen > 0 )
168
- {
169
- /*
170
- * Optimization to prevent most recursion: don't recurse
171
- * unless first pattern byte matches first text byte.
172
- */
173
- if (TCHAR (* t )== firstpat )
174
- {
175
- int matched = MatchText (t ,tlen ,p ,plen );
176
-
177
- if (matched != LIKE_FALSE )
178
- return matched ;/* TRUE or ABORT */
179
- }
180
-
181
- NextChar (t ,tlen );
182
- }
177
+ NextChar (t ,tlen );
183
178
}
184
179
185
180
/*
@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
195
190
NextByte (p ,plen );
196
191
continue ;
197
192
}
198
- else if (TCHAR (* p )!= TCHAR (* t ))
193
+ else if (GETCHAR (* p )!= GETCHAR (* t ))
199
194
{
200
195
/* non-wildcard pattern char fails to match text char */
201
196
return LIKE_FALSE ;
@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
220
215
if (tlen > 0 )
221
216
return LIKE_FALSE ;/* end of pattern, but not of text */
222
217
223
- /* End of text string.Do we have matching pattern remaining? */
224
- while (plen > 0 && * p == '%' )/* allow multiple %'s at end of
225
- * pattern */
218
+ /*
219
+ * End of text, but perhaps not of pattern. Match iff the remaining
220
+ * pattern can match a zero-length string, ie, it's zero or more %'s.
221
+ */
222
+ while (plen > 0 && * p == '%' )
226
223
NextByte (p ,plen );
227
-
228
224
if (plen <=0 )
229
225
return LIKE_TRUE ;
230
226
@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
348
344
#undef do_like_escape
349
345
#endif
350
346
351
- #undef TCHAR
347
+ #undef GETCHAR
352
348
353
349
#ifdef MATCH_LOWER
354
350
#undef MATCH_LOWER