1414 * NextChar
1515 * MatchText - to name of function wanted
1616 * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17- * MATCH_LOWER - defineiff using to_lower ontext chars
17+ * MATCH_LOWER - definefor case (4), using to_lower onsingle-byte chars
1818 *
1919 * Copyright (c) 1996-2010, PostgreSQL Global Development Group
2020 *
2121 * IDENTIFICATION
22- *$PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.27 2010/01/02 16:57:54 momjian Exp $
22+ *$PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.28 2010/05/28 17:35:23 tgl Exp $
2323 *
2424 *-------------------------------------------------------------------------
2525 */
7070 */
7171
7272#ifdef MATCH_LOWER
73- #define TCHAR (t ) ((char) tolower((unsigned char) (t)))
73+ #define GETCHAR (t ) ((char) tolower((unsigned char) (t)))
7474#else
75- #define TCHAR (t ) (t)
75+ #define GETCHAR (t ) (t)
7676#endif
7777
7878static int
@@ -101,85 +101,80 @@ MatchText(char *t, int tlen, char *p, int plen)
101101ereport (ERROR ,
102102(errcode (ERRCODE_INVALID_ESCAPE_SEQUENCE ),
103103errmsg ("LIKE pattern must not end with escape character" )));
104- if (TCHAR (* p )!= TCHAR (* t ))
104+ if (GETCHAR (* p )!= GETCHAR (* t ))
105105return LIKE_FALSE ;
106106}
107107else if (* p == '%' )
108108{
109+ char firstpat ;
110+
109111/*
110- * % processing is essentially a search for a match for what
111- * follows the %, plus a recursive match of the remainder. We
112- * succeed if and only if both conditions are met.
112+ * % processing is essentially a search for a text position at
113+ * which the remainder of the text matches the remainder of the
114+ * pattern, using a recursive call to check each potential match.
115+ *
116+ * If there are wildcards immediately following the %, we can skip
117+ * over them first, using the idea that any sequence of N _'s and
118+ * one or more %'s is equivalent to N _'s and one % (ie, it will
119+ * match any sequence of at least N text characters). In this
120+ * way we will always run the recursive search loop using a
121+ * pattern fragment that begins with a literal character-to-match,
122+ * thereby not recursing more than we have to.
113123 */
124+ NextByte (p ,plen );
114125
115- /* %% is the same as % according to the SQL standard */
116- /* Advance past all %'s */
117- while (plen > 0 && * p == '%' )
118- NextByte (p ,plen );
119- /* Trailing percent matches everything. */
126+ while (plen > 0 )
127+ {
128+ if (* p == '%' )
129+ NextByte (p ,plen );
130+ else if (* p == '_' )
131+ {
132+ /* If not enough text left to match the pattern, ABORT */
133+ if (tlen <=0 )
134+ return LIKE_ABORT ;
135+ NextChar (t ,tlen );
136+ NextByte (p ,plen );
137+ }
138+ else
139+ break ;/* Reached a non-wildcard pattern char */
140+ }
141+
142+ /*
143+ * If we're at end of pattern, match: we have a trailing % which
144+ * matches any remaining text string.
145+ */
120146if (plen <=0 )
121147return LIKE_TRUE ;
122148
123149/*
124150 * Otherwise, scan for a text position at which we can match the
125- * rest of the pattern.
151+ * rest of the pattern. The first remaining pattern char is known
152+ * to be a regular or escaped literal character, so we can compare
153+ * the first pattern byte to each text byte to avoid recursing
154+ * more than we have to. This fact also guarantees that we don't
155+ * have to consider a match to the zero-length substring at the
156+ * end of the text.
126157 */
127- if (* p == '_ ' )
158+ if (* p == '\\ ' )
128159{
129- /* %_ is the same as _% - avoid matching _ repeatedly */
160+ if (plen < 2 )
161+ return LIKE_FALSE ;/* XXX should throw error */
162+ firstpat = GETCHAR (p [1 ]);
163+ }
164+ else
165+ firstpat = GETCHAR (* p );
130166
131- do
132- {
133- NextChar (t ,tlen );
134- NextByte (p ,plen );
135- }while (tlen > 0 && plen > 0 && * p == '_' );
136-
137- /*
138- * If we are at the end of the pattern, succeed: % followed by
139- * n _'s matches any string of at least n characters, and we
140- * have now found there are at least n characters.
141- */
142- if (plen <=0 )
143- return LIKE_TRUE ;
144-
145- /* Look for a place that matches the rest of the pattern */
146- while (tlen > 0 )
167+ while (tlen > 0 )
168+ {
169+ if (GETCHAR (* t )== firstpat )
147170{
148171int matched = MatchText (t ,tlen ,p ,plen );
149172
150173if (matched != LIKE_FALSE )
151- return matched ;/* TRUE or ABORT */
152-
153- NextChar (t ,tlen );
174+ return matched ;/* TRUE or ABORT */
154175}
155- }
156- else
157- {
158- char firstpat = TCHAR (* p );
159176
160- if (* p == '\\' )
161- {
162- if (plen < 2 )
163- return LIKE_FALSE ;
164- firstpat = TCHAR (p [1 ]);
165- }
166-
167- while (tlen > 0 )
168- {
169- /*
170- * Optimization to prevent most recursion: don't recurse
171- * unless first pattern byte matches first text byte.
172- */
173- if (TCHAR (* t )== firstpat )
174- {
175- int matched = MatchText (t ,tlen ,p ,plen );
176-
177- if (matched != LIKE_FALSE )
178- return matched ;/* TRUE or ABORT */
179- }
180-
181- NextChar (t ,tlen );
182- }
177+ NextChar (t ,tlen );
183178}
184179
185180/*
@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
195190NextByte (p ,plen );
196191continue ;
197192}
198- else if (TCHAR (* p )!= TCHAR (* t ))
193+ else if (GETCHAR (* p )!= GETCHAR (* t ))
199194{
200195/* non-wildcard pattern char fails to match text char */
201196return LIKE_FALSE ;
@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
220215if (tlen > 0 )
221216return LIKE_FALSE ;/* end of pattern, but not of text */
222217
223- /* End of text string.Do we have matching pattern remaining? */
224- while (plen > 0 && * p == '%' )/* allow multiple %'s at end of
225- * pattern */
218+ /*
219+ * End of text, but perhaps not of pattern. Match iff the remaining
220+ * pattern can match a zero-length string, ie, it's zero or more %'s.
221+ */
222+ while (plen > 0 && * p == '%' )
226223NextByte (p ,plen );
227-
228224if (plen <=0 )
229225return LIKE_TRUE ;
230226
@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
348344#undef do_like_escape
349345#endif
350346
351- #undef TCHAR
347+ #undef GETCHAR
352348
353349#ifdef MATCH_LOWER
354350#undef MATCH_LOWER