1919
2020#include <ctype.h>
2121
22+ #include "catalog/pg_collation.h"
2223#include "mb/pg_wchar.h"
2324#include "utils/builtins.h"
25+ #include "utils/pg_locale.h"
2426
2527
2628#define LIKE_TRUE 1
2729#define LIKE_FALSE 0
2830#define LIKE_ABORT (-1)
2931
3032
31- static int SB_MatchText (char * t ,int tlen ,char * p ,int plen );
33+ static int SB_MatchText (char * t ,int tlen ,char * p ,int plen ,
34+ pg_locale_t locale ,bool locale_is_c );
3235static text * SB_do_like_escape (text * ,text * );
3336
34- static int MB_MatchText (char * t ,int tlen ,char * p ,int plen );
37+ static int MB_MatchText (char * t ,int tlen ,char * p ,int plen ,
38+ pg_locale_t locale ,bool locale_is_c );
3539static text * MB_do_like_escape (text * ,text * );
3640
37- static int UTF8_MatchText (char * t ,int tlen ,char * p ,int plen );
41+ static int UTF8_MatchText (char * t ,int tlen ,char * p ,int plen ,
42+ pg_locale_t locale ,bool locale_is_c );
3843
39- static int SB_IMatchText (char * t ,int tlen ,char * p ,int plen );
44+ static int SB_IMatchText (char * t ,int tlen ,char * p ,int plen ,
45+ pg_locale_t locale ,bool locale_is_c );
4046
4147static int GenericMatchText (char * s ,int slen ,char * p ,int plen );
4248static int Generic_Text_IC_like (text * str ,text * pat ,Oid collation );
@@ -78,6 +84,24 @@ wchareq(char *p1, char *p2)
7884 * comparison.This should be revisited when we install better locale support.
7985 */
8086
87+ /*
88+ * We do handle case-insensitive matching for single-byte encodings using
89+ * fold-on-the-fly processing, however.
90+ */
91+ static char
92+ SB_lower_char (unsignedchar c ,pg_locale_t locale ,bool locale_is_c )
93+ {
94+ if (locale_is_c )
95+ return pg_ascii_tolower (c );
96+ #ifdef HAVE_LOCALE_T
97+ else if (locale )
98+ return tolower_l (c ,locale );
99+ #endif
100+ else
101+ return pg_tolower (c );
102+ }
103+
104+
81105#define NextByte (p ,plen )((p)++, (plen)--)
82106
83107/* Set up to compile like_match.c for multibyte characters */
@@ -107,7 +131,7 @@ wchareq(char *p1, char *p2)
107131#include "like_match.c"
108132
109133/* setup to compile like_match.c for single byte case insensitive matches */
110- #define MATCH_LOWER
134+ #define MATCH_LOWER ( t ) SB_lower_char((unsigned char) (t), locale, locale_is_c)
111135#define NextChar (p ,plen ) NextByte((p), (plen))
112136#define MatchText SB_IMatchText
113137
@@ -121,15 +145,16 @@ wchareq(char *p1, char *p2)
121145
122146#include "like_match.c"
123147
148+ /* Generic for all cases not requiring inline case-folding */
124149static inline int
125150GenericMatchText (char * s ,int slen ,char * p ,int plen )
126151{
127152if (pg_database_encoding_max_length ()== 1 )
128- return SB_MatchText (s ,slen ,p ,plen );
153+ return SB_MatchText (s ,slen ,p ,plen , 0 , true );
129154else if (GetDatabaseEncoding ()== PG_UTF8 )
130- return UTF8_MatchText (s ,slen ,p ,plen );
155+ return UTF8_MatchText (s ,slen ,p ,plen , 0 , true );
131156else
132- return MB_MatchText (s ,slen ,p ,plen );
157+ return MB_MatchText (s ,slen ,p ,plen , 0 , true );
133158}
134159
135160static inline int
@@ -142,8 +167,8 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
142167
143168/*
144169 * For efficiency reasons, in the single byte case we don't call lower()
145- * on the pattern and text, but instead callto_lower on each character.
146- * In the multi-byte case we don't have much choice :-(
170+ * on the pattern and text, but instead callSB_lower_char on each
171+ *character. In the multi-byte case we don't have much choice :-(
147172 */
148173
149174if (pg_database_encoding_max_length ()> 1 )
@@ -156,17 +181,42 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
156181s = VARDATA (str );
157182slen = (VARSIZE (str )- VARHDRSZ );
158183if (GetDatabaseEncoding ()== PG_UTF8 )
159- return UTF8_MatchText (s ,slen ,p ,plen );
184+ return UTF8_MatchText (s ,slen ,p ,plen , 0 , true );
160185else
161- return MB_MatchText (s ,slen ,p ,plen );
186+ return MB_MatchText (s ,slen ,p ,plen , 0 , true );
162187}
163188else
164189{
190+ /*
191+ * Here we need to prepare locale information for SB_lower_char.
192+ * This should match the methods used in str_tolower().
193+ */
194+ pg_locale_t locale = 0 ;
195+ bool locale_is_c = false;
196+
197+ if (lc_ctype_is_c (collation ))
198+ locale_is_c = true;
199+ else if (collation != DEFAULT_COLLATION_OID )
200+ {
201+ if (!OidIsValid (collation ))
202+ {
203+ /*
204+ * This typically means that the parser could not resolve a
205+ * conflict of implicit collations, so report it that way.
206+ */
207+ ereport (ERROR ,
208+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
209+ errmsg ("could not determine which collation to use for ILIKE" ),
210+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
211+ }
212+ locale = pg_newlocale_from_collation (collation );
213+ }
214+
165215p = VARDATA_ANY (pat );
166216plen = VARSIZE_ANY_EXHDR (pat );
167217s = VARDATA_ANY (str );
168218slen = VARSIZE_ANY_EXHDR (str );
169- return SB_IMatchText (s ,slen ,p ,plen );
219+ return SB_IMatchText (s ,slen ,p ,plen , locale , locale_is_c );
170220}
171221}
172222
@@ -274,7 +324,7 @@ bytealike(PG_FUNCTION_ARGS)
274324p = VARDATA_ANY (pat );
275325plen = VARSIZE_ANY_EXHDR (pat );
276326
277- result = (SB_MatchText (s ,slen ,p ,plen )== LIKE_TRUE );
327+ result = (SB_MatchText (s ,slen ,p ,plen , 0 , true )== LIKE_TRUE );
278328
279329PG_RETURN_BOOL (result );
280330}
@@ -295,7 +345,7 @@ byteanlike(PG_FUNCTION_ARGS)
295345p = VARDATA_ANY (pat );
296346plen = VARSIZE_ANY_EXHDR (pat );
297347
298- result = (SB_MatchText (s ,slen ,p ,plen )!= LIKE_TRUE );
348+ result = (SB_MatchText (s ,slen ,p ,plen , 0 , true )!= LIKE_TRUE );
299349
300350PG_RETURN_BOOL (result );
301351}