@@ -1181,9 +1181,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11811181return result ;
11821182}
11831183
1184- /* divide pattern into fixed prefix and remainder */
1184+ /*
1185+ * Divide pattern into fixed prefix and remainder. XXX we have to assume
1186+ * default collation here, because we don't have access to the actual
1187+ * input collation for the operator. FIXME ...
1188+ */
11851189patt = (Const * )other ;
1186- pstatus = pattern_fixed_prefix (patt ,ptype ,& prefix ,& rest );
1190+ pstatus = pattern_fixed_prefix (patt ,ptype ,DEFAULT_COLLATION_OID ,
1191+ & prefix ,& rest );
11871192
11881193/*
11891194 * If necessary, coerce the prefix constant to the right type. (The "rest"
@@ -4755,6 +4760,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47554760 *-------------------------------------------------------------------------
47564761 */
47574762
4763+ /*
4764+ * Check whether char is a letter (and, hence, subject to case-folding)
4765+ *
4766+ * In multibyte character sets, we can't use isalpha, and it does not seem
4767+ * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
4768+ * any multibyte char is potentially case-varying.
4769+ */
4770+ static int
4771+ pattern_char_isalpha (char c ,bool is_multibyte ,
4772+ pg_locale_t locale ,bool locale_is_c )
4773+ {
4774+ if (locale_is_c )
4775+ return (c >='A' && c <='Z' )|| (c >='a' && c <='z' );
4776+ else if (is_multibyte && IS_HIGHBIT_SET (c ))
4777+ return true;
4778+ #ifdef HAVE_LOCALE_T
4779+ else if (locale )
4780+ return isalpha_l ((unsigned char )c ,locale );
4781+ #endif
4782+ else
4783+ return isalpha ((unsignedchar )c );
4784+ }
4785+
47584786/*
47594787 * Extract the fixed prefix, if any, for a pattern.
47604788 *
@@ -4769,7 +4797,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47694797 */
47704798
47714799static Pattern_Prefix_Status
4772- like_fixed_prefix (Const * patt_const ,bool case_insensitive ,
4800+ like_fixed_prefix (Const * patt_const ,bool case_insensitive ,Oid collation ,
47734801Const * * prefix_const ,Const * * rest_const )
47744802{
47754803char * match ;
@@ -4780,15 +4808,39 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
47804808int pos ,
47814809match_pos ;
47824810bool is_multibyte = (pg_database_encoding_max_length ()> 1 );
4811+ pg_locale_t locale = 0 ;
4812+ bool locale_is_c = false;
47834813
47844814/* the right-hand const is type text or bytea */
47854815Assert (typeid == BYTEAOID || typeid == TEXTOID );
47864816
4787- if (typeid == BYTEAOID && case_insensitive )
4788- ereport (ERROR ,
4789- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
4817+ if (case_insensitive )
4818+ {
4819+ if (typeid == BYTEAOID )
4820+ ereport (ERROR ,
4821+ (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
47904822errmsg ("case insensitive matching not supported on type bytea" )));
47914823
4824+ /* If case-insensitive, we need locale info */
4825+ if (lc_ctype_is_c (collation ))
4826+ locale_is_c = true;
4827+ else if (collation != DEFAULT_COLLATION_OID )
4828+ {
4829+ if (!OidIsValid (collation ))
4830+ {
4831+ /*
4832+ * This typically means that the parser could not resolve a
4833+ * conflict of implicit collations, so report it that way.
4834+ */
4835+ ereport (ERROR ,
4836+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
4837+ errmsg ("could not determine which collation to use for ILIKE" ),
4838+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
4839+ }
4840+ locale = pg_newlocale_from_collation (collation );
4841+ }
4842+ }
4843+
47924844if (typeid != BYTEAOID )
47934845{
47944846patt = TextDatumGetCString (patt_const -> constvalue );
@@ -4822,23 +4874,11 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
48224874break ;
48234875}
48244876
4825- /*
4826- * XXX In multibyte character sets, we can't trust isalpha, so assume
4827- * any multibyte char is potentially case-varying.
4828- */
4829- if (case_insensitive )
4830- {
4831- if (is_multibyte && (unsignedchar )patt [pos ] >=0x80 )
4832- break ;
4833- if (isalpha ((unsignedchar )patt [pos ]))
4834- break ;
4835- }
4877+ /* Stop if case-varying character (it's sort of a wildcard) */
4878+ if (case_insensitive &&
4879+ pattern_char_isalpha (patt [pos ],is_multibyte ,locale ,locale_is_c ))
4880+ break ;
48364881
4837- /*
4838- * NOTE: this code used to think that %% meant a literal %, but
4839- * textlike() itself does not think that, and the SQL92 spec doesn't
4840- * say any such thing either.
4841- */
48424882match [match_pos ++ ]= patt [pos ];
48434883}
48444884
@@ -4870,7 +4910,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
48704910}
48714911
48724912static Pattern_Prefix_Status
4873- regex_fixed_prefix (Const * patt_const ,bool case_insensitive ,
4913+ regex_fixed_prefix (Const * patt_const ,bool case_insensitive ,Oid collation ,
48744914Const * * prefix_const ,Const * * rest_const )
48754915{
48764916char * match ;
@@ -4883,6 +4923,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
48834923char * rest ;
48844924Oid typeid = patt_const -> consttype ;
48854925bool is_multibyte = (pg_database_encoding_max_length ()> 1 );
4926+ pg_locale_t locale = 0 ;
4927+ bool locale_is_c = false;
48864928
48874929/*
48884930 * Should be unnecessary, there are no bytea regex operators defined. As
@@ -4894,6 +4936,28 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
48944936(errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
48954937errmsg ("regular-expression matching not supported on type bytea" )));
48964938
4939+ if (case_insensitive )
4940+ {
4941+ /* If case-insensitive, we need locale info */
4942+ if (lc_ctype_is_c (collation ))
4943+ locale_is_c = true;
4944+ else if (collation != DEFAULT_COLLATION_OID )
4945+ {
4946+ if (!OidIsValid (collation ))
4947+ {
4948+ /*
4949+ * This typically means that the parser could not resolve a
4950+ * conflict of implicit collations, so report it that way.
4951+ */
4952+ ereport (ERROR ,
4953+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
4954+ errmsg ("could not determine which collation to use for regular expression" ),
4955+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
4956+ }
4957+ locale = pg_newlocale_from_collation (collation );
4958+ }
4959+ }
4960+
48974961/* the right-hand const is type text for all of these */
48984962patt = TextDatumGetCString (patt_const -> constvalue );
48994963
@@ -4969,17 +5033,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
49695033patt [pos ]== '$' )
49705034break ;
49715035
4972- /*
4973- * XXX In multibyte character sets, we can't trust isalpha, so assume
4974- * any multibyte char is potentially case-varying.
4975- */
4976- if (case_insensitive )
4977- {
4978- if (is_multibyte && (unsignedchar )patt [pos ] >=0x80 )
4979- break ;
4980- if (isalpha ((unsignedchar )patt [pos ]))
4981- break ;
4982- }
5036+ /* Stop if case-varying character (it's sort of a wildcard) */
5037+ if (case_insensitive &&
5038+ pattern_char_isalpha (patt [pos ],is_multibyte ,locale ,locale_is_c ))
5039+ break ;
49835040
49845041/*
49855042 * Check for quantifiers. Except for +, this means the preceding
@@ -5004,7 +5061,7 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
50045061 * backslash followed by alphanumeric is an escape, not a quoted
50055062 * character. Must treat it as having multiple possible matches.
50065063 * Note: since only ASCII alphanumerics are escapes, we don't have to
5007- * be paranoid about multibyte here.
5064+ * be paranoid about multibyteor collations here.
50085065 */
50095066if (patt [pos ]== '\\' )
50105067{
@@ -5056,24 +5113,24 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
50565113}
50575114
50585115Pattern_Prefix_Status
5059- pattern_fixed_prefix (Const * patt ,Pattern_Type ptype ,
5116+ pattern_fixed_prefix (Const * patt ,Pattern_Type ptype ,Oid collation ,
50605117Const * * prefix ,Const * * rest )
50615118{
50625119Pattern_Prefix_Status result ;
50635120
50645121switch (ptype )
50655122{
50665123case Pattern_Type_Like :
5067- result = like_fixed_prefix (patt , false,prefix ,rest );
5124+ result = like_fixed_prefix (patt , false,collation , prefix ,rest );
50685125break ;
50695126case Pattern_Type_Like_IC :
5070- result = like_fixed_prefix (patt , true,prefix ,rest );
5127+ result = like_fixed_prefix (patt , true,collation , prefix ,rest );
50715128break ;
50725129case Pattern_Type_Regex :
5073- result = regex_fixed_prefix (patt , false,prefix ,rest );
5130+ result = regex_fixed_prefix (patt , false,collation , prefix ,rest );
50745131break ;
50755132case Pattern_Type_Regex_IC :
5076- result = regex_fixed_prefix (patt , true,prefix ,rest );
5133+ result = regex_fixed_prefix (patt , true,collation , prefix ,rest );
50775134break ;
50785135default :
50795136elog (ERROR ,"unrecognized ptype: %d" , (int )ptype );