Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3c381a5

Browse files
committed
Teach pattern_fixed_prefix() about collations.
This is necessary, not optional, now that ILIKE and regexes are collationaware --- else we might derive a wrong comparison constant for indexoptimized pattern matches.
1 parentdad1f46 commit3c381a5

File tree

3 files changed

+117
-55
lines changed

3 files changed

+117
-55
lines changed

‎src/backend/optimizer/path/indxpath.c

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2446,6 +2446,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
24462446
boolisIndexable= false;
24472447
Node*rightop;
24482448
Oidexpr_op;
2449+
Oidexpr_coll;
24492450
Const*patt;
24502451
Const*prefix=NULL;
24512452
Const*rest=NULL;
@@ -2462,6 +2463,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
24622463
/* we know these will succeed */
24632464
rightop=get_rightop(clause);
24642465
expr_op= ((OpExpr*)clause)->opno;
2466+
expr_coll= ((OpExpr*)clause)->inputcollid;
24652467

24662468
/* again, required for all current special ops: */
24672469
if (!IsA(rightop,Const)||
@@ -2475,13 +2477,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
24752477
caseOID_BPCHAR_LIKE_OP:
24762478
caseOID_NAME_LIKE_OP:
24772479
/* the right-hand const is type text for all of these */
2478-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,
2480+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,expr_coll,
24792481
&prefix,&rest);
24802482
isIndexable= (pstatus!=Pattern_Prefix_None);
24812483
break;
24822484

24832485
caseOID_BYTEA_LIKE_OP:
2484-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,
2486+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,expr_coll,
24852487
&prefix,&rest);
24862488
isIndexable= (pstatus!=Pattern_Prefix_None);
24872489
break;
@@ -2490,7 +2492,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
24902492
caseOID_BPCHAR_ICLIKE_OP:
24912493
caseOID_NAME_ICLIKE_OP:
24922494
/* the right-hand const is type text for all of these */
2493-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like_IC,
2495+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like_IC,expr_coll,
24942496
&prefix,&rest);
24952497
isIndexable= (pstatus!=Pattern_Prefix_None);
24962498
break;
@@ -2499,7 +2501,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
24992501
caseOID_BPCHAR_REGEXEQ_OP:
25002502
caseOID_NAME_REGEXEQ_OP:
25012503
/* the right-hand const is type text for all of these */
2502-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex,
2504+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex,expr_coll,
25032505
&prefix,&rest);
25042506
isIndexable= (pstatus!=Pattern_Prefix_None);
25052507
break;
@@ -2508,7 +2510,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
25082510
caseOID_BPCHAR_ICREGEXEQ_OP:
25092511
caseOID_NAME_ICREGEXEQ_OP:
25102512
/* the right-hand const is type text for all of these */
2511-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex_IC,
2513+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex_IC,expr_coll,
25122514
&prefix,&rest);
25132515
isIndexable= (pstatus!=Pattern_Prefix_None);
25142516
break;
@@ -2544,10 +2546,9 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
25442546
*
25452547
* The non-pattern opclasses will not sort the way we need in most non-C
25462548
* locales. We can use such an index anyway for an exact match (simple
2547-
* equality), but not for prefix-match cases. Note that we are looking at
2548-
* the index's collation, not the expression's collation -- this test is
2549-
* not dependent on the LIKE/regex operator's collation (which would only
2550-
* affect case folding behavior of ILIKE, anyway).
2549+
* equality), but not for prefix-match cases. Note that here we are
2550+
* looking at the index's collation, not the expression's collation --
2551+
* this test is *not* dependent on the LIKE/regex operator's collation.
25512552
*/
25522553
switch (expr_op)
25532554
{
@@ -2558,7 +2559,8 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
25582559
isIndexable=
25592560
(opfamily==TEXT_PATTERN_BTREE_FAM_OID)||
25602561
(opfamily==TEXT_BTREE_FAM_OID&&
2561-
(pstatus==Pattern_Prefix_Exact||lc_collate_is_c(idxcollation)));
2562+
(pstatus==Pattern_Prefix_Exact||
2563+
lc_collate_is_c(idxcollation)));
25622564
break;
25632565

25642566
caseOID_BPCHAR_LIKE_OP:
@@ -2568,7 +2570,8 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
25682570
isIndexable=
25692571
(opfamily==BPCHAR_PATTERN_BTREE_FAM_OID)||
25702572
(opfamily==BPCHAR_BTREE_FAM_OID&&
2571-
(pstatus==Pattern_Prefix_Exact||lc_collate_is_c(idxcollation)));
2573+
(pstatus==Pattern_Prefix_Exact||
2574+
lc_collate_is_c(idxcollation)));
25722575
break;
25732576

25742577
caseOID_NAME_LIKE_OP:
@@ -2770,6 +2773,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
27702773
Node*leftop=get_leftop(clause);
27712774
Node*rightop=get_rightop(clause);
27722775
Oidexpr_op= ((OpExpr*)clause)->opno;
2776+
Oidexpr_coll= ((OpExpr*)clause)->inputcollid;
27732777
Const*patt= (Const*)rightop;
27742778
Const*prefix=NULL;
27752779
Const*rest=NULL;
@@ -2791,7 +2795,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
27912795
caseOID_BYTEA_LIKE_OP:
27922796
if (!op_in_opfamily(expr_op,opfamily))
27932797
{
2794-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,
2798+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like,expr_coll,
27952799
&prefix,&rest);
27962800
returnprefix_quals(leftop,opfamily,idxcollation,prefix,pstatus);
27972801
}
@@ -2803,7 +2807,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
28032807
if (!op_in_opfamily(expr_op,opfamily))
28042808
{
28052809
/* the right-hand const is type text for all of these */
2806-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like_IC,
2810+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Like_IC,expr_coll,
28072811
&prefix,&rest);
28082812
returnprefix_quals(leftop,opfamily,idxcollation,prefix,pstatus);
28092813
}
@@ -2815,7 +2819,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
28152819
if (!op_in_opfamily(expr_op,opfamily))
28162820
{
28172821
/* the right-hand const is type text for all of these */
2818-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex,
2822+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex,expr_coll,
28192823
&prefix,&rest);
28202824
returnprefix_quals(leftop,opfamily,idxcollation,prefix,pstatus);
28212825
}
@@ -2827,7 +2831,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
28272831
if (!op_in_opfamily(expr_op,opfamily))
28282832
{
28292833
/* the right-hand const is type text for all of these */
2830-
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex_IC,
2834+
pstatus=pattern_fixed_prefix(patt,Pattern_Type_Regex_IC,expr_coll,
28312835
&prefix,&rest);
28322836
returnprefix_quals(leftop,opfamily,idxcollation,prefix,pstatus);
28332837
}

‎src/backend/utils/adt/selfuncs.c

Lines changed: 97 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,9 +1181,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11811181
returnresult;
11821182
}
11831183

1184-
/* divide pattern into fixed prefix and remainder */
1184+
/*
1185+
* Divide pattern into fixed prefix and remainder. XXX we have to assume
1186+
* default collation here, because we don't have access to the actual
1187+
* input collation for the operator. FIXME ...
1188+
*/
11851189
patt= (Const*)other;
1186-
pstatus=pattern_fixed_prefix(patt,ptype,&prefix,&rest);
1190+
pstatus=pattern_fixed_prefix(patt,ptype,DEFAULT_COLLATION_OID,
1191+
&prefix,&rest);
11871192

11881193
/*
11891194
* If necessary, coerce the prefix constant to the right type. (The "rest"
@@ -4755,6 +4760,29 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47554760
*-------------------------------------------------------------------------
47564761
*/
47574762

4763+
/*
4764+
* Check whether char is a letter (and, hence, subject to case-folding)
4765+
*
4766+
* In multibyte character sets, we can't use isalpha, and it does not seem
4767+
* worth trying to convert to wchar_t to use iswalpha. Instead, just assume
4768+
* any multibyte char is potentially case-varying.
4769+
*/
4770+
staticint
4771+
pattern_char_isalpha(charc,boolis_multibyte,
4772+
pg_locale_tlocale,boollocale_is_c)
4773+
{
4774+
if (locale_is_c)
4775+
return (c >='A'&&c <='Z')|| (c >='a'&&c <='z');
4776+
elseif (is_multibyte&&IS_HIGHBIT_SET(c))
4777+
return true;
4778+
#ifdefHAVE_LOCALE_T
4779+
elseif (locale)
4780+
returnisalpha_l((unsignedchar)c,locale);
4781+
#endif
4782+
else
4783+
returnisalpha((unsignedchar)c);
4784+
}
4785+
47584786
/*
47594787
* Extract the fixed prefix, if any, for a pattern.
47604788
*
@@ -4769,7 +4797,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47694797
*/
47704798

47714799
staticPattern_Prefix_Status
4772-
like_fixed_prefix(Const*patt_const,boolcase_insensitive,
4800+
like_fixed_prefix(Const*patt_const,boolcase_insensitive,Oidcollation,
47734801
Const**prefix_const,Const**rest_const)
47744802
{
47754803
char*match;
@@ -4780,15 +4808,39 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
47804808
intpos,
47814809
match_pos;
47824810
boolis_multibyte= (pg_database_encoding_max_length()>1);
4811+
pg_locale_tlocale=0;
4812+
boollocale_is_c= false;
47834813

47844814
/* the right-hand const is type text or bytea */
47854815
Assert(typeid==BYTEAOID||typeid==TEXTOID);
47864816

4787-
if (typeid==BYTEAOID&&case_insensitive)
4788-
ereport(ERROR,
4789-
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4817+
if (case_insensitive)
4818+
{
4819+
if (typeid==BYTEAOID)
4820+
ereport(ERROR,
4821+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
47904822
errmsg("case insensitive matching not supported on type bytea")));
47914823

4824+
/* If case-insensitive, we need locale info */
4825+
if (lc_ctype_is_c(collation))
4826+
locale_is_c= true;
4827+
elseif (collation!=DEFAULT_COLLATION_OID)
4828+
{
4829+
if (!OidIsValid(collation))
4830+
{
4831+
/*
4832+
* This typically means that the parser could not resolve a
4833+
* conflict of implicit collations, so report it that way.
4834+
*/
4835+
ereport(ERROR,
4836+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
4837+
errmsg("could not determine which collation to use for ILIKE"),
4838+
errhint("Use the COLLATE clause to set the collation explicitly.")));
4839+
}
4840+
locale=pg_newlocale_from_collation(collation);
4841+
}
4842+
}
4843+
47924844
if (typeid!=BYTEAOID)
47934845
{
47944846
patt=TextDatumGetCString(patt_const->constvalue);
@@ -4822,23 +4874,11 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
48224874
break;
48234875
}
48244876

4825-
/*
4826-
* XXX In multibyte character sets, we can't trust isalpha, so assume
4827-
* any multibyte char is potentially case-varying.
4828-
*/
4829-
if (case_insensitive)
4830-
{
4831-
if (is_multibyte&& (unsignedchar)patt[pos] >=0x80)
4832-
break;
4833-
if (isalpha((unsignedchar)patt[pos]))
4834-
break;
4835-
}
4877+
/* Stop if case-varying character (it's sort of a wildcard) */
4878+
if (case_insensitive&&
4879+
pattern_char_isalpha(patt[pos],is_multibyte,locale,locale_is_c))
4880+
break;
48364881

4837-
/*
4838-
* NOTE: this code used to think that %% meant a literal %, but
4839-
* textlike() itself does not think that, and the SQL92 spec doesn't
4840-
* say any such thing either.
4841-
*/
48424882
match[match_pos++]=patt[pos];
48434883
}
48444884

@@ -4870,7 +4910,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
48704910
}
48714911

48724912
staticPattern_Prefix_Status
4873-
regex_fixed_prefix(Const*patt_const,boolcase_insensitive,
4913+
regex_fixed_prefix(Const*patt_const,boolcase_insensitive,Oidcollation,
48744914
Const**prefix_const,Const**rest_const)
48754915
{
48764916
char*match;
@@ -4883,6 +4923,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
48834923
char*rest;
48844924
Oidtypeid=patt_const->consttype;
48854925
boolis_multibyte= (pg_database_encoding_max_length()>1);
4926+
pg_locale_tlocale=0;
4927+
boollocale_is_c= false;
48864928

48874929
/*
48884930
* Should be unnecessary, there are no bytea regex operators defined. As
@@ -4894,6 +4936,28 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
48944936
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
48954937
errmsg("regular-expression matching not supported on type bytea")));
48964938

4939+
if (case_insensitive)
4940+
{
4941+
/* If case-insensitive, we need locale info */
4942+
if (lc_ctype_is_c(collation))
4943+
locale_is_c= true;
4944+
elseif (collation!=DEFAULT_COLLATION_OID)
4945+
{
4946+
if (!OidIsValid(collation))
4947+
{
4948+
/*
4949+
* This typically means that the parser could not resolve a
4950+
* conflict of implicit collations, so report it that way.
4951+
*/
4952+
ereport(ERROR,
4953+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
4954+
errmsg("could not determine which collation to use for regular expression"),
4955+
errhint("Use the COLLATE clause to set the collation explicitly.")));
4956+
}
4957+
locale=pg_newlocale_from_collation(collation);
4958+
}
4959+
}
4960+
48974961
/* the right-hand const is type text for all of these */
48984962
patt=TextDatumGetCString(patt_const->constvalue);
48994963

@@ -4969,17 +5033,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
49695033
patt[pos]=='$')
49705034
break;
49715035

4972-
/*
4973-
* XXX In multibyte character sets, we can't trust isalpha, so assume
4974-
* any multibyte char is potentially case-varying.
4975-
*/
4976-
if (case_insensitive)
4977-
{
4978-
if (is_multibyte&& (unsignedchar)patt[pos] >=0x80)
4979-
break;
4980-
if (isalpha((unsignedchar)patt[pos]))
4981-
break;
4982-
}
5036+
/* Stop if case-varying character (it's sort of a wildcard) */
5037+
if (case_insensitive&&
5038+
pattern_char_isalpha(patt[pos],is_multibyte,locale,locale_is_c))
5039+
break;
49835040

49845041
/*
49855042
* Check for quantifiers. Except for +, this means the preceding
@@ -5004,7 +5061,7 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
50045061
* backslash followed by alphanumeric is an escape, not a quoted
50055062
* character. Must treat it as having multiple possible matches.
50065063
* Note: since only ASCII alphanumerics are escapes, we don't have to
5007-
* be paranoid about multibyte here.
5064+
* be paranoid about multibyteor collationshere.
50085065
*/
50095066
if (patt[pos]=='\\')
50105067
{
@@ -5056,24 +5113,24 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
50565113
}
50575114

50585115
Pattern_Prefix_Status
5059-
pattern_fixed_prefix(Const*patt,Pattern_Typeptype,
5116+
pattern_fixed_prefix(Const*patt,Pattern_Typeptype,Oidcollation,
50605117
Const**prefix,Const**rest)
50615118
{
50625119
Pattern_Prefix_Statusresult;
50635120

50645121
switch (ptype)
50655122
{
50665123
casePattern_Type_Like:
5067-
result=like_fixed_prefix(patt, false,prefix,rest);
5124+
result=like_fixed_prefix(patt, false,collation,prefix,rest);
50685125
break;
50695126
casePattern_Type_Like_IC:
5070-
result=like_fixed_prefix(patt, true,prefix,rest);
5127+
result=like_fixed_prefix(patt, true,collation,prefix,rest);
50715128
break;
50725129
casePattern_Type_Regex:
5073-
result=regex_fixed_prefix(patt, false,prefix,rest);
5130+
result=regex_fixed_prefix(patt, false,collation,prefix,rest);
50745131
break;
50755132
casePattern_Type_Regex_IC:
5076-
result=regex_fixed_prefix(patt, true,prefix,rest);
5133+
result=regex_fixed_prefix(patt, true,collation,prefix,rest);
50775134
break;
50785135
default:
50795136
elog(ERROR,"unrecognized ptype: %d", (int)ptype);

‎src/include/utils/selfuncs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
132132

133133
externPattern_Prefix_Statuspattern_fixed_prefix(Const*patt,
134134
Pattern_Typeptype,
135+
Oidcollation,
135136
Const**prefix,
136137
Const**rest);
137138
externConst*make_greater_string(constConst*str_const,FmgrInfo*ltproc);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp