Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit00f11f4

Browse files
committed
Fix ILIKE to honor collation when working in single-byte encodings.
The original collation patch only fixed the multi-byte code path.This change also ensures that ILIKE's idea of the case-folding rulesis exactly the same as str_tolower's.
1 parentf89e4df commit00f11f4

File tree

2 files changed

+72
-20
lines changed

2 files changed

+72
-20
lines changed

‎src/backend/utils/adt/like.c

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,30 @@
1919

2020
#include<ctype.h>
2121

22+
#include"catalog/pg_collation.h"
2223
#include"mb/pg_wchar.h"
2324
#include"utils/builtins.h"
25+
#include"utils/pg_locale.h"
2426

2527

2628
#defineLIKE_TRUE1
2729
#defineLIKE_FALSE0
2830
#defineLIKE_ABORT(-1)
2931

3032

31-
staticintSB_MatchText(char*t,inttlen,char*p,intplen);
33+
staticintSB_MatchText(char*t,inttlen,char*p,intplen,
34+
pg_locale_tlocale,boollocale_is_c);
3235
statictext*SB_do_like_escape(text*,text*);
3336

34-
staticintMB_MatchText(char*t,inttlen,char*p,intplen);
37+
staticintMB_MatchText(char*t,inttlen,char*p,intplen,
38+
pg_locale_tlocale,boollocale_is_c);
3539
statictext*MB_do_like_escape(text*,text*);
3640

37-
staticintUTF8_MatchText(char*t,inttlen,char*p,intplen);
41+
staticintUTF8_MatchText(char*t,inttlen,char*p,intplen,
42+
pg_locale_tlocale,boollocale_is_c);
3843

39-
staticintSB_IMatchText(char*t,inttlen,char*p,intplen);
44+
staticintSB_IMatchText(char*t,inttlen,char*p,intplen,
45+
pg_locale_tlocale,boollocale_is_c);
4046

4147
staticintGenericMatchText(char*s,intslen,char*p,intplen);
4248
staticintGeneric_Text_IC_like(text*str,text*pat,Oidcollation);
@@ -78,6 +84,24 @@ wchareq(char *p1, char *p2)
7884
* comparison.This should be revisited when we install better locale support.
7985
*/
8086

87+
/*
88+
* We do handle case-insensitive matching for single-byte encodings using
89+
* fold-on-the-fly processing, however.
90+
*/
91+
staticchar
92+
SB_lower_char(unsignedcharc,pg_locale_tlocale,boollocale_is_c)
93+
{
94+
if (locale_is_c)
95+
returnpg_ascii_tolower(c);
96+
#ifdefHAVE_LOCALE_T
97+
elseif (locale)
98+
returntolower_l(c,locale);
99+
#endif
100+
else
101+
returnpg_tolower(c);
102+
}
103+
104+
81105
#defineNextByte(p,plen)((p)++, (plen)--)
82106

83107
/* Set up to compile like_match.c for multibyte characters */
@@ -107,7 +131,7 @@ wchareq(char *p1, char *p2)
107131
#include"like_match.c"
108132

109133
/* setup to compile like_match.c for single byte case insensitive matches */
110-
#defineMATCH_LOWER
134+
#defineMATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
111135
#defineNextChar(p,plen) NextByte((p), (plen))
112136
#defineMatchText SB_IMatchText
113137

@@ -121,15 +145,16 @@ wchareq(char *p1, char *p2)
121145

122146
#include"like_match.c"
123147

148+
/* Generic for all cases not requiring inline case-folding */
124149
staticinlineint
125150
GenericMatchText(char*s,intslen,char*p,intplen)
126151
{
127152
if (pg_database_encoding_max_length()==1)
128-
returnSB_MatchText(s,slen,p,plen);
153+
returnSB_MatchText(s,slen,p,plen,0, true);
129154
elseif (GetDatabaseEncoding()==PG_UTF8)
130-
returnUTF8_MatchText(s,slen,p,plen);
155+
returnUTF8_MatchText(s,slen,p,plen,0, true);
131156
else
132-
returnMB_MatchText(s,slen,p,plen);
157+
returnMB_MatchText(s,slen,p,plen,0, true);
133158
}
134159

135160
staticinlineint
@@ -142,8 +167,8 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
142167

143168
/*
144169
* For efficiency reasons, in the single byte case we don't call lower()
145-
* on the pattern and text, but instead callto_lower on each character.
146-
* In the multi-byte case we don't have much choice :-(
170+
* on the pattern and text, but instead callSB_lower_char on each
171+
*character.In the multi-byte case we don't have much choice :-(
147172
*/
148173

149174
if (pg_database_encoding_max_length()>1)
@@ -156,17 +181,42 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
156181
s=VARDATA(str);
157182
slen= (VARSIZE(str)-VARHDRSZ);
158183
if (GetDatabaseEncoding()==PG_UTF8)
159-
returnUTF8_MatchText(s,slen,p,plen);
184+
returnUTF8_MatchText(s,slen,p,plen,0, true);
160185
else
161-
returnMB_MatchText(s,slen,p,plen);
186+
returnMB_MatchText(s,slen,p,plen,0, true);
162187
}
163188
else
164189
{
190+
/*
191+
* Here we need to prepare locale information for SB_lower_char.
192+
* This should match the methods used in str_tolower().
193+
*/
194+
pg_locale_tlocale=0;
195+
boollocale_is_c= false;
196+
197+
if (lc_ctype_is_c(collation))
198+
locale_is_c= true;
199+
elseif (collation!=DEFAULT_COLLATION_OID)
200+
{
201+
if (!OidIsValid(collation))
202+
{
203+
/*
204+
* This typically means that the parser could not resolve a
205+
* conflict of implicit collations, so report it that way.
206+
*/
207+
ereport(ERROR,
208+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
209+
errmsg("could not determine which collation to use for ILIKE"),
210+
errhint("Use the COLLATE clause to set the collation explicitly.")));
211+
}
212+
locale=pg_newlocale_from_collation(collation);
213+
}
214+
165215
p=VARDATA_ANY(pat);
166216
plen=VARSIZE_ANY_EXHDR(pat);
167217
s=VARDATA_ANY(str);
168218
slen=VARSIZE_ANY_EXHDR(str);
169-
returnSB_IMatchText(s,slen,p,plen);
219+
returnSB_IMatchText(s,slen,p,plen,locale,locale_is_c);
170220
}
171221
}
172222

@@ -274,7 +324,7 @@ bytealike(PG_FUNCTION_ARGS)
274324
p=VARDATA_ANY(pat);
275325
plen=VARSIZE_ANY_EXHDR(pat);
276326

277-
result= (SB_MatchText(s,slen,p,plen)==LIKE_TRUE);
327+
result= (SB_MatchText(s,slen,p,plen,0, true)==LIKE_TRUE);
278328

279329
PG_RETURN_BOOL(result);
280330
}
@@ -295,7 +345,7 @@ byteanlike(PG_FUNCTION_ARGS)
295345
p=VARDATA_ANY(pat);
296346
plen=VARSIZE_ANY_EXHDR(pat);
297347

298-
result= (SB_MatchText(s,slen,p,plen)!=LIKE_TRUE);
348+
result= (SB_MatchText(s,slen,p,plen,0, true)!=LIKE_TRUE);
299349

300350
PG_RETURN_BOOL(result);
301351
}

‎src/backend/utils/adt/like_match.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* This file is included by like.c four times, to provide matching code for
77
* (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
8-
* and (4) case insensitive matches in singlebyte encodings.
8+
* and (4) case insensitive matches in single-byte encodings.
99
* (UTF8 is a special case because we can use a much more efficient version
1010
* of NextChar than can be used for general multi-byte encodings.)
1111
*
@@ -14,7 +14,7 @@
1414
* NextChar
1515
* MatchText - to name of function wanted
1616
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17-
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars
17+
* MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
1818
*
1919
* Copyright (c) 1996-2011, PostgreSQL Global Development Group
2020
*
@@ -70,13 +70,14 @@
7070
*/
7171

7272
#ifdefMATCH_LOWER
73-
#defineGETCHAR(t)((char) tolower((unsigned char) (t)))
73+
#defineGETCHAR(t)MATCH_LOWER(t)
7474
#else
7575
#defineGETCHAR(t) (t)
7676
#endif
7777

7878
staticint
79-
MatchText(char*t,inttlen,char*p,intplen)
79+
MatchText(char*t,inttlen,char*p,intplen,
80+
pg_locale_tlocale,boollocale_is_c)
8081
{
8182
/* Fast path for match-everything pattern */
8283
if (plen==1&&*p=='%')
@@ -170,7 +171,8 @@ MatchText(char *t, int tlen, char *p, int plen)
170171
{
171172
if (GETCHAR(*t)==firstpat)
172173
{
173-
intmatched=MatchText(t,tlen,p,plen);
174+
intmatched=MatchText(t,tlen,p,plen,
175+
locale,locale_is_c);
174176

175177
if (matched!=LIKE_FALSE)
176178
returnmatched;/* TRUE or ABORT */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp