Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit600b953

Browse files
committed
Clean up formatting.c's logic for matching constant strings.
seq_search(), which is used to match input substrings to constantssuch as month and day names, had a lot of bizarre and unnecessarybehaviors. It was mostly possible to avert our eyes from that before,but we don't want to duplicate those behaviors in the upcoming patchto allow recognition of non-English month and day names. So it's timeto clean this up. In particular:* seq_search scribbled on the input string, which is a pretty dangerousthing to do, especially in the badly underdocumented way it was done here.Fortunately the input string is a temporary copy, but that was being madethree subroutine levels away, making it something easy to breakaccidentally. The behavior is externally visible nonetheless, in the formof odd case-folding in error reports about unrecognized month/day names.The scribbling is evidently being done to save a few calls to pg_tolower,but that's such a cheap function (at least for ASCII data) that it'spretty pointless to worry about. In HEAD I switched it to bepg_ascii_tolower to ensure it is cheap in all cases; but there are cornercases in Turkish where this'd change behavior, so leave it as pg_tolowerin the back branches.* seq_search insisted on knowing the case form (all-upper, all-lower,or initcap) of the constant strings, so that it didn't have to case-foldthem to perform case-insensitive comparisons. This likewise seems likeexcessive micro-optimization, given that pg_tolower is certainly verycheap for ASCII data. It seems unsafe to assume that we know the caseform that will come out of pg_locale.c for localized month/day names, soit's better just to define the comparison rule as "downcase all stringsbefore comparing". (The choice between downcasing and upcasing isarbitrary so far as English is concerned, but it might not be in otherlocales, so follow citext's lead here.)* seq_search also had a parameter that'd cause it to report a matchafter a maximum number of characters, even if the constant string werelonger than that. This was not actually used because no caller passeda value small enough to cut off a comparison. Replicating that behaviorfor localized month/day names seems expensive as well as useless, solet's get rid of that too.* from_char_seq_search used the maximum-length parameter to truncatethe input string in error reports about not finding a matching name.This leads to rather confusing reports in many cases. Worse, it isoutright dangerous if the input string isn't all-ASCII, because werisk truncating the string in the middle of a multibyte character.That'd lead either to delivering an illegible error message to theclient, or to encoding-conversion failures that obscure the actualdata problem. Get rid of that in favor of truncating at whitespaceif any (a suggestion due to Alvaro Herrera).In addition to fixing these things, I const-ified the input stringpointers of DCH_from_char and its subroutines, to make sure therearen't any other scribbling-on-input problems.The risk of generating a badly-encoded error message seems likeenough of a bug to justify back-patching, so patch all supportedbranches.Discussion:https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
1 parentd76652e commit600b953

File tree

2 files changed

+85
-91
lines changed

2 files changed

+85
-91
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 84 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484

8585
#include"catalog/pg_collation.h"
8686
#include"mb/pg_wchar.h"
87+
#include"parser/scansup.h"
8788
#include"utils/builtins.h"
8889
#include"utils/date.h"
8990
#include"utils/datetime.h"
@@ -280,18 +281,6 @@ static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
280281
* Flags & Options:
281282
* ----------
282283
*/
283-
#defineONE_UPPER1/* Name */
284-
#defineALL_UPPER2/* NAME */
285-
#defineALL_LOWER3/* name */
286-
287-
#defineFULL_SIZ0
288-
289-
#defineMAX_MONTH_LEN9
290-
#defineMAX_MON_LEN3
291-
#defineMAX_DAY_LEN9
292-
#defineMAX_DY_LEN3
293-
#defineMAX_RM_LEN4
294-
295284
#defineTH_UPPER1
296285
#defineTH_LOWER2
297286

@@ -953,7 +942,7 @@ static void parse_format(FormatNode *node, char *str, const KeyWord *kw,
953942

954943
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
955944
TmToChar*in,char*out,Oidcollid);
956-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out);
945+
staticvoidDCH_from_char(FormatNode*node,constchar*in,TmFromChar*out);
957946

958947
#ifdefDEBUG_TO_FROM_CHAR
959948
staticvoiddump_index(constKeyWord*k,constint*index);
@@ -963,13 +952,15 @@ static void dump_node(FormatNode *node, int max);
963952
staticconstchar*get_th(char*num,inttype);
964953
staticchar*str_numth(char*dest,char*num,inttype);
965954
staticintadjust_partial_year_to_2020(intyear);
966-
staticintstrspace_len(char*str);
955+
staticintstrspace_len(constchar*str);
967956
staticvoidfrom_char_set_mode(TmFromChar*tmfc,constFromCharDateModemode);
968957
staticvoidfrom_char_set_int(int*dest,constintvalue,constFormatNode*node);
969-
staticintfrom_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node);
970-
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node);
971-
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
972-
staticintfrom_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,FormatNode*node);
958+
staticintfrom_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node);
959+
staticintfrom_char_parse_int(int*dest,constchar**src,FormatNode*node);
960+
staticintseq_search(constchar*name,constchar*const*array,int*len);
961+
staticintfrom_char_seq_search(int*dest,constchar**src,
962+
constchar*const*array,
963+
FormatNode*node);
973964
staticvoiddo_to_timestamp(text*date_txt,text*fmt,
974965
structpg_tm*tm,fsec_t*fsec);
975966
staticchar*fill_str(char*str,intc,intmax);
@@ -2083,7 +2074,7 @@ adjust_partial_year_to_2020(int year)
20832074

20842075

20852076
staticint
2086-
strspace_len(char*str)
2077+
strspace_len(constchar*str)
20872078
{
20882079
intlen=0;
20892080

@@ -2157,11 +2148,11 @@ from_char_set_int(int *dest, const int value, const FormatNode *node)
21572148
* with DD and MI).
21582149
*/
21592150
staticint
2160-
from_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node)
2151+
from_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node)
21612152
{
21622153
longresult;
21632154
charcopy[DCH_MAX_ITEM_SIZ+1];
2164-
char*init=*src;
2155+
constchar*init=*src;
21652156
intused;
21662157

21672158
/*
@@ -2178,8 +2169,11 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
21782169
* This node is in Fill Mode, or the next node is known to be a
21792170
* non-digit value, so we just slurp as many characters as we can get.
21802171
*/
2172+
char*endptr;
2173+
21812174
errno=0;
2182-
result=strtol(init,src,10);
2175+
result=strtol(init,&endptr,10);
2176+
*src=endptr;
21832177
}
21842178
else
21852179
{
@@ -2247,76 +2241,61 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22472241
* required length explicitly.
22482242
*/
22492243
staticint
2250-
from_char_parse_int(int*dest,char**src,FormatNode*node)
2244+
from_char_parse_int(int*dest,constchar**src,FormatNode*node)
22512245
{
22522246
returnfrom_char_parse_int_len(dest,src,node->key->len,node);
22532247
}
22542248

2255-
/* ----------
2256-
* Sequential search with to upper/lower conversion
2257-
* ----------
2249+
/*
2250+
* Sequentially search null-terminated "array" for a case-insensitive match
2251+
* to the initial character(s) of "name".
2252+
*
2253+
* Returns array index of match, or -1 for no match.
2254+
*
2255+
* *len is set to the length of the match, or 0 for no match.
2256+
*
2257+
* Case-insensitivity is defined per pg_tolower, so this is only
2258+
* suitable for comparisons to ASCII strings.
22582259
*/
22592260
staticint
2260-
seq_search(char*name,constchar*const*array,inttype,intmax,int*len)
2261+
seq_search(constchar*name,constchar*const*array,int*len)
22612262
{
2262-
constchar*p;
2263-
constchar*const*a;
2264-
char*n;
2265-
intlast,
2266-
i;
2263+
unsignedcharfirstc;
2264+
constchar*const*a;
22672265

22682266
*len=0;
22692267

2268+
/* empty string can't match anything */
22702269
if (!*name)
22712270
return-1;
22722271

2273-
/* set first char */
2274-
if (type==ONE_UPPER||type==ALL_UPPER)
2275-
*name=pg_toupper((unsignedchar)*name);
2276-
elseif (type==ALL_LOWER)
2277-
*name=pg_tolower((unsignedchar)*name);
2272+
/* we handle first char specially to gain some speed */
2273+
firstc=pg_tolower((unsignedchar)*name);
22782274

2279-
for (last=0,a=array;*a!=NULL;a++)
2275+
for (a=array;*a!=NULL;a++)
22802276
{
2277+
constchar*p;
2278+
constchar*n;
2279+
22812280
/* compare first chars */
2282-
if (*name!=**a)
2281+
if (pg_tolower((unsignedchar)**a)!=firstc)
22832282
continue;
22842283

2285-
for (i=1,p=*a+1,n=name+1;;n++,p++,i++)
2284+
/* compare rest of string */
2285+
for (p=*a+1,n=name+1;;p++,n++)
22862286
{
2287-
/* search fragment (max) only */
2288-
if (max&&i==max)
2289-
{
2290-
*len=i;
2291-
returna-array;
2292-
}
2293-
/* full size */
2287+
/* return success if we matched whole array entry */
22942288
if (*p=='\0')
22952289
{
2296-
*len=i;
2290+
*len=n-name;
22972291
returna-array;
22982292
}
2299-
/*Not foundinarray 'a' */
2293+
/*else, must have another characterin"name" ... */
23002294
if (*n=='\0')
23012295
break;
2302-
2303-
/*
2304-
* Convert (but convert new chars only)
2305-
*/
2306-
if (i>last)
2307-
{
2308-
if (type==ONE_UPPER||type==ALL_LOWER)
2309-
*n=pg_tolower((unsignedchar)*n);
2310-
elseif (type==ALL_UPPER)
2311-
*n=pg_toupper((unsignedchar)*n);
2312-
last=i;
2313-
}
2314-
2315-
#ifdefDEBUG_TO_FROM_CHAR
2316-
elog(DEBUG_elog_output,"N: %c, P: %c, A: %s (%s)",
2317-
*n,*p,*a,name);
2318-
#endif
2319-
if (*n!=*p)
2296+
/* ... and it must match */
2297+
if (pg_tolower((unsignedchar)*p)!=
2298+
pg_tolower((unsignedchar)*n))
23202299
break;
23212300
}
23222301
}
@@ -2325,28 +2304,43 @@ seq_search(char *name, const char *const * array, int type, int max, int *len)
23252304
}
23262305

23272306
/*
2328-
* Perform a sequential search in 'array' fortextmatching the first 'max'
2329-
*characters of thesource string.
2307+
* Perform a sequential search in 'array' foran entrymatching the first
2308+
*character(s) of the'src' string case-insensitively.
23302309
*
23312310
* If a match is found, copy the array index of the match into the integer
23322311
* pointed to by 'dest', advance 'src' to the end of the part of the string
23332312
* which matched, and return the number of characters consumed.
23342313
*
23352314
* If the string doesn't match, throw an error.
2315+
*
2316+
* 'node' is used only for error reports: node->key->name identifies the
2317+
* field type we were searching for.
23362318
*/
23372319
staticint
2338-
from_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,
2320+
from_char_seq_search(int*dest,constchar**src,constchar*const*array,
23392321
FormatNode*node)
23402322
{
23412323
intlen;
23422324

2343-
*dest=seq_search(*src,array,type,max,&len);
2325+
*dest=seq_search(*src,array,&len);
2326+
23442327
if (len <=0)
23452328
{
2346-
charcopy[DCH_MAX_ITEM_SIZ+1];
2329+
/*
2330+
* In the error report, truncate the string at the next whitespace (if
2331+
* any) to avoid including irrelevant data.
2332+
*/
2333+
char*copy=pstrdup(*src);
2334+
char*c;
23472335

2348-
Assert(max <=DCH_MAX_ITEM_SIZ);
2349-
strlcpy(copy,*src,max+1);
2336+
for (c=copy;*c;c++)
2337+
{
2338+
if (scanner_isspace(*c))
2339+
{
2340+
*c='\0';
2341+
break;
2342+
}
2343+
}
23502344

23512345
ereport(ERROR,
23522346
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
@@ -2935,10 +2929,10 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
29352929
* ----------
29362930
*/
29372931
staticvoid
2938-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out)
2932+
DCH_from_char(FormatNode*node,constchar*in,TmFromChar*out)
29392933
{
29402934
FormatNode*n;
2941-
char*s;
2935+
constchar*s;
29422936
intlen,
29432937
value;
29442938
boolfx_mode= false;
@@ -2975,7 +2969,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
29752969
caseDCH_a_m:
29762970
caseDCH_p_m:
29772971
from_char_seq_search(&value,&s,ampm_strings_long,
2978-
ALL_UPPER,n->key->len,n);
2972+
n);
29792973
from_char_set_int(&out->pm,value %2,n);
29802974
out->clock=CLOCK_12_HOUR;
29812975
break;
@@ -2984,7 +2978,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
29842978
caseDCH_am:
29852979
caseDCH_pm:
29862980
from_char_seq_search(&value,&s,ampm_strings,
2987-
ALL_UPPER,n->key->len,n);
2981+
n);
29882982
from_char_set_int(&out->pm,value %2,n);
29892983
out->clock=CLOCK_12_HOUR;
29902984
break;
@@ -3043,29 +3037,29 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30433037
caseDCH_a_d:
30443038
caseDCH_b_c:
30453039
from_char_seq_search(&value,&s,adbc_strings_long,
3046-
ALL_UPPER,n->key->len,n);
3040+
n);
30473041
from_char_set_int(&out->bc,value %2,n);
30483042
break;
30493043
caseDCH_AD:
30503044
caseDCH_BC:
30513045
caseDCH_ad:
30523046
caseDCH_bc:
30533047
from_char_seq_search(&value,&s,adbc_strings,
3054-
ALL_UPPER,n->key->len,n);
3048+
n);
30553049
from_char_set_int(&out->bc,value %2,n);
30563050
break;
30573051
caseDCH_MONTH:
30583052
caseDCH_Month:
30593053
caseDCH_month:
3060-
from_char_seq_search(&value,&s,months_full,ONE_UPPER,
3061-
MAX_MONTH_LEN,n);
3054+
from_char_seq_search(&value,&s,months_full,
3055+
n);
30623056
from_char_set_int(&out->mm,value+1,n);
30633057
break;
30643058
caseDCH_MON:
30653059
caseDCH_Mon:
30663060
caseDCH_mon:
3067-
from_char_seq_search(&value,&s,months,ONE_UPPER,
3068-
MAX_MON_LEN,n);
3061+
from_char_seq_search(&value,&s,months,
3062+
n);
30693063
from_char_set_int(&out->mm,value+1,n);
30703064
break;
30713065
caseDCH_MM:
@@ -3075,16 +3069,16 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30753069
caseDCH_DAY:
30763070
caseDCH_Day:
30773071
caseDCH_day:
3078-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3079-
MAX_DAY_LEN,n);
3072+
from_char_seq_search(&value,&s,days,
3073+
n);
30803074
from_char_set_int(&out->d,value,n);
30813075
out->d++;
30823076
break;
30833077
caseDCH_DY:
30843078
caseDCH_Dy:
30853079
caseDCH_dy:
3086-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3087-
MAX_DY_LEN,n);
3080+
from_char_seq_search(&value,&s,days,
3081+
n);
30883082
from_char_set_int(&out->d,value,n);
30893083
out->d++;
30903084
break;
@@ -3183,12 +3177,12 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31833177
break;
31843178
caseDCH_RM:
31853179
from_char_seq_search(&value,&s,rm_months_upper,
3186-
ALL_UPPER,MAX_RM_LEN,n);
3180+
n);
31873181
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
31883182
break;
31893183
caseDCH_rm:
31903184
from_char_seq_search(&value,&s,rm_months_lower,
3191-
ALL_LOWER,MAX_RM_LEN,n);
3185+
n);
31923186
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
31933187
break;
31943188
caseDCH_W:

‎src/test/regress/expected/horology.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2808,7 +2808,7 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
28082808
(1 row)
28092809

28102810
SELECT to_timestamp('97/Feb/16', 'YYMonDD');
2811-
ERROR: invalid value "/Fe" for "Mon"
2811+
ERROR: invalid value "/Feb/16" for "Mon"
28122812
DETAIL: The given value did not match any of the allowed values for this field.
28132813
SELECT to_timestamp('19971116', 'YYYYMMDD');
28142814
to_timestamp

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp