Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4c70098

Browse files
committed
Clean up formatting.c's logic for matching constant strings.
seq_search(), which is used to match input substrings to constantssuch as month and day names, had a lot of bizarre and unnecessarybehaviors. It was mostly possible to avert our eyes from that before,but we don't want to duplicate those behaviors in the upcoming patchto allow recognition of non-English month and day names. So it's timeto clean this up. In particular:* seq_search scribbled on the input string, which is a pretty dangerousthing to do, especially in the badly underdocumented way it was done here.Fortunately the input string is a temporary copy, but that was being madethree subroutine levels away, making it something easy to breakaccidentally. The behavior is externally visible nonetheless, in the formof odd case-folding in error reports about unrecognized month/day names.The scribbling is evidently being done to save a few calls to pg_tolower,but that's such a cheap function (at least for ASCII data) that it'spretty pointless to worry about. In HEAD I switched it to bepg_ascii_tolower to ensure it is cheap in all cases; but there are cornercases in Turkish where this'd change behavior, so leave it as pg_tolowerin the back branches.* seq_search insisted on knowing the case form (all-upper, all-lower,or initcap) of the constant strings, so that it didn't have to case-foldthem to perform case-insensitive comparisons. This likewise seems likeexcessive micro-optimization, given that pg_tolower is certainly verycheap for ASCII data. It seems unsafe to assume that we know the caseform that will come out of pg_locale.c for localized month/day names, soit's better just to define the comparison rule as "downcase all stringsbefore comparing". (The choice between downcasing and upcasing isarbitrary so far as English is concerned, but it might not be in otherlocales, so follow citext's lead here.)* seq_search also had a parameter that'd cause it to report a matchafter a maximum number of characters, even if the constant string werelonger than that. This was not actually used because no caller passeda value small enough to cut off a comparison. Replicating that behaviorfor localized month/day names seems expensive as well as useless, solet's get rid of that too.* from_char_seq_search used the maximum-length parameter to truncatethe input string in error reports about not finding a matching name.This leads to rather confusing reports in many cases. Worse, it isoutright dangerous if the input string isn't all-ASCII, because werisk truncating the string in the middle of a multibyte character.That'd lead either to delivering an illegible error message to theclient, or to encoding-conversion failures that obscure the actualdata problem. Get rid of that in favor of truncating at whitespaceif any (a suggestion due to Alvaro Herrera).In addition to fixing these things, I const-ified the input stringpointers of DCH_from_char and its subroutines, to make sure therearen't any other scribbling-on-input problems.The risk of generating a badly-encoded error message seems likeenough of a bug to justify back-patching, so patch all supportedbranches.Discussion:https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
1 parentcd69ec6 commit4c70098

File tree

2 files changed

+85
-91
lines changed

2 files changed

+85
-91
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 83 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
#include"catalog/pg_collation.h"
8989
#include"catalog/pg_type.h"
9090
#include"mb/pg_wchar.h"
91+
#include"parser/scansup.h"
9192
#include"utils/builtins.h"
9293
#include"utils/date.h"
9394
#include"utils/datetime.h"
@@ -317,16 +318,6 @@ static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
317318
* Flags & Options:
318319
* ----------
319320
*/
320-
#defineONE_UPPER1/* Name */
321-
#defineALL_UPPER2/* NAME */
322-
#defineALL_LOWER3/* name */
323-
324-
#defineMAX_MONTH_LEN9
325-
#defineMAX_MON_LEN3
326-
#defineMAX_DAY_LEN9
327-
#defineMAX_DY_LEN3
328-
#defineMAX_RM_LEN4
329-
330321
#defineTH_UPPER1
331322
#defineTH_LOWER2
332323

@@ -1048,7 +1039,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
10481039

10491040
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
10501041
TmToChar*in,char*out,Oidcollid);
1051-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out,
1042+
staticvoidDCH_from_char(FormatNode*node,constchar*in,TmFromChar*out,
10521043
boolstd,bool*have_error);
10531044

10541045
#ifdefDEBUG_TO_FROM_CHAR
@@ -1059,18 +1050,18 @@ static void dump_node(FormatNode *node, int max);
10591050
staticconstchar*get_th(char*num,inttype);
10601051
staticchar*str_numth(char*dest,char*num,inttype);
10611052
staticintadjust_partial_year_to_2020(intyear);
1062-
staticintstrspace_len(char*str);
1053+
staticintstrspace_len(constchar*str);
10631054
staticvoidfrom_char_set_mode(TmFromChar*tmfc,constFromCharDateModemode,
10641055
bool*have_error);
10651056
staticvoidfrom_char_set_int(int*dest,constintvalue,constFormatNode*node,
10661057
bool*have_error);
1067-
staticintfrom_char_parse_int_len(int*dest,char**src,constintlen,
1058+
staticintfrom_char_parse_int_len(int*dest,constchar**src,constintlen,
10681059
FormatNode*node,bool*have_error);
1069-
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node,
1060+
staticintfrom_char_parse_int(int*dest,constchar**src,FormatNode*node,
10701061
bool*have_error);
1071-
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
1072-
staticintfrom_char_seq_search(int*dest,char**src,
1073-
constchar*const*array,inttype,intmax,
1062+
staticintseq_search(constchar*name,constchar*const*array,int*len);
1063+
staticintfrom_char_seq_search(int*dest,constchar**src,
1064+
constchar*const*array,
10741065
FormatNode*node,bool*have_error);
10751066
staticvoiddo_to_timestamp(text*date_txt,text*fmt,boolstd,
10761067
structpg_tm*tm,fsec_t*fsec,int*fprec,
@@ -2259,7 +2250,7 @@ adjust_partial_year_to_2020(int year)
22592250

22602251

22612252
staticint
2262-
strspace_len(char*str)
2253+
strspace_len(constchar*str)
22632254
{
22642255
intlen=0;
22652256

@@ -2348,12 +2339,12 @@ from_char_set_int(int *dest, const int value, const FormatNode *node,
23482339
* and -1 is returned.
23492340
*/
23502341
staticint
2351-
from_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node,
2342+
from_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node,
23522343
bool*have_error)
23532344
{
23542345
longresult;
23552346
charcopy[DCH_MAX_ITEM_SIZ+1];
2356-
char*init=*src;
2347+
constchar*init=*src;
23572348
intused;
23582349

23592350
/*
@@ -2370,8 +2361,11 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node,
23702361
* This node is in Fill Mode, or the next node is known to be a
23712362
* non-digit value, so we just slurp as many characters as we can get.
23722363
*/
2364+
char*endptr;
2365+
23732366
errno=0;
2374-
result=strtol(init,src,10);
2367+
result=strtol(init,&endptr,10);
2368+
*src=endptr;
23752369
}
23762370
else
23772371
{
@@ -2448,76 +2442,61 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node,
24482442
* required length explicitly.
24492443
*/
24502444
staticint
2451-
from_char_parse_int(int*dest,char**src,FormatNode*node,bool*have_error)
2445+
from_char_parse_int(int*dest,constchar**src,FormatNode*node,bool*have_error)
24522446
{
24532447
returnfrom_char_parse_int_len(dest,src,node->key->len,node,have_error);
24542448
}
24552449

2456-
/* ----------
2457-
* Sequential search with to upper/lower conversion
2458-
* ----------
2450+
/*
2451+
* Sequentially search null-terminated "array" for a case-insensitive match
2452+
* to the initial character(s) of "name".
2453+
*
2454+
* Returns array index of match, or -1 for no match.
2455+
*
2456+
* *len is set to the length of the match, or 0 for no match.
2457+
*
2458+
* Case-insensitivity is defined per pg_ascii_tolower, so this is only
2459+
* suitable for comparisons to ASCII strings.
24592460
*/
24602461
staticint
2461-
seq_search(char*name,constchar*const*array,inttype,intmax,int*len)
2462+
seq_search(constchar*name,constchar*const*array,int*len)
24622463
{
2463-
constchar*p;
2464+
unsignedcharfirstc;
24642465
constchar*const*a;
2465-
char*n;
2466-
intlast,
2467-
i;
24682466

24692467
*len=0;
24702468

2469+
/* empty string can't match anything */
24712470
if (!*name)
24722471
return-1;
24732472

2474-
/* set first char */
2475-
if (type==ONE_UPPER||type==ALL_UPPER)
2476-
*name=pg_toupper((unsignedchar)*name);
2477-
elseif (type==ALL_LOWER)
2478-
*name=pg_tolower((unsignedchar)*name);
2473+
/* we handle first char specially to gain some speed */
2474+
firstc=pg_ascii_tolower((unsignedchar)*name);
24792475

2480-
for (last=0,a=array;*a!=NULL;a++)
2476+
for (a=array;*a!=NULL;a++)
24812477
{
2478+
constchar*p;
2479+
constchar*n;
2480+
24822481
/* compare first chars */
2483-
if (*name!=**a)
2482+
if (pg_ascii_tolower((unsignedchar)**a)!=firstc)
24842483
continue;
24852484

2486-
for (i=1,p=*a+1,n=name+1;;n++,p++,i++)
2485+
/* compare rest of string */
2486+
for (p=*a+1,n=name+1;;p++,n++)
24872487
{
2488-
/* search fragment (max) only */
2489-
if (max&&i==max)
2490-
{
2491-
*len=i;
2492-
returna-array;
2493-
}
2494-
/* full size */
2488+
/* return success if we matched whole array entry */
24952489
if (*p=='\0')
24962490
{
2497-
*len=i;
2491+
*len=n-name;
24982492
returna-array;
24992493
}
2500-
/*Not foundinarray 'a' */
2494+
/*else, must have another characterin"name" ... */
25012495
if (*n=='\0')
25022496
break;
2503-
2504-
/*
2505-
* Convert (but convert new chars only)
2506-
*/
2507-
if (i>last)
2508-
{
2509-
if (type==ONE_UPPER||type==ALL_LOWER)
2510-
*n=pg_tolower((unsignedchar)*n);
2511-
elseif (type==ALL_UPPER)
2512-
*n=pg_toupper((unsignedchar)*n);
2513-
last=i;
2514-
}
2515-
2516-
#ifdefDEBUG_TO_FROM_CHAR
2517-
elog(DEBUG_elog_output,"N: %c, P: %c, A: %s (%s)",
2518-
*n,*p,*a,name);
2519-
#endif
2520-
if (*n!=*p)
2497+
/* ... and it must match */
2498+
if (pg_ascii_tolower((unsignedchar)*p)!=
2499+
pg_ascii_tolower((unsignedchar)*n))
25212500
break;
25222501
}
25232502
}
@@ -2526,29 +2505,44 @@ seq_search(char *name, const char *const *array, int type, int max, int *len)
25262505
}
25272506

25282507
/*
2529-
* Perform a sequential search in 'array' fortextmatching the first 'max'
2530-
*characters of thesource string.
2508+
* Perform a sequential search in 'array' foran entrymatching the first
2509+
*character(s) of the'src' string case-insensitively.
25312510
*
25322511
* If a match is found, copy the array index of the match into the integer
25332512
* pointed to by 'dest', advance 'src' to the end of the part of the string
25342513
* which matched, and return the number of characters consumed.
25352514
*
25362515
* If the string doesn't match, throw an error if 'have_error' is NULL,
25372516
* otherwise set '*have_error' and return -1.
2517+
*
2518+
* 'node' is used only for error reports: node->key->name identifies the
2519+
* field type we were searching for.
25382520
*/
25392521
staticint
2540-
from_char_seq_search(int*dest,char**src,constchar*const*array,inttype,
2541-
intmax,FormatNode*node,bool*have_error)
2522+
from_char_seq_search(int*dest,constchar**src,constchar*const*array,
2523+
FormatNode*node,bool*have_error)
25422524
{
25432525
intlen;
25442526

2545-
*dest=seq_search(*src,array,type,max,&len);
2527+
*dest=seq_search(*src,array,&len);
2528+
25462529
if (len <=0)
25472530
{
2548-
charcopy[DCH_MAX_ITEM_SIZ+1];
2531+
/*
2532+
* In the error report, truncate the string at the next whitespace (if
2533+
* any) to avoid including irrelevant data.
2534+
*/
2535+
char*copy=pstrdup(*src);
2536+
char*c;
25492537

2550-
Assert(max <=DCH_MAX_ITEM_SIZ);
2551-
strlcpy(copy,*src,max+1);
2538+
for (c=copy;*c;c++)
2539+
{
2540+
if (scanner_isspace(*c))
2541+
{
2542+
*c='\0';
2543+
break;
2544+
}
2545+
}
25522546

25532547
RETURN_ERROR(ereport(ERROR,
25542548
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
@@ -3166,11 +3160,11 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
31663160
* ----------
31673161
*/
31683162
staticvoid
3169-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out,boolstd,
3163+
DCH_from_char(FormatNode*node,constchar*in,TmFromChar*out,boolstd,
31703164
bool*have_error)
31713165
{
31723166
FormatNode*n;
3173-
char*s;
3167+
constchar*s;
31743168
intlen,
31753169
value;
31763170
boolfx_mode=std;
@@ -3279,7 +3273,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
32793273
caseDCH_a_m:
32803274
caseDCH_p_m:
32813275
from_char_seq_search(&value,&s,ampm_strings_long,
3282-
ALL_UPPER,n->key->len,n,have_error);
3276+
n,have_error);
32833277
CHECK_ERROR;
32843278
from_char_set_int(&out->pm,value %2,n,have_error);
32853279
CHECK_ERROR;
@@ -3290,7 +3284,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
32903284
caseDCH_am:
32913285
caseDCH_pm:
32923286
from_char_seq_search(&value,&s,ampm_strings,
3293-
ALL_UPPER,n->key->len,n,have_error);
3287+
n,have_error);
32943288
CHECK_ERROR;
32953289
from_char_set_int(&out->pm,value %2,n,have_error);
32963290
CHECK_ERROR;
@@ -3403,7 +3397,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
34033397
caseDCH_a_d:
34043398
caseDCH_b_c:
34053399
from_char_seq_search(&value,&s,adbc_strings_long,
3406-
ALL_UPPER,n->key->len,n,have_error);
3400+
n,have_error);
34073401
CHECK_ERROR;
34083402
from_char_set_int(&out->bc,value %2,n,have_error);
34093403
CHECK_ERROR;
@@ -3413,25 +3407,25 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
34133407
caseDCH_ad:
34143408
caseDCH_bc:
34153409
from_char_seq_search(&value,&s,adbc_strings,
3416-
ALL_UPPER,n->key->len,n,have_error);
3410+
n,have_error);
34173411
CHECK_ERROR;
34183412
from_char_set_int(&out->bc,value %2,n,have_error);
34193413
CHECK_ERROR;
34203414
break;
34213415
caseDCH_MONTH:
34223416
caseDCH_Month:
34233417
caseDCH_month:
3424-
from_char_seq_search(&value,&s,months_full,ONE_UPPER,
3425-
MAX_MONTH_LEN,n,have_error);
3418+
from_char_seq_search(&value,&s,months_full,
3419+
n,have_error);
34263420
CHECK_ERROR;
34273421
from_char_set_int(&out->mm,value+1,n,have_error);
34283422
CHECK_ERROR;
34293423
break;
34303424
caseDCH_MON:
34313425
caseDCH_Mon:
34323426
caseDCH_mon:
3433-
from_char_seq_search(&value,&s,months,ONE_UPPER,
3434-
MAX_MON_LEN,n,have_error);
3427+
from_char_seq_search(&value,&s,months,
3428+
n,have_error);
34353429
CHECK_ERROR;
34363430
from_char_set_int(&out->mm,value+1,n,have_error);
34373431
CHECK_ERROR;
@@ -3444,8 +3438,8 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
34443438
caseDCH_DAY:
34453439
caseDCH_Day:
34463440
caseDCH_day:
3447-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3448-
MAX_DAY_LEN,n,have_error);
3441+
from_char_seq_search(&value,&s,days,
3442+
n,have_error);
34493443
CHECK_ERROR;
34503444
from_char_set_int(&out->d,value,n,have_error);
34513445
CHECK_ERROR;
@@ -3454,8 +3448,8 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
34543448
caseDCH_DY:
34553449
caseDCH_Dy:
34563450
caseDCH_dy:
3457-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3458-
MAX_DY_LEN,n,have_error);
3451+
from_char_seq_search(&value,&s,days,
3452+
n,have_error);
34593453
CHECK_ERROR;
34603454
from_char_set_int(&out->d,value,n,have_error);
34613455
CHECK_ERROR;
@@ -3572,15 +3566,15 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
35723566
break;
35733567
caseDCH_RM:
35743568
from_char_seq_search(&value,&s,rm_months_upper,
3575-
ALL_UPPER,MAX_RM_LEN,n,have_error);
3569+
n,have_error);
35763570
CHECK_ERROR;
35773571
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,
35783572
n,have_error);
35793573
CHECK_ERROR;
35803574
break;
35813575
caseDCH_rm:
35823576
from_char_seq_search(&value,&s,rm_months_lower,
3583-
ALL_LOWER,MAX_RM_LEN,n,have_error);
3577+
n,have_error);
35843578
CHECK_ERROR;
35853579
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,
35863580
n,have_error);

‎src/test/regress/expected/horology.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,7 +2616,7 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
26162616
(1 row)
26172617

26182618
SELECT to_timestamp('97/Feb/16', 'YYMonDD');
2619-
ERROR: invalid value "/Fe" for "Mon"
2619+
ERROR: invalid value "/Feb/16" for "Mon"
26202620
DETAIL: The given value did not match any of the allowed values for this field.
26212621
SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
26222622
to_timestamp
@@ -2941,7 +2941,7 @@ SELECT to_timestamp('2000 ++ JUN', 'YYYY MON');
29412941
(1 row)
29422942

29432943
SELECT to_timestamp('2000 + + JUN', 'YYYY MON');
2944-
ERROR: invalid value "+ J" for "MON"
2944+
ERROR: invalid value "+" for "MON"
29452945
DETAIL: The given value did not match any of the allowed values for this field.
29462946
SELECT to_timestamp('2000 + + JUN', 'YYYY MON');
29472947
to_timestamp

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp