Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbe13f22

Browse files
committed
Clean up formatting.c's logic for matching constant strings.
seq_search(), which is used to match input substrings to constantssuch as month and day names, had a lot of bizarre and unnecessarybehaviors. It was mostly possible to avert our eyes from that before,but we don't want to duplicate those behaviors in the upcoming patchto allow recognition of non-English month and day names. So it's timeto clean this up. In particular:* seq_search scribbled on the input string, which is a pretty dangerousthing to do, especially in the badly underdocumented way it was done here.Fortunately the input string is a temporary copy, but that was being madethree subroutine levels away, making it something easy to breakaccidentally. The behavior is externally visible nonetheless, in the formof odd case-folding in error reports about unrecognized month/day names.The scribbling is evidently being done to save a few calls to pg_tolower,but that's such a cheap function (at least for ASCII data) that it'spretty pointless to worry about. In HEAD I switched it to bepg_ascii_tolower to ensure it is cheap in all cases; but there are cornercases in Turkish where this'd change behavior, so leave it as pg_tolowerin the back branches.* seq_search insisted on knowing the case form (all-upper, all-lower,or initcap) of the constant strings, so that it didn't have to case-foldthem to perform case-insensitive comparisons. This likewise seems likeexcessive micro-optimization, given that pg_tolower is certainly verycheap for ASCII data. It seems unsafe to assume that we know the caseform that will come out of pg_locale.c for localized month/day names, soit's better just to define the comparison rule as "downcase all stringsbefore comparing". (The choice between downcasing and upcasing isarbitrary so far as English is concerned, but it might not be in otherlocales, so follow citext's lead here.)* seq_search also had a parameter that'd cause it to report a matchafter a maximum number of characters, even if the constant string werelonger than that. This was not actually used because no caller passeda value small enough to cut off a comparison. Replicating that behaviorfor localized month/day names seems expensive as well as useless, solet's get rid of that too.* from_char_seq_search used the maximum-length parameter to truncatethe input string in error reports about not finding a matching name.This leads to rather confusing reports in many cases. Worse, it isoutright dangerous if the input string isn't all-ASCII, because werisk truncating the string in the middle of a multibyte character.That'd lead either to delivering an illegible error message to theclient, or to encoding-conversion failures that obscure the actualdata problem. Get rid of that in favor of truncating at whitespaceif any (a suggestion due to Alvaro Herrera).In addition to fixing these things, I const-ified the input stringpointers of DCH_from_char and its subroutines, to make sure therearen't any other scribbling-on-input problems.The risk of generating a badly-encoded error message seems likeenough of a bug to justify back-patching, so patch all supportedbranches.Discussion:https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
1 parent817a1b8 commitbe13f22

File tree

2 files changed

+85
-91
lines changed

2 files changed

+85
-91
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 83 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787

8888
#include"catalog/pg_collation.h"
8989
#include"mb/pg_wchar.h"
90+
#include"parser/scansup.h"
9091
#include"utils/builtins.h"
9192
#include"utils/date.h"
9293
#include"utils/datetime.h"
@@ -278,18 +279,6 @@ static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
278279
* Flags & Options:
279280
* ----------
280281
*/
281-
#defineONE_UPPER1/* Name */
282-
#defineALL_UPPER2/* NAME */
283-
#defineALL_LOWER3/* name */
284-
285-
#defineFULL_SIZ0
286-
287-
#defineMAX_MONTH_LEN9
288-
#defineMAX_MON_LEN3
289-
#defineMAX_DAY_LEN9
290-
#defineMAX_DY_LEN3
291-
#defineMAX_RM_LEN4
292-
293282
#defineTH_UPPER1
294283
#defineTH_LOWER2
295284

@@ -977,7 +966,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
977966

978967
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
979968
TmToChar*in,char*out,Oidcollid);
980-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out);
969+
staticvoidDCH_from_char(FormatNode*node,constchar*in,TmFromChar*out);
981970

982971
#ifdefDEBUG_TO_FROM_CHAR
983972
staticvoiddump_index(constKeyWord*k,constint*index);
@@ -987,13 +976,15 @@ static void dump_node(FormatNode *node, int max);
987976
staticconstchar*get_th(char*num,inttype);
988977
staticchar*str_numth(char*dest,char*num,inttype);
989978
staticintadjust_partial_year_to_2020(intyear);
990-
staticintstrspace_len(char*str);
979+
staticintstrspace_len(constchar*str);
991980
staticvoidfrom_char_set_mode(TmFromChar*tmfc,constFromCharDateModemode);
992981
staticvoidfrom_char_set_int(int*dest,constintvalue,constFormatNode*node);
993-
staticintfrom_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node);
994-
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node);
995-
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
996-
staticintfrom_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,FormatNode*node);
982+
staticintfrom_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node);
983+
staticintfrom_char_parse_int(int*dest,constchar**src,FormatNode*node);
984+
staticintseq_search(constchar*name,constchar*const*array,int*len);
985+
staticintfrom_char_seq_search(int*dest,constchar**src,
986+
constchar*const*array,
987+
FormatNode*node);
997988
staticvoiddo_to_timestamp(text*date_txt,text*fmt,
998989
structpg_tm*tm,fsec_t*fsec);
999990
staticchar*fill_str(char*str,intc,intmax);
@@ -2157,7 +2148,7 @@ adjust_partial_year_to_2020(int year)
21572148

21582149

21592150
staticint
2160-
strspace_len(char*str)
2151+
strspace_len(constchar*str)
21612152
{
21622153
intlen=0;
21632154

@@ -2231,11 +2222,11 @@ from_char_set_int(int *dest, const int value, const FormatNode *node)
22312222
* with DD and MI).
22322223
*/
22332224
staticint
2234-
from_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node)
2225+
from_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node)
22352226
{
22362227
longresult;
22372228
charcopy[DCH_MAX_ITEM_SIZ+1];
2238-
char*init=*src;
2229+
constchar*init=*src;
22392230
intused;
22402231

22412232
/*
@@ -2252,8 +2243,11 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22522243
* This node is in Fill Mode, or the next node is known to be a
22532244
* non-digit value, so we just slurp as many characters as we can get.
22542245
*/
2246+
char*endptr;
2247+
22552248
errno=0;
2256-
result=strtol(init,src,10);
2249+
result=strtol(init,&endptr,10);
2250+
*src=endptr;
22572251
}
22582252
else
22592253
{
@@ -2321,76 +2315,61 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
23212315
* required length explicitly.
23222316
*/
23232317
staticint
2324-
from_char_parse_int(int*dest,char**src,FormatNode*node)
2318+
from_char_parse_int(int*dest,constchar**src,FormatNode*node)
23252319
{
23262320
returnfrom_char_parse_int_len(dest,src,node->key->len,node);
23272321
}
23282322

2329-
/* ----------
2330-
* Sequential search with to upper/lower conversion
2331-
* ----------
2323+
/*
2324+
* Sequentially search null-terminated "array" for a case-insensitive match
2325+
* to the initial character(s) of "name".
2326+
*
2327+
* Returns array index of match, or -1 for no match.
2328+
*
2329+
* *len is set to the length of the match, or 0 for no match.
2330+
*
2331+
* Case-insensitivity is defined per pg_tolower, so this is only
2332+
* suitable for comparisons to ASCII strings.
23322333
*/
23332334
staticint
2334-
seq_search(char*name,constchar*const*array,inttype,intmax,int*len)
2335+
seq_search(constchar*name,constchar*const*array,int*len)
23352336
{
2336-
constchar*p;
2337+
unsignedcharfirstc;
23372338
constchar*const*a;
2338-
char*n;
2339-
intlast,
2340-
i;
23412339

23422340
*len=0;
23432341

2342+
/* empty string can't match anything */
23442343
if (!*name)
23452344
return-1;
23462345

2347-
/* set first char */
2348-
if (type==ONE_UPPER||type==ALL_UPPER)
2349-
*name=pg_toupper((unsignedchar)*name);
2350-
elseif (type==ALL_LOWER)
2351-
*name=pg_tolower((unsignedchar)*name);
2346+
/* we handle first char specially to gain some speed */
2347+
firstc=pg_tolower((unsignedchar)*name);
23522348

2353-
for (last=0,a=array;*a!=NULL;a++)
2349+
for (a=array;*a!=NULL;a++)
23542350
{
2351+
constchar*p;
2352+
constchar*n;
2353+
23552354
/* compare first chars */
2356-
if (*name!=**a)
2355+
if (pg_tolower((unsignedchar)**a)!=firstc)
23572356
continue;
23582357

2359-
for (i=1,p=*a+1,n=name+1;;n++,p++,i++)
2358+
/* compare rest of string */
2359+
for (p=*a+1,n=name+1;;p++,n++)
23602360
{
2361-
/* search fragment (max) only */
2362-
if (max&&i==max)
2363-
{
2364-
*len=i;
2365-
returna-array;
2366-
}
2367-
/* full size */
2361+
/* return success if we matched whole array entry */
23682362
if (*p=='\0')
23692363
{
2370-
*len=i;
2364+
*len=n-name;
23712365
returna-array;
23722366
}
2373-
/*Not foundinarray 'a' */
2367+
/*else, must have another characterin"name" ... */
23742368
if (*n=='\0')
23752369
break;
2376-
2377-
/*
2378-
* Convert (but convert new chars only)
2379-
*/
2380-
if (i>last)
2381-
{
2382-
if (type==ONE_UPPER||type==ALL_LOWER)
2383-
*n=pg_tolower((unsignedchar)*n);
2384-
elseif (type==ALL_UPPER)
2385-
*n=pg_toupper((unsignedchar)*n);
2386-
last=i;
2387-
}
2388-
2389-
#ifdefDEBUG_TO_FROM_CHAR
2390-
elog(DEBUG_elog_output,"N: %c, P: %c, A: %s (%s)",
2391-
*n,*p,*a,name);
2392-
#endif
2393-
if (*n!=*p)
2370+
/* ... and it must match */
2371+
if (pg_tolower((unsignedchar)*p)!=
2372+
pg_tolower((unsignedchar)*n))
23942373
break;
23952374
}
23962375
}
@@ -2399,28 +2378,43 @@ seq_search(char *name, const char *const *array, int type, int max, int *len)
23992378
}
24002379

24012380
/*
2402-
* Perform a sequential search in 'array' fortextmatching the first 'max'
2403-
*characters of thesource string.
2381+
* Perform a sequential search in 'array' foran entrymatching the first
2382+
*character(s) of the'src' string case-insensitively.
24042383
*
24052384
* If a match is found, copy the array index of the match into the integer
24062385
* pointed to by 'dest', advance 'src' to the end of the part of the string
24072386
* which matched, and return the number of characters consumed.
24082387
*
24092388
* If the string doesn't match, throw an error.
2389+
*
2390+
* 'node' is used only for error reports: node->key->name identifies the
2391+
* field type we were searching for.
24102392
*/
24112393
staticint
2412-
from_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,
2394+
from_char_seq_search(int*dest,constchar**src,constchar*const*array,
24132395
FormatNode*node)
24142396
{
24152397
intlen;
24162398

2417-
*dest=seq_search(*src,array,type,max,&len);
2399+
*dest=seq_search(*src,array,&len);
2400+
24182401
if (len <=0)
24192402
{
2420-
charcopy[DCH_MAX_ITEM_SIZ+1];
2403+
/*
2404+
* In the error report, truncate the string at the next whitespace (if
2405+
* any) to avoid including irrelevant data.
2406+
*/
2407+
char*copy=pstrdup(*src);
2408+
char*c;
24212409

2422-
Assert(max <=DCH_MAX_ITEM_SIZ);
2423-
strlcpy(copy,*src,max+1);
2410+
for (c=copy;*c;c++)
2411+
{
2412+
if (scanner_isspace(*c))
2413+
{
2414+
*c='\0';
2415+
break;
2416+
}
2417+
}
24242418

24252419
ereport(ERROR,
24262420
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
@@ -3019,10 +3013,10 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
30193013
* ----------
30203014
*/
30213015
staticvoid
3022-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out)
3016+
DCH_from_char(FormatNode*node,constchar*in,TmFromChar*out)
30233017
{
30243018
FormatNode*n;
3025-
char*s;
3019+
constchar*s;
30263020
intlen,
30273021
value;
30283022
boolfx_mode= false;
@@ -3114,7 +3108,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31143108
caseDCH_a_m:
31153109
caseDCH_p_m:
31163110
from_char_seq_search(&value,&s,ampm_strings_long,
3117-
ALL_UPPER,n->key->len,n);
3111+
n);
31183112
from_char_set_int(&out->pm,value %2,n);
31193113
out->clock=CLOCK_12_HOUR;
31203114
break;
@@ -3123,7 +3117,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31233117
caseDCH_am:
31243118
caseDCH_pm:
31253119
from_char_seq_search(&value,&s,ampm_strings,
3126-
ALL_UPPER,n->key->len,n);
3120+
n);
31273121
from_char_set_int(&out->pm,value %2,n);
31283122
out->clock=CLOCK_12_HOUR;
31293123
break;
@@ -3214,29 +3208,29 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
32143208
caseDCH_a_d:
32153209
caseDCH_b_c:
32163210
from_char_seq_search(&value,&s,adbc_strings_long,
3217-
ALL_UPPER,n->key->len,n);
3211+
n);
32183212
from_char_set_int(&out->bc,value %2,n);
32193213
break;
32203214
caseDCH_AD:
32213215
caseDCH_BC:
32223216
caseDCH_ad:
32233217
caseDCH_bc:
32243218
from_char_seq_search(&value,&s,adbc_strings,
3225-
ALL_UPPER,n->key->len,n);
3219+
n);
32263220
from_char_set_int(&out->bc,value %2,n);
32273221
break;
32283222
caseDCH_MONTH:
32293223
caseDCH_Month:
32303224
caseDCH_month:
3231-
from_char_seq_search(&value,&s,months_full,ONE_UPPER,
3232-
MAX_MONTH_LEN,n);
3225+
from_char_seq_search(&value,&s,months_full,
3226+
n);
32333227
from_char_set_int(&out->mm,value+1,n);
32343228
break;
32353229
caseDCH_MON:
32363230
caseDCH_Mon:
32373231
caseDCH_mon:
3238-
from_char_seq_search(&value,&s,months,ONE_UPPER,
3239-
MAX_MON_LEN,n);
3232+
from_char_seq_search(&value,&s,months,
3233+
n);
32403234
from_char_set_int(&out->mm,value+1,n);
32413235
break;
32423236
caseDCH_MM:
@@ -3246,16 +3240,16 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
32463240
caseDCH_DAY:
32473241
caseDCH_Day:
32483242
caseDCH_day:
3249-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3250-
MAX_DAY_LEN,n);
3243+
from_char_seq_search(&value,&s,days,
3244+
n);
32513245
from_char_set_int(&out->d,value,n);
32523246
out->d++;
32533247
break;
32543248
caseDCH_DY:
32553249
caseDCH_Dy:
32563250
caseDCH_dy:
3257-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3258-
MAX_DY_LEN,n);
3251+
from_char_seq_search(&value,&s,days,
3252+
n);
32593253
from_char_set_int(&out->d,value,n);
32603254
out->d++;
32613255
break;
@@ -3354,12 +3348,12 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
33543348
break;
33553349
caseDCH_RM:
33563350
from_char_seq_search(&value,&s,rm_months_upper,
3357-
ALL_UPPER,MAX_RM_LEN,n);
3351+
n);
33583352
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
33593353
break;
33603354
caseDCH_rm:
33613355
from_char_seq_search(&value,&s,rm_months_lower,
3362-
ALL_LOWER,MAX_RM_LEN,n);
3356+
n);
33633357
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
33643358
break;
33653359
caseDCH_W:

‎src/test/regress/expected/horology.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,7 +2616,7 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
26162616
(1 row)
26172617

26182618
SELECT to_timestamp('97/Feb/16', 'YYMonDD');
2619-
ERROR: invalid value "/Fe" for "Mon"
2619+
ERROR: invalid value "/Feb/16" for "Mon"
26202620
DETAIL: The given value did not match any of the allowed values for this field.
26212621
SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
26222622
to_timestamp
@@ -2862,7 +2862,7 @@ SELECT to_timestamp('2000 ++ JUN', 'YYYY MON');
28622862
(1 row)
28632863

28642864
SELECT to_timestamp('2000 + + JUN', 'YYYY MON');
2865-
ERROR: invalid value "+ J" for "MON"
2865+
ERROR: invalid value "+" for "MON"
28662866
DETAIL: The given value did not match any of the allowed values for this field.
28672867
SELECT to_timestamp('2000 + + JUN', 'YYYY MON');
28682868
to_timestamp

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp