Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfb12aef

Browse files
committed
Clean up formatting.c's logic for matching constant strings.
seq_search(), which is used to match input substrings to constantssuch as month and day names, had a lot of bizarre and unnecessarybehaviors. It was mostly possible to avert our eyes from that before,but we don't want to duplicate those behaviors in the upcoming patchto allow recognition of non-English month and day names. So it's timeto clean this up. In particular:* seq_search scribbled on the input string, which is a pretty dangerousthing to do, especially in the badly underdocumented way it was done here.Fortunately the input string is a temporary copy, but that was being madethree subroutine levels away, making it something easy to breakaccidentally. The behavior is externally visible nonetheless, in the formof odd case-folding in error reports about unrecognized month/day names.The scribbling is evidently being done to save a few calls to pg_tolower,but that's such a cheap function (at least for ASCII data) that it'spretty pointless to worry about. In HEAD I switched it to bepg_ascii_tolower to ensure it is cheap in all cases; but there are cornercases in Turkish where this'd change behavior, so leave it as pg_tolowerin the back branches.* seq_search insisted on knowing the case form (all-upper, all-lower,or initcap) of the constant strings, so that it didn't have to case-foldthem to perform case-insensitive comparisons. This likewise seems likeexcessive micro-optimization, given that pg_tolower is certainly verycheap for ASCII data. It seems unsafe to assume that we know the caseform that will come out of pg_locale.c for localized month/day names, soit's better just to define the comparison rule as "downcase all stringsbefore comparing". (The choice between downcasing and upcasing isarbitrary so far as English is concerned, but it might not be in otherlocales, so follow citext's lead here.)* seq_search also had a parameter that'd cause it to report a matchafter a maximum number of characters, even if the constant string werelonger than that. This was not actually used because no caller passeda value small enough to cut off a comparison. Replicating that behaviorfor localized month/day names seems expensive as well as useless, solet's get rid of that too.* from_char_seq_search used the maximum-length parameter to truncatethe input string in error reports about not finding a matching name.This leads to rather confusing reports in many cases. Worse, it isoutright dangerous if the input string isn't all-ASCII, because werisk truncating the string in the middle of a multibyte character.That'd lead either to delivering an illegible error message to theclient, or to encoding-conversion failures that obscure the actualdata problem. Get rid of that in favor of truncating at whitespaceif any (a suggestion due to Alvaro Herrera).In addition to fixing these things, I const-ified the input stringpointers of DCH_from_char and its subroutines, to make sure therearen't any other scribbling-on-input problems.The risk of generating a badly-encoded error message seems likeenough of a bug to justify back-patching, so patch all supportedbranches.Discussion:https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
1 parent5b4b07f commitfb12aef

File tree

2 files changed

+84
-90
lines changed

2 files changed

+84
-90
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 83 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888

8989
#include"catalog/pg_collation.h"
9090
#include"mb/pg_wchar.h"
91+
#include"parser/scansup.h"
9192
#include"utils/builtins.h"
9293
#include"utils/date.h"
9394
#include"utils/datetime.h"
@@ -275,18 +276,6 @@ static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
275276
* Flags & Options:
276277
* ----------
277278
*/
278-
#defineONE_UPPER1/* Name */
279-
#defineALL_UPPER2/* NAME */
280-
#defineALL_LOWER3/* name */
281-
282-
#defineFULL_SIZ0
283-
284-
#defineMAX_MONTH_LEN9
285-
#defineMAX_MON_LEN3
286-
#defineMAX_DAY_LEN9
287-
#defineMAX_DY_LEN3
288-
#defineMAX_RM_LEN4
289-
290279
#defineTH_UPPER1
291280
#defineTH_LOWER2
292281

@@ -960,7 +949,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
960949

961950
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
962951
TmToChar*in,char*out,Oidcollid);
963-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out);
952+
staticvoidDCH_from_char(FormatNode*node,constchar*in,TmFromChar*out);
964953

965954
#ifdefDEBUG_TO_FROM_CHAR
966955
staticvoiddump_index(constKeyWord*k,constint*index);
@@ -970,13 +959,15 @@ static void dump_node(FormatNode *node, int max);
970959
staticconstchar*get_th(char*num,inttype);
971960
staticchar*str_numth(char*dest,char*num,inttype);
972961
staticintadjust_partial_year_to_2020(intyear);
973-
staticintstrspace_len(char*str);
962+
staticintstrspace_len(constchar*str);
974963
staticvoidfrom_char_set_mode(TmFromChar*tmfc,constFromCharDateModemode);
975964
staticvoidfrom_char_set_int(int*dest,constintvalue,constFormatNode*node);
976-
staticintfrom_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node);
977-
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node);
978-
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
979-
staticintfrom_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,FormatNode*node);
965+
staticintfrom_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node);
966+
staticintfrom_char_parse_int(int*dest,constchar**src,FormatNode*node);
967+
staticintseq_search(constchar*name,constchar*const*array,int*len);
968+
staticintfrom_char_seq_search(int*dest,constchar**src,
969+
constchar*const*array,
970+
FormatNode*node);
980971
staticvoiddo_to_timestamp(text*date_txt,text*fmt,
981972
structpg_tm*tm,fsec_t*fsec);
982973
staticchar*fill_str(char*str,intc,intmax);
@@ -2120,7 +2111,7 @@ adjust_partial_year_to_2020(int year)
21202111

21212112

21222113
staticint
2123-
strspace_len(char*str)
2114+
strspace_len(constchar*str)
21242115
{
21252116
intlen=0;
21262117

@@ -2194,11 +2185,11 @@ from_char_set_int(int *dest, const int value, const FormatNode *node)
21942185
* with DD and MI).
21952186
*/
21962187
staticint
2197-
from_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node)
2188+
from_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node)
21982189
{
21992190
longresult;
22002191
charcopy[DCH_MAX_ITEM_SIZ+1];
2201-
char*init=*src;
2192+
constchar*init=*src;
22022193
intused;
22032194

22042195
/*
@@ -2215,8 +2206,11 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22152206
* This node is in Fill Mode, or the next node is known to be a
22162207
* non-digit value, so we just slurp as many characters as we can get.
22172208
*/
2209+
char*endptr;
2210+
22182211
errno=0;
2219-
result=strtol(init,src,10);
2212+
result=strtol(init,&endptr,10);
2213+
*src=endptr;
22202214
}
22212215
else
22222216
{
@@ -2284,76 +2278,61 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22842278
* required length explicitly.
22852279
*/
22862280
staticint
2287-
from_char_parse_int(int*dest,char**src,FormatNode*node)
2281+
from_char_parse_int(int*dest,constchar**src,FormatNode*node)
22882282
{
22892283
returnfrom_char_parse_int_len(dest,src,node->key->len,node);
22902284
}
22912285

2292-
/* ----------
2293-
* Sequential search with to upper/lower conversion
2294-
* ----------
2286+
/*
2287+
* Sequentially search null-terminated "array" for a case-insensitive match
2288+
* to the initial character(s) of "name".
2289+
*
2290+
* Returns array index of match, or -1 for no match.
2291+
*
2292+
* *len is set to the length of the match, or 0 for no match.
2293+
*
2294+
* Case-insensitivity is defined per pg_tolower, so this is only
2295+
* suitable for comparisons to ASCII strings.
22952296
*/
22962297
staticint
2297-
seq_search(char*name,constchar*const*array,inttype,intmax,int*len)
2298+
seq_search(constchar*name,constchar*const*array,int*len)
22982299
{
2299-
constchar*p;
2300+
unsignedcharfirstc;
23002301
constchar*const*a;
2301-
char*n;
2302-
intlast,
2303-
i;
23042302

23052303
*len=0;
23062304

2305+
/* empty string can't match anything */
23072306
if (!*name)
23082307
return-1;
23092308

2310-
/* set first char */
2311-
if (type==ONE_UPPER||type==ALL_UPPER)
2312-
*name=pg_toupper((unsignedchar)*name);
2313-
elseif (type==ALL_LOWER)
2314-
*name=pg_tolower((unsignedchar)*name);
2309+
/* we handle first char specially to gain some speed */
2310+
firstc=pg_tolower((unsignedchar)*name);
23152311

2316-
for (last=0,a=array;*a!=NULL;a++)
2312+
for (a=array;*a!=NULL;a++)
23172313
{
2314+
constchar*p;
2315+
constchar*n;
2316+
23182317
/* compare first chars */
2319-
if (*name!=**a)
2318+
if (pg_tolower((unsignedchar)**a)!=firstc)
23202319
continue;
23212320

2322-
for (i=1,p=*a+1,n=name+1;;n++,p++,i++)
2321+
/* compare rest of string */
2322+
for (p=*a+1,n=name+1;;p++,n++)
23232323
{
2324-
/* search fragment (max) only */
2325-
if (max&&i==max)
2326-
{
2327-
*len=i;
2328-
returna-array;
2329-
}
2330-
/* full size */
2324+
/* return success if we matched whole array entry */
23312325
if (*p=='\0')
23322326
{
2333-
*len=i;
2327+
*len=n-name;
23342328
returna-array;
23352329
}
2336-
/*Not foundinarray 'a' */
2330+
/*else, must have another characterin"name" ... */
23372331
if (*n=='\0')
23382332
break;
2339-
2340-
/*
2341-
* Convert (but convert new chars only)
2342-
*/
2343-
if (i>last)
2344-
{
2345-
if (type==ONE_UPPER||type==ALL_LOWER)
2346-
*n=pg_tolower((unsignedchar)*n);
2347-
elseif (type==ALL_UPPER)
2348-
*n=pg_toupper((unsignedchar)*n);
2349-
last=i;
2350-
}
2351-
2352-
#ifdefDEBUG_TO_FROM_CHAR
2353-
elog(DEBUG_elog_output,"N: %c, P: %c, A: %s (%s)",
2354-
*n,*p,*a,name);
2355-
#endif
2356-
if (*n!=*p)
2333+
/* ... and it must match */
2334+
if (pg_tolower((unsignedchar)*p)!=
2335+
pg_tolower((unsignedchar)*n))
23572336
break;
23582337
}
23592338
}
@@ -2362,28 +2341,43 @@ seq_search(char *name, const char *const *array, int type, int max, int *len)
23622341
}
23632342

23642343
/*
2365-
* Perform a sequential search in 'array' fortextmatching the first 'max'
2366-
*characters of thesource string.
2344+
* Perform a sequential search in 'array' foran entrymatching the first
2345+
*character(s) of the'src' string case-insensitively.
23672346
*
23682347
* If a match is found, copy the array index of the match into the integer
23692348
* pointed to by 'dest', advance 'src' to the end of the part of the string
23702349
* which matched, and return the number of characters consumed.
23712350
*
23722351
* If the string doesn't match, throw an error.
2352+
*
2353+
* 'node' is used only for error reports: node->key->name identifies the
2354+
* field type we were searching for.
23732355
*/
23742356
staticint
2375-
from_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,
2357+
from_char_seq_search(int*dest,constchar**src,constchar*const*array,
23762358
FormatNode*node)
23772359
{
23782360
intlen;
23792361

2380-
*dest=seq_search(*src,array,type,max,&len);
2362+
*dest=seq_search(*src,array,&len);
2363+
23812364
if (len <=0)
23822365
{
2383-
charcopy[DCH_MAX_ITEM_SIZ+1];
2366+
/*
2367+
* In the error report, truncate the string at the next whitespace (if
2368+
* any) to avoid including irrelevant data.
2369+
*/
2370+
char*copy=pstrdup(*src);
2371+
char*c;
23842372

2385-
Assert(max <=DCH_MAX_ITEM_SIZ);
2386-
strlcpy(copy,*src,max+1);
2373+
for (c=copy;*c;c++)
2374+
{
2375+
if (scanner_isspace(*c))
2376+
{
2377+
*c='\0';
2378+
break;
2379+
}
2380+
}
23872381

23882382
ereport(ERROR,
23892383
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
@@ -2982,10 +2976,10 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
29822976
* ----------
29832977
*/
29842978
staticvoid
2985-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out)
2979+
DCH_from_char(FormatNode*node,constchar*in,TmFromChar*out)
29862980
{
29872981
FormatNode*n;
2988-
char*s;
2982+
constchar*s;
29892983
intlen,
29902984
value;
29912985
boolfx_mode= false;
@@ -3022,7 +3016,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30223016
caseDCH_a_m:
30233017
caseDCH_p_m:
30243018
from_char_seq_search(&value,&s,ampm_strings_long,
3025-
ALL_UPPER,n->key->len,n);
3019+
n);
30263020
from_char_set_int(&out->pm,value %2,n);
30273021
out->clock=CLOCK_12_HOUR;
30283022
break;
@@ -3031,7 +3025,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30313025
caseDCH_am:
30323026
caseDCH_pm:
30333027
from_char_seq_search(&value,&s,ampm_strings,
3034-
ALL_UPPER,n->key->len,n);
3028+
n);
30353029
from_char_set_int(&out->pm,value %2,n);
30363030
out->clock=CLOCK_12_HOUR;
30373031
break;
@@ -3106,29 +3100,29 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31063100
caseDCH_a_d:
31073101
caseDCH_b_c:
31083102
from_char_seq_search(&value,&s,adbc_strings_long,
3109-
ALL_UPPER,n->key->len,n);
3103+
n);
31103104
from_char_set_int(&out->bc,value %2,n);
31113105
break;
31123106
caseDCH_AD:
31133107
caseDCH_BC:
31143108
caseDCH_ad:
31153109
caseDCH_bc:
31163110
from_char_seq_search(&value,&s,adbc_strings,
3117-
ALL_UPPER,n->key->len,n);
3111+
n);
31183112
from_char_set_int(&out->bc,value %2,n);
31193113
break;
31203114
caseDCH_MONTH:
31213115
caseDCH_Month:
31223116
caseDCH_month:
3123-
from_char_seq_search(&value,&s,months_full,ONE_UPPER,
3124-
MAX_MONTH_LEN,n);
3117+
from_char_seq_search(&value,&s,months_full,
3118+
n);
31253119
from_char_set_int(&out->mm,value+1,n);
31263120
break;
31273121
caseDCH_MON:
31283122
caseDCH_Mon:
31293123
caseDCH_mon:
3130-
from_char_seq_search(&value,&s,months,ONE_UPPER,
3131-
MAX_MON_LEN,n);
3124+
from_char_seq_search(&value,&s,months,
3125+
n);
31323126
from_char_set_int(&out->mm,value+1,n);
31333127
break;
31343128
caseDCH_MM:
@@ -3138,16 +3132,16 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31383132
caseDCH_DAY:
31393133
caseDCH_Day:
31403134
caseDCH_day:
3141-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3142-
MAX_DAY_LEN,n);
3135+
from_char_seq_search(&value,&s,days,
3136+
n);
31433137
from_char_set_int(&out->d,value,n);
31443138
out->d++;
31453139
break;
31463140
caseDCH_DY:
31473141
caseDCH_Dy:
31483142
caseDCH_dy:
3149-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3150-
MAX_DY_LEN,n);
3143+
from_char_seq_search(&value,&s,days,
3144+
n);
31513145
from_char_set_int(&out->d,value,n);
31523146
out->d++;
31533147
break;
@@ -3246,12 +3240,12 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
32463240
break;
32473241
caseDCH_RM:
32483242
from_char_seq_search(&value,&s,rm_months_upper,
3249-
ALL_UPPER,MAX_RM_LEN,n);
3243+
n);
32503244
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
32513245
break;
32523246
caseDCH_rm:
32533247
from_char_seq_search(&value,&s,rm_months_lower,
3254-
ALL_LOWER,MAX_RM_LEN,n);
3248+
n);
32553249
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
32563250
break;
32573251
caseDCH_W:

‎src/test/regress/expected/horology.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2808,7 +2808,7 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
28082808
(1 row)
28092809

28102810
SELECT to_timestamp('97/Feb/16', 'YYMonDD');
2811-
ERROR: invalid value "/Fe" for "Mon"
2811+
ERROR: invalid value "/Feb/16" for "Mon"
28122812
DETAIL: The given value did not match any of the allowed values for this field.
28132813
SELECT to_timestamp('19971116', 'YYYYMMDD');
28142814
to_timestamp

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp