Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit212b870

Browse files
committed
Clean up formatting.c's logic for matching constant strings.
seq_search(), which is used to match input substrings to constantssuch as month and day names, had a lot of bizarre and unnecessarybehaviors. It was mostly possible to avert our eyes from that before,but we don't want to duplicate those behaviors in the upcoming patchto allow recognition of non-English month and day names. So it's timeto clean this up. In particular:* seq_search scribbled on the input string, which is a pretty dangerousthing to do, especially in the badly underdocumented way it was done here.Fortunately the input string is a temporary copy, but that was being madethree subroutine levels away, making it something easy to breakaccidentally. The behavior is externally visible nonetheless, in the formof odd case-folding in error reports about unrecognized month/day names.The scribbling is evidently being done to save a few calls to pg_tolower,but that's such a cheap function (at least for ASCII data) that it'spretty pointless to worry about. In HEAD I switched it to bepg_ascii_tolower to ensure it is cheap in all cases; but there are cornercases in Turkish where this'd change behavior, so leave it as pg_tolowerin the back branches.* seq_search insisted on knowing the case form (all-upper, all-lower,or initcap) of the constant strings, so that it didn't have to case-foldthem to perform case-insensitive comparisons. This likewise seems likeexcessive micro-optimization, given that pg_tolower is certainly verycheap for ASCII data. It seems unsafe to assume that we know the caseform that will come out of pg_locale.c for localized month/day names, soit's better just to define the comparison rule as "downcase all stringsbefore comparing". (The choice between downcasing and upcasing isarbitrary so far as English is concerned, but it might not be in otherlocales, so follow citext's lead here.)* seq_search also had a parameter that'd cause it to report a matchafter a maximum number of characters, even if the constant string werelonger than that. This was not actually used because no caller passeda value small enough to cut off a comparison. Replicating that behaviorfor localized month/day names seems expensive as well as useless, solet's get rid of that too.* from_char_seq_search used the maximum-length parameter to truncatethe input string in error reports about not finding a matching name.This leads to rather confusing reports in many cases. Worse, it isoutright dangerous if the input string isn't all-ASCII, because werisk truncating the string in the middle of a multibyte character.That'd lead either to delivering an illegible error message to theclient, or to encoding-conversion failures that obscure the actualdata problem. Get rid of that in favor of truncating at whitespaceif any (a suggestion due to Alvaro Herrera).In addition to fixing these things, I const-ified the input stringpointers of DCH_from_char and its subroutines, to make sure therearen't any other scribbling-on-input problems.The risk of generating a badly-encoded error message seems likeenough of a bug to justify back-patching, so patch all supportedbranches.Discussion:https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
1 parent9055344 commit212b870

File tree

2 files changed

+84
-90
lines changed

2 files changed

+84
-90
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 83 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888

8989
#include"catalog/pg_collation.h"
9090
#include"mb/pg_wchar.h"
91+
#include"parser/scansup.h"
9192
#include"utils/builtins.h"
9293
#include"utils/date.h"
9394
#include"utils/datetime.h"
@@ -284,18 +285,6 @@ static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
284285
* Flags & Options:
285286
* ----------
286287
*/
287-
#defineONE_UPPER1/* Name */
288-
#defineALL_UPPER2/* NAME */
289-
#defineALL_LOWER3/* name */
290-
291-
#defineFULL_SIZ0
292-
293-
#defineMAX_MONTH_LEN9
294-
#defineMAX_MON_LEN3
295-
#defineMAX_DAY_LEN9
296-
#defineMAX_DY_LEN3
297-
#defineMAX_RM_LEN4
298-
299288
#defineTH_UPPER1
300289
#defineTH_LOWER2
301290

@@ -961,7 +950,7 @@ static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
961950

962951
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
963952
TmToChar*in,char*out,Oidcollid);
964-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out);
953+
staticvoidDCH_from_char(FormatNode*node,constchar*in,TmFromChar*out);
965954

966955
#ifdefDEBUG_TO_FROM_CHAR
967956
staticvoiddump_index(constKeyWord*k,constint*index);
@@ -971,13 +960,15 @@ static void dump_node(FormatNode *node, int max);
971960
staticconstchar*get_th(char*num,inttype);
972961
staticchar*str_numth(char*dest,char*num,inttype);
973962
staticintadjust_partial_year_to_2020(intyear);
974-
staticintstrspace_len(char*str);
963+
staticintstrspace_len(constchar*str);
975964
staticvoidfrom_char_set_mode(TmFromChar*tmfc,constFromCharDateModemode);
976965
staticvoidfrom_char_set_int(int*dest,constintvalue,constFormatNode*node);
977-
staticintfrom_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node);
978-
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node);
979-
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
980-
staticintfrom_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,FormatNode*node);
966+
staticintfrom_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node);
967+
staticintfrom_char_parse_int(int*dest,constchar**src,FormatNode*node);
968+
staticintseq_search(constchar*name,constchar*const*array,int*len);
969+
staticintfrom_char_seq_search(int*dest,constchar**src,
970+
constchar*const*array,
971+
FormatNode*node);
981972
staticvoiddo_to_timestamp(text*date_txt,text*fmt,
982973
structpg_tm*tm,fsec_t*fsec);
983974
staticchar*fill_str(char*str,intc,intmax);
@@ -2135,7 +2126,7 @@ adjust_partial_year_to_2020(int year)
21352126

21362127

21372128
staticint
2138-
strspace_len(char*str)
2129+
strspace_len(constchar*str)
21392130
{
21402131
intlen=0;
21412132

@@ -2209,11 +2200,11 @@ from_char_set_int(int *dest, const int value, const FormatNode *node)
22092200
* with DD and MI).
22102201
*/
22112202
staticint
2212-
from_char_parse_int_len(int*dest,char**src,constintlen,FormatNode*node)
2203+
from_char_parse_int_len(int*dest,constchar**src,constintlen,FormatNode*node)
22132204
{
22142205
longresult;
22152206
charcopy[DCH_MAX_ITEM_SIZ+1];
2216-
char*init=*src;
2207+
constchar*init=*src;
22172208
intused;
22182209

22192210
/*
@@ -2230,8 +2221,11 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22302221
* This node is in Fill Mode, or the next node is known to be a
22312222
* non-digit value, so we just slurp as many characters as we can get.
22322223
*/
2224+
char*endptr;
2225+
22332226
errno=0;
2234-
result=strtol(init,src,10);
2227+
result=strtol(init,&endptr,10);
2228+
*src=endptr;
22352229
}
22362230
else
22372231
{
@@ -2299,76 +2293,61 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
22992293
* required length explicitly.
23002294
*/
23012295
staticint
2302-
from_char_parse_int(int*dest,char**src,FormatNode*node)
2296+
from_char_parse_int(int*dest,constchar**src,FormatNode*node)
23032297
{
23042298
returnfrom_char_parse_int_len(dest,src,node->key->len,node);
23052299
}
23062300

2307-
/* ----------
2308-
* Sequential search with to upper/lower conversion
2309-
* ----------
2301+
/*
2302+
* Sequentially search null-terminated "array" for a case-insensitive match
2303+
* to the initial character(s) of "name".
2304+
*
2305+
* Returns array index of match, or -1 for no match.
2306+
*
2307+
* *len is set to the length of the match, or 0 for no match.
2308+
*
2309+
* Case-insensitivity is defined per pg_tolower, so this is only
2310+
* suitable for comparisons to ASCII strings.
23102311
*/
23112312
staticint
2312-
seq_search(char*name,constchar*const*array,inttype,intmax,int*len)
2313+
seq_search(constchar*name,constchar*const*array,int*len)
23132314
{
2314-
constchar*p;
2315+
unsignedcharfirstc;
23152316
constchar*const*a;
2316-
char*n;
2317-
intlast,
2318-
i;
23192317

23202318
*len=0;
23212319

2320+
/* empty string can't match anything */
23222321
if (!*name)
23232322
return-1;
23242323

2325-
/* set first char */
2326-
if (type==ONE_UPPER||type==ALL_UPPER)
2327-
*name=pg_toupper((unsignedchar)*name);
2328-
elseif (type==ALL_LOWER)
2329-
*name=pg_tolower((unsignedchar)*name);
2324+
/* we handle first char specially to gain some speed */
2325+
firstc=pg_tolower((unsignedchar)*name);
23302326

2331-
for (last=0,a=array;*a!=NULL;a++)
2327+
for (a=array;*a!=NULL;a++)
23322328
{
2329+
constchar*p;
2330+
constchar*n;
2331+
23332332
/* compare first chars */
2334-
if (*name!=**a)
2333+
if (pg_tolower((unsignedchar)**a)!=firstc)
23352334
continue;
23362335

2337-
for (i=1,p=*a+1,n=name+1;;n++,p++,i++)
2336+
/* compare rest of string */
2337+
for (p=*a+1,n=name+1;;p++,n++)
23382338
{
2339-
/* search fragment (max) only */
2340-
if (max&&i==max)
2341-
{
2342-
*len=i;
2343-
returna-array;
2344-
}
2345-
/* full size */
2339+
/* return success if we matched whole array entry */
23462340
if (*p=='\0')
23472341
{
2348-
*len=i;
2342+
*len=n-name;
23492343
returna-array;
23502344
}
2351-
/*Not foundinarray 'a' */
2345+
/*else, must have another characterin"name" ... */
23522346
if (*n=='\0')
23532347
break;
2354-
2355-
/*
2356-
* Convert (but convert new chars only)
2357-
*/
2358-
if (i>last)
2359-
{
2360-
if (type==ONE_UPPER||type==ALL_LOWER)
2361-
*n=pg_tolower((unsignedchar)*n);
2362-
elseif (type==ALL_UPPER)
2363-
*n=pg_toupper((unsignedchar)*n);
2364-
last=i;
2365-
}
2366-
2367-
#ifdefDEBUG_TO_FROM_CHAR
2368-
elog(DEBUG_elog_output,"N: %c, P: %c, A: %s (%s)",
2369-
*n,*p,*a,name);
2370-
#endif
2371-
if (*n!=*p)
2348+
/* ... and it must match */
2349+
if (pg_tolower((unsignedchar)*p)!=
2350+
pg_tolower((unsignedchar)*n))
23722351
break;
23732352
}
23742353
}
@@ -2377,28 +2356,43 @@ seq_search(char *name, const char *const *array, int type, int max, int *len)
23772356
}
23782357

23792358
/*
2380-
* Perform a sequential search in 'array' fortextmatching the first 'max'
2381-
*characters of thesource string.
2359+
* Perform a sequential search in 'array' foran entrymatching the first
2360+
*character(s) of the'src' string case-insensitively.
23822361
*
23832362
* If a match is found, copy the array index of the match into the integer
23842363
* pointed to by 'dest', advance 'src' to the end of the part of the string
23852364
* which matched, and return the number of characters consumed.
23862365
*
23872366
* If the string doesn't match, throw an error.
2367+
*
2368+
* 'node' is used only for error reports: node->key->name identifies the
2369+
* field type we were searching for.
23882370
*/
23892371
staticint
2390-
from_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,
2372+
from_char_seq_search(int*dest,constchar**src,constchar*const*array,
23912373
FormatNode*node)
23922374
{
23932375
intlen;
23942376

2395-
*dest=seq_search(*src,array,type,max,&len);
2377+
*dest=seq_search(*src,array,&len);
2378+
23962379
if (len <=0)
23972380
{
2398-
charcopy[DCH_MAX_ITEM_SIZ+1];
2381+
/*
2382+
* In the error report, truncate the string at the next whitespace (if
2383+
* any) to avoid including irrelevant data.
2384+
*/
2385+
char*copy=pstrdup(*src);
2386+
char*c;
23992387

2400-
Assert(max <=DCH_MAX_ITEM_SIZ);
2401-
strlcpy(copy,*src,max+1);
2388+
for (c=copy;*c;c++)
2389+
{
2390+
if (scanner_isspace(*c))
2391+
{
2392+
*c='\0';
2393+
break;
2394+
}
2395+
}
24022396

24032397
ereport(ERROR,
24042398
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
@@ -2984,10 +2978,10 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
29842978
* ----------
29852979
*/
29862980
staticvoid
2987-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out)
2981+
DCH_from_char(FormatNode*node,constchar*in,TmFromChar*out)
29882982
{
29892983
FormatNode*n;
2990-
char*s;
2984+
constchar*s;
29912985
intlen,
29922986
value;
29932987
boolfx_mode= false;
@@ -3024,7 +3018,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30243018
caseDCH_a_m:
30253019
caseDCH_p_m:
30263020
from_char_seq_search(&value,&s,ampm_strings_long,
3027-
ALL_UPPER,n->key->len,n);
3021+
n);
30283022
from_char_set_int(&out->pm,value %2,n);
30293023
out->clock=CLOCK_12_HOUR;
30303024
break;
@@ -3033,7 +3027,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30333027
caseDCH_am:
30343028
caseDCH_pm:
30353029
from_char_seq_search(&value,&s,ampm_strings,
3036-
ALL_UPPER,n->key->len,n);
3030+
n);
30373031
from_char_set_int(&out->pm,value %2,n);
30383032
out->clock=CLOCK_12_HOUR;
30393033
break;
@@ -3094,29 +3088,29 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30943088
caseDCH_a_d:
30953089
caseDCH_b_c:
30963090
from_char_seq_search(&value,&s,adbc_strings_long,
3097-
ALL_UPPER,n->key->len,n);
3091+
n);
30983092
from_char_set_int(&out->bc,value %2,n);
30993093
break;
31003094
caseDCH_AD:
31013095
caseDCH_BC:
31023096
caseDCH_ad:
31033097
caseDCH_bc:
31043098
from_char_seq_search(&value,&s,adbc_strings,
3105-
ALL_UPPER,n->key->len,n);
3099+
n);
31063100
from_char_set_int(&out->bc,value %2,n);
31073101
break;
31083102
caseDCH_MONTH:
31093103
caseDCH_Month:
31103104
caseDCH_month:
3111-
from_char_seq_search(&value,&s,months_full,ONE_UPPER,
3112-
MAX_MONTH_LEN,n);
3105+
from_char_seq_search(&value,&s,months_full,
3106+
n);
31133107
from_char_set_int(&out->mm,value+1,n);
31143108
break;
31153109
caseDCH_MON:
31163110
caseDCH_Mon:
31173111
caseDCH_mon:
3118-
from_char_seq_search(&value,&s,months,ONE_UPPER,
3119-
MAX_MON_LEN,n);
3112+
from_char_seq_search(&value,&s,months,
3113+
n);
31203114
from_char_set_int(&out->mm,value+1,n);
31213115
break;
31223116
caseDCH_MM:
@@ -3126,16 +3120,16 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
31263120
caseDCH_DAY:
31273121
caseDCH_Day:
31283122
caseDCH_day:
3129-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3130-
MAX_DAY_LEN,n);
3123+
from_char_seq_search(&value,&s,days,
3124+
n);
31313125
from_char_set_int(&out->d,value,n);
31323126
out->d++;
31333127
break;
31343128
caseDCH_DY:
31353129
caseDCH_Dy:
31363130
caseDCH_dy:
3137-
from_char_seq_search(&value,&s,days,ONE_UPPER,
3138-
MAX_DY_LEN,n);
3131+
from_char_seq_search(&value,&s,days,
3132+
n);
31393133
from_char_set_int(&out->d,value,n);
31403134
out->d++;
31413135
break;
@@ -3234,12 +3228,12 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
32343228
break;
32353229
caseDCH_RM:
32363230
from_char_seq_search(&value,&s,rm_months_upper,
3237-
ALL_UPPER,MAX_RM_LEN,n);
3231+
n);
32383232
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
32393233
break;
32403234
caseDCH_rm:
32413235
from_char_seq_search(&value,&s,rm_months_lower,
3242-
ALL_LOWER,MAX_RM_LEN,n);
3236+
n);
32433237
from_char_set_int(&out->mm,MONTHS_PER_YEAR-value,n);
32443238
break;
32453239
caseDCH_W:

‎src/test/regress/expected/horology.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2808,7 +2808,7 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
28082808
(1 row)
28092809

28102810
SELECT to_timestamp('97/Feb/16', 'YYMonDD');
2811-
ERROR: invalid value "/Fe" for "Mon"
2811+
ERROR: invalid value "/Feb/16" for "Mon"
28122812
DETAIL: The given value did not match any of the allowed values for this field.
28132813
SELECT to_timestamp('19971116', 'YYYYMMDD');
28142814
to_timestamp

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp