Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1a950f3

Browse files
committed
Implement standard datetime parsing mode
SQL Standard 2016 defines rules for handling separators in datetime templatestrings, which are different to to_date()/to_timestamp() rules. Standardallows only small set of separators and requires strict matching for them.Standard applies to jsonpath .datetime() method and CAST (... FORMAT ...) SQLclause. We're not going to change handling of separators in existingto_date()/to_timestamp() functions, because their current behavior is familiarfor users. Standard behavior now available by special flag, which will be usedin upcoming .datetime() jsonpath method.Discussion:https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.comAuthor: Alexander Korotkov
1 parentbd29cc1 commit1a950f3

File tree

1 file changed

+104
-40
lines changed

1 file changed

+104
-40
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 104 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,12 @@
9999
#include"utils/pg_locale.h"
100100

101101
/* ----------
102-
* Routinestype
102+
* Routinesflags
103103
* ----------
104104
*/
105-
#defineDCH_TYPE1/* DATE-TIME version*/
106-
#defineNUM_TYPE2/* NUMBER version*/
105+
#defineDCH_FLAG0x1/* DATE-TIME flag*/
106+
#defineNUM_FLAG0x2/* NUMBER flag*/
107+
#defineSTD_FLAG0x4/* STANDARD flag*/
107108

108109
/* ----------
109110
* KeyWord Index (ascii from position 32 (' ') to 126 (~))
@@ -384,6 +385,7 @@ typedef struct
384385
{
385386
FormatNodeformat[DCH_CACHE_SIZE+1];
386387
charstr[DCH_CACHE_SIZE+1];
388+
boolstd;
387389
boolvalid;
388390
intage;
389391
}DCHCacheEntry;
@@ -1000,11 +1002,12 @@ static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int t
10001002
staticboolis_separator_char(constchar*str);
10011003
staticvoidNUMDesc_prepare(NUMDesc*num,FormatNode*n);
10021004
staticvoidparse_format(FormatNode*node,constchar*str,constKeyWord*kw,
1003-
constKeySuffix*suf,constint*index,intver,NUMDesc*Num);
1005+
constKeySuffix*suf,constint*index,uint32flags,NUMDesc*Num);
10041006

10051007
staticvoidDCH_to_char(FormatNode*node,boolis_interval,
10061008
TmToChar*in,char*out,Oidcollid);
1007-
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out);
1009+
staticvoidDCH_from_char(FormatNode*node,char*in,TmFromChar*out,
1010+
boolstd);
10081011

10091012
#ifdefDEBUG_TO_FROM_CHAR
10101013
staticvoiddump_index(constKeyWord*k,constint*index);
@@ -1021,7 +1024,7 @@ static intfrom_char_parse_int_len(int *dest, char **src, const int len, FormatN
10211024
staticintfrom_char_parse_int(int*dest,char**src,FormatNode*node);
10221025
staticintseq_search(char*name,constchar*const*array,inttype,intmax,int*len);
10231026
staticintfrom_char_seq_search(int*dest,char**src,constchar*const*array,inttype,intmax,FormatNode*node);
1024-
staticvoiddo_to_timestamp(text*date_txt,text*fmt,
1027+
staticvoiddo_to_timestamp(text*date_txt,text*fmt,boolstd,
10251028
structpg_tm*tm,fsec_t*fsec,int*fprec);
10261029
staticchar*fill_str(char*str,intc,intmax);
10271030
staticFormatNode*NUM_cache(intlen,NUMDesc*Num,text*pars_str,bool*shouldFree);
@@ -1033,9 +1036,9 @@ static void NUM_numpart_to_char(NUMProc *Np, int id);
10331036
staticchar*NUM_processor(FormatNode*node,NUMDesc*Num,char*inout,
10341037
char*number,intinput_len,intto_char_out_pre_spaces,
10351038
intsign,boolis_to_char,Oidcollid);
1036-
staticDCHCacheEntry*DCH_cache_getnew(constchar*str);
1037-
staticDCHCacheEntry*DCH_cache_search(constchar*str);
1038-
staticDCHCacheEntry*DCH_cache_fetch(constchar*str);
1039+
staticDCHCacheEntry*DCH_cache_getnew(constchar*str,boolstd);
1040+
staticDCHCacheEntry*DCH_cache_search(constchar*str,boolstd);
1041+
staticDCHCacheEntry*DCH_cache_fetch(constchar*str,boolstd);
10391042
staticNUMCacheEntry*NUM_cache_getnew(constchar*str);
10401043
staticNUMCacheEntry*NUM_cache_search(constchar*str);
10411044
staticNUMCacheEntry*NUM_cache_fetch(constchar*str);
@@ -1278,7 +1281,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
12781281
*/
12791282
staticvoid
12801283
parse_format(FormatNode*node,constchar*str,constKeyWord*kw,
1281-
constKeySuffix*suf,constint*index,intver,NUMDesc*Num)
1284+
constKeySuffix*suf,constint*index,uint32flags,NUMDesc*Num)
12821285
{
12831286
FormatNode*n;
12841287

@@ -1296,7 +1299,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
12961299
/*
12971300
* Prefix
12981301
*/
1299-
if (ver==DCH_TYPE&&
1302+
if ((flags&DCH_FLAG)&&
13001303
(s=suff_search(str,suf,SUFFTYPE_PREFIX))!=NULL)
13011304
{
13021305
suffix |=s->id;
@@ -1317,13 +1320,13 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13171320
/*
13181321
* NUM version: Prepare global NUMDesc struct
13191322
*/
1320-
if (ver==NUM_TYPE)
1323+
if (flags&NUM_FLAG)
13211324
NUMDesc_prepare(Num,n);
13221325

13231326
/*
13241327
* Postfix
13251328
*/
1326-
if (ver==DCH_TYPE&&*str&&
1329+
if ((flags&DCH_FLAG)&&*str&&
13271330
(s=suff_search(str,suf,SUFFTYPE_POSTFIX))!=NULL)
13281331
{
13291332
n->suffix |=s->id;
@@ -1337,11 +1340,34 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13371340
{
13381341
intchlen;
13391342

1340-
/*
1341-
* Process double-quoted literal string, if any
1342-
*/
1343-
if (*str=='"')
1343+
if (flags&STD_FLAG)
1344+
{
1345+
/*
1346+
* Standard mode, allow only following separators: "-./,':; "
1347+
*/
1348+
if (strchr("-./,':; ",*str)==NULL)
1349+
ereport(ERROR,
1350+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1351+
errmsg("invalid datetime format separator: \"%s\"",
1352+
pnstrdup(str,pg_mblen(str)))));
1353+
1354+
if (*str==' ')
1355+
n->type=NODE_TYPE_SPACE;
1356+
else
1357+
n->type=NODE_TYPE_SEPARATOR;
1358+
1359+
n->character[0]=*str;
1360+
n->character[1]='\0';
1361+
n->key=NULL;
1362+
n->suffix=0;
1363+
n++;
1364+
str++;
1365+
}
1366+
elseif (*str=='"')
13441367
{
1368+
/*
1369+
* Process double-quoted literal string, if any
1370+
*/
13451371
str++;
13461372
while (*str)
13471373
{
@@ -1373,7 +1399,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13731399
str++;
13741400
chlen=pg_mblen(str);
13751401

1376-
if (ver==DCH_TYPE&&is_separator_char(str))
1402+
if ((flags&DCH_FLAG)&&is_separator_char(str))
13771403
n->type=NODE_TYPE_SEPARATOR;
13781404
elseif (isspace((unsignedchar)*str))
13791405
n->type=NODE_TYPE_SPACE;
@@ -3060,13 +3086,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
30603086
* ----------
30613087
*/
30623088
staticvoid
3063-
DCH_from_char(FormatNode*node,char*in,TmFromChar*out)
3089+
DCH_from_char(FormatNode*node,char*in,TmFromChar*out,boolstd)
30643090
{
30653091
FormatNode*n;
30663092
char*s;
30673093
intlen,
30683094
value;
3069-
boolfx_mode=false;
3095+
boolfx_mode=std;
30703096

30713097
/* number of extra skipped characters (more than given in format string) */
30723098
intextra_skip=0;
@@ -3089,7 +3115,23 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
30893115

30903116
if (n->type==NODE_TYPE_SPACE||n->type==NODE_TYPE_SEPARATOR)
30913117
{
3092-
if (!fx_mode)
3118+
if (std)
3119+
{
3120+
/*
3121+
* Standard mode requires strict matching between format
3122+
* string separators/spaces and input string.
3123+
*/
3124+
Assert(n->character[0]&& !n->character[1]);
3125+
3126+
if (*s==n->character[0])
3127+
s++;
3128+
else
3129+
ereport(ERROR,
3130+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3131+
errmsg("unmatched format separator \"%c\"",
3132+
n->character[0])));
3133+
}
3134+
elseif (!fx_mode)
30933135
{
30943136
/*
30953137
* In non FX (fixed format) mode one format string space or
@@ -3434,6 +3476,27 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
34343476
}
34353477
}
34363478
}
3479+
3480+
/*
3481+
* Standard parsing mode doesn't allow unmatched format patterns or
3482+
* trailing characters in the input string.
3483+
*/
3484+
if (std)
3485+
{
3486+
if (n->type!=NODE_TYPE_END)
3487+
ereport(ERROR,
3488+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3489+
errmsg("input string is too short for datetime format")));
3490+
3491+
while (*s!='\0'&&isspace((unsignedchar)*s))
3492+
s++;
3493+
3494+
if (*s!='\0')
3495+
ereport(ERROR,
3496+
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3497+
errmsg("trailing characters remain in input string after "
3498+
"datetime format")));
3499+
}
34373500
}
34383501

34393502
/*
@@ -3456,7 +3519,7 @@ DCH_prevent_counter_overflow(void)
34563519

34573520
/* select a DCHCacheEntry to hold the given format picture */
34583521
staticDCHCacheEntry*
3459-
DCH_cache_getnew(constchar*str)
3522+
DCH_cache_getnew(constchar*str,boolstd)
34603523
{
34613524
DCHCacheEntry*ent;
34623525

@@ -3506,6 +3569,7 @@ DCH_cache_getnew(const char *str)
35063569
MemoryContextAllocZero(TopMemoryContext,sizeof(DCHCacheEntry));
35073570
ent->valid= false;
35083571
StrNCpy(ent->str,str,DCH_CACHE_SIZE+1);
3572+
ent->std=std;
35093573
ent->age= (++DCHCounter);
35103574
/* caller is expected to fill format, then set valid */
35113575
++n_DCHCache;
@@ -3515,7 +3579,7 @@ DCH_cache_getnew(const char *str)
35153579

35163580
/* look for an existing DCHCacheEntry matching the given format picture */
35173581
staticDCHCacheEntry*
3518-
DCH_cache_search(constchar*str)
3582+
DCH_cache_search(constchar*str,boolstd)
35193583
{
35203584
/* Ensure we can advance DCHCounter below */
35213585
DCH_prevent_counter_overflow();
@@ -3524,7 +3588,7 @@ DCH_cache_search(const char *str)
35243588
{
35253589
DCHCacheEntry*ent=DCHCache[i];
35263590

3527-
if (ent->valid&&strcmp(ent->str,str)==0)
3591+
if (ent->valid&&strcmp(ent->str,str)==0&&ent->std==std)
35283592
{
35293593
ent->age= (++DCHCounter);
35303594
returnent;
@@ -3536,21 +3600,21 @@ DCH_cache_search(const char *str)
35363600

35373601
/* Find or create a DCHCacheEntry for the given format picture */
35383602
staticDCHCacheEntry*
3539-
DCH_cache_fetch(constchar*str)
3603+
DCH_cache_fetch(constchar*str,boolstd)
35403604
{
35413605
DCHCacheEntry*ent;
35423606

3543-
if ((ent=DCH_cache_search(str))==NULL)
3607+
if ((ent=DCH_cache_search(str,std))==NULL)
35443608
{
35453609
/*
35463610
* Not in the cache, must run parser and save a new format-picture to
35473611
* the cache. Do not mark the cache entry valid until parsing
35483612
* succeeds.
35493613
*/
3550-
ent=DCH_cache_getnew(str);
3614+
ent=DCH_cache_getnew(str,std);
35513615

3552-
parse_format(ent->format,str,DCH_keywords,
3553-
DCH_suff,DCH_index,DCH_TYPE,NULL);
3616+
parse_format(ent->format,str,DCH_keywords,DCH_suff,DCH_index,
3617+
DCH_FLAG | (std ?STD_FLAG :0),NULL);
35543618

35553619
ent->valid= true;
35563620
}
@@ -3595,14 +3659,14 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
35953659
format= (FormatNode*)palloc((fmt_len+1)*sizeof(FormatNode));
35963660

35973661
parse_format(format,fmt_str,DCH_keywords,
3598-
DCH_suff,DCH_index,DCH_TYPE,NULL);
3662+
DCH_suff,DCH_index,DCH_FLAG,NULL);
35993663
}
36003664
else
36013665
{
36023666
/*
36033667
* Use cache buffers
36043668
*/
3605-
DCHCacheEntry*ent=DCH_cache_fetch(fmt_str);
3669+
DCHCacheEntry*ent=DCH_cache_fetch(fmt_str, false);
36063670

36073671
incache= true;
36083672
format=ent->format;
@@ -3744,7 +3808,7 @@ to_timestamp(PG_FUNCTION_ARGS)
37443808
fsec_tfsec;
37453809
intfprec;
37463810

3747-
do_to_timestamp(date_txt,fmt,&tm,&fsec,&fprec);
3811+
do_to_timestamp(date_txt,fmt,false,&tm,&fsec,&fprec);
37483812

37493813
/* Use the specified time zone, if any. */
37503814
if (tm.tm_zone)
@@ -3783,7 +3847,7 @@ to_date(PG_FUNCTION_ARGS)
37833847
structpg_tmtm;
37843848
fsec_tfsec;
37853849

3786-
do_to_timestamp(date_txt,fmt,&tm,&fsec,NULL);
3850+
do_to_timestamp(date_txt,fmt,false,&tm,&fsec,NULL);
37873851

37883852
/* Prevent overflow in Julian-day routines */
37893853
if (!IS_VALID_JULIAN(tm.tm_year,tm.tm_mon,tm.tm_mday))
@@ -3818,7 +3882,7 @@ to_date(PG_FUNCTION_ARGS)
38183882
* struct 'tm' and 'fsec'.
38193883
*/
38203884
staticvoid
3821-
do_to_timestamp(text*date_txt,text*fmt,
3885+
do_to_timestamp(text*date_txt,text*fmt,boolstd,
38223886
structpg_tm*tm,fsec_t*fsec,int*fprec)
38233887
{
38243888
FormatNode*format;
@@ -3853,15 +3917,15 @@ do_to_timestamp(text *date_txt, text *fmt,
38533917

38543918
format= (FormatNode*)palloc((fmt_len+1)*sizeof(FormatNode));
38553919

3856-
parse_format(format,fmt_str,DCH_keywords,
3857-
DCH_suff,DCH_index,DCH_TYPE,NULL);
3920+
parse_format(format,fmt_str,DCH_keywords,DCH_suff,DCH_index,
3921+
DCH_FLAG | (std ?STD_FLAG :0),NULL);
38583922
}
38593923
else
38603924
{
38613925
/*
38623926
* Use cache buffers
38633927
*/
3864-
DCHCacheEntry*ent=DCH_cache_fetch(fmt_str);
3928+
DCHCacheEntry*ent=DCH_cache_fetch(fmt_str,std);
38653929

38663930
incache= true;
38673931
format=ent->format;
@@ -3872,7 +3936,7 @@ do_to_timestamp(text *date_txt, text *fmt,
38723936
/* dump_index(DCH_keywords, DCH_index); */
38733937
#endif
38743938

3875-
DCH_from_char(format,date_str,&tmfc);
3939+
DCH_from_char(format,date_str,&tmfc,std);
38763940

38773941
pfree(fmt_str);
38783942

@@ -4241,7 +4305,7 @@ NUM_cache_fetch(const char *str)
42414305
zeroize_NUM(&ent->Num);
42424306

42434307
parse_format(ent->format,str,NUM_keywords,
4244-
NULL,NUM_index,NUM_TYPE,&ent->Num);
4308+
NULL,NUM_index,NUM_FLAG,&ent->Num);
42454309

42464310
ent->valid= true;
42474311
}
@@ -4273,7 +4337,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
42734337
zeroize_NUM(Num);
42744338

42754339
parse_format(format,str,NUM_keywords,
4276-
NULL,NUM_index,NUM_TYPE,Num);
4340+
NULL,NUM_index,NUM_FLAG,Num);
42774341
}
42784342
else
42794343
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp