Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit976a1a4

Browse files
committed
Improve to_date/to_number/to_timestamp behavior with multibyte characters.
The documentation says that these functions skip one input characterper literal (non-pattern) format character. Actually, though, theyskipped one input *byte* per literal *byte*, which could be hugelyconfusing if either data or format contained multibyte characters.To fix, adjust the FormatNode representation and parse_format() sothat multibyte format characters are stored as one FormatNode notseveral, and adjust the data-skipping bits to advance by pg_mblen()not necessarily one byte. There's no user-visible behavior changeon the to_char() side, although the internal representation changes.Commite87d496 had already fixed most places where we skip characterson the basis of non-literal format patterns to advance by charactersnot bytes, but this gets one more place, the SKIP_THth macro. I thinkeverything in formatting.c gets that right now.It'd be nice to have some regression test cases covering this behavior;but of course there's no way to do so in an encoding-agnostic way, andmany of the interesting aspects would also require unportable localeselections. So I've not bothered here.Discussion:https://postgr.es/m/28186.1510957703@sss.pgh.pa.us
1 parent63ca863 commit976a1a4

File tree

1 file changed

+41
-27
lines changed

1 file changed

+41
-27
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,6 @@ typedef enum
151151
FROM_CHAR_DATE_ISOWEEK/* ISO 8601 week date */
152152
}FromCharDateMode;
153153

154-
typedefstructFormatNodeFormatNode;
155-
156154
typedefstruct
157155
{
158156
constchar*name;
@@ -162,13 +160,13 @@ typedef struct
162160
FromCharDateModedate_mode;
163161
}KeyWord;
164162

165-
structFormatNode
163+
typedefstruct
166164
{
167-
inttype;/*node type*/
168-
constKeyWord*key;/* ifnodetype isKEYWORD*/
169-
charcharacter;/* ifnodetype is CHAR*/
170-
intsuffix;/* keyword suffix*/
171-
};
165+
inttype;/*NODE_TYPE_XXX, see below*/
166+
constKeyWord*key;/* if type isACTION*/
167+
charcharacter[MAX_MULTIBYTE_CHAR_LEN+1];/* if type is CHAR*/
168+
intsuffix;/* keywordprefix/suffix code, if any*/
169+
}FormatNode;
172170

173171
#defineNODE_TYPE_END1
174172
#defineNODE_TYPE_ACTION2
@@ -1282,12 +1280,15 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
12821280
}
12831281
elseif (*str)
12841282
{
1283+
intchlen;
1284+
12851285
/*
12861286
* Process double-quoted literal string, if any
12871287
*/
12881288
if (*str=='"')
12891289
{
1290-
while (*(++str))
1290+
str++;
1291+
while (*str)
12911292
{
12921293
if (*str=='"')
12931294
{
@@ -1297,11 +1298,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
12971298
/* backslash quotes the next character, if any */
12981299
if (*str=='\\'&&*(str+1))
12991300
str++;
1301+
chlen=pg_mblen(str);
13001302
n->type=NODE_TYPE_CHAR;
1301-
n->character=*str;
1303+
memcpy(n->character,str,chlen);
1304+
n->character[chlen]='\0';
13021305
n->key=NULL;
13031306
n->suffix=0;
13041307
n++;
1308+
str+=chlen;
13051309
}
13061310
}
13071311
else
@@ -1312,12 +1316,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
13121316
*/
13131317
if (*str=='\\'&&*(str+1)=='"')
13141318
str++;
1319+
chlen=pg_mblen(str);
13151320
n->type=NODE_TYPE_CHAR;
1316-
n->character=*str;
1321+
memcpy(n->character,str,chlen);
1322+
n->character[chlen]='\0';
13171323
n->key=NULL;
13181324
n->suffix=0;
13191325
n++;
1320-
str++;
1326+
str+=chlen;
13211327
}
13221328
}
13231329
}
@@ -1349,7 +1355,8 @@ dump_node(FormatNode *node, int max)
13491355
elog(DEBUG_elog_output,"%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
13501356
a,n->key->name,DUMP_THth(n->suffix),DUMP_FM(n->suffix));
13511357
elseif (n->type==NODE_TYPE_CHAR)
1352-
elog(DEBUG_elog_output,"%d:\t NODE_TYPE_CHAR '%c'",a,n->character);
1358+
elog(DEBUG_elog_output,"%d:\t NODE_TYPE_CHAR '%s'",
1359+
a,n->character);
13531360
elseif (n->type==NODE_TYPE_END)
13541361
{
13551362
elog(DEBUG_elog_output,"%d:\t NODE_TYPE_END",a);
@@ -2008,8 +2015,8 @@ asc_toupper_z(const char *buff)
20082015
do { \
20092016
if (S_THth(_suf)) \
20102017
{ \
2011-
if (*(ptr)) (ptr)++; \
2012-
if (*(ptr)) (ptr)++; \
2018+
if (*(ptr)) (ptr) += pg_mblen(ptr); \
2019+
if (*(ptr)) (ptr) += pg_mblen(ptr); \
20132020
} \
20142021
} while (0)
20152022

@@ -2076,7 +2083,8 @@ is_next_separator(FormatNode *n)
20762083

20772084
return true;
20782085
}
2079-
elseif (isdigit((unsignedchar)n->character))
2086+
elseif (n->character[1]=='\0'&&
2087+
isdigit((unsignedchar)n->character[0]))
20802088
return false;
20812089

20822090
return true;/* some non-digit input (separator) */
@@ -2405,8 +2413,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24052413
{
24062414
if (n->type!=NODE_TYPE_ACTION)
24072415
{
2408-
*s=n->character;
2409-
s++;
2416+
strcpy(s,n->character);
2417+
s+=strlen(s);
24102418
continue;
24112419
}
24122420

@@ -2974,7 +2982,7 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
29742982
* we don't insist that the consumed character match the format's
29752983
* character.
29762984
*/
2977-
s++;
2985+
s+=pg_mblen(s);
29782986
continue;
29792987
}
29802988

@@ -4217,7 +4225,7 @@ get_last_relevant_decnum(char *num)
42174225
/*
42184226
* These macros are used in NUM_processor() and its subsidiary routines.
42194227
* OVERLOAD_TEST: true if we've reached end of input string
4220-
* AMOUNT_TEST(s): true if at least scharacters remain in string
4228+
* AMOUNT_TEST(s): true if at least sbytes remain in string
42214229
*/
42224230
#defineOVERLOAD_TEST(Np->inout_p >= Np->inout + input_len)
42234231
#defineAMOUNT_TEST(s)(Np->inout_p <= Np->inout + (input_len - (s)))
@@ -4821,9 +4829,9 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
48214829
if (!Np->is_to_char)
48224830
{
48234831
/*
4824-
* Check at least onecharacter remains to be scanned. (In
4825-
*actionsbelow, must use AMOUNT_TEST if we want to read more
4826-
*characters thanthat.)
4832+
* Check at least onebyte remains to be scanned. (In actions
4833+
* below, must use AMOUNT_TEST if we want to read more bytes than
4834+
* that.)
48274835
*/
48284836
if (OVERLOAD_TEST)
48294837
break;
@@ -5081,12 +5089,18 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
50815089
* In TO_CHAR, non-pattern characters in the format are copied to
50825090
* the output. In TO_NUMBER, we skip one input character for each
50835091
* non-pattern format character, whether or not it matches the
5084-
* format character. (Currently, that's actually implemented as
5085-
* skipping one input byte per non-pattern format byte, which is
5086-
* wrong...)
5092+
* format character.
50875093
*/
50885094
if (Np->is_to_char)
5089-
*Np->inout_p=n->character;
5095+
{
5096+
strcpy(Np->inout_p,n->character);
5097+
Np->inout_p+=strlen(Np->inout_p);
5098+
}
5099+
else
5100+
{
5101+
Np->inout_p+=pg_mblen(Np->inout_p);
5102+
}
5103+
continue;
50905104
}
50915105
Np->inout_p++;
50925106
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp