Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit80edfd7

Browse files
committed
Revisit error message details for JSON input parsing.
Instead of identifying error locations only by line number (which couldbe entirely unhelpful with long input lines), provide a fragment of theinput text too, placing this info in a new CONTEXT entry. Make theerror detail messages conform more closely to style guidelines, fixfailure to expose some of them for translation, ensure compiler cancheck formats against supplied parameters.
1 parent0f0fba1 commit80edfd7

File tree

2 files changed

+224
-86
lines changed

2 files changed

+224
-86
lines changed

‎src/backend/utils/adt/json.c

Lines changed: 161 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ typedef struct/* state of JSON lexer */
4343
char*token_start;/* start of current token within input */
4444
char*token_terminator;/* end of previous or current token */
4545
JsonValueTypetoken_type;/* type of current token, once it's known */
46-
intline_number;/* current line number (counting from 1) */
47-
char*line_start;/* start of current line within input (BROKEN!!) */
4846
}JsonLexContext;
4947

5048
typedefenum/* states of JSON parser */
@@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex);
7876
staticvoidjson_lex_number(JsonLexContext*lex,char*s);
7977
staticvoidreport_parse_error(JsonParseStack*stack,JsonLexContext*lex);
8078
staticvoidreport_invalid_token(JsonLexContext*lex);
79+
staticintreport_json_context(JsonLexContext*lex);
8180
staticchar*extract_mb_char(char*s);
8281
staticvoidcomposite_to_json(Datumcomposite,StringInforesult,
8382
booluse_line_feeds);
@@ -185,8 +184,6 @@ json_validate_cstring(char *input)
185184
/* Set up lexing context. */
186185
lex.input=input;
187186
lex.token_terminator=lex.input;
188-
lex.line_number=1;
189-
lex.line_start=input;
190187

191188
/* Set up parse stack. */
192189
stacksize=32;
@@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex)
335332
/* Skip leading whitespace. */
336333
s=lex->token_terminator;
337334
while (*s==' '||*s=='\t'||*s=='\n'||*s=='\r')
338-
{
339-
if (*s=='\n')
340-
lex->line_number++;
341335
s++;
342-
}
343336
lex->token_start=s;
344337

345338
/* Determine token type. */
@@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex)
350343
{
351344
/* End of string. */
352345
lex->token_start=NULL;
353-
lex->token_terminator=NULL;
346+
lex->token_terminator=s;
354347
}
355348
else
356349
{
@@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex)
397390
/*
398391
* We got some sort of unexpected punctuation or an otherwise
399392
* unexpected character, so just complain about that one
400-
* character.
393+
* character. (It can't be multibyte because the above loop
394+
* will advance over any multibyte characters.)
401395
*/
402396
lex->token_terminator=s+1;
403397
report_invalid_token(lex);
@@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex)
443437
lex->token_terminator=s;
444438
report_invalid_token(lex);
445439
}
440+
/* Since *s isn't printable, exclude it from the context string */
441+
lex->token_terminator=s;
446442
ereport(ERROR,
447443
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
448444
errmsg("invalid input syntax for type json"),
449-
errdetail("line %d: Character with value \"0x%02x\" must be escaped.",
450-
lex->line_number, (unsignedchar)*s)));
445+
errdetail("Character with value 0x%02x must be escaped.",
446+
(unsignedchar)*s),
447+
report_json_context(lex)));
451448
}
452449
elseif (*s=='\\')
453450
{
@@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex)
465462

466463
for (i=1;i <=4;i++)
467464
{
468-
if (s[i]=='\0')
465+
s++;
466+
if (*s=='\0')
469467
{
470-
lex->token_terminator=s+i;
468+
lex->token_terminator=s;
471469
report_invalid_token(lex);
472470
}
473-
elseif (s[i] >='0'&&s[i] <='9')
474-
ch= (ch*16)+ (s[i]-'0');
475-
elseif (s[i] >='a'&&s[i] <='f')
476-
ch= (ch*16)+ (s[i]-'a')+10;
477-
elseif (s[i] >='A'&&s[i] <='F')
478-
ch= (ch*16)+ (s[i]-'A')+10;
471+
elseif (*s >='0'&&*s <='9')
472+
ch= (ch*16)+ (*s-'0');
473+
elseif (*s >='a'&&*s <='f')
474+
ch= (ch*16)+ (*s-'a')+10;
475+
elseif (*s >='A'&&*s <='F')
476+
ch= (ch*16)+ (*s-'A')+10;
479477
else
480478
{
479+
lex->token_terminator=s+pg_mblen(s);
481480
ereport(ERROR,
482481
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
483482
errmsg("invalid input syntax for type json"),
484-
errdetail("line %d:\"\\u\" must be followed by four hexadecimal digits.",
485-
lex->line_number)));
483+
errdetail("\"\\u\" must be followed by four hexadecimal digits."),
484+
report_json_context(lex)));
486485
}
487486
}
488-
489-
/* Account for the four additional bytes we just parsed. */
490-
s+=4;
491487
}
492488
elseif (strchr("\"\\/bfnrt",*s)==NULL)
493489
{
494490
/* Not a valid string escape, so error out. */
491+
lex->token_terminator=s+pg_mblen(s);
495492
ereport(ERROR,
496493
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
497494
errmsg("invalid input syntax for type json"),
498-
errdetail("line %d: Invalid escape \"\\%s\".",
499-
lex->line_number,extract_mb_char(s))));
495+
errdetail("Escape sequence \"\\%s\" is invalid.",
496+
extract_mb_char(s)),
497+
report_json_context(lex)));
500498
}
501499
}
502500
}
@@ -599,75 +597,116 @@ json_lex_number(JsonLexContext *lex, char *s)
599597

600598
/*
601599
* Report a parse error.
600+
*
601+
* lex->token_start and lex->token_terminator must identify the current token.
602602
*/
603603
staticvoid
604604
report_parse_error(JsonParseStack*stack,JsonLexContext*lex)
605605
{
606-
char*detail=NULL;
607-
char*token=NULL;
606+
char*token;
608607
inttoklen;
609608

610609
/* Handle case where the input ended prematurely. */
611610
if (lex->token_start==NULL)
612611
ereport(ERROR,
613612
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
614-
errmsg("invalid input syntax for type json: \"%s\"",
615-
lex->input),
616-
errdetail("The input string ended unexpectedly.")));
613+
errmsg("invalid input syntax for type json"),
614+
errdetail("Theinput string ended unexpectedly."),
615+
report_json_context(lex)));
617616

618-
/* Separate out theoffending token. */
617+
/* Separate out thecurrent token. */
619618
toklen=lex->token_terminator-lex->token_start;
620619
token=palloc(toklen+1);
621620
memcpy(token,lex->token_start,toklen);
622621
token[toklen]='\0';
623622

624-
/*Select correct detail message. */
623+
/*Complain, with the appropriate detail message. */
625624
if (stack==NULL)
626-
detail="line %d: Expected end of input, but found \"%s\".";
625+
ereport(ERROR,
626+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
627+
errmsg("invalid input syntax for type json"),
628+
errdetail("Expected end of input, but found \"%s\".",
629+
token),
630+
report_json_context(lex)));
627631
else
628632
{
629633
switch (stack->state)
630634
{
631635
caseJSON_PARSE_VALUE:
632-
detail="line %d: Expected string, number, object, array, true, false, or null, but found \"%s\".";
636+
ereport(ERROR,
637+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
638+
errmsg("invalid input syntax for type json"),
639+
errdetail("Expected JSON value, but found \"%s\".",
640+
token),
641+
report_json_context(lex)));
633642
break;
634643
caseJSON_PARSE_ARRAY_START:
635-
detail="line %d: Expected array element or \"]\", but found \"%s\".";
644+
ereport(ERROR,
645+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
646+
errmsg("invalid input syntax for type json"),
647+
errdetail("Expected array element or \"]\", but found \"%s\".",
648+
token),
649+
report_json_context(lex)));
636650
break;
637651
caseJSON_PARSE_ARRAY_NEXT:
638-
detail="line %d: Expected \",\" or \"]\", but found \"%s\".";
652+
ereport(ERROR,
653+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
654+
errmsg("invalid input syntax for type json"),
655+
errdetail("Expected \",\" or \"]\", but found \"%s\".",
656+
token),
657+
report_json_context(lex)));
639658
break;
640659
caseJSON_PARSE_OBJECT_START:
641-
detail="line %d: Expected string or \"}\", but found \"%s\".";
660+
ereport(ERROR,
661+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
662+
errmsg("invalid input syntax for type json"),
663+
errdetail("Expected string or \"}\", but found \"%s\".",
664+
token),
665+
report_json_context(lex)));
642666
break;
643667
caseJSON_PARSE_OBJECT_LABEL:
644-
detail="line %d: Expected \":\", but found \"%s\".";
668+
ereport(ERROR,
669+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
670+
errmsg("invalid input syntax for type json"),
671+
errdetail("Expected \":\", but found \"%s\".",
672+
token),
673+
report_json_context(lex)));
645674
break;
646675
caseJSON_PARSE_OBJECT_NEXT:
647-
detail="line %d: Expected \",\" or \"}\", but found \"%s\".";
676+
ereport(ERROR,
677+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
678+
errmsg("invalid input syntax for type json"),
679+
errdetail("Expected \",\" or \"}\", but found \"%s\".",
680+
token),
681+
report_json_context(lex)));
648682
break;
649683
caseJSON_PARSE_OBJECT_COMMA:
650-
detail="line %d: Expected string, but found \"%s\".";
684+
ereport(ERROR,
685+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
686+
errmsg("invalid input syntax for type json"),
687+
errdetail("Expected string, but found \"%s\".",
688+
token),
689+
report_json_context(lex)));
651690
break;
691+
default:
692+
elog(ERROR,"unexpected json parse state: %d",
693+
(int)stack->state);
652694
}
653695
}
654-
655-
ereport(ERROR,
656-
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
657-
errmsg("invalid input syntax for type json: \"%s\"",
658-
lex->input),
659-
detail ?errdetail(detail,lex->line_number,token) :0));
660696
}
661697

662698
/*
663699
* Report an invalid input token.
700+
*
701+
* lex->token_start and lex->token_terminator must identify the token.
664702
*/
665703
staticvoid
666704
report_invalid_token(JsonLexContext*lex)
667705
{
668706
char*token;
669707
inttoklen;
670708

709+
/* Separate out the offending token. */
671710
toklen=lex->token_terminator-lex->token_start;
672711
token=palloc(toklen+1);
673712
memcpy(token,lex->token_start,toklen);
@@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex)
676715
ereport(ERROR,
677716
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
678717
errmsg("invalid input syntax for type json"),
679-
errdetail("line %d: Token \"%s\" is invalid.",
680-
lex->line_number,token)));
718+
errdetail("Token \"%s\" is invalid.",token),
719+
report_json_context(lex)));
720+
}
721+
722+
/*
723+
* Report a CONTEXT line for bogus JSON input.
724+
*
725+
* lex->token_terminator must be set to identify the spot where we detected
726+
* the error. Note that lex->token_start might be NULL, in case we recognized
727+
* error at EOF.
728+
*
729+
* The return value isn't meaningful, but we make it non-void so that this
730+
* can be invoked inside ereport().
731+
*/
732+
staticint
733+
report_json_context(JsonLexContext*lex)
734+
{
735+
constchar*context_start;
736+
constchar*context_end;
737+
constchar*line_start;
738+
intline_number;
739+
char*ctxt;
740+
intctxtlen;
741+
constchar*prefix;
742+
constchar*suffix;
743+
744+
/* Choose boundaries for the part of the input we will display */
745+
context_start=lex->input;
746+
context_end=lex->token_terminator;
747+
line_start=context_start;
748+
line_number=1;
749+
for (;;)
750+
{
751+
/* Always advance over newlines (context_end test is just paranoia) */
752+
if (*context_start=='\n'&&context_start<context_end)
753+
{
754+
context_start++;
755+
line_start=context_start;
756+
line_number++;
757+
continue;
758+
}
759+
/* Otherwise, done as soon as we are close enough to context_end */
760+
if (context_end-context_start<50)
761+
break;
762+
/* Advance to next multibyte character */
763+
if (IS_HIGHBIT_SET(*context_start))
764+
context_start+=pg_mblen(context_start);
765+
else
766+
context_start++;
767+
}
768+
769+
/*
770+
* We add "..." to indicate that the excerpt doesn't start at the
771+
* beginning of the line ... but if we're within 3 characters of the
772+
* beginning of the line, we might as well just show the whole line.
773+
*/
774+
if (context_start-line_start <=3)
775+
context_start=line_start;
776+
777+
/* Get a null-terminated copy of the data to present */
778+
ctxtlen=context_end-context_start;
779+
ctxt=palloc(ctxtlen+1);
780+
memcpy(ctxt,context_start,ctxtlen);
781+
ctxt[ctxtlen]='\0';
782+
783+
/*
784+
* Show the context, prefixing "..." if not starting at start of line, and
785+
* suffixing "..." if not ending at end of line.
786+
*/
787+
prefix= (context_start>line_start) ?"..." :"";
788+
suffix= (*context_end!='\0'&&*context_end!='\n'&&*context_end!='\r') ?"..." :"";
789+
790+
returnerrcontext("JSON data, line %d: %s%s%s",
791+
line_number,prefix,ctxt,suffix);
681792
}
682793

683794
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp