NotificationsYou must be signed in to change notification settings
Fork5
Star27

Commit80edfd7

committed

Revisit error message details for JSON input parsing.

Instead of identifying error locations only by line number (which couldbe entirely unhelpful with long input lines), provide a fragment of theinput text too, placing this info in a new CONTEXT entry. Make theerror detail messages conform more closely to style guidelines, fixfailure to expose some of them for translation, ensure compiler cancheck formats against supplied parameters.

1 parent0f0fba1 commit80edfd7Copy full SHA for 80edfd7

File tree

2 files changed

+224

-86

lines changed

src
- backend/utils/adt
  - json.c
- test/regress/expected
  - json.out

2 files changed

+224

-86

lines changed

`‎src/backend/utils/adt/json.c‎`

Lines changed: 161 additions & 50 deletions

Original file line number	Diff line number	Diff line change
`@@ -43,8 +43,6 @@ typedef struct/* state of JSON lexer */`
`43`	`43`	`chartoken_start;/ start of current token within input */`
`44`	`44`	`chartoken_terminator;/ end of previous or current token */`
`45`	`45`	`JsonValueTypetoken_type;/* type of current token, once it's known */`
`46`		`-intline_number;/* current line number (counting from 1) */`
`47`		`-charline_start;/ start of current line within input (BROKEN!!) */`
`48`	`46`	`}JsonLexContext;`
`49`	`47`
`50`	`48`	`typedefenum/* states of JSON parser */`
`@@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex);`
`78`	`76`	`staticvoidjson_lex_number(JsonLexContextlex,chars);`
`79`	`77`	`staticvoidreport_parse_error(JsonParseStackstack,JsonLexContextlex);`
`80`	`78`	`staticvoidreport_invalid_token(JsonLexContext*lex);`
	`79`	`+staticintreport_json_context(JsonLexContext*lex);`
`81`	`80`	`staticcharextract_mb_char(chars);`
`82`	`81`	`staticvoidcomposite_to_json(Datumcomposite,StringInforesult,`
`83`	`82`	`booluse_line_feeds);`
`@@ -185,8 +184,6 @@ json_validate_cstring(char *input)`
`185`	`184`	`/* Set up lexing context. */`
`186`	`185`	`lex.input=input;`
`187`	`186`	`lex.token_terminator=lex.input;`
`188`		`-lex.line_number=1;`
`189`		`-lex.line_start=input;`
`190`	`187`
`191`	`188`	`/* Set up parse stack. */`
`192`	`189`	`stacksize=32;`
`@@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex)`
`335`	`332`	`/* Skip leading whitespace. */`
`336`	`333`	`s=lex->token_terminator;`
`337`	`334`	`while (s==' '\|\|s=='\t'\|\|s=='\n'\|\|s=='\r')`
`338`		`-{`
`339`		`-if (*s=='\n')`
`340`		`-lex->line_number++;`
`341`	`335`	`s++;`
`342`		`-}`
`343`	`336`	`lex->token_start=s;`
`344`	`337`
`345`	`338`	`/* Determine token type. */`
`@@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex)`
`350`	`343`	`{`
`351`	`344`	`/* End of string. */`
`352`	`345`	`lex->token_start=NULL;`
`353`		`-lex->token_terminator=NULL;`
	`346`	`+lex->token_terminator=s;`
`354`	`347`	`}`
`355`	`348`	`else`
`356`	`349`	`{`
`@@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex)`
`397`	`390`	`/*`
`398`	`391`	`* We got some sort of unexpected punctuation or an otherwise`
`399`	`392`	`* unexpected character, so just complain about that one`
`400`		`- * character.`
	`393`	`+ * character. (It can't be multibyte because the above loop`
	`394`	`+ * will advance over any multibyte characters.)`
`401`	`395`	`*/`
`402`	`396`	`lex->token_terminator=s+1;`
`403`	`397`	`report_invalid_token(lex);`
`@@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex)`
`443`	`437`	`lex->token_terminator=s;`
`444`	`438`	`report_invalid_token(lex);`
`445`	`439`	`}`
	`440`	`+/* Since s isn't printable, exclude it from the context string /`
	`441`	`+lex->token_terminator=s;`
`446`	`442`	`ereport(ERROR,`
`447`	`443`	`(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`448`	`444`	`errmsg("invalid input syntax for type json"),`
`449`		`-errdetail("line %d: Character with value \"0x%02x\" must be escaped.",`
`450`		`-lex->line_number, (unsignedchar)*s)));`
	`445`	`+errdetail("Character with value 0x%02x must be escaped.",`
	`446`	`+ (unsignedchar)*s),`
	`447`	`+report_json_context(lex)));`
`451`	`448`	`}`
`452`	`449`	`elseif (*s=='\\')`
`453`	`450`	`{`
`@@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex)`
`465`	`462`
`466`	`463`	`for (i=1;i <=4;i++)`
`467`	`464`	`{`
`468`		`-if (s[i]=='\0')`
	`465`	`+s++;`
	`466`	`+if (*s=='\0')`
`469`	`467`	`{`
`470`		`-lex->token_terminator=s+i;`
	`468`	`+lex->token_terminator=s;`
`471`	`469`	`report_invalid_token(lex);`
`472`	`470`	`}`
`473`		`-elseif (s[i] >='0'&&s[i] <='9')`
`474`		`-ch= (ch*16)+ (s[i]-'0');`
`475`		`-elseif (s[i] >='a'&&s[i] <='f')`
`476`		`-ch= (ch*16)+ (s[i]-'a')+10;`
`477`		`-elseif (s[i] >='A'&&s[i] <='F')`
`478`		`-ch= (ch*16)+ (s[i]-'A')+10;`
	`471`	`+elseif (s >='0'&&s <='9')`
	`472`	`+ch= (ch16)+ (s-'0');`
	`473`	`+elseif (s >='a'&&s <='f')`
	`474`	`+ch= (ch16)+ (s-'a')+10;`
	`475`	`+elseif (s >='A'&&s <='F')`
	`476`	`+ch= (ch16)+ (s-'A')+10;`
`479`	`477`	`else`
`480`	`478`	`{`
	`479`	`+lex->token_terminator=s+pg_mblen(s);`
`481`	`480`	`ereport(ERROR,`
`482`	`481`	`(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`483`	`482`	`errmsg("invalid input syntax for type json"),`
`484`		`-errdetail("line %d:\"\\u\" must be followed by four hexadecimal digits.",`
`485`		`-lex->line_number)));`
	`483`	`+errdetail("\"\\u\" must be followed by four hexadecimal digits."),`
	`484`	`+report_json_context(lex)));`
`486`	`485`	`}`
`487`	`486`	`}`
`488`		`-`
`489`		`-/* Account for the four additional bytes we just parsed. */`
`490`		`-s+=4;`
`491`	`487`	`}`
`492`	`488`	`elseif (strchr("\"\\/bfnrt",*s)==NULL)`
`493`	`489`	`{`
`494`	`490`	`/* Not a valid string escape, so error out. */`
	`491`	`+lex->token_terminator=s+pg_mblen(s);`
`495`	`492`	`ereport(ERROR,`
`496`	`493`	`(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`497`	`494`	`errmsg("invalid input syntax for type json"),`
`498`		`-errdetail("line %d: Invalid escape \"\\%s\".",`
`499`		`-lex->line_number,extract_mb_char(s))));`
	`495`	`+errdetail("Escape sequence \"\\%s\" is invalid.",`
	`496`	`+extract_mb_char(s)),`
	`497`	`+report_json_context(lex)));`
`500`	`498`	`}`
`501`	`499`	`}`
`502`	`500`	`}`
`@@ -599,75 +597,116 @@ json_lex_number(JsonLexContext lex, char s)`
`599`	`597`
`600`	`598`	`/*`
`601`	`599`	`* Report a parse error.`
	`600`	`+ *`
	`601`	`+ * lex->token_start and lex->token_terminator must identify the current token.`
`602`	`602`	`*/`
`603`	`603`	`staticvoid`
`604`	`604`	`report_parse_error(JsonParseStackstack,JsonLexContextlex)`
`605`	`605`	`{`
`606`		`-char*detail=NULL;`
`607`		`-char*token=NULL;`
	`606`	`+char*token;`
`608`	`607`	`inttoklen;`
`609`	`608`
`610`	`609`	`/* Handle case where the input ended prematurely. */`
`611`	`610`	`if (lex->token_start==NULL)`
`612`	`611`	`ereport(ERROR,`
`613`	`612`	`(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`614`		`-errmsg("invalid input syntax for type json: \"%s\"",`
`615`		`-lex->input),`
`616`		`-errdetail("The input string ended unexpectedly.")));`
	`613`	`+errmsg("invalid input syntax for type json"),`
	`614`	`+errdetail("Theinput string ended unexpectedly."),`
	`615`	`+report_json_context(lex)));`
`617`	`616`
`618`		`-/* Separate out theoffending token. */`
	`617`	`+/* Separate out thecurrent token. */`
`619`	`618`	`toklen=lex->token_terminator-lex->token_start;`
`620`	`619`	`token=palloc(toklen+1);`
`621`	`620`	`memcpy(token,lex->token_start,toklen);`
`622`	`621`	`token[toklen]='\0';`
`623`	`622`
`624`		`-/Select correct detail message. /`
	`623`	`+/Complain, with the appropriate detail message. /`
`625`	`624`	`if (stack==NULL)`
`626`		`-detail="line %d: Expected end of input, but found \"%s\".";`
	`625`	`+ereport(ERROR,`
	`626`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`627`	`+errmsg("invalid input syntax for type json"),`
	`628`	`+errdetail("Expected end of input, but found \"%s\".",`
	`629`	`+token),`
	`630`	`+report_json_context(lex)));`
`627`	`631`	`else`
`628`	`632`	`{`
`629`	`633`	`switch (stack->state)`
`630`	`634`	`{`
`631`	`635`	`caseJSON_PARSE_VALUE:`
`632`		`-detail="line %d: Expected string, number, object, array, true, false, or null, but found \"%s\".";`
	`636`	`+ereport(ERROR,`
	`637`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`638`	`+errmsg("invalid input syntax for type json"),`
	`639`	`+errdetail("Expected JSON value, but found \"%s\".",`
	`640`	`+token),`
	`641`	`+report_json_context(lex)));`
`633`	`642`	`break;`
`634`	`643`	`caseJSON_PARSE_ARRAY_START:`
`635`		`-detail="line %d: Expected array element or \"]\", but found \"%s\".";`
	`644`	`+ereport(ERROR,`
	`645`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`646`	`+errmsg("invalid input syntax for type json"),`
	`647`	`+errdetail("Expected array element or \"]\", but found \"%s\".",`
	`648`	`+token),`
	`649`	`+report_json_context(lex)));`
`636`	`650`	`break;`
`637`	`651`	`caseJSON_PARSE_ARRAY_NEXT:`
`638`		`-detail="line %d: Expected \",\" or \"]\", but found \"%s\".";`
	`652`	`+ereport(ERROR,`
	`653`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`654`	`+errmsg("invalid input syntax for type json"),`
	`655`	`+errdetail("Expected \",\" or \"]\", but found \"%s\".",`
	`656`	`+token),`
	`657`	`+report_json_context(lex)));`
`639`	`658`	`break;`
`640`	`659`	`caseJSON_PARSE_OBJECT_START:`
`641`		`-detail="line %d: Expected string or \"}\", but found \"%s\".";`
	`660`	`+ereport(ERROR,`
	`661`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`662`	`+errmsg("invalid input syntax for type json"),`
	`663`	`+errdetail("Expected string or \"}\", but found \"%s\".",`
	`664`	`+token),`
	`665`	`+report_json_context(lex)));`
`642`	`666`	`break;`
`643`	`667`	`caseJSON_PARSE_OBJECT_LABEL:`
`644`		`-detail="line %d: Expected \":\", but found \"%s\".";`
	`668`	`+ereport(ERROR,`
	`669`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`670`	`+errmsg("invalid input syntax for type json"),`
	`671`	`+errdetail("Expected \":\", but found \"%s\".",`
	`672`	`+token),`
	`673`	`+report_json_context(lex)));`
`645`	`674`	`break;`
`646`	`675`	`caseJSON_PARSE_OBJECT_NEXT:`
`647`		`-detail="line %d: Expected \",\" or \"}\", but found \"%s\".";`
	`676`	`+ereport(ERROR,`
	`677`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`678`	`+errmsg("invalid input syntax for type json"),`
	`679`	`+errdetail("Expected \",\" or \"}\", but found \"%s\".",`
	`680`	`+token),`
	`681`	`+report_json_context(lex)));`
`648`	`682`	`break;`
`649`	`683`	`caseJSON_PARSE_OBJECT_COMMA:`
`650`		`-detail="line %d: Expected string, but found \"%s\".";`
	`684`	`+ereport(ERROR,`
	`685`	`+(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
	`686`	`+errmsg("invalid input syntax for type json"),`
	`687`	`+errdetail("Expected string, but found \"%s\".",`
	`688`	`+token),`
	`689`	`+report_json_context(lex)));`
`651`	`690`	`break;`
	`691`	`+default:`
	`692`	`+elog(ERROR,"unexpected json parse state: %d",`
	`693`	`+ (int)stack->state);`
`652`	`694`	`}`
`653`	`695`	`}`
`654`		`-`
`655`		`-ereport(ERROR,`
`656`		`-(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`657`		`-errmsg("invalid input syntax for type json: \"%s\"",`
`658`		`-lex->input),`
`659`		`-detail ?errdetail(detail,lex->line_number,token) :0));`
`660`	`696`	`}`
`661`	`697`
`662`	`698`	`/*`
`663`	`699`	`* Report an invalid input token.`
	`700`	`+ *`
	`701`	`+ * lex->token_start and lex->token_terminator must identify the token.`
`664`	`702`	`*/`
`665`	`703`	`staticvoid`
`666`	`704`	`report_invalid_token(JsonLexContext*lex)`
`667`	`705`	`{`
`668`	`706`	`char*token;`
`669`	`707`	`inttoklen;`
`670`	`708`
	`709`	`+/* Separate out the offending token. */`
`671`	`710`	`toklen=lex->token_terminator-lex->token_start;`
`672`	`711`	`token=palloc(toklen+1);`
`673`	`712`	`memcpy(token,lex->token_start,toklen);`
`@@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex)`
`676`	`715`	`ereport(ERROR,`
`677`	`716`	`(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),`
`678`	`717`	`errmsg("invalid input syntax for type json"),`
`679`		`-errdetail("line %d: Token \"%s\" is invalid.",`
`680`		`-lex->line_number,token)));`
	`718`	`+errdetail("Token \"%s\" is invalid.",token),`
	`719`	`+report_json_context(lex)));`
	`720`	`+}`
	`721`	`+`
	`722`	`+/*`
	`723`	`+ * Report a CONTEXT line for bogus JSON input.`
	`724`	`+ *`
	`725`	`+ * lex->token_terminator must be set to identify the spot where we detected`
	`726`	`+ * the error. Note that lex->token_start might be NULL, in case we recognized`
	`727`	`+ * error at EOF.`
	`728`	`+ *`
	`729`	`+ * The return value isn't meaningful, but we make it non-void so that this`
	`730`	`+ * can be invoked inside ereport().`
	`731`	`+ */`
	`732`	`+staticint`
	`733`	`+report_json_context(JsonLexContext*lex)`
	`734`	`+{`
	`735`	`+constchar*context_start;`
	`736`	`+constchar*context_end;`
	`737`	`+constchar*line_start;`
	`738`	`+intline_number;`
	`739`	`+char*ctxt;`
	`740`	`+intctxtlen;`
	`741`	`+constchar*prefix;`
	`742`	`+constchar*suffix;`
	`743`	`+`
	`744`	`+/* Choose boundaries for the part of the input we will display */`
	`745`	`+context_start=lex->input;`
	`746`	`+context_end=lex->token_terminator;`
	`747`	`+line_start=context_start;`
	`748`	`+line_number=1;`
	`749`	`+for (;;)`
	`750`	`+{`
	`751`	`+/* Always advance over newlines (context_end test is just paranoia) */`
	`752`	`+if (*context_start=='\n'&&context_start<context_end)`
	`753`	`+{`
	`754`	`+context_start++;`
	`755`	`+line_start=context_start;`
	`756`	`+line_number++;`
	`757`	`+continue;`
	`758`	`+}`
	`759`	`+/* Otherwise, done as soon as we are close enough to context_end */`
	`760`	`+if (context_end-context_start<50)`
	`761`	`+break;`
	`762`	`+/* Advance to next multibyte character */`
	`763`	`+if (IS_HIGHBIT_SET(*context_start))`
	`764`	`+context_start+=pg_mblen(context_start);`
	`765`	`+else`
	`766`	`+context_start++;`
	`767`	`+}`
	`768`	`+`
	`769`	`+/*`
	`770`	`+ * We add "..." to indicate that the excerpt doesn't start at the`
	`771`	`+ * beginning of the line ... but if we're within 3 characters of the`
	`772`	`+ * beginning of the line, we might as well just show the whole line.`
	`773`	`+ */`
	`774`	`+if (context_start-line_start <=3)`
	`775`	`+context_start=line_start;`
	`776`	`+`
	`777`	`+/* Get a null-terminated copy of the data to present */`
	`778`	`+ctxtlen=context_end-context_start;`
	`779`	`+ctxt=palloc(ctxtlen+1);`
	`780`	`+memcpy(ctxt,context_start,ctxtlen);`
	`781`	`+ctxt[ctxtlen]='\0';`
	`782`	`+`
	`783`	`+/*`
	`784`	`+ * Show the context, prefixing "..." if not starting at start of line, and`
	`785`	`+ * suffixing "..." if not ending at end of line.`
	`786`	`+ */`
	`787`	`+prefix= (context_start>line_start) ?"..." :"";`
	`788`	`+suffix= (context_end!='\0'&&context_end!='\n'&&*context_end!='\r') ?"..." :"";`
	`789`	`+`
	`790`	`+returnerrcontext("JSON data, line %d: %s%s%s",`
	`791`	`+line_number,prefix,ctxt,suffix);`
`681`	`792`	`}`
`682`	`793`
`683`	`794`	`/*`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit80edfd7

File tree

2 files changed

2 files changed

`‎src/backend/utils/adt/json.c‎`

0 commit comments