@@ -43,8 +43,6 @@ typedef struct/* state of JSON lexer */
4343char * token_start ;/* start of current token within input */
4444char * token_terminator ;/* end of previous or current token */
4545JsonValueType token_type ;/* type of current token, once it's known */
46- int line_number ;/* current line number (counting from 1) */
47- char * line_start ;/* start of current line within input (BROKEN!!) */
4846}JsonLexContext ;
4947
5048typedef enum /* states of JSON parser */
@@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex);
7876static void json_lex_number (JsonLexContext * lex ,char * s );
7977static void report_parse_error (JsonParseStack * stack ,JsonLexContext * lex );
8078static void report_invalid_token (JsonLexContext * lex );
79+ static int report_json_context (JsonLexContext * lex );
8180static char * extract_mb_char (char * s );
8281static void composite_to_json (Datum composite ,StringInfo result ,
8382bool use_line_feeds );
@@ -185,8 +184,6 @@ json_validate_cstring(char *input)
185184/* Set up lexing context. */
186185lex .input = input ;
187186lex .token_terminator = lex .input ;
188- lex .line_number = 1 ;
189- lex .line_start = input ;
190187
191188/* Set up parse stack. */
192189stacksize = 32 ;
@@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex)
335332/* Skip leading whitespace. */
336333s = lex -> token_terminator ;
337334while (* s == ' ' || * s == '\t' || * s == '\n' || * s == '\r' )
338- {
339- if (* s == '\n' )
340- lex -> line_number ++ ;
341335s ++ ;
342- }
343336lex -> token_start = s ;
344337
345338/* Determine token type. */
@@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex)
350343{
351344/* End of string. */
352345lex -> token_start = NULL ;
353- lex -> token_terminator = NULL ;
346+ lex -> token_terminator = s ;
354347}
355348else
356349{
@@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex)
397390/*
398391 * We got some sort of unexpected punctuation or an otherwise
399392 * unexpected character, so just complain about that one
400- * character.
393+ * character. (It can't be multibyte because the above loop
394+ * will advance over any multibyte characters.)
401395 */
402396lex -> token_terminator = s + 1 ;
403397report_invalid_token (lex );
@@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex)
443437lex -> token_terminator = s ;
444438report_invalid_token (lex );
445439}
440+ /* Since *s isn't printable, exclude it from the context string */
441+ lex -> token_terminator = s ;
446442ereport (ERROR ,
447443(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
448444errmsg ("invalid input syntax for type json" ),
449- errdetail ("line %d: Character with value \"0x%02x\" must be escaped." ,
450- lex -> line_number , (unsignedchar )* s )));
445+ errdetail ("Character with value 0x%02x must be escaped." ,
446+ (unsignedchar )* s ),
447+ report_json_context (lex )));
451448}
452449else if (* s == '\\' )
453450{
@@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex)
465462
466463for (i = 1 ;i <=4 ;i ++ )
467464{
468- if (s [i ]== '\0' )
465+ s ++ ;
466+ if (* s == '\0' )
469467{
470- lex -> token_terminator = s + i ;
468+ lex -> token_terminator = s ;
471469report_invalid_token (lex );
472470}
473- else if (s [ i ] >='0' && s [ i ] <='9' )
474- ch = (ch * 16 )+ (s [ i ] - '0' );
475- else if (s [ i ] >='a' && s [ i ] <='f' )
476- ch = (ch * 16 )+ (s [ i ] - 'a' )+ 10 ;
477- else if (s [ i ] >='A' && s [ i ] <='F' )
478- ch = (ch * 16 )+ (s [ i ] - 'A' )+ 10 ;
471+ else if (* s >='0' && * s <='9' )
472+ ch = (ch * 16 )+ (* s - '0' );
473+ else if (* s >='a' && * s <='f' )
474+ ch = (ch * 16 )+ (* s - 'a' )+ 10 ;
475+ else if (* s >='A' && * s <='F' )
476+ ch = (ch * 16 )+ (* s - 'A' )+ 10 ;
479477else
480478{
479+ lex -> token_terminator = s + pg_mblen (s );
481480ereport (ERROR ,
482481(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
483482errmsg ("invalid input syntax for type json" ),
484- errdetail ("line %d: \"\\u\" must be followed by four hexadecimal digits." ,
485- lex -> line_number )));
483+ errdetail ("\"\\u\" must be followed by four hexadecimal digits." ) ,
484+ report_json_context ( lex )));
486485}
487486}
488-
489- /* Account for the four additional bytes we just parsed. */
490- s += 4 ;
491487}
492488else if (strchr ("\"\\/bfnrt" ,* s )== NULL )
493489{
494490/* Not a valid string escape, so error out. */
491+ lex -> token_terminator = s + pg_mblen (s );
495492ereport (ERROR ,
496493(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
497494errmsg ("invalid input syntax for type json" ),
498- errdetail ("line %d: Invalid escape \"\\%s\"." ,
499- lex -> line_number ,extract_mb_char (s ))));
495+ errdetail ("Escape sequence \"\\%s\" is invalid." ,
496+ extract_mb_char (s )),
497+ report_json_context (lex )));
500498}
501499}
502500}
@@ -599,75 +597,116 @@ json_lex_number(JsonLexContext *lex, char *s)
599597
600598/*
601599 * Report a parse error.
600+ *
601+ * lex->token_start and lex->token_terminator must identify the current token.
602602 */
603603static void
604604report_parse_error (JsonParseStack * stack ,JsonLexContext * lex )
605605{
606- char * detail = NULL ;
607- char * token = NULL ;
606+ char * token ;
608607int toklen ;
609608
610609/* Handle case where the input ended prematurely. */
611610if (lex -> token_start == NULL )
612611ereport (ERROR ,
613612(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
614- errmsg ("invalid input syntax for type json: \"%s\"" ,
615- lex -> input ),
616- errdetail ( "The input string ended unexpectedly." )));
613+ errmsg ("invalid input syntax for type json" ) ,
614+ errdetail ( "The input string ended unexpectedly." ),
615+ report_json_context ( lex )));
617616
618- /* Separate out theoffending token. */
617+ /* Separate out thecurrent token. */
619618toklen = lex -> token_terminator - lex -> token_start ;
620619token = palloc (toklen + 1 );
621620memcpy (token ,lex -> token_start ,toklen );
622621token [toklen ]= '\0' ;
623622
624- /*Select correct detail message. */
623+ /*Complain, with the appropriate detail message. */
625624if (stack == NULL )
626- detail = "line %d: Expected end of input, but found \"%s\"." ;
625+ ereport (ERROR ,
626+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
627+ errmsg ("invalid input syntax for type json" ),
628+ errdetail ("Expected end of input, but found \"%s\"." ,
629+ token ),
630+ report_json_context (lex )));
627631else
628632{
629633switch (stack -> state )
630634{
631635case JSON_PARSE_VALUE :
632- detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\"." ;
636+ ereport (ERROR ,
637+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
638+ errmsg ("invalid input syntax for type json" ),
639+ errdetail ("Expected JSON value, but found \"%s\"." ,
640+ token ),
641+ report_json_context (lex )));
633642break ;
634643case JSON_PARSE_ARRAY_START :
635- detail = "line %d: Expected array element or \"]\", but found \"%s\"." ;
644+ ereport (ERROR ,
645+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
646+ errmsg ("invalid input syntax for type json" ),
647+ errdetail ("Expected array element or \"]\", but found \"%s\"." ,
648+ token ),
649+ report_json_context (lex )));
636650break ;
637651case JSON_PARSE_ARRAY_NEXT :
638- detail = "line %d: Expected \",\" or \"]\", but found \"%s\"." ;
652+ ereport (ERROR ,
653+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
654+ errmsg ("invalid input syntax for type json" ),
655+ errdetail ("Expected \",\" or \"]\", but found \"%s\"." ,
656+ token ),
657+ report_json_context (lex )));
639658break ;
640659case JSON_PARSE_OBJECT_START :
641- detail = "line %d: Expected string or \"}\", but found \"%s\"." ;
660+ ereport (ERROR ,
661+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
662+ errmsg ("invalid input syntax for type json" ),
663+ errdetail ("Expected string or \"}\", but found \"%s\"." ,
664+ token ),
665+ report_json_context (lex )));
642666break ;
643667case JSON_PARSE_OBJECT_LABEL :
644- detail = "line %d: Expected \":\", but found \"%s\"." ;
668+ ereport (ERROR ,
669+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
670+ errmsg ("invalid input syntax for type json" ),
671+ errdetail ("Expected \":\", but found \"%s\"." ,
672+ token ),
673+ report_json_context (lex )));
645674break ;
646675case JSON_PARSE_OBJECT_NEXT :
647- detail = "line %d: Expected \",\" or \"}\", but found \"%s\"." ;
676+ ereport (ERROR ,
677+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
678+ errmsg ("invalid input syntax for type json" ),
679+ errdetail ("Expected \",\" or \"}\", but found \"%s\"." ,
680+ token ),
681+ report_json_context (lex )));
648682break ;
649683case JSON_PARSE_OBJECT_COMMA :
650- detail = "line %d: Expected string, but found \"%s\"." ;
684+ ereport (ERROR ,
685+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
686+ errmsg ("invalid input syntax for type json" ),
687+ errdetail ("Expected string, but found \"%s\"." ,
688+ token ),
689+ report_json_context (lex )));
651690break ;
691+ default :
692+ elog (ERROR ,"unexpected json parse state: %d" ,
693+ (int )stack -> state );
652694}
653695}
654-
655- ereport (ERROR ,
656- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
657- errmsg ("invalid input syntax for type json: \"%s\"" ,
658- lex -> input ),
659- detail ?errdetail (detail ,lex -> line_number ,token ) :0 ));
660696}
661697
662698/*
663699 * Report an invalid input token.
700+ *
701+ * lex->token_start and lex->token_terminator must identify the token.
664702 */
665703static void
666704report_invalid_token (JsonLexContext * lex )
667705{
668706char * token ;
669707int toklen ;
670708
709+ /* Separate out the offending token. */
671710toklen = lex -> token_terminator - lex -> token_start ;
672711token = palloc (toklen + 1 );
673712memcpy (token ,lex -> token_start ,toklen );
@@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex)
676715ereport (ERROR ,
677716(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
678717errmsg ("invalid input syntax for type json" ),
679- errdetail ("line %d: Token \"%s\" is invalid." ,
680- lex -> line_number ,token )));
718+ errdetail ("Token \"%s\" is invalid." ,token ),
719+ report_json_context (lex )));
720+ }
721+
722+ /*
723+ * Report a CONTEXT line for bogus JSON input.
724+ *
725+ * lex->token_terminator must be set to identify the spot where we detected
726+ * the error. Note that lex->token_start might be NULL, in case we recognized
727+ * error at EOF.
728+ *
729+ * The return value isn't meaningful, but we make it non-void so that this
730+ * can be invoked inside ereport().
731+ */
732+ static int
733+ report_json_context (JsonLexContext * lex )
734+ {
735+ const char * context_start ;
736+ const char * context_end ;
737+ const char * line_start ;
738+ int line_number ;
739+ char * ctxt ;
740+ int ctxtlen ;
741+ const char * prefix ;
742+ const char * suffix ;
743+
744+ /* Choose boundaries for the part of the input we will display */
745+ context_start = lex -> input ;
746+ context_end = lex -> token_terminator ;
747+ line_start = context_start ;
748+ line_number = 1 ;
749+ for (;;)
750+ {
751+ /* Always advance over newlines (context_end test is just paranoia) */
752+ if (* context_start == '\n' && context_start < context_end )
753+ {
754+ context_start ++ ;
755+ line_start = context_start ;
756+ line_number ++ ;
757+ continue ;
758+ }
759+ /* Otherwise, done as soon as we are close enough to context_end */
760+ if (context_end - context_start < 50 )
761+ break ;
762+ /* Advance to next multibyte character */
763+ if (IS_HIGHBIT_SET (* context_start ))
764+ context_start += pg_mblen (context_start );
765+ else
766+ context_start ++ ;
767+ }
768+
769+ /*
770+ * We add "..." to indicate that the excerpt doesn't start at the
771+ * beginning of the line ... but if we're within 3 characters of the
772+ * beginning of the line, we might as well just show the whole line.
773+ */
774+ if (context_start - line_start <=3 )
775+ context_start = line_start ;
776+
777+ /* Get a null-terminated copy of the data to present */
778+ ctxtlen = context_end - context_start ;
779+ ctxt = palloc (ctxtlen + 1 );
780+ memcpy (ctxt ,context_start ,ctxtlen );
781+ ctxt [ctxtlen ]= '\0' ;
782+
783+ /*
784+ * Show the context, prefixing "..." if not starting at start of line, and
785+ * suffixing "..." if not ending at end of line.
786+ */
787+ prefix = (context_start > line_start ) ?"..." :"" ;
788+ suffix = (* context_end != '\0' && * context_end != '\n' && * context_end != '\r' ) ?"..." :"" ;
789+
790+ return errcontext ("JSON data, line %d: %s%s%s" ,
791+ line_number ,prefix ,ctxt ,suffix );
681792}
682793
683794/*