Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit25a7812

Browse files
committed
Fix JSON error reporting for many cases of erroneous string values.
The majority of error exit cases in json_lex_string() failed toset lex->token_terminator, causing problems for the error contextreporting code: it would see token_terminator less than token_startand do something more or less nuts. In v14 and up the end resultcould be as bad as a crash in report_json_context(). Olderversions accidentally avoided that fate; but all versions produceerror context lines that are far less useful than intended,because they'd stop at the end of the prior token instead ofcontinuing to where the actually-bad input is.To fix, invent some macros that make it less notationally painfulto do the right thing. Also add documentation about what thefunction is actually required to do; and in >= v14, add an assertionin report_json_context about token_terminator being sufficientlyfar advanced.Per report from Nikolay Shaplov. Back-patch to all supportedversions.Discussion:https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro
1 parent30dbdbe commit25a7812

File tree

4 files changed

+78
-59
lines changed

4 files changed

+78
-59
lines changed

‎src/backend/utils/adt/jsonfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ report_json_context(JsonLexContext *lex)
675675
line_start=lex->line_start;
676676
context_start=line_start;
677677
context_end=lex->token_terminator;
678+
Assert(context_end >=context_start);
678679

679680
/* Advance until we are close enough to context_end */
680681
while (context_end-context_start >=50)

‎src/common/jsonapi.c

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,14 @@ json_lex(JsonLexContext *lex)
697697

698698
/*
699699
* The next token in the input stream is known to be a string; lex it.
700+
*
701+
* If lex->strval isn't NULL, fill it with the decoded string.
702+
* Set lex->token_terminator to the end of the decoded input, and in
703+
* success cases, transfer its previous value to lex->prev_token_terminator.
704+
* Return JSON_SUCCESS or an error code.
705+
*
706+
* Note: be careful that all error exits advance lex->token_terminator
707+
* to the point after the character we detected the error on.
700708
*/
701709
staticinlineJsonParseErrorType
702710
json_lex_string(JsonLexContext*lex)
@@ -705,6 +713,19 @@ json_lex_string(JsonLexContext *lex)
705713
char*constend=lex->input+lex->input_length;
706714
inthi_surrogate=-1;
707715

716+
/* Convenience macros for error exits */
717+
#defineFAIL_AT_CHAR_START(code) \
718+
do { \
719+
lex->token_terminator = s; \
720+
return code; \
721+
} while (0)
722+
#defineFAIL_AT_CHAR_END(code) \
723+
do { \
724+
lex->token_terminator = \
725+
s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
726+
return code; \
727+
} while (0)
728+
708729
if (lex->strval!=NULL)
709730
resetStringInfo(lex->strval);
710731

@@ -715,21 +736,15 @@ json_lex_string(JsonLexContext *lex)
715736
s++;
716737
/* Premature end of the string. */
717738
if (s >=end)
718-
{
719-
lex->token_terminator=s;
720-
returnJSON_INVALID_TOKEN;
721-
}
739+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
722740
elseif (*s=='"')
723741
break;
724742
elseif (*s=='\\')
725743
{
726744
/* OK, we have an escape character. */
727745
s++;
728746
if (s >=end)
729-
{
730-
lex->token_terminator=s;
731-
returnJSON_INVALID_TOKEN;
732-
}
747+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
733748
elseif (*s=='u')
734749
{
735750
inti;
@@ -739,21 +754,15 @@ json_lex_string(JsonLexContext *lex)
739754
{
740755
s++;
741756
if (s >=end)
742-
{
743-
lex->token_terminator=s;
744-
returnJSON_INVALID_TOKEN;
745-
}
757+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
746758
elseif (*s >='0'&&*s <='9')
747759
ch= (ch*16)+ (*s-'0');
748760
elseif (*s >='a'&&*s <='f')
749761
ch= (ch*16)+ (*s-'a')+10;
750762
elseif (*s >='A'&&*s <='F')
751763
ch= (ch*16)+ (*s-'A')+10;
752764
else
753-
{
754-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
755-
returnJSON_UNICODE_ESCAPE_FORMAT;
756-
}
765+
FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
757766
}
758767
if (lex->strval!=NULL)
759768
{
@@ -763,20 +772,20 @@ json_lex_string(JsonLexContext *lex)
763772
if (is_utf16_surrogate_first(ch))
764773
{
765774
if (hi_surrogate!=-1)
766-
returnJSON_UNICODE_HIGH_SURROGATE;
775+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
767776
hi_surrogate=ch;
768777
continue;
769778
}
770779
elseif (is_utf16_surrogate_second(ch))
771780
{
772781
if (hi_surrogate==-1)
773-
returnJSON_UNICODE_LOW_SURROGATE;
782+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
774783
ch=surrogate_pair_to_codepoint(hi_surrogate,ch);
775784
hi_surrogate=-1;
776785
}
777786

778787
if (hi_surrogate!=-1)
779-
returnJSON_UNICODE_LOW_SURROGATE;
788+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
780789

781790
/*
782791
* Reject invalid cases. We can't have a value above
@@ -786,7 +795,7 @@ json_lex_string(JsonLexContext *lex)
786795
if (ch==0)
787796
{
788797
/* We can't allow this, since our TEXT type doesn't */
789-
returnJSON_UNICODE_CODE_POINT_ZERO;
798+
FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
790799
}
791800

792801
/*
@@ -800,7 +809,7 @@ json_lex_string(JsonLexContext *lex)
800809
charcbuf[MAX_UNICODE_EQUIVALENT_STRING+1];
801810

802811
if (!pg_unicode_to_server_noerror(ch, (unsignedchar*)cbuf))
803-
returnJSON_UNICODE_UNTRANSLATABLE;
812+
FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
804813
appendStringInfoString(lex->strval,cbuf);
805814
}
806815
#else
@@ -820,14 +829,14 @@ json_lex_string(JsonLexContext *lex)
820829
appendStringInfoChar(lex->strval, (char)ch);
821830
}
822831
else
823-
returnJSON_UNICODE_HIGH_ESCAPE;
832+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
824833
#endif/* FRONTEND */
825834
}
826835
}
827836
elseif (lex->strval!=NULL)
828837
{
829838
if (hi_surrogate!=-1)
830-
returnJSON_UNICODE_LOW_SURROGATE;
839+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
831840

832841
switch (*s)
833842
{
@@ -852,10 +861,14 @@ json_lex_string(JsonLexContext *lex)
852861
appendStringInfoChar(lex->strval,'\t');
853862
break;
854863
default:
855-
/* Not a valid string escape, so signal error. */
864+
865+
/*
866+
* Not a valid string escape, so signal error. We
867+
* adjust token_start so that just the escape sequence
868+
* is reported, not the whole string.
869+
*/
856870
lex->token_start=s;
857-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
858-
returnJSON_ESCAPING_INVALID;
871+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
859872
}
860873
}
861874
elseif (strchr("\"\\/bfnrt",*s)==NULL)
@@ -868,16 +881,15 @@ json_lex_string(JsonLexContext *lex)
868881
* shown it's not a performance win.
869882
*/
870883
lex->token_start=s;
871-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
872-
returnJSON_ESCAPING_INVALID;
884+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
873885
}
874886
}
875887
else
876888
{
877889
char*p=s;
878890

879891
if (hi_surrogate!=-1)
880-
returnJSON_UNICODE_LOW_SURROGATE;
892+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
881893

882894
/*
883895
* Skip to the first byte that requires special handling, so we
@@ -917,12 +929,18 @@ json_lex_string(JsonLexContext *lex)
917929
}
918930

919931
if (hi_surrogate!=-1)
932+
{
933+
lex->token_terminator=s+1;
920934
returnJSON_UNICODE_LOW_SURROGATE;
935+
}
921936

922937
/* Hooray, we found the end of the string! */
923938
lex->prev_token_terminator=lex->token_terminator;
924939
lex->token_terminator=s+1;
925940
returnJSON_SUCCESS;
941+
942+
#undef FAIL_AT_CHAR_START
943+
#undef FAIL_AT_CHAR_END
926944
}
927945

928946
/*

‎src/test/regress/expected/json_encoding.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
5656
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
5757
ERROR: invalid input syntax for type json
5858
DETAIL: Unicode high surrogate must not follow a high surrogate.
59-
CONTEXT: JSON data, line 1: { "a":...
59+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
6060
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
6161
ERROR: invalid input syntax for type json
6262
DETAIL: Unicode low surrogate must follow a high surrogate.
63-
CONTEXT: JSON data, line 1: { "a":...
63+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6464
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
6565
ERROR: invalid input syntax for type json
6666
DETAIL: Unicode low surrogate must follow a high surrogate.
67-
CONTEXT: JSON data, line 1: { "a":...
67+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
6868
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
6969
ERROR: invalid input syntax for type json
7070
DETAIL: Unicode low surrogate must follow a high surrogate.
71-
CONTEXT: JSON data, line 1: { "a":...
71+
CONTEXT: JSON data, line 1: { "a": "\ude04...
7272
--handling of simple unicode escapes
7373
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
7474
correct_in_utf8
@@ -121,7 +121,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
121121
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
122122
ERROR: unsupported Unicode escape sequence
123123
DETAIL: \u0000 cannot be converted to text.
124-
CONTEXT: JSON data, line 1: { "a":...
124+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
125125
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
126126
not_an_escape
127127
--------------------
@@ -159,7 +159,7 @@ ERROR: unsupported Unicode escape sequence
159159
LINE 1: SELECT '"\u0000"'::jsonb;
160160
^
161161
DETAIL: \u0000 cannot be converted to text.
162-
CONTEXT: JSON data, line 1: ...
162+
CONTEXT: JSON data, line 1:"\u0000...
163163
-- use octet_length here so we don't get an odd unicode char in the
164164
-- output
165165
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -180,25 +180,25 @@ ERROR: invalid input syntax for type json
180180
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
181181
^
182182
DETAIL: Unicode high surrogate must not follow a high surrogate.
183-
CONTEXT: JSON data, line 1: { "a":...
183+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
184184
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
185185
ERROR: invalid input syntax for type json
186186
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
187187
^
188188
DETAIL: Unicode low surrogate must follow a high surrogate.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "\ude04...
190190
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
191191
ERROR: invalid input syntax for type json
192192
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
193193
^
194194
DETAIL: Unicode low surrogate must follow a high surrogate.
195-
CONTEXT: JSON data, line 1: { "a":...
195+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
196196
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
197197
ERROR: invalid input syntax for type json
198198
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
199199
^
200200
DETAIL: Unicode low surrogate must follow a high surrogate.
201-
CONTEXT: JSON data, line 1: { "a":...
201+
CONTEXT: JSON data, line 1: { "a": "\ude04...
202202
-- handling of simple unicode escapes
203203
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
204204
correct_in_utf8
@@ -223,7 +223,7 @@ ERROR: unsupported Unicode escape sequence
223223
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
224224
^
225225
DETAIL: \u0000 cannot be converted to text.
226-
CONTEXT: JSON data, line 1: { "a":...
226+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
227227
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
228228
not_an_escape
229229
------------------------------
@@ -253,7 +253,7 @@ ERROR: unsupported Unicode escape sequence
253253
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
254254
^
255255
DETAIL: \u0000 cannot be converted to text.
256-
CONTEXT: JSON data, line 1: { "a":...
256+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
257257
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
258258
not_an_escape
259259
--------------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp