Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit74a1a36

Browse files
committed
Fix JSON error reporting for many cases of erroneous string values.
The majority of error exit cases in json_lex_string() failed toset lex->token_terminator, causing problems for the error contextreporting code: it would see token_terminator less than token_startand do something more or less nuts. In v14 and up the end resultcould be as bad as a crash in report_json_context(). Olderversions accidentally avoided that fate; but all versions produceerror context lines that are far less useful than intended,because they'd stop at the end of the prior token instead ofcontinuing to where the actually-bad input is.To fix, invent some macros that make it less notationally painfulto do the right thing. Also add documentation about what thefunction is actually required to do; and in >= v14, add an assertionin report_json_context about token_terminator being sufficientlyfar advanced.Per report from Nikolay Shaplov. Back-patch to all supportedversions.Discussion:https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro
1 parent5fd61bd commit74a1a36

File tree

4 files changed

+72
-54
lines changed

4 files changed

+72
-54
lines changed

‎src/backend/utils/adt/jsonfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@ report_json_context(JsonLexContext *lex)
656656
line_start=lex->line_start;
657657
context_start=line_start;
658658
context_end=lex->token_terminator;
659+
Assert(context_end >=context_start);
659660

660661
/* Advance until we are close enough to context_end */
661662
while (context_end-context_start >=50)

‎src/common/jsonapi.c

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,14 @@ json_lex(JsonLexContext *lex)
667667

668668
/*
669669
* The next token in the input stream is known to be a string; lex it.
670+
*
671+
* If lex->strval isn't NULL, fill it with the decoded string.
672+
* Set lex->token_terminator to the end of the decoded input, and in
673+
* success cases, transfer its previous value to lex->prev_token_terminator.
674+
* Return JSON_SUCCESS or an error code.
675+
*
676+
* Note: be careful that all error exits advance lex->token_terminator
677+
* to the point after the character we detected the error on.
670678
*/
671679
staticinlineJsonParseErrorType
672680
json_lex_string(JsonLexContext*lex)
@@ -675,6 +683,19 @@ json_lex_string(JsonLexContext *lex)
675683
intlen;
676684
inthi_surrogate=-1;
677685

686+
/* Convenience macros for error exits */
687+
#defineFAIL_AT_CHAR_START(code) \
688+
do { \
689+
lex->token_terminator = s; \
690+
return code; \
691+
} while (0)
692+
#defineFAIL_AT_CHAR_END(code) \
693+
do { \
694+
lex->token_terminator = \
695+
s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
696+
return code; \
697+
} while (0)
698+
678699
if (lex->strval!=NULL)
679700
resetStringInfo(lex->strval);
680701

@@ -687,29 +708,22 @@ json_lex_string(JsonLexContext *lex)
687708
len++;
688709
/* Premature end of the string. */
689710
if (len >=lex->input_length)
690-
{
691-
lex->token_terminator=s;
692-
returnJSON_INVALID_TOKEN;
693-
}
711+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
694712
elseif (*s=='"')
695713
break;
696714
elseif ((unsignedchar)*s<32)
697715
{
698716
/* Per RFC4627, these characters MUST be escaped. */
699717
/* Since *s isn't printable, exclude it from the context string */
700-
lex->token_terminator=s;
701-
returnJSON_ESCAPING_REQUIRED;
718+
FAIL_AT_CHAR_START(JSON_ESCAPING_REQUIRED);
702719
}
703720
elseif (*s=='\\')
704721
{
705722
/* OK, we have an escape character. */
706723
s++;
707724
len++;
708725
if (len >=lex->input_length)
709-
{
710-
lex->token_terminator=s;
711-
returnJSON_INVALID_TOKEN;
712-
}
726+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
713727
elseif (*s=='u')
714728
{
715729
inti;
@@ -720,21 +734,15 @@ json_lex_string(JsonLexContext *lex)
720734
s++;
721735
len++;
722736
if (len >=lex->input_length)
723-
{
724-
lex->token_terminator=s;
725-
returnJSON_INVALID_TOKEN;
726-
}
737+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
727738
elseif (*s >='0'&&*s <='9')
728739
ch= (ch*16)+ (*s-'0');
729740
elseif (*s >='a'&&*s <='f')
730741
ch= (ch*16)+ (*s-'a')+10;
731742
elseif (*s >='A'&&*s <='F')
732743
ch= (ch*16)+ (*s-'A')+10;
733744
else
734-
{
735-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
736-
returnJSON_UNICODE_ESCAPE_FORMAT;
737-
}
745+
FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
738746
}
739747
if (lex->strval!=NULL)
740748
{
@@ -744,20 +752,20 @@ json_lex_string(JsonLexContext *lex)
744752
if (is_utf16_surrogate_first(ch))
745753
{
746754
if (hi_surrogate!=-1)
747-
returnJSON_UNICODE_HIGH_SURROGATE;
755+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
748756
hi_surrogate=ch;
749757
continue;
750758
}
751759
elseif (is_utf16_surrogate_second(ch))
752760
{
753761
if (hi_surrogate==-1)
754-
returnJSON_UNICODE_LOW_SURROGATE;
762+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
755763
ch=surrogate_pair_to_codepoint(hi_surrogate,ch);
756764
hi_surrogate=-1;
757765
}
758766

759767
if (hi_surrogate!=-1)
760-
returnJSON_UNICODE_LOW_SURROGATE;
768+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
761769

762770
/*
763771
* Reject invalid cases. We can't have a value above
@@ -767,7 +775,7 @@ json_lex_string(JsonLexContext *lex)
767775
if (ch==0)
768776
{
769777
/* We can't allow this, since our TEXT type doesn't */
770-
returnJSON_UNICODE_CODE_POINT_ZERO;
778+
FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
771779
}
772780

773781
/*
@@ -804,14 +812,14 @@ json_lex_string(JsonLexContext *lex)
804812
appendStringInfoChar(lex->strval, (char)ch);
805813
}
806814
else
807-
returnJSON_UNICODE_HIGH_ESCAPE;
815+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
808816
#endif/* FRONTEND */
809817
}
810818
}
811819
elseif (lex->strval!=NULL)
812820
{
813821
if (hi_surrogate!=-1)
814-
returnJSON_UNICODE_LOW_SURROGATE;
822+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
815823

816824
switch (*s)
817825
{
@@ -836,10 +844,14 @@ json_lex_string(JsonLexContext *lex)
836844
appendStringInfoChar(lex->strval,'\t');
837845
break;
838846
default:
839-
/* Not a valid string escape, so signal error. */
847+
848+
/*
849+
* Not a valid string escape, so signal error. We
850+
* adjust token_start so that just the escape sequence
851+
* is reported, not the whole string.
852+
*/
840853
lex->token_start=s;
841-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
842-
returnJSON_ESCAPING_INVALID;
854+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
843855
}
844856
}
845857
elseif (strchr("\"\\/bfnrt",*s)==NULL)
@@ -852,26 +864,31 @@ json_lex_string(JsonLexContext *lex)
852864
* shown it's not a performance win.
853865
*/
854866
lex->token_start=s;
855-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
856-
returnJSON_ESCAPING_INVALID;
867+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
857868
}
858869
}
859870
elseif (lex->strval!=NULL)
860871
{
861872
if (hi_surrogate!=-1)
862-
returnJSON_UNICODE_LOW_SURROGATE;
873+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
863874

864875
appendStringInfoChar(lex->strval,*s);
865876
}
866877
}
867878

868879
if (hi_surrogate!=-1)
880+
{
881+
lex->token_terminator=s+1;
869882
returnJSON_UNICODE_LOW_SURROGATE;
883+
}
870884

871885
/* Hooray, we found the end of the string! */
872886
lex->prev_token_terminator=lex->token_terminator;
873887
lex->token_terminator=s+1;
874888
returnJSON_SUCCESS;
889+
890+
#undef FAIL_AT_CHAR_START
891+
#undef FAIL_AT_CHAR_END
875892
}
876893

877894
/*

‎src/test/regress/expected/json_encoding.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
5656
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
5757
ERROR: invalid input syntax for type json
5858
DETAIL: Unicode high surrogate must not follow a high surrogate.
59-
CONTEXT: JSON data, line 1: { "a":...
59+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
6060
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
6161
ERROR: invalid input syntax for type json
6262
DETAIL: Unicode low surrogate must follow a high surrogate.
63-
CONTEXT: JSON data, line 1: { "a":...
63+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6464
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
6565
ERROR: invalid input syntax for type json
6666
DETAIL: Unicode low surrogate must follow a high surrogate.
67-
CONTEXT: JSON data, line 1: { "a":...
67+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
6868
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
6969
ERROR: invalid input syntax for type json
7070
DETAIL: Unicode low surrogate must follow a high surrogate.
71-
CONTEXT: JSON data, line 1: { "a":...
71+
CONTEXT: JSON data, line 1: { "a": "\ude04...
7272
--handling of simple unicode escapes
7373
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
7474
correct_in_utf8
@@ -121,7 +121,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
121121
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
122122
ERROR: unsupported Unicode escape sequence
123123
DETAIL: \u0000 cannot be converted to text.
124-
CONTEXT: JSON data, line 1: { "a":...
124+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
125125
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
126126
not_an_escape
127127
--------------------
@@ -159,7 +159,7 @@ ERROR: unsupported Unicode escape sequence
159159
LINE 1: SELECT '"\u0000"'::jsonb;
160160
^
161161
DETAIL: \u0000 cannot be converted to text.
162-
CONTEXT: JSON data, line 1: ...
162+
CONTEXT: JSON data, line 1:"\u0000...
163163
-- use octet_length here so we don't get an odd unicode char in the
164164
-- output
165165
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -180,25 +180,25 @@ ERROR: invalid input syntax for type json
180180
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
181181
^
182182
DETAIL: Unicode high surrogate must not follow a high surrogate.
183-
CONTEXT: JSON data, line 1: { "a":...
183+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
184184
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
185185
ERROR: invalid input syntax for type json
186186
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
187187
^
188188
DETAIL: Unicode low surrogate must follow a high surrogate.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "\ude04...
190190
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
191191
ERROR: invalid input syntax for type json
192192
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
193193
^
194194
DETAIL: Unicode low surrogate must follow a high surrogate.
195-
CONTEXT: JSON data, line 1: { "a":...
195+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
196196
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
197197
ERROR: invalid input syntax for type json
198198
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
199199
^
200200
DETAIL: Unicode low surrogate must follow a high surrogate.
201-
CONTEXT: JSON data, line 1: { "a":...
201+
CONTEXT: JSON data, line 1: { "a": "\ude04...
202202
-- handling of simple unicode escapes
203203
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
204204
correct_in_utf8
@@ -223,7 +223,7 @@ ERROR: unsupported Unicode escape sequence
223223
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
224224
^
225225
DETAIL: \u0000 cannot be converted to text.
226-
CONTEXT: JSON data, line 1: { "a":...
226+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
227227
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
228228
not_an_escape
229229
------------------------------
@@ -253,7 +253,7 @@ ERROR: unsupported Unicode escape sequence
253253
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
254254
^
255255
DETAIL: \u0000 cannot be converted to text.
256-
CONTEXT: JSON data, line 1: { "a":...
256+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
257257
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
258258
not_an_escape
259259
--------------------

‎src/test/regress/expected/json_encoding_1.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ ERROR: conversion between UTF8 and SQL_ASCII is not supported
5252
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
5353
ERROR: invalid input syntax for type json
5454
DETAIL: Unicode high surrogate must not follow a high surrogate.
55-
CONTEXT: JSON data, line 1: { "a":...
55+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
5656
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
5757
ERROR: invalid input syntax for type json
5858
DETAIL: Unicode low surrogate must follow a high surrogate.
59-
CONTEXT: JSON data, line 1: { "a":...
59+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6060
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
6161
ERROR: invalid input syntax for type json
6262
DETAIL: Unicode low surrogate must follow a high surrogate.
63-
CONTEXT: JSON data, line 1: { "a":...
63+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
6464
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
6565
ERROR: invalid input syntax for type json
6666
DETAIL: Unicode low surrogate must follow a high surrogate.
67-
CONTEXT: JSON data, line 1: { "a":...
67+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6868
--handling of simple unicode escapes
6969
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
7070
correct_in_utf8
@@ -113,7 +113,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
113113
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
114114
ERROR: unsupported Unicode escape sequence
115115
DETAIL: \u0000 cannot be converted to text.
116-
CONTEXT: JSON data, line 1: { "a":...
116+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
117117
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
118118
not_an_escape
119119
--------------------
@@ -151,7 +151,7 @@ ERROR: unsupported Unicode escape sequence
151151
LINE 1: SELECT '"\u0000"'::jsonb;
152152
^
153153
DETAIL: \u0000 cannot be converted to text.
154-
CONTEXT: JSON data, line 1: ...
154+
CONTEXT: JSON data, line 1:"\u0000...
155155
-- use octet_length here so we don't get an odd unicode char in the
156156
-- output
157157
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -168,25 +168,25 @@ ERROR: invalid input syntax for type json
168168
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
169169
^
170170
DETAIL: Unicode high surrogate must not follow a high surrogate.
171-
CONTEXT: JSON data, line 1: { "a":...
171+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
172172
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
173173
ERROR: invalid input syntax for type json
174174
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
175175
^
176176
DETAIL: Unicode low surrogate must follow a high surrogate.
177-
CONTEXT: JSON data, line 1: { "a":...
177+
CONTEXT: JSON data, line 1: { "a": "\ude04...
178178
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
179179
ERROR: invalid input syntax for type json
180180
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
181181
^
182182
DETAIL: Unicode low surrogate must follow a high surrogate.
183-
CONTEXT: JSON data, line 1: { "a":...
183+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
184184
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
185185
ERROR: invalid input syntax for type json
186186
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
187187
^
188188
DETAIL: Unicode low surrogate must follow a high surrogate.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "\ude04...
190190
-- handling of simple unicode escapes
191191
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
192192
ERROR: conversion between UTF8 and SQL_ASCII is not supported
@@ -209,7 +209,7 @@ ERROR: unsupported Unicode escape sequence
209209
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
210210
^
211211
DETAIL: \u0000 cannot be converted to text.
212-
CONTEXT: JSON data, line 1: { "a":...
212+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
213213
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
214214
not_an_escape
215215
------------------------------
@@ -237,7 +237,7 @@ ERROR: unsupported Unicode escape sequence
237237
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
238238
^
239239
DETAIL: \u0000 cannot be converted to text.
240-
CONTEXT: JSON data, line 1: { "a":...
240+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
241241
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
242242
not_an_escape
243243
--------------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp