Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit52e9a78

Browse files
committed
Fix JSON error reporting for many cases of erroneous string values.
The majority of error exit cases in json_lex_string() failed toset lex->token_terminator, causing problems for the error contextreporting code: it would see token_terminator less than token_startand do something more or less nuts. In v14 and up the end resultcould be as bad as a crash in report_json_context(). Olderversions accidentally avoided that fate; but all versions produceerror context lines that are far less useful than intended,because they'd stop at the end of the prior token instead ofcontinuing to where the actually-bad input is.To fix, invent some macros that make it less notationally painfulto do the right thing. Also add documentation about what thefunction is actually required to do; and in >= v14, add an assertionin report_json_context about token_terminator being sufficientlyfar advanced.Per report from Nikolay Shaplov. Back-patch to all supportedversions.Discussion:https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro
1 parentbc0bcce commit52e9a78

File tree

3 files changed

+71
-54
lines changed

3 files changed

+71
-54
lines changed

‎src/common/jsonapi.c

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,14 @@ json_lex(JsonLexContext *lex)
673673

674674
/*
675675
* The next token in the input stream is known to be a string; lex it.
676+
*
677+
* If lex->strval isn't NULL, fill it with the decoded string.
678+
* Set lex->token_terminator to the end of the decoded input, and in
679+
* success cases, transfer its previous value to lex->prev_token_terminator.
680+
* Return JSON_SUCCESS or an error code.
681+
*
682+
* Note: be careful that all error exits advance lex->token_terminator
683+
* to the point after the character we detected the error on.
676684
*/
677685
staticinlineJsonParseErrorType
678686
json_lex_string(JsonLexContext*lex)
@@ -681,6 +689,19 @@ json_lex_string(JsonLexContext *lex)
681689
intlen;
682690
inthi_surrogate=-1;
683691

692+
/* Convenience macros for error exits */
693+
#defineFAIL_AT_CHAR_START(code) \
694+
do { \
695+
lex->token_terminator = s; \
696+
return code; \
697+
} while (0)
698+
#defineFAIL_AT_CHAR_END(code) \
699+
do { \
700+
lex->token_terminator = \
701+
s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
702+
return code; \
703+
} while (0)
704+
684705
if (lex->strval!=NULL)
685706
resetStringInfo(lex->strval);
686707

@@ -693,29 +714,22 @@ json_lex_string(JsonLexContext *lex)
693714
len++;
694715
/* Premature end of the string. */
695716
if (len >=lex->input_length)
696-
{
697-
lex->token_terminator=s;
698-
returnJSON_INVALID_TOKEN;
699-
}
717+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
700718
elseif (*s=='"')
701719
break;
702720
elseif ((unsignedchar)*s<32)
703721
{
704722
/* Per RFC4627, these characters MUST be escaped. */
705723
/* Since *s isn't printable, exclude it from the context string */
706-
lex->token_terminator=s;
707-
returnJSON_ESCAPING_REQUIRED;
724+
FAIL_AT_CHAR_START(JSON_ESCAPING_REQUIRED);
708725
}
709726
elseif (*s=='\\')
710727
{
711728
/* OK, we have an escape character. */
712729
s++;
713730
len++;
714731
if (len >=lex->input_length)
715-
{
716-
lex->token_terminator=s;
717-
returnJSON_INVALID_TOKEN;
718-
}
732+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
719733
elseif (*s=='u')
720734
{
721735
inti;
@@ -726,21 +740,15 @@ json_lex_string(JsonLexContext *lex)
726740
s++;
727741
len++;
728742
if (len >=lex->input_length)
729-
{
730-
lex->token_terminator=s;
731-
returnJSON_INVALID_TOKEN;
732-
}
743+
FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
733744
elseif (*s >='0'&&*s <='9')
734745
ch= (ch*16)+ (*s-'0');
735746
elseif (*s >='a'&&*s <='f')
736747
ch= (ch*16)+ (*s-'a')+10;
737748
elseif (*s >='A'&&*s <='F')
738749
ch= (ch*16)+ (*s-'A')+10;
739750
else
740-
{
741-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
742-
returnJSON_UNICODE_ESCAPE_FORMAT;
743-
}
751+
FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
744752
}
745753
if (lex->strval!=NULL)
746754
{
@@ -750,20 +758,20 @@ json_lex_string(JsonLexContext *lex)
750758
if (is_utf16_surrogate_first(ch))
751759
{
752760
if (hi_surrogate!=-1)
753-
returnJSON_UNICODE_HIGH_SURROGATE;
761+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
754762
hi_surrogate=ch;
755763
continue;
756764
}
757765
elseif (is_utf16_surrogate_second(ch))
758766
{
759767
if (hi_surrogate==-1)
760-
returnJSON_UNICODE_LOW_SURROGATE;
768+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
761769
ch=surrogate_pair_to_codepoint(hi_surrogate,ch);
762770
hi_surrogate=-1;
763771
}
764772

765773
if (hi_surrogate!=-1)
766-
returnJSON_UNICODE_LOW_SURROGATE;
774+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
767775

768776
/*
769777
* Reject invalid cases. We can't have a value above
@@ -773,7 +781,7 @@ json_lex_string(JsonLexContext *lex)
773781
if (ch==0)
774782
{
775783
/* We can't allow this, since our TEXT type doesn't */
776-
returnJSON_UNICODE_CODE_POINT_ZERO;
784+
FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
777785
}
778786

779787
/*
@@ -810,14 +818,14 @@ json_lex_string(JsonLexContext *lex)
810818
appendStringInfoChar(lex->strval, (char)ch);
811819
}
812820
else
813-
returnJSON_UNICODE_HIGH_ESCAPE;
821+
FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
814822
#endif/* FRONTEND */
815823
}
816824
}
817825
elseif (lex->strval!=NULL)
818826
{
819827
if (hi_surrogate!=-1)
820-
returnJSON_UNICODE_LOW_SURROGATE;
828+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
821829

822830
switch (*s)
823831
{
@@ -842,10 +850,14 @@ json_lex_string(JsonLexContext *lex)
842850
appendStringInfoChar(lex->strval,'\t');
843851
break;
844852
default:
845-
/* Not a valid string escape, so signal error. */
853+
854+
/*
855+
* Not a valid string escape, so signal error. We
856+
* adjust token_start so that just the escape sequence
857+
* is reported, not the whole string.
858+
*/
846859
lex->token_start=s;
847-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
848-
returnJSON_ESCAPING_INVALID;
860+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
849861
}
850862
}
851863
elseif (strchr("\"\\/bfnrt",*s)==NULL)
@@ -858,28 +870,33 @@ json_lex_string(JsonLexContext *lex)
858870
* shown it's not a performance win.
859871
*/
860872
lex->token_start=s;
861-
lex->token_terminator=s+pg_encoding_mblen_bounded(lex->input_encoding,s);
862-
returnJSON_ESCAPING_INVALID;
873+
FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
863874
}
864875

865876
}
866877
elseif (lex->strval!=NULL)
867878
{
868879
if (hi_surrogate!=-1)
869-
returnJSON_UNICODE_LOW_SURROGATE;
880+
FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
870881

871882
appendStringInfoChar(lex->strval,*s);
872883
}
873884

874885
}
875886

876887
if (hi_surrogate!=-1)
888+
{
889+
lex->token_terminator=s+1;
877890
returnJSON_UNICODE_LOW_SURROGATE;
891+
}
878892

879893
/* Hooray, we found the end of the string! */
880894
lex->prev_token_terminator=lex->token_terminator;
881895
lex->token_terminator=s+1;
882896
returnJSON_SUCCESS;
897+
898+
#undef FAIL_AT_CHAR_START
899+
#undef FAIL_AT_CHAR_END
883900
}
884901

885902
/*

‎src/test/regress/expected/json_encoding.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
5656
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
5757
ERROR: invalid input syntax for type json
5858
DETAIL: Unicode high surrogate must not follow a high surrogate.
59-
CONTEXT: JSON data, line 1: { "a":...
59+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
6060
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
6161
ERROR: invalid input syntax for type json
6262
DETAIL: Unicode low surrogate must follow a high surrogate.
63-
CONTEXT: JSON data, line 1: { "a":...
63+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6464
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
6565
ERROR: invalid input syntax for type json
6666
DETAIL: Unicode low surrogate must follow a high surrogate.
67-
CONTEXT: JSON data, line 1: { "a":...
67+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
6868
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
6969
ERROR: invalid input syntax for type json
7070
DETAIL: Unicode low surrogate must follow a high surrogate.
71-
CONTEXT: JSON data, line 1: { "a":...
71+
CONTEXT: JSON data, line 1: { "a": "\ude04...
7272
--handling of simple unicode escapes
7373
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
7474
correct_in_utf8
@@ -121,7 +121,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
121121
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
122122
ERROR: unsupported Unicode escape sequence
123123
DETAIL: \u0000 cannot be converted to text.
124-
CONTEXT: JSON data, line 1: { "a":...
124+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
125125
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
126126
not_an_escape
127127
--------------------
@@ -159,7 +159,7 @@ ERROR: unsupported Unicode escape sequence
159159
LINE 1: SELECT '"\u0000"'::jsonb;
160160
^
161161
DETAIL: \u0000 cannot be converted to text.
162-
CONTEXT: JSON data, line 1: ...
162+
CONTEXT: JSON data, line 1:"\u0000...
163163
-- use octet_length here so we don't get an odd unicode char in the
164164
-- output
165165
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -180,25 +180,25 @@ ERROR: invalid input syntax for type json
180180
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
181181
^
182182
DETAIL: Unicode high surrogate must not follow a high surrogate.
183-
CONTEXT: JSON data, line 1: { "a":...
183+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
184184
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
185185
ERROR: invalid input syntax for type json
186186
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
187187
^
188188
DETAIL: Unicode low surrogate must follow a high surrogate.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "\ude04...
190190
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
191191
ERROR: invalid input syntax for type json
192192
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
193193
^
194194
DETAIL: Unicode low surrogate must follow a high surrogate.
195-
CONTEXT: JSON data, line 1: { "a":...
195+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
196196
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
197197
ERROR: invalid input syntax for type json
198198
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
199199
^
200200
DETAIL: Unicode low surrogate must follow a high surrogate.
201-
CONTEXT: JSON data, line 1: { "a":...
201+
CONTEXT: JSON data, line 1: { "a": "\ude04...
202202
-- handling of simple unicode escapes
203203
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
204204
correct_in_utf8
@@ -223,7 +223,7 @@ ERROR: unsupported Unicode escape sequence
223223
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
224224
^
225225
DETAIL: \u0000 cannot be converted to text.
226-
CONTEXT: JSON data, line 1: { "a":...
226+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
227227
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
228228
not_an_escape
229229
------------------------------
@@ -253,7 +253,7 @@ ERROR: unsupported Unicode escape sequence
253253
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
254254
^
255255
DETAIL: \u0000 cannot be converted to text.
256-
CONTEXT: JSON data, line 1: { "a":...
256+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
257257
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
258258
not_an_escape
259259
--------------------

‎src/test/regress/expected/json_encoding_1.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ ERROR: conversion between UTF8 and SQL_ASCII is not supported
5252
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
5353
ERROR: invalid input syntax for type json
5454
DETAIL: Unicode high surrogate must not follow a high surrogate.
55-
CONTEXT: JSON data, line 1: { "a":...
55+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
5656
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
5757
ERROR: invalid input syntax for type json
5858
DETAIL: Unicode low surrogate must follow a high surrogate.
59-
CONTEXT: JSON data, line 1: { "a":...
59+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6060
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
6161
ERROR: invalid input syntax for type json
6262
DETAIL: Unicode low surrogate must follow a high surrogate.
63-
CONTEXT: JSON data, line 1: { "a":...
63+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
6464
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
6565
ERROR: invalid input syntax for type json
6666
DETAIL: Unicode low surrogate must follow a high surrogate.
67-
CONTEXT: JSON data, line 1: { "a":...
67+
CONTEXT: JSON data, line 1: { "a": "\ude04...
6868
--handling of simple unicode escapes
6969
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
7070
correct_in_utf8
@@ -113,7 +113,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
113113
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
114114
ERROR: unsupported Unicode escape sequence
115115
DETAIL: \u0000 cannot be converted to text.
116-
CONTEXT: JSON data, line 1: { "a":...
116+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
117117
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
118118
not_an_escape
119119
--------------------
@@ -151,7 +151,7 @@ ERROR: unsupported Unicode escape sequence
151151
LINE 1: SELECT '"\u0000"'::jsonb;
152152
^
153153
DETAIL: \u0000 cannot be converted to text.
154-
CONTEXT: JSON data, line 1: ...
154+
CONTEXT: JSON data, line 1:"\u0000...
155155
-- use octet_length here so we don't get an odd unicode char in the
156156
-- output
157157
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -168,25 +168,25 @@ ERROR: invalid input syntax for type json
168168
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
169169
^
170170
DETAIL: Unicode high surrogate must not follow a high surrogate.
171-
CONTEXT: JSON data, line 1: { "a":...
171+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
172172
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
173173
ERROR: invalid input syntax for type json
174174
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
175175
^
176176
DETAIL: Unicode low surrogate must follow a high surrogate.
177-
CONTEXT: JSON data, line 1: { "a":...
177+
CONTEXT: JSON data, line 1: { "a": "\ude04...
178178
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
179179
ERROR: invalid input syntax for type json
180180
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
181181
^
182182
DETAIL: Unicode low surrogate must follow a high surrogate.
183-
CONTEXT: JSON data, line 1: { "a":...
183+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
184184
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
185185
ERROR: invalid input syntax for type json
186186
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
187187
^
188188
DETAIL: Unicode low surrogate must follow a high surrogate.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "\ude04...
190190
-- handling of simple unicode escapes
191191
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
192192
ERROR: conversion between UTF8 and SQL_ASCII is not supported
@@ -209,7 +209,7 @@ ERROR: unsupported Unicode escape sequence
209209
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
210210
^
211211
DETAIL: \u0000 cannot be converted to text.
212-
CONTEXT: JSON data, line 1: { "a":...
212+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
213213
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
214214
not_an_escape
215215
------------------------------
@@ -237,7 +237,7 @@ ERROR: unsupported Unicode escape sequence
237237
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
238238
^
239239
DETAIL: \u0000 cannot be converted to text.
240-
CONTEXT: JSON data, line 1: { "a":...
240+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
241241
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
242242
not_an_escape
243243
--------------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp