Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc25a929

Browse files
committed
Fix JSON error reporting for many cases of erroneous string values.
The majority of error exit cases in json_lex_string() failed toset lex->token_terminator, causing problems for the error contextreporting code: it would see token_terminator less than token_startand do something more or less nuts. In v14 and up the end resultcould be as bad as a crash in report_json_context(). Olderversions accidentally avoided that fate; but all versions produceerror context lines that are far less useful than intended,because they'd stop at the end of the prior token instead ofcontinuing to where the actually-bad input is.To fix, invent some macros that make it less notationally painfulto do the right thing. Also add documentation about what thefunction is actually required to do; and in >= v14, add an assertionin report_json_context about token_terminator being sufficientlyfar advanced.Per report from Nikolay Shaplov. Back-patch to all supportedversions.Discussion:https://postgr.es/m/7332649.x5DLKWyVIX@thinkpad-pgpro
1 parent62a91a1 commitc25a929

File tree

3 files changed

+57
-32
lines changed

3 files changed

+57
-32
lines changed

‎src/backend/utils/adt/json.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,13 @@ json_lex(JsonLexContext *lex)
750750

751751
/*
752752
* The next token in the input stream is known to be a string; lex it.
753+
*
754+
* If lex->strval isn't NULL, fill it with the decoded string.
755+
* Set lex->token_terminator to the end of the decoded input, and in
756+
* success cases, transfer its previous value to lex->prev_token_terminator.
757+
*
758+
* Note: be careful that all error cases advance lex->token_terminator
759+
* to the point after the character we detected the error on.
753760
*/
754761
staticinlinevoid
755762
json_lex_string(JsonLexContext*lex)
@@ -837,33 +844,42 @@ json_lex_string(JsonLexContext *lex)
837844
if (ch >=0xd800&&ch <=0xdbff)
838845
{
839846
if (hi_surrogate!=-1)
847+
{
848+
lex->token_terminator=s+pg_mblen(s);
840849
ereport(ERROR,
841850
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
842851
errmsg("invalid input syntax for type %s",
843852
"json"),
844853
errdetail("Unicode high surrogate must not follow a high surrogate."),
845854
report_json_context(lex)));
855+
}
846856
hi_surrogate= (ch&0x3ff) <<10;
847857
continue;
848858
}
849859
elseif (ch >=0xdc00&&ch <=0xdfff)
850860
{
851861
if (hi_surrogate==-1)
862+
{
863+
lex->token_terminator=s+pg_mblen(s);
852864
ereport(ERROR,
853865
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
854866
errmsg("invalid input syntax for type %s","json"),
855867
errdetail("Unicode low surrogate must follow a high surrogate."),
856868
report_json_context(lex)));
869+
}
857870
ch=0x10000+hi_surrogate+ (ch&0x3ff);
858871
hi_surrogate=-1;
859872
}
860873

861874
if (hi_surrogate!=-1)
875+
{
876+
lex->token_terminator=s+pg_mblen(s);
862877
ereport(ERROR,
863878
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
864879
errmsg("invalid input syntax for type %s","json"),
865880
errdetail("Unicode low surrogate must follow a high surrogate."),
866881
report_json_context(lex)));
882+
}
867883

868884
/*
869885
* For UTF8, replace the escape sequence by the actual
@@ -875,6 +891,7 @@ json_lex_string(JsonLexContext *lex)
875891
if (ch==0)
876892
{
877893
/* We can't allow this, since our TEXT type doesn't */
894+
lex->token_terminator=s+pg_mblen(s);
878895
ereport(ERROR,
879896
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
880897
errmsg("unsupported Unicode escape sequence"),
@@ -898,24 +915,27 @@ json_lex_string(JsonLexContext *lex)
898915
}
899916
else
900917
{
918+
lex->token_terminator=s+pg_mblen(s);
901919
ereport(ERROR,
902920
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
903921
errmsg("unsupported Unicode escape sequence"),
904922
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
905923
report_json_context(lex)));
906924
}
907-
908925
}
909926
}
910927
elseif (lex->strval!=NULL)
911928
{
912929
if (hi_surrogate!=-1)
930+
{
931+
lex->token_terminator=s+pg_mblen(s);
913932
ereport(ERROR,
914933
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
915934
errmsg("invalid input syntax for type %s",
916935
"json"),
917936
errdetail("Unicode low surrogate must follow a high surrogate."),
918937
report_json_context(lex)));
938+
}
919939

920940
switch (*s)
921941
{
@@ -968,28 +988,33 @@ json_lex_string(JsonLexContext *lex)
968988
extract_mb_char(s)),
969989
report_json_context(lex)));
970990
}
971-
972991
}
973992
elseif (lex->strval!=NULL)
974993
{
975994
if (hi_surrogate!=-1)
995+
{
996+
lex->token_terminator=s+pg_mblen(s);
976997
ereport(ERROR,
977998
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
978999
errmsg("invalid input syntax for type %s","json"),
9791000
errdetail("Unicode low surrogate must follow a high surrogate."),
9801001
report_json_context(lex)));
1002+
}
9811003

9821004
appendStringInfoChar(lex->strval,*s);
9831005
}
9841006

9851007
}
9861008

9871009
if (hi_surrogate!=-1)
1010+
{
1011+
lex->token_terminator=s+pg_mblen(s);
9881012
ereport(ERROR,
9891013
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
9901014
errmsg("invalid input syntax for type %s","json"),
9911015
errdetail("Unicode low surrogate must follow a high surrogate."),
9921016
report_json_context(lex)));
1017+
}
9931018

9941019
/* Hooray, we found the end of the string! */
9951020
lex->prev_token_terminator=lex->token_terminator;

‎src/test/regress/expected/json_encoding.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,19 @@ select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
4141
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
4242
ERROR: invalid input syntax for type json
4343
DETAIL: Unicode high surrogate must not follow a high surrogate.
44-
CONTEXT: JSON data, line 1: { "a":...
44+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
4545
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
4646
ERROR: invalid input syntax for type json
4747
DETAIL: Unicode low surrogate must follow a high surrogate.
48-
CONTEXT: JSON data, line 1: { "a":...
48+
CONTEXT: JSON data, line 1: { "a": "\ude04...
4949
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
5050
ERROR: invalid input syntax for type json
5151
DETAIL: Unicode low surrogate must follow a high surrogate.
52-
CONTEXT: JSON data, line 1: { "a":...
52+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
5353
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
5454
ERROR: invalid input syntax for type json
5555
DETAIL: Unicode low surrogate must follow a high surrogate.
56-
CONTEXT: JSON data, line 1: { "a":...
56+
CONTEXT: JSON data, line 1: { "a": "\ude04...
5757
--handling of simple unicode escapes
5858
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
5959
correct_in_utf8
@@ -106,7 +106,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
106106
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
107107
ERROR: unsupported Unicode escape sequence
108108
DETAIL: \u0000 cannot be converted to text.
109-
CONTEXT: JSON data, line 1: { "a":...
109+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
110110
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
111111
not_an_escape
112112
--------------------
@@ -144,7 +144,7 @@ ERROR: unsupported Unicode escape sequence
144144
LINE 1: SELECT '"\u0000"'::jsonb;
145145
^
146146
DETAIL: \u0000 cannot be converted to text.
147-
CONTEXT: JSON data, line 1: ...
147+
CONTEXT: JSON data, line 1:"\u0000...
148148
-- use octet_length here so we don't get an odd unicode char in the
149149
-- output
150150
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -165,25 +165,25 @@ ERROR: invalid input syntax for type json
165165
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
166166
^
167167
DETAIL: Unicode high surrogate must not follow a high surrogate.
168-
CONTEXT: JSON data, line 1: { "a":...
168+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
169169
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
170170
ERROR: invalid input syntax for type json
171171
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
172172
^
173173
DETAIL: Unicode low surrogate must follow a high surrogate.
174-
CONTEXT: JSON data, line 1: { "a":...
174+
CONTEXT: JSON data, line 1: { "a": "\ude04...
175175
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
176176
ERROR: invalid input syntax for type json
177177
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
178178
^
179179
DETAIL: Unicode low surrogate must follow a high surrogate.
180-
CONTEXT: JSON data, line 1: { "a":...
180+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
181181
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
182182
ERROR: invalid input syntax for type json
183183
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
184184
^
185185
DETAIL: Unicode low surrogate must follow a high surrogate.
186-
CONTEXT: JSON data, line 1: { "a":...
186+
CONTEXT: JSON data, line 1: { "a": "\ude04...
187187
-- handling of simple unicode escapes
188188
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
189189
correct_in_utf8
@@ -208,7 +208,7 @@ ERROR: unsupported Unicode escape sequence
208208
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
209209
^
210210
DETAIL: \u0000 cannot be converted to text.
211-
CONTEXT: JSON data, line 1: { "a":...
211+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
212212
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
213213
not_an_escape
214214
------------------------------
@@ -238,7 +238,7 @@ ERROR: unsupported Unicode escape sequence
238238
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
239239
^
240240
DETAIL: \u0000 cannot be converted to text.
241-
CONTEXT: JSON data, line 1: { "a":...
241+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
242242
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
243243
not_an_escape
244244
--------------------

‎src/test/regress/expected/json_encoding_1.out

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,23 @@ SELECT '"\uaBcD"'::json;-- OK, uppercase and lower case both OK
3535
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
3636
ERROR: unsupported Unicode escape sequence
3737
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
38-
CONTEXT: JSON data, line 1: { "a":...
38+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
3939
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
4040
ERROR: invalid input syntax for type json
4141
DETAIL: Unicode high surrogate must not follow a high surrogate.
42-
CONTEXT: JSON data, line 1: { "a":...
42+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
4343
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
4444
ERROR: invalid input syntax for type json
4545
DETAIL: Unicode low surrogate must follow a high surrogate.
46-
CONTEXT: JSON data, line 1: { "a":...
46+
CONTEXT: JSON data, line 1: { "a": "\ude04...
4747
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
4848
ERROR: invalid input syntax for type json
4949
DETAIL: Unicode low surrogate must follow a high surrogate.
50-
CONTEXT: JSON data, line 1: { "a":...
50+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
5151
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
5252
ERROR: invalid input syntax for type json
5353
DETAIL: Unicode low surrogate must follow a high surrogate.
54-
CONTEXT: JSON data, line 1: { "a":...
54+
CONTEXT: JSON data, line 1: { "a": "\ude04...
5555
--handling of simple unicode escapes
5656
select json '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
5757
correct_in_utf8
@@ -86,7 +86,7 @@ select json '{ "a": "null \\u0000 escape" }' as not_an_escape;
8686
select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
8787
ERROR: unsupported Unicode escape sequence
8888
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
89-
CONTEXT: JSON data, line 1: { "a":...
89+
CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
9090
select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
9191
correct_everywhere
9292
--------------------
@@ -102,7 +102,7 @@ select json '{ "a": "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
102102
select json '{ "a": "null \u0000 escape" }' ->> 'a' as fails;
103103
ERROR: unsupported Unicode escape sequence
104104
DETAIL: \u0000 cannot be converted to text.
105-
CONTEXT: JSON data, line 1: { "a":...
105+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
106106
select json '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
107107
not_an_escape
108108
--------------------
@@ -140,53 +140,53 @@ ERROR: unsupported Unicode escape sequence
140140
LINE 1: SELECT '"\u0000"'::jsonb;
141141
^
142142
DETAIL: \u0000 cannot be converted to text.
143-
CONTEXT: JSON data, line 1: ...
143+
CONTEXT: JSON data, line 1:"\u0000...
144144
-- use octet_length here so we don't get an odd unicode char in the
145145
-- output
146146
SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
147147
ERROR: unsupported Unicode escape sequence
148148
LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
149149
^
150150
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
151-
CONTEXT: JSON data, line 1: ...
151+
CONTEXT: JSON data, line 1:"\uaBcD...
152152
-- handling of unicode surrogate pairs
153153
SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
154154
ERROR: unsupported Unicode escape sequence
155155
LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3...
156156
^
157157
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
158-
CONTEXT: JSON data, line 1: { "a":...
158+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ude04...
159159
SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
160160
ERROR: invalid input syntax for type json
161161
LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a';
162162
^
163163
DETAIL: Unicode high surrogate must not follow a high surrogate.
164-
CONTEXT: JSON data, line 1: { "a":...
164+
CONTEXT: JSON data, line 1: { "a": "\ud83d\ud83d...
165165
SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
166166
ERROR: invalid input syntax for type json
167167
LINE 1: SELECT jsonb '{ "a": "\ude04\ud83d" }' -> 'a';
168168
^
169169
DETAIL: Unicode low surrogate must follow a high surrogate.
170-
CONTEXT: JSON data, line 1: { "a":...
170+
CONTEXT: JSON data, line 1: { "a": "\ude04...
171171
SELECT jsonb '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
172172
ERROR: invalid input syntax for type json
173173
LINE 1: SELECT jsonb '{ "a": "\ud83dX" }' -> 'a';
174174
^
175175
DETAIL: Unicode low surrogate must follow a high surrogate.
176-
CONTEXT: JSON data, line 1: { "a":...
176+
CONTEXT: JSON data, line 1: { "a": "\ud83dX...
177177
SELECT jsonb '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
178178
ERROR: invalid input syntax for type json
179179
LINE 1: SELECT jsonb '{ "a": "\ude04X" }' -> 'a';
180180
^
181181
DETAIL: Unicode low surrogate must follow a high surrogate.
182-
CONTEXT: JSON data, line 1: { "a":...
182+
CONTEXT: JSON data, line 1: { "a": "\ude04...
183183
-- handling of simple unicode escapes
184184
SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8;
185185
ERROR: unsupported Unicode escape sequence
186186
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr...
187187
^
188188
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
189-
CONTEXT: JSON data, line 1: { "a":...
189+
CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
190190
SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere;
191191
correct_everywhere
192192
-----------------------------
@@ -204,7 +204,7 @@ ERROR: unsupported Unicode escape sequence
204204
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' as fails;
205205
^
206206
DETAIL: \u0000 cannot be converted to text.
207-
CONTEXT: JSON data, line 1: { "a":...
207+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
208208
SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape;
209209
not_an_escape
210210
------------------------------
@@ -216,7 +216,7 @@ ERROR: unsupported Unicode escape sequence
216216
LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'...
217217
^
218218
DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
219-
CONTEXT: JSON data, line 1: { "a":...
219+
CONTEXT: JSON data, line 1: { "a": "the Copyright \u00a9...
220220
SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
221221
correct_everywhere
222222
--------------------
@@ -234,7 +234,7 @@ ERROR: unsupported Unicode escape sequence
234234
LINE 1: SELECT jsonb '{ "a": "null \u0000 escape" }' ->> 'a' as fai...
235235
^
236236
DETAIL: \u0000 cannot be converted to text.
237-
CONTEXT: JSON data, line 1: { "a":...
237+
CONTEXT: JSON data, line 1: { "a": "null \u0000...
238238
SELECT jsonb '{ "a": "null \\u0000 escape" }' ->> 'a' as not_an_escape;
239239
not_an_escape
240240
--------------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp