Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit94e3311

Browse files
committed
Handle Unicode surrogate pairs correctly when processing JSON.
In 9.2, Unicode escape sequences are not analysed at all other thanto make sure that they are in the form \uXXXX. But in 9.3 many of thenew operators and functions try to turn JSON text values into text inthe server encoding, and this includes de-escaping Unicode escapesequences. This processing had not taken into account the possibilitythat this might contain a surrogate pair to designate a characteroutside the BMP. That is now handled correctly.This also enforces correct use of surrogate pairs, something that is notdone by the type's input routines. This fact is noted in the docs.
1 parentc99d5d1 commit94e3311

File tree

4 files changed

+92
-0
lines changed

4 files changed

+92
-0
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10150,6 +10150,15 @@ table2-mapping
1015010150
</tgroup>
1015110151
</table>
1015210152

10153+
<note>
10154+
<para>
10155+
The <type>json</type> functions and operators can impose stricter validity requirements
10156+
than the type's input functions. In particular, they check much more closely that any use
10157+
of Unicode surrogate pairs to designate characters outside the Unicode Basic Multilingual
10158+
Plane is correct.
10159+
</para>
10160+
</note>
10161+
1015310162
<note>
1015410163
<para>
1015510164
The <xref linkend="hstore"> extension has a cast from <type>hstore</type> to

‎src/backend/utils/adt/json.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,7 @@ json_lex_string(JsonLexContext *lex)
646646
{
647647
char*s;
648648
intlen;
649+
inthi_surrogate=-1;
649650

650651
if (lex->strval!=NULL)
651652
resetStringInfo(lex->strval);
@@ -718,6 +719,36 @@ json_lex_string(JsonLexContext *lex)
718719
intutf8len;
719720
char*converted;
720721

722+
if (ch >=0xd800&&ch <=0xdbff)
723+
{
724+
if (hi_surrogate!=-1)
725+
ereport(ERROR,
726+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
727+
errmsg("invalid input syntax for type json"),
728+
errdetail("high order surrogate must not follow a high order surrogate."),
729+
report_json_context(lex)));
730+
hi_surrogate= (ch&0x3ff) <<10;
731+
continue;
732+
}
733+
elseif (ch >=0xdc00&&ch <=0xdfff)
734+
{
735+
if (hi_surrogate==-1)
736+
ereport(ERROR,
737+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
738+
errmsg("invalid input syntax for type json"),
739+
errdetail("low order surrogate must follow a high order surrogate."),
740+
report_json_context(lex)));
741+
ch=0x10000+hi_surrogate+ (ch&0x3ff);
742+
hi_surrogate=-1;
743+
}
744+
745+
if (hi_surrogate!=-1)
746+
ereport(ERROR,
747+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
748+
errmsg("invalid input syntax for type json"),
749+
errdetail("low order surrogate must follow a high order surrogate."),
750+
report_json_context(lex)));
751+
721752
unicode_to_utf8(ch, (unsignedchar*)utf8str);
722753
utf8len=pg_utf_mblen((unsignedchar*)utf8str);
723754
utf8str[utf8len]='\0';
@@ -730,6 +761,13 @@ json_lex_string(JsonLexContext *lex)
730761
}
731762
elseif (lex->strval!=NULL)
732763
{
764+
if (hi_surrogate!=-1)
765+
ereport(ERROR,
766+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
767+
errmsg("invalid input syntax for type json"),
768+
errdetail("low order surrogate must follow a high order surrogate."),
769+
report_json_context(lex)));
770+
733771
switch (*s)
734772
{
735773
case'"':
@@ -784,11 +822,25 @@ json_lex_string(JsonLexContext *lex)
784822
}
785823
elseif (lex->strval!=NULL)
786824
{
825+
if (hi_surrogate!=-1)
826+
ereport(ERROR,
827+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
828+
errmsg("invalid input syntax for type json"),
829+
errdetail("low order surrogate must follow a high order surrogate."),
830+
report_json_context(lex)));
831+
787832
appendStringInfoChar(lex->strval,*s);
788833
}
789834

790835
}
791836

837+
if (hi_surrogate!=-1)
838+
ereport(ERROR,
839+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
840+
errmsg("invalid input syntax for type json"),
841+
errdetail("low order surrogate must follow a high order surrogate."),
842+
report_json_context(lex)));
843+
792844
/* Hooray, we found the end of the string! */
793845
lex->prev_token_terminator=lex->token_terminator;
794846
lex->token_terminator=s+1;

‎src/test/regress/expected/json.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,3 +920,26 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
920920
ERROR: cannot call json_populate_recordset on a nested object
921921
select * from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
922922
ERROR: cannot call json_populate_recordset on a nested object
923+
-- handling of unicode surrogate pairs
924+
select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct;
925+
correct
926+
----------------------------
927+
"\ud83d\ude04\ud83d\udc36"
928+
(1 row)
929+
930+
select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
931+
ERROR: invalid input syntax for type json
932+
DETAIL: high order surrogate must not follow a high order surrogate.
933+
CONTEXT: JSON data, line 1: { "a":...
934+
select json '{ "a": "\ude04\ud83d" }' -> 'a'; -- surrogates in wrong order
935+
ERROR: invalid input syntax for type json
936+
DETAIL: low order surrogate must follow a high order surrogate.
937+
CONTEXT: JSON data, line 1: { "a":...
938+
select json '{ "a": "\ud83dX" }' -> 'a'; -- orphan high surrogate
939+
ERROR: invalid input syntax for type json
940+
DETAIL: low order surrogate must follow a high order surrogate.
941+
CONTEXT: JSON data, line 1: { "a":...
942+
select json '{ "a": "\ude04X" }' -> 'a'; -- orphan low surrogate
943+
ERROR: invalid input syntax for type json
944+
DETAIL: low order surrogate must follow a high order surrogate.
945+
CONTEXT: JSON data, line 1: { "a":...

‎src/test/regress/sql/json.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,11 @@ select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":
296296
select*from json_populate_recordset(row('def',99,null)::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q;
297297
select*from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
298298
select*from json_populate_recordset(row('def',99,null)::jpop,'[{"c":[100,200,300],"x":43.2},{"a":{"z":true},"b":3,"c":"2012-01-20 10:42:53"}]') q;
299+
300+
-- handling of unicode surrogate pairs
301+
302+
select json'{ "a": "\ud83d\ude04\ud83d\udc36" }'->'a'as correct;
303+
select json'{ "a": "\ud83d\ud83d" }'->'a';-- 2 high surrogates in a row
304+
select json'{ "a": "\ude04\ud83d" }'->'a';-- surrogates in wrong order
305+
select json'{ "a": "\ud83dX" }'->'a';-- orphan high surrogate
306+
select json'{ "a": "\ude04X" }'->'a';-- orphan low surrogate

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp