NotificationsYou must be signed in to change notification settings
Fork5
Star27

Commit02faeb4

committed

Surrogate pair support for U& string and identifier syntax

This is mainly to make the functionality consistent with the proposed \uescape syntax.

1 parentc6bc0fe commit02faeb4Copy full SHA for 02faeb4

File tree

2 files changed

+81

-6

lines changed

doc/src/sgml
- syntax.sgml
src/backend/parser
- scan.l

2 files changed

+81

-6

lines changed

`‎doc/src/sgml/syntax.sgml‎`

Lines changed: 9 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.134 2009/08/27 20:08:02 tgl Exp $ -->`
	`1`	`+<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.135 2009/09/21 22:22:07 petere Exp $ -->`
`2`	`2`
`3`	`3`	`<chapter id="sql-syntax">`
`4`	`4`	`<title>SQL Syntax</title>`
`@@ -238,6 +238,10 @@ U&"d!0061t!+000061" UESCAPE '!'`
`238`	`238`	`The Unicode escape syntax works only when the server encoding is`
`239`	`239`	`UTF8. When other server encodings are used, only code points in`
`240`	`240`	`the ASCII range (up to <literal>\007F</literal>) can be specified.`
	`241`	`+ Both the 4-digit and the 6-digit form can be used to specify`
	`242`	`+ UTF-16 surrogate pairs to compose characters with code points`
	`243`	`+ larger than <literal>\FFFF</literal> (although the availability of`
	`244`	`+ the 6-digit form technically makes this unnecessary).`
`241`	`245`	`</para>`
`242`	`246`
`243`	`247`	`<para>`
`@@ -497,6 +501,10 @@ U&'d!0061t!+000061' UESCAPE '!'`
`497`	`501`	`UTF8. When other server encodings are used, only code points in`
`498`	`502`	`the ASCII range (up to <literal>\007F</literal>) can be`
`499`	`503`	`specified.`
	`504`	`+ Both the 4-digit and the 6-digit form can be used to specify`
	`505`	`+ UTF-16 surrogate pairs to compose characters with code points`
	`506`	`+ larger than <literal>\FFFF</literal> (although the availability`
	`507`	`+ of the 6-digit form technically makes this unnecessary).`
`500`	`508`	`</para>`
`501`	`509`
`502`	`510`	`<para>`

`‎src/backend/parser/scan.l‎`

Lines changed: 72 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`* Portions Copyright (c) 1994, Regents of the University of California`
`25`	`25`	`*`
`26`	`26`	`* IDENTIFICATION`
`27`		`- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.157 2009/07/14 20:24:10 tgl Exp $`
	`27`	`+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.158 2009/09/21 22:22:07 petere Exp $`
`28`	`28`	`*`
`29`	`29`	`*-------------------------------------------------------------------------`
`30`	`30`	`*/`
`@@ -1097,11 +1097,30 @@ check_unicode_value(pg_wchar c, char *loc, base_yyscan_t yyscanner)`
`1097`	`1097`	`}`
`1098`	`1098`	`}`
`1099`	`1099`
	`1100`	`+staticbool`
	`1101`	`+is_utf16_surrogate_first(pg_wchar c)`
	`1102`	`+{`
	`1103`	`+return (c >=0xD800 && c <=0xDBFF);`
	`1104`	`+}`
	`1105`	`+`
	`1106`	`+staticbool`
	`1107`	`+is_utf16_surrogate_second(pg_wchar c)`
	`1108`	`+{`
	`1109`	`+return (c >=0xDC00 && c <=0xDFFF);`
	`1110`	`+}`
	`1111`	`+`
	`1112`	`+static pg_wchar`
	`1113`	`+surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)`
	`1114`	`+{`
	`1115`	`+return ((first &0x3FF) <<10) +0x10000 + (second &0x3FF);`
	`1116`	`+}`
	`1117`	`+`
`1100`	`1118`	`staticchar *`
`1101`	`1119`	`litbuf_udeescape(unsignedchar escape,base_yyscan_t yyscanner)`
`1102`	`1120`	`{`
`1103`	`1121`	`char *new;`
`1104`	`1122`	`char litbuf, in, *out;`
	`1123`	`+pg_wchar pair_first =0;`
`1105`	`1124`
`1106`	`1125`	`if (isxdigit(escape)`
`1107`	`1126`	`\|\| escape =='+'`
`@@ -1131,16 +1150,39 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner)`
`1131`	`1150`	`{`
`1132`	`1151`	`if (in[1] == escape)`
`1133`	`1152`	`{`
	`1153`	`+if (pair_first)`
	`1154`	`+{`
	`1155`	`+ADVANCE_YYLLOC(in - litbuf +3);/* 3 for U&" */`
	`1156`	`+yyerror("invalid Unicode surrogate pair");`
	`1157`	`+}`
`1134`	`1158`	`*out++ = escape;`
`1135`	`1159`	`in +=2;`
`1136`	`1160`	`}`
`1137`	`1161`	`elseif (isxdigit(in[1]) &&isxdigit(in[2]) &&isxdigit(in[3]) &&isxdigit(in[4]))`
`1138`	`1162`	`{`
`1139`	`1163`	`pg_wchar unicode =hexval(in[1]) 161616 +hexval(in[2]) 1616 +hexval(in[3]) 16 +hexval(in[4]);`
`1140`	`1164`	`check_unicode_value(unicode, in, yyscanner);`
`1141`		`-unicode_to_utf8(unicode, (unsignedchar *) out);`
	`1165`	`+if (pair_first)`
	`1166`	`+{`
	`1167`	`+if (is_utf16_surrogate_second(unicode))`
	`1168`	`+{`
	`1169`	`+unicode =surrogate_pair_to_codepoint(pair_first, unicode);`
	`1170`	`+pair_first =0;`
	`1171`	`+}`
	`1172`	`+else`
	`1173`	`+{`
	`1174`	`+ADVANCE_YYLLOC(in - litbuf +3);/* 3 for U&" */`
	`1175`	`+yyerror("invalid Unicode surrogate pair");`
	`1176`	`+}`
	`1177`	`+}`
	`1178`	`+if (is_utf16_surrogate_first(unicode))`
	`1179`	`+pair_first = unicode;`
	`1180`	`+else`
	`1181`	`+{`
	`1182`	`+unicode_to_utf8(unicode, (unsignedchar *) out);`
	`1183`	`+out +=pg_mblen(out);`
	`1184`	`+}`
`1142`	`1185`	`in +=5;`
`1143`		`-out +=pg_mblen(out);`
`1144`	`1186`	`}`
`1145`	`1187`	`elseif (in[1] =='+'`
`1146`	`1188`	`&&isxdigit(in[2]) &&isxdigit(in[3])`
`@@ -1150,9 +1192,27 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner)`
`1150`	`1192`	`pg_wchar unicode =hexval(in[2]) 1616161616 +hexval(in[3]) 16161616 +hexval(in[4]) 161616`
`1151`	`1193`	`+hexval(in[5]) 1616 +hexval(in[6]) *16 +hexval(in[7]);`
`1152`	`1194`	`check_unicode_value(unicode, in, yyscanner);`
`1153`		`-unicode_to_utf8(unicode, (unsignedchar *) out);`
	`1195`	`+if (pair_first)`
	`1196`	`+{`
	`1197`	`+if (is_utf16_surrogate_second(unicode))`
	`1198`	`+{`
	`1199`	`+unicode =surrogate_pair_to_codepoint(pair_first, unicode);`
	`1200`	`+pair_first =0;`
	`1201`	`+}`
	`1202`	`+else`
	`1203`	`+{`
	`1204`	`+ADVANCE_YYLLOC(in - litbuf +3);/* 3 for U&" */`
	`1205`	`+yyerror("invalid Unicode surrogate pair");`
	`1206`	`+}`
	`1207`	`+}`
	`1208`	`+if (is_utf16_surrogate_first(unicode))`
	`1209`	`+pair_first = unicode;`
	`1210`	`+else`
	`1211`	`+{`
	`1212`	`+unicode_to_utf8(unicode, (unsignedchar *) out);`
	`1213`	`+out +=pg_mblen(out);`
	`1214`	`+}`
`1154`	`1215`	`in +=8;`
`1155`		`-out +=pg_mblen(out);`
`1156`	`1216`	`}`
`1157`	`1217`	`else`
`1158`	`1218`	`{`
`@@ -1161,7 +1221,14 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner)`
`1161`	`1221`	`}`
`1162`	`1222`	`}`
`1163`	`1223`	`else`
	`1224`	`+{`
	`1225`	`+if (pair_first)`
	`1226`	`+{`
	`1227`	`+ADVANCE_YYLLOC(in - litbuf +3);/* 3 for U&" */`
	`1228`	`+yyerror("invalid Unicode surrogate pair");`
	`1229`	`+}`
`1164`	`1230`	`out++ = in++;`
	`1231`	`+}`
`1165`	`1232`	`}`
`1166`	`1233`
`1167`	`1234`	`*out ='\0';`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit02faeb4

File tree

2 files changed

2 files changed

`‎doc/src/sgml/syntax.sgml‎`

`‎src/backend/parser/scan.l‎`

0 commit comments