@@ -97,6 +97,7 @@ static bool is_utf16_surrogate_first(pg_wchar c);
9797static bool is_utf16_surrogate_second (pg_wchar c);
9898static pg_wcharsurrogate_pair_to_codepoint (pg_wchar first, pg_wchar second);
9999static void addunicode (pg_wchar c,yyscan_t yyscanner);
100+ static bool check_uescapechar (unsigned char escape);
100101
101102#define yyerror (msg ) scanner_yyerror(msg, yyscanner)
102103
@@ -150,7 +151,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
150151 * <xe> extended quoted strings (support backslash escape sequences)
151152 * <xdolq> $foo$ quoted strings
152153 * <xui> quoted identifier with Unicode escapes
154+ * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
153155 * <xus> quoted string with Unicode escapes
156+ * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
154157 * <xeu> Unicode surrogate pair in extended quoted string
155158 */
156159
@@ -162,7 +165,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
162165%x xq
163166%x xdolq
164167%x xui
168+ %x xuiend
165169%x xus
170+ %x xusend
166171%x xeu
167172
168173/*
@@ -279,17 +284,17 @@ xdinside[^"]+
279284/* Unicode escapes */
280285uescape [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]{quote }
281286/* error rule to avoid backup */
282- uescapefail ( " - " | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ])
287+ uescapefail [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ]
283288
284289/* Quoted identifier with Unicode escapes */
285290xuistart [uU ]&{dquote }
286- xuistop1 {dquote }{whitespace }* {uescapefail }?
287- xuistop2 {dquote }{whitespace }* {uescape }
288291
289292/* Quoted string with Unicode escapes */
290293xusstart [uU ]&{quote }
291- xusstop1 {quote }{whitespace }* {uescapefail }?
292- xusstop2 {quote }{whitespace }* {uescape }
294+
295+ /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
296+ xustop1 {uescapefail }?
297+ xustop2 {uescape }
293298
294299/* error rule to avoid backup */
295300xufailed [uU ]&
@@ -536,15 +541,31 @@ other.
536541yylval->str =litbufdup (yyscanner);
537542return SCONST;
538543}
539- <xus >{xusstop1 } {
544+ <xus >{quotestop } |
545+ <xus >{quotefail } {
540546/* throw back all but the quote */
541547yyless (1 );
548+ /* handle possible UESCAPE in xusend mode */
549+ BEGIN (xusend);
550+ }
551+ <xusend >{whitespace }
552+ <xusend >{other } |
553+ <xusend >{xustop1 } {
554+ /* no UESCAPE after the quote, throw back everything */
555+ yyless (0 );
542556BEGIN (INITIAL);
543557yylval->str =litbuf_udeescape (' \\ ' , yyscanner);
544558return SCONST;
545559}
546- <xus >{xusstop2 } {
560+ <xusend >{xustop2 } {
561+ /* found UESCAPE after the end quote */
547562BEGIN (INITIAL);
563+ if (!check_uescapechar (yytext[yyleng-2 ]))
564+ {
565+ SET_YYLLOC ();
566+ ADVANCE_YYLLOC (yyleng-2 );
567+ yyerror (" invalid Unicode escape character" );
568+ }
548569yylval->str =litbuf_udeescape (yytext[yyleng-2 ], yyscanner);
549570return SCONST;
550571}
@@ -702,26 +723,41 @@ other.
702723yylval->str = ident;
703724return IDENT;
704725}
705- <xui >{xuistop1 }{
726+ <xui >{dquote } {
727+ yyless (1 );
728+ /* handle possible UESCAPE in xuiend mode */
729+ BEGIN (xuiend);
730+ }
731+ <xuiend >{whitespace } { }
732+ <xuiend >{other } |
733+ <xuiend >{xustop1 } {
734+ /* no UESCAPE after the quote, throw back everything */
706735char *ident;
707736
737+ yyless (0 );
738+
708739BEGIN (INITIAL);
709740if (yyextra->literallen ==0 )
710741yyerror (" zero-length delimited identifier" );
711742ident =litbuf_udeescape (' \\ ' , yyscanner);
712743if (yyextra->literallen >= NAMEDATALEN)
713744truncate_identifier (ident, yyextra->literallen ,true );
714745yylval->str = ident;
715- /* throw back all but the quote */
716- yyless (1 );
717746return IDENT;
718747}
719- <xui >{xuistop2 }{
748+ <xuiend >{xustop2 }{
749+ /* found UESCAPE after the end quote */
720750char *ident;
721751
722752BEGIN (INITIAL);
723753if (yyextra->literallen ==0 )
724754yyerror (" zero-length delimited identifier" );
755+ if (!check_uescapechar (yytext[yyleng-2 ]))
756+ {
757+ SET_YYLLOC ();
758+ ADVANCE_YYLLOC (yyleng-2 );
759+ yyerror (" invalid Unicode escape character" );
760+ }
725761ident =litbuf_udeescape (yytext[yyleng -2 ], yyscanner);
726762if (yyextra->literallen >= NAMEDATALEN)
727763truncate_identifier (ident, yyextra->literallen ,true );
@@ -1203,22 +1239,29 @@ addunicode(pg_wchar c, core_yyscan_t yyscanner)
12031239addlit (buf,pg_mblen (buf), yyscanner);
12041240}
12051241
1206- static char *
1207- litbuf_udeescape (unsigned char escape,core_yyscan_t yyscanner)
1242+ /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1243+ static bool
1244+ check_uescapechar (unsigned char escape)
12081245{
1209- char *new ;
1210- char *litbuf, *in, *out;
1211- pg_wchar pair_first =0 ;
1212-
12131246if (isxdigit (escape)
12141247|| escape ==' +'
12151248|| escape ==' \' '
12161249|| escape ==' "'
12171250||scanner_isspace (escape))
12181251{
1219- ADVANCE_YYLLOC (yyextra->literallen + yyleng +1 );
1220- yyerror (" invalid Unicode escape character" );
1252+ return false ;
12211253}
1254+ else
1255+ return true ;
1256+ }
1257+
1258+ /* like litbufdup, but handle unicode escapes */
1259+ static char *
1260+ litbuf_udeescape (unsigned char escape,core_yyscan_t yyscanner)
1261+ {
1262+ char *new ;
1263+ char *litbuf, *in, *out;
1264+ pg_wchar pair_first =0 ;
12221265
12231266/* Make literalbuf null-terminated to simplify the scanning loop */
12241267litbuf = yyextra->literalbuf ;