Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit799ac99

Browse files
committed
Sync psql's scanner with recent changes in backend scanner's flex rules.
Marko Kreen, Tom Lane
1 parent3686bcb commit799ac99

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

‎src/backend/parser/scan.l

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1994, Regents of the University of California
2525
*
2626
* IDENTIFICATION
27-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.161 2009/09/25 21:13:06 petere Exp $
27+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -571,18 +571,16 @@ other.
571571

572572
BEGIN(xe);
573573
}
574-
<xeu>.|
575-
<xeu>\n|
574+
<xeu>.{yyerror("invalid Unicode surrogate pair"); }
575+
<xeu>\n{yyerror("invalid Unicode surrogate pair"); }
576576
<xeu><<EOF>>{yyerror("invalid Unicode surrogate pair"); }
577-
578577
<xe,xeu>{xeunicodefail}{
579578
ereport(ERROR,
580579
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
581580
errmsg("invalid Unicode escape"),
582581
errhint("Unicode escapes must be\\uXXXX or\\UXXXXXXXX."),
583582
lexer_errposition()));
584-
}
585-
583+
}
586584
<xe>{xeescape} {
587585
if (yytext[1] =='\'')
588586
{

‎src/bin/psql/psqlscan.l

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* Portions Copyright (c) 1994, Regents of the University of California
3434
*
3535
* IDENTIFICATION
36-
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.28 2009/01/01 17:23:55 momjian Exp $
36+
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
3737
*
3838
*-------------------------------------------------------------------------
3939
*/
@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
117117
static YY_BUFFER_STATEprepare_buffer(constchar *txt,int len,
118118
char **txtcopy);
119119
staticvoidemit(constchar *txt,int len);
120+
staticboolis_utf16_surrogate_first(uint32 c);
120121

121122
#defineECHOemit(yytext, yyleng)
122123

@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
158159
* <xdolq> $foo$ quoted strings
159160
* <xui> quoted identifier with Unicode escapes
160161
* <xus> quoted string with Unicode escapes
162+
* <xeu> Unicode surrogate pair in extended quoted string
161163
*/
162164

163165
%x xb
@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
169171
%x xdolq
170172
%x xui
171173
%x xus
174+
%x xeu
172175
/* Additional exclusive states for psql only: lex backslash commands*/
173176
%x xslashcmd
174177
%x xslasharg
@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
192195
* did not end with a newline.
193196
*
194197
* XXX perhaps \f (formfeed) should be treated as a newline as well?
198+
*
199+
* XXX if you change the set of whitespace characters, fix scanner_isspace()
200+
* to agree, and see also the plpgsql lexer.
195201
*/
196202

197203
space[ \t\n\r\f]
@@ -253,6 +259,8 @@ xeinside[^\\']+
253259
xeescape[\\][^0-7]
254260
xeoctesc[\\][0-7]{1,3}
255261
xehexesc[\\]x[0-9A-Fa-f]{1,2}
262+
xeunicode[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
263+
xeunicodefail[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
256264
257265
/* Extended quote
258266
* xqdouble implements embedded quote,''''
@@ -334,6 +342,10 @@ identifier{ident_start}{ident_cont}*
334342

335343
typecast"::"
336344

345+
/* these two token types are used by PL/pgsql, though not in core SQL*/
346+
dot_dot\.\.
347+
colon_equals":="
348+
337349
/*
338350
* "self" is the set of chars that should be returned as single-character
339351
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
@@ -511,6 +523,22 @@ other.
511523
<xe>{xeinside} {
512524
ECHO;
513525
}
526+
<xe>{xeunicode} {
527+
uint32 c =strtoul(yytext+2,NULL,16);
528+
529+
if (is_utf16_surrogate_first(c))
530+
BEGIN(xeu);
531+
ECHO;
532+
}
533+
<xeu>{xeunicode} {
534+
BEGIN(xe);
535+
ECHO;
536+
}
537+
<xeu>.{ ECHO; }
538+
<xeu>\n{ ECHO; }
539+
<xe,xeu>{xeunicodefail}{
540+
ECHO;
541+
}
514542
<xe>{xeescape} {
515543
ECHO;
516544
}
@@ -605,6 +633,14 @@ other.
605633
ECHO;
606634
}
607635

636+
{dot_dot}{
637+
ECHO;
638+
}
639+
640+
{colon_equals}{
641+
ECHO;
642+
}
643+
608644
/*
609645
* These rules are specific to psql --- they implement parenthesis
610646
* counting and detection of command-ending semicolon. These must
@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
16901726
}
16911727
}
16921728
}
1729+
1730+
staticbool
1731+
is_utf16_surrogate_first(uint32 c)
1732+
{
1733+
return (c >=0xD800 && c <=0xDBFF);
1734+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp