Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd809fd0

Browse files
committed
Improve parser's one-extra-token lookahead mechanism.
There are a couple of places in our grammar that fail to be strict LALR(1),by requiring more than a single token of lookahead to decide what to do.Up to now we've dealt with that by using a filter between the lexer andparser that merges adjacent tokens into one in the places where two tokensof lookahead are necessary. But that creates a number of user-visibleanomalies, for instance that you can't name a CTE "ordinality" because"WITH ordinality AS ..." triggers folding of WITH and ORDINALITY into onetoken. I realized that there's a better way.In this patch, we still do the lookahead basically as before, but we nevermerge the second token into the first; we replace just the first token bya special lookahead symbol when one of the lookahead pairs is seen.This requires a couple extra productions in the grammar, but it involvesfewer special tokens, so that the grammar tables come out a bit smallerthan before. The filter logic is no slower than before, perhaps a bitfaster.I also fixed the filter logic so that when backing up after a lookahead,the current token's terminator is correctly restored; this eliminates someweird behavior in error message issuance, as is shown by the one change inexisting regression test outputs.I believe that this patch entirely eliminates odd behaviors caused bylookahead for WITH. It doesn't really improve the situation for NULLSfollowed by FIRST/LAST unfortunately: those sequences still act like areserved word, even though there are cases where they should be seen as twoordinary identifiers, eg "SELECT nulls first FROM ...". I experimentedwith additional grammar hacks but couldn't find any simple solution forthat. Still, this is better than before, and it seems much more likelythat we *could* somehow solve the NULLS case on the basis of this filterbehavior than the previous one.
1 parent23a7835 commitd809fd0

File tree

8 files changed

+173
-113
lines changed

8 files changed

+173
-113
lines changed

‎src/backend/parser/gram.y

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -633,9 +633,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
633633
/*
634634
* The grammar thinks these are keywords, but they are not in the kwlist.h
635635
* list and so can never be entered directly. The filter in parser.c
636-
* creates these tokens when required.
636+
* creates these tokens when required (based on looking one token ahead).
637637
*/
638-
%tokenNULLS_FIRSTNULLS_LASTWITH_ORDINALITYWITH_TIME
638+
%tokenNULLS_LAWITH_LA
639639

640640

641641
/* Precedence: lowest to highest*/
@@ -873,6 +873,7 @@ CreateRoleStmt:
873873

874874

875875
opt_with:WITH{}
876+
|WITH_LA{}
876877
|/*EMPTY*/{}
877878
;
878879

@@ -6673,8 +6674,8 @@ opt_asc_desc: ASC{ $$ = SORTBY_ASC; }
66736674
|/*EMPTY*/{$$ = SORTBY_DEFAULT; }
66746675
;
66756676

6676-
opt_nulls_order:NULLS_FIRST{$$ = SORTBY_NULLS_FIRST; }
6677-
|NULLS_LAST{$$ = SORTBY_NULLS_LAST; }
6677+
opt_nulls_order:NULLS_LAFIRST_P{$$ = SORTBY_NULLS_FIRST; }
6678+
|NULLS_LALAST_P{$$ = SORTBY_NULLS_LAST; }
66786679
|/*EMPTY*/{$$ = SORTBY_NULLS_DEFAULT; }
66796680
;
66806681

@@ -8923,7 +8924,7 @@ AlterTSDictionaryStmt:
89238924
;
89248925

89258926
AlterTSConfigurationStmt:
8926-
ALTERTEXT_PSEARCHCONFIGURATIONany_nameADD_PMAPPINGFORname_listWITHany_name_list
8927+
ALTERTEXT_PSEARCHCONFIGURATIONany_nameADD_PMAPPINGFORname_listany_withany_name_list
89278928
{
89288929
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
89298930
n->cfgname =$5;
@@ -8933,7 +8934,7 @@ AlterTSConfigurationStmt:
89338934
n->replace =false;
89348935
$$ = (Node*)n;
89358936
}
8936-
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGFORname_listWITHany_name_list
8937+
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGFORname_listany_withany_name_list
89378938
{
89388939
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
89398940
n->cfgname =$5;
@@ -8943,7 +8944,7 @@ AlterTSConfigurationStmt:
89438944
n->replace =false;
89448945
$$ = (Node*)n;
89458946
}
8946-
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGREPLACEany_nameWITHany_name
8947+
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGREPLACEany_nameany_withany_name
89478948
{
89488949
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
89498950
n->cfgname =$5;
@@ -8953,7 +8954,7 @@ AlterTSConfigurationStmt:
89538954
n->replace =true;
89548955
$$ = (Node*)n;
89558956
}
8956-
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGFORname_listREPLACEany_nameWITHany_name
8957+
|ALTERTEXT_PSEARCHCONFIGURATIONany_nameALTERMAPPINGFORname_listREPLACEany_nameany_withany_name
89578958
{
89588959
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
89598960
n->cfgname =$5;
@@ -8981,6 +8982,11 @@ AlterTSConfigurationStmt:
89818982
}
89828983
;
89838984

8985+
/* Use this if TIME or ORDINALITY after WITH should be taken as an identifier*/
8986+
any_with:WITH{}
8987+
|WITH_LA{}
8988+
;
8989+
89848990

89858991
/*****************************************************************************
89868992
*
@@ -9891,6 +9897,8 @@ simple_select:
98919897
*AS (query) [ SEARCH or CYCLE clause ]
98929898
*
98939899
* We don't currently support the SEARCH or CYCLE clause.
9900+
*
9901+
* Recognizing WITH_LA here allows a CTE to be named TIME or ORDINALITY.
98949902
*/
98959903
with_clause:
98969904
WITHcte_list
@@ -9900,6 +9908,13 @@ with_clause:
99009908
$$->recursive =false;
99019909
$$->location =@1;
99029910
}
9911+
|WITH_LActe_list
9912+
{
9913+
$$ = makeNode(WithClause);
9914+
$$->ctes =$2;
9915+
$$->recursive =false;
9916+
$$->location =@1;
9917+
}
99039918
|WITHRECURSIVEcte_list
99049919
{
99059920
$$ = makeNode(WithClause);
@@ -10601,7 +10616,7 @@ opt_col_def_list: AS '(' TableFuncElementList ')'{ $$ = $3; }
1060110616
|/*EMPTY*/{$$ = NIL; }
1060210617
;
1060310618

10604-
opt_ordinality:WITH_ORDINALITY{$$ =true; }
10619+
opt_ordinality:WITH_LAORDINALITY{$$ =true; }
1060510620
|/*EMPTY*/{$$ =false; }
1060610621
;
1060710622

@@ -11057,7 +11072,7 @@ ConstInterval:
1105711072
;
1105811073

1105911074
opt_timezone:
11060-
WITH_TIMEZONE{$$ =TRUE; }
11075+
WITH_LATIMEZONE{$$ =TRUE; }
1106111076
|WITHOUTTIMEZONE{$$ =FALSE; }
1106211077
|/*EMPTY*/{$$ =FALSE; }
1106311078
;

‎src/backend/parser/parser.c

Lines changed: 58 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,13 @@ raw_parser(const char *str)
6464
/*
6565
* Intermediate filter between parser and core lexer (core_yylex in scan.l).
6666
*
67-
*The filter is needed because in some cases the standard SQL grammar
67+
*This filter is needed because in some cases the standard SQL grammar
6868
* requires more than one token lookahead. We reduce these cases to one-token
69-
* lookahead bycombining tokens here, in order to keep the grammar LALR(1).
69+
* lookahead byreplacing tokens here, in order to keep the grammar LALR(1).
7070
*
7171
* Using a filter is simpler than trying to recognize multiword tokens
7272
* directly in scan.l, because we'd have to allow for comments between the
73-
* words. Furthermore it's not clear how to doit without re-introducing
73+
* words. Furthermore it's not clear how to dothat without re-introducing
7474
* scanner backtrack, which would cost more performance than this filter
7575
* layer does.
7676
*
@@ -84,7 +84,7 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
8484
base_yy_extra_type*yyextra=pg_yyget_extra(yyscanner);
8585
intcur_token;
8686
intnext_token;
87-
core_YYSTYPEcur_yylval;
87+
intcur_token_length;
8888
YYLTYPEcur_yylloc;
8989

9090
/* Get next token --- we might already have it */
@@ -93,74 +93,85 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
9393
cur_token=yyextra->lookahead_token;
9494
lvalp->core_yystype=yyextra->lookahead_yylval;
9595
*llocp=yyextra->lookahead_yylloc;
96+
*(yyextra->lookahead_end)=yyextra->lookahead_hold_char;
9697
yyextra->have_lookahead= false;
9798
}
9899
else
99100
cur_token=core_yylex(&(lvalp->core_yystype),llocp,yyscanner);
100101

101-
/* Do we need to look ahead for a possible multiword token? */
102+
/*
103+
* If this token isn't one that requires lookahead, just return it. If it
104+
* does, determine the token length. (We could get that via strlen(), but
105+
* since we have such a small set of possibilities, hardwiring seems
106+
* feasible and more efficient.)
107+
*/
102108
switch (cur_token)
103109
{
104110
caseNULLS_P:
111+
cur_token_length=5;
112+
break;
113+
caseWITH:
114+
cur_token_length=4;
115+
break;
116+
default:
117+
returncur_token;
118+
}
105119

106-
/*
107-
* NULLS FIRST and NULLS LAST must be reduced to one token
108-
*/
109-
cur_yylval=lvalp->core_yystype;
110-
cur_yylloc=*llocp;
111-
next_token=core_yylex(&(lvalp->core_yystype),llocp,yyscanner);
120+
/*
121+
* Identify end+1 of current token. core_yylex() has temporarily stored a
122+
* '\0' here, and will undo that when we call it again. We need to redo
123+
* it to fully revert the lookahead call for error reporting purposes.
124+
*/
125+
yyextra->lookahead_end=yyextra->core_yy_extra.scanbuf+
126+
*llocp+cur_token_length;
127+
Assert(*(yyextra->lookahead_end)=='\0');
128+
129+
/*
130+
* Save and restore *llocp around the call. It might look like we could
131+
* avoid this by just passing &lookahead_yylloc to core_yylex(), but that
132+
* does not work because flex actually holds onto the last-passed pointer
133+
* internally, and will use that for error reporting. We need any error
134+
* reports to point to the current token, not the next one.
135+
*/
136+
cur_yylloc=*llocp;
137+
138+
/* Get next token, saving outputs into lookahead variables */
139+
next_token=core_yylex(&(yyextra->lookahead_yylval),llocp,yyscanner);
140+
yyextra->lookahead_token=next_token;
141+
yyextra->lookahead_yylloc=*llocp;
142+
143+
*llocp=cur_yylloc;
144+
145+
/* Now revert the un-truncation of the current token */
146+
yyextra->lookahead_hold_char=*(yyextra->lookahead_end);
147+
*(yyextra->lookahead_end)='\0';
148+
149+
yyextra->have_lookahead= true;
150+
151+
/* Replace cur_token if needed, based on lookahead */
152+
switch (cur_token)
153+
{
154+
caseNULLS_P:
155+
/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
112156
switch (next_token)
113157
{
114158
caseFIRST_P:
115-
cur_token=NULLS_FIRST;
116-
break;
117159
caseLAST_P:
118-
cur_token=NULLS_LAST;
119-
break;
120-
default:
121-
/* save the lookahead token for next time */
122-
yyextra->lookahead_token=next_token;
123-
yyextra->lookahead_yylval=lvalp->core_yystype;
124-
yyextra->lookahead_yylloc=*llocp;
125-
yyextra->have_lookahead= true;
126-
/* and back up the output info to cur_token */
127-
lvalp->core_yystype=cur_yylval;
128-
*llocp=cur_yylloc;
160+
cur_token=NULLS_LA;
129161
break;
130162
}
131163
break;
132164

133165
caseWITH:
134-
135-
/*
136-
* WITH TIME and WITH ORDINALITY must each be reduced to one token
137-
*/
138-
cur_yylval=lvalp->core_yystype;
139-
cur_yylloc=*llocp;
140-
next_token=core_yylex(&(lvalp->core_yystype),llocp,yyscanner);
166+
/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
141167
switch (next_token)
142168
{
143169
caseTIME:
144-
cur_token=WITH_TIME;
145-
break;
146170
caseORDINALITY:
147-
cur_token=WITH_ORDINALITY;
148-
break;
149-
default:
150-
/* save the lookahead token for next time */
151-
yyextra->lookahead_token=next_token;
152-
yyextra->lookahead_yylval=lvalp->core_yystype;
153-
yyextra->lookahead_yylloc=*llocp;
154-
yyextra->have_lookahead= true;
155-
/* and back up the output info to cur_token */
156-
lvalp->core_yystype=cur_yylval;
157-
*llocp=cur_yylloc;
171+
cur_token=WITH_LA;
158172
break;
159173
}
160174
break;
161-
162-
default:
163-
break;
164175
}
165176

166177
returncur_token;

‎src/include/parser/gramparse.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef struct base_yy_extra_type
4646
intlookahead_token;/* one-token lookahead */
4747
core_YYSTYPElookahead_yylval;/* yylval for lookahead token */
4848
YYLTYPElookahead_yylloc;/* yylloc for lookahead token */
49+
char*lookahead_end;/* end of current token */
50+
charlookahead_hold_char;/* to be put back at *lookahead_end */
4951

5052
/*
5153
* State variables that belong to the grammar.

‎src/interfaces/ecpg/preproc/parse.pl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,8 @@
4242

4343
# or in the block
4444
my%replace_string = (
45-
'WITH_TIME'=>'with time',
46-
'WITH_ORDINALITY'=>'with ordinality',
47-
'NULLS_FIRST'=>'nulls first',
48-
'NULLS_LAST'=>'nulls last',
45+
'NULLS_LA'=>'nulls',
46+
'WITH_LA'=>'with',
4947
'TYPECAST'=>'::',
5048
'DOT_DOT'=>'..',
5149
'COLON_EQUALS'=>':=',);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp