1212 *
1313 *
1414 * IDENTIFICATION
15- * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.136 2005/06/16 01:43:48 momjian Exp $
15+ * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.137 2005/10/05 14:58:36 meskes Exp $
1616 *
1717 *-------------------------------------------------------------------------
1818*/
@@ -29,6 +29,8 @@ extern YYSTYPE yylval;
2929
3030static int xcdepth =0 ;/* depth of nesting in slash-star comments*/
3131static char *dolqstart;/* current $foo$ quote start string*/
32+ bool escape_string_warning;
33+ static bool warn_on_first_escape;
3234
3335/*
3436 * literalbuf is used to accumulate literal values when multiple rules
@@ -44,6 +46,7 @@ static intliteralalloc;/* current allocated buffer size */
4446static void addlit (char *ytext,int yleng);
4547static void addlitchar (unsigned char );
4648static void parse_include (void );
49+ static void check_escape_warning (void );
4750
4851char *token_start;
4952int state_before;
@@ -111,48 +114,44 @@ static struct _if_value
111114/* Bit string
112115 */
113116xbstart [bB ]{quote }
114- xbstop {quote }
115117xbinside [^ ' ]*
116- xbcat {quote }{whitespace_with_newline }{quote }
117118
118- /* Hexadecimal number
119- */
119+ /* Hexadecimal number */
120120xhstart [xX ]{quote }
121- xhstop {quote }
122121xhinside [^ ' ]*
123- xhcat {quote }{whitespace_with_newline }{quote }
124122
125- /* National character
126- */
123+ /* National character */
127124xnstart [nN ]{quote }
128125
129- /* C version of hex number
130- */
126+ /* Quoted string that allows backslash escapes */
127+ xestart [eE ]{quote }
128+
129+ /* C version of hex number */
131130xch 0[xX ][0 -9A -Fa -f ]*
132131
133132/* Extended quote
134- * xqdouble implements embedded quote
135- * xqcat allows strings to cross input lines
133+ * xqdouble implements embedded quote, ''''
136134 */
137- quote '
138135xqstart {quote }
139- xqstop {quote }
140136xqdouble {quote }{quote }
141137xqinside [^ \\ ' ]+
142138xqescape [\\ ][^ 0 -7 ]
143139xqoctesc [\\ ][0 -7 ]{1,3 }
144140xqhexesc [\\ ]x[0 -9A -Fa -f ]{1,2 }
145- xqcat {quote }{whitespace_with_newline }{quote }
146141
147142/* $foo$ style quotes ("dollar quoting")
148143 * The quoted string starts with $foo$ where "foo" is an optional string
149144 * in the form of an identifier, except that it may not contain "$",
150145 * and extends to the first occurrence of an identical string.
151146 * There is *no* processing of the quoted text.
147+ *
148+ * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
149+ * fails to match its trailing "$".
152150 */
153151dolq_start [A -Za -z \200 -\377 _ ]
154152dolq_cont [A -Za -z \200 -\377 _0 -9 ]
155153dolqdelim \$ ({dolq_start }{dolq_cont }* )? \$
154+ dolqfailed \$ {dolq_start }{dolq_cont }*
156155dolqinside [^ $ ]+
157156
158157/* Double quote
@@ -218,11 +217,16 @@ operator{op_chars}+
218217/* we no longer allow unary minus in numbers.
219218 * instead we pass it separately to parser. there it gets
220219 * coerced via doNegate() -- Leon aug 20 1999
220+ *
221+ * {realfail1} and {realfail2} are added to prevent the need for scanner
222+ * backup when the {real} rule fails to match completely.
221223 */
222224
223225integer {digit }+
224226decimal (({digit }* \. {digit }+ )| ({digit }+ \. {digit }* ))
225- real ((({digit }* \. {digit }+ )| ({digit }+ \. {digit }* )| ({digit }+ ))([Ee ][-+ ]? {digit }+ ))
227+ real ({integer }| {decimal })[Ee ][-+ ]? {digit }+
228+ realfail1 ({integer }| {decimal })[Ee ]
229+ realfail2 ({integer }| {decimal })[Ee ][-+ ]
226230
227231param \$ {integer }
228232
@@ -262,6 +266,11 @@ whitespace({space}+|{comment})
262266horiz_whitespace ({horiz_space }| {comment })
263267whitespace_with_newline ({horiz_whitespace }* {newline }{whitespace }* )
264268
269+ quote '
270+ quotestop {quote }{whitespace }*
271+ quotecontinue {quote }{whitespace_with_newline }{quote }
272+ quotefail {quote }{whitespace }* " -"
273+
265274/* special characters for other dbms */
266275/* we have to react differently in compat mode */
267276informix_special [\$ ]
@@ -343,6 +352,7 @@ cppline{space}*#(.*\\{space})*.*{newline}
343352
344353<xc >{xcinside }{ ECHO; }
345354<xc >{op_chars }{ ECHO; }
355+ <xc >\* + { ECHO; }
346356
347357<xc ><<EOF>> {mmerror (PARSE_ERROR, ET_FATAL," Unterminated /* comment" ); }
348358
@@ -352,7 +362,9 @@ cppline{space}*#(.*\\{space})*.*{newline}
352362startlit ();
353363addlitchar (' b' );
354364}
355- <xb >{xbstop }{
365+ <xb >{quotestop } |
366+ <xb >{quotefail }{
367+ yyless (1 );
356368BEGIN (SQL);
357369if (literalbuf[strspn (literalbuf," 01" ) +1 ] !=' \0 ' )
358370mmerror (PARSE_ERROR, ET_ERROR," invalid bit string input." );
@@ -362,53 +374,80 @@ cppline{space}*#(.*\\{space})*.*{newline}
362374
363375<xh >{xhinside }|
364376<xb >{xbinside }{addlit (yytext, yyleng); }
365- <xh >{xhcat } |
366- <xb >{xbcat } {/* ignore */ }
377+ <xh >{quotecontinue } |
378+ <xb >{quotecontinue } {/* ignore */ }
367379<xb ><<EOF>> {mmerror (PARSE_ERROR, ET_FATAL," Unterminated bit string" ); }
368380
369381<SQL >{xhstart }{
370382token_start = yytext;
371383BEGIN (xh);
372384startlit ();
373385addlitchar (' x' );
374- }
375- <xh >{xhstop }{
376- yylval.str =mm_strdup (literalbuf);
377- return XCONST;
378- }
386+ }
387+ <xh >{quotestop }|
388+ <xh >{quotefail } {
389+ yyless (1 );
390+ BEGIN (SQL);
391+ yylval.str =mm_strdup (literalbuf);
392+ return XCONST;
393+ }
379394
380395<xh ><<EOF>> {mmerror (PARSE_ERROR, ET_FATAL," Unterminated hexadecimal integer" ); }
381396<SQL >{xnstart } {
382397/* National character.
383- * Need to remember type info to flow it forward into the parser.
384- * Not yet implemented. - thomas 2002-06-17
398+ * Transfer it as-is to the backend.
385399 */
386400 token_start = yytext;
387401BEGIN (xq);
388402startlit ();
389403}
390404<C ,SQL >{xqstart }{
391- token_start = yytext;
392- state_before = YYSTATE;
393- BEGIN (xq);
394- startlit ();
395- }
396- <xq >{xqstop }{
397- BEGIN (state_before);
398- yylval.str =mm_strdup (literalbuf);
399- return SCONST;
400- }
405+ warn_on_first_escape =true ;
406+ token_start = yytext;
407+ state_before = YYSTATE;
408+ BEGIN (xq);
409+ startlit ();
410+ }
411+ <C ,SQL >{xestart }{
412+ warn_on_first_escape =false ;
413+ token_start = yytext;
414+ state_before = YYSTATE;
415+ BEGIN (xq);
416+ startlit ();
417+ }
418+ <xq >{quotestop } |
419+ <xq >{quotefail }{
420+ yyless (1 );
421+ BEGIN (state_before);
422+ yylval.str =mm_strdup (literalbuf);
423+ return SCONST;
424+ }
401425<xq >{xqdouble }{addlitchar (' \' ' ); }
402426<xq >{xqinside }{addlit (yytext, yyleng); }
403- <xq >{xqescape } {addlit (yytext, yyleng); }
404- <xq >{xqoctesc }{addlit (yytext, yyleng); }
405- <xq >{xqhexesc }{addlit (yytext, yyleng); }
406- <xq >{xqcat }{/* ignore */ }
427+ <xq >{xqescape } {
428+ check_escape_warning ();
429+ addlit (yytext, yyleng);
430+ }
431+ <xq >{xqoctesc }{
432+ check_escape_warning ();
433+ addlit (yytext, yyleng);
434+ }
435+ <xq >{xqhexesc }{
436+ check_escape_warning ();
437+ addlit (yytext, yyleng);
438+ }
439+ <xq >{quotecontinue }{/* ignore */ }
407440<xq >. {
408441/* This is only needed for \ just before EOF */
409442addlitchar (yytext[0 ]);
410443 }
411444<xq ><<EOF>> {mmerror (PARSE_ERROR, ET_FATAL," Unterminated quoted string" ); }
445+ <SQL >{dolqfailed }{
446+ /* throw back all but the initial "$" */
447+ yyless (1 );
448+ /* and treat it as {other} */
449+ return yytext[0 ];
450+ }
412451<SQL >{dolqdelim } {
413452token_start = yytext;
414453dolqstart =mm_strdup (yytext);
@@ -434,9 +473,8 @@ cppline{space}*#(.*\\{space})*.*{newline}
434473yyless (yyleng-1 );
435474}
436475}
437- <xdolq >{dolqinside } {
438- addlit (yytext, yyleng);
439- }
476+ <xdolq >{dolqinside } {addlit (yytext, yyleng); }
477+ <xdolq >{dolqfailed }{addlit (yytext, yyleng); }
440478<xdolq >. {
441479/* This is only needed for $ inside the quoted text */
442480addlitchar (yytext[0 ]);
@@ -588,11 +626,21 @@ cppline{space}*#(.*\\{space})*.*{newline}
588626{decimal }{
589627yylval.str =mm_strdup (yytext);
590628return FCONST;
591- }
629+ }
592630<C ,SQL >{real }{
593631yylval.str =mm_strdup (yytext);
594632return FCONST;
595- }
633+ }
634+ <SQL >{realfail1 }{
635+ yyless (yyleng-1 );
636+ yylval.str =mm_strdup (yytext);
637+ return FCONST;
638+ }
639+ <SQL >{realfail2 }{
640+ yyless (yyleng-2 );
641+ yylval.str =mm_strdup (yytext);
642+ return FCONST;
643+ }
596644<SQL >:{identifier }(((" ->" | \. ){identifier })| (\[ {array }\] ))* {
597645yylval.str =mm_strdup (yytext+1 );
598646return (CVARIABLE);
@@ -1189,3 +1237,11 @@ parse_include(void)
11891237
11901238 BEGIN C;
11911239}
1240+
1241+ static void
1242+ check_escape_warning (void )
1243+ {
1244+ if (warn_on_first_escape && escape_string_warning)
1245+ mmerror (PARSE_ERROR, ET_WARNING," nonstandard use of escape in a string literal" );
1246+ warn_on_first_escape =false ;/* warn only once per string */
1247+ }