99 *
1010 *
1111 * IDENTIFICATION
12- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.65 2000/02/21 18:47:02 tgl Exp $
12+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.66 2000/03/11 05:14:06 tgl Exp $
1313 *
1414 *-------------------------------------------------------------------------
1515*/
@@ -133,22 +133,24 @@ xdstop{dquote}
133133xdinside [^ " ]+
134134
135135/* C-style comments
136- * Ignored by the scanner and parser.
137136 *
138137 * The "extended comment" syntax closely resembles allowable operator syntax.
139138 * The tricky part here is to get lex to recognize a string starting with
140139 * slash-star as a comment, when interpreting it as an operator would produce
141- * a longer match --- remember lex will prefer a longer match! So, we have
142- * to provide a special rule for xcline (a complete comment that could
143- * otherwise look like an operator), as well as append {op_and_self}* to
144- * xcstart so that it matches at least as much as {operator} would.
145- * Then the tie-breaker (first matching rule of same length) wins.
146- * There is still a problem if someone writes, eg, slash-star-star-slash-plus.
147- * It'll be taken as an xcstart, rather than xcline and an operator as one
148- * could wish. I don't see any way around that given lex's behavior;
149- * that someone will just have to write a space after the comment.
140+ * a longer match --- remember lex will prefer a longer match! Also, if we
141+ * have something like plus-slash-star, lex will think this is a 3-character
142+ * operator whereas we want to see it as a + operator and a comment start.
143+ * The solution is two-fold:
144+ * 1. append {op_and_self}* to xcstart so that it matches as much text as
145+ * {operator} would. Then the tie-breaker (first matching rule of same
146+ * length) ensures xcstart wins. We put back the extra stuff with yyless()
147+ * in case it contains a star-slash that should terminate the comment.
148+ * 2. In the operator rule, check for slash-star within the operator, and
149+ * if found throw it back with yyless(). This handles the plus-slash-star
150+ * problem.
151+ * SQL92-style comments, which start with dash-dash, have similar interactions
152+ * with the operator rule.
150153 */
151- xcline \/\* {op_and_self }* \*\/
152154xcstart \/\* {op_and_self }*
153155xcstop \* + \/
154156xcinside ([^ * ]+ )| (\* + [^ / ])
@@ -161,6 +163,7 @@ identifier{letter}{letter_or_digit}*
161163
162164typecast " ::"
163165
166+ /* NB: if you change "self", fix the copy in the operator rule too! */
164167self [,() \[\] .;$ \:\+\-\*\/\%\^\<\>\=\| ]
165168op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\= ]
166169operator {op_and_self }+
@@ -218,27 +221,30 @@ other.
218221 *
219222 * Quoted strings must allow some special characters such as single-quote
220223 * and newline.
221- * Embedded single-quotes are implemented both in theSQL/92 -standard
224+ * Embedded single-quotes are implemented both in theSQL92 -standard
222225 * style of two adjacent single quotes "''" and in the Postgres/Java style
223226 * of escaped-quote "\'".
224227 * Other embedded escaped characters are matched explicitly and the leading
225228 * backslash is dropped from the string. - thomas 1997-09-24
226- * Note that xcline must appear before xcstart, which must appear before
227- * operator, as explained above! Also whitespace (comment) must appear
228- * before operator.
229+ * Note that xcstart must appear before operator, as explained above!
230+ * Also whitespace (comment) must appear before operator.
229231 */
230232
231233%%
232234{whitespace }{/* ignore */ }
233235
234- {xcline }{/* ignore */ }
235-
236- {xcstart }{BEGIN (xc); }
236+ {xcstart }{
237+ BEGIN (xc);
238+ /* Put back any characters past slash-star; see above */
239+ yyless (2 );
240+ }
237241
238242<xc >{xcstop }{BEGIN (INITIAL); }
239243
240244<xc >{xcinside }{/* ignore */ }
241245
246+ <xc ><<EOF>> {elog (ERROR," Unterminated /* comment" ); }
247+
242248{xbstart }{
243249BEGIN (xb);
244250startlit ();
@@ -262,6 +268,7 @@ other.
262268<xb >{xbcat }{
263269/* ignore */
264270}
271+ <xb ><<EOF>> {elog (ERROR," Unterminated binary integer" ); }
265272
266273{xhstart }{
267274BEGIN (xh);
@@ -278,6 +285,7 @@ other.
278285 literalbuf);
279286return ICONST;
280287}
288+ <xh ><<EOF>> {elog (ERROR," Unterminated hexadecimal integer" ); }
281289
282290{xqstart }{
283291BEGIN (xq);
@@ -296,6 +304,7 @@ other.
296304<xq >{xqcat }{
297305/* ignore */
298306}
307+ <xq ><<EOF>> {elog (ERROR," Unterminated quoted string" ); }
299308
300309
301310{xdstart }{
@@ -310,12 +319,39 @@ other.
310319<xd >{xdinside }{
311320addlit (yytext, yyleng);
312321}
322+ <xd ><<EOF>> {elog (ERROR," Unterminated quoted identifier" ); }
313323
314324{typecast }{return TYPECAST; }
315325
316326{self }{return yytext[0 ]; }
317327
318328{operator }{
329+ /* Check for embedded slash-star or dash-dash */
330+ char *slashstar =strstr ((char *)yytext," /*" );
331+ char *dashdash =strstr ((char *)yytext," --" );
332+
333+ if (slashstar && dashdash)
334+ {
335+ if (slashstar > dashdash)
336+ slashstar = dashdash;
337+ }
338+ else if (!slashstar)
339+ slashstar = dashdash;
340+
341+ if (slashstar)
342+ {
343+ int nchars = slashstar - ((char *)yytext);
344+ yyless (nchars);
345+ /* If what we have left is only one char, and it's
346+ * one of the characters matching "self", then
347+ * return it as a character token the same way
348+ * that the "self" rule would have.
349+ */
350+ if (nchars ==1 &&
351+ strchr (" ,()[].;$:+-*/%^<>=|" , yytext[0 ]))
352+ return yytext[0 ];
353+ }
354+
319355if (strcmp ((char *)yytext," !=" ) ==0 )
320356yylval.str =pstrdup (" <>" );/* compatibility */
321357else