44 * procedural language
55 *
66 * IDENTIFICATION
7- * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.31 2004/02/24 22:06:32 tgl Exp $
7+ * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.32 2004/02/25 18:10:51 tgl Exp $
88 *
99 * This software is copyrighted by Jan Wieck - Hamburg.
1010 *
@@ -57,6 +57,8 @@ static intlookahead_token;
5757static bool have_lookahead_token;
5858static const char *cur_line_start;
5959static int cur_line_num;
60+ static char *dolqstart;/* current $foo$ quote start string*/
61+ static int dolqlen;/* signal to plpgsql_get_string_value*/
6062
6163int plpgsql_SpaceScanned =0 ;
6264%}
@@ -70,7 +72,9 @@ intplpgsql_SpaceScanned = 0;
7072%option case-insensitive
7173
7274
73- %x IN_STRING IN_COMMENT
75+ %x IN_STRING
76+ %x IN_COMMENT
77+ %x IN_DOLLARQUOTE
7478
7579digit [0 -9 ]
7680ident_start [A -Za -z \200 -\377 _ ]
@@ -84,6 +88,14 @@ param\${digit}+
8488
8589space [\t\n\r\f ]
8690
91+ /* $foo$ style quotes ("dollar quoting")
92+ * copied straight from the backend SQL parser
93+ */
94+ dolq_start [A -Za -z \200 -\377 _ ]
95+ dolq_cont [A -Za -z \200 -\377 _0 -9 ]
96+ dolqdelim \$ ({dolq_start }{dolq_cont }* )? \$
97+ dolqinside [^ $ ]+
98+
8799%%
88100/* ----------
89101 * Local variables in scanner to remember where
@@ -97,7 +109,7 @@ space[ \t\n\r\f]
97109 * Reset the state when entering the scanner
98110 * ----------
99111 */
100- BEGIN INITIAL;
112+ BEGIN( INITIAL) ;
101113 plpgsql_SpaceScanned =0 ;
102114
103115/* ----------
@@ -247,9 +259,9 @@ dump{ return O_DUMP;}
247259--[^ \r\n ]* ;
248260
249261\/\* { start_lineno =plpgsql_scanner_lineno ();
250- BEGIN IN_COMMENT;
262+ BEGIN ( IN_COMMENT) ;
251263}
252- <IN_COMMENT >\*\/ { BEGIN INITIAL; plpgsql_SpaceScanned =1 ; }
264+ <IN_COMMENT >\*\/ {BEGIN ( INITIAL) ; plpgsql_SpaceScanned =1 ; }
253265<IN_COMMENT >\n ;
254266<IN_COMMENT >. ;
255267<IN_COMMENT ><<EOF>> {
@@ -260,7 +272,7 @@ dump{ return O_DUMP;}
260272}
261273
262274/* ----------
263- * Collect anything inside of ''s and return one STRING
275+ * Collect anything inside of ''s and return one STRING token
264276 *
265277 * Hacking yytext/yyleng here lets us avoid using yymore(), which is
266278 * a win for performance. It's safe because we know the underlying
@@ -270,15 +282,18 @@ dump{ return O_DUMP;}
270282'{
271283 start_lineno =plpgsql_scanner_lineno ();
272284 start_charpos = yytext;
273- BEGIN IN_STRING;
285+ BEGIN ( IN_STRING) ;
274286}
275287<IN_STRING >\\ . { }
276288<IN_STRING >\\ {/* can only happen with \ at EOF */ }
277289<IN_STRING >''{ }
278290<IN_STRING >'{
279- yyleng -= (yytext - start_charpos);
291+ /* tell plpgsql_get_string_value it's not a dollar quote */
292+ dolqlen =0 ;
293+ /* adjust yytext/yyleng to describe whole string token */
294+ yyleng += (yytext - start_charpos);
280295 yytext = start_charpos;
281- BEGIN INITIAL;
296+ BEGIN ( INITIAL) ;
282297return T_STRING;
283298}
284299<IN_STRING >[^ ' \\ ]+ { }
@@ -289,6 +304,43 @@ dump{ return O_DUMP;}
289304errmsg (" unterminated string" )));
290305}
291306
307+ {dolqdelim }{
308+ start_lineno =plpgsql_scanner_lineno ();
309+ start_charpos = yytext;
310+ dolqstart =pstrdup (yytext);
311+ BEGIN (IN_DOLLARQUOTE);
312+ }
313+ <IN_DOLLARQUOTE >{dolqdelim } {
314+ if (strcmp (yytext, dolqstart) ==0 )
315+ {
316+ pfree (dolqstart);
317+ /* tell plpgsql_get_string_value it is a dollar quote */
318+ dolqlen = yyleng;
319+ /* adjust yytext/yyleng to describe whole string token */
320+ yyleng += (yytext - start_charpos);
321+ yytext = start_charpos;
322+ BEGIN (INITIAL);
323+ return T_STRING;
324+ }
325+ else
326+ {
327+ /*
328+ * When we fail to match $...$ to dolqstart, transfer
329+ * the $... part to the output, but put back the final
330+ * $ for rescanning. Consider $delim$...$junk$delim$
331+ */
332+ yyless (yyleng-1 );
333+ }
334+ }
335+ <IN_DOLLARQUOTE >{dolqinside } { }
336+ <IN_DOLLARQUOTE >. {/* needed for $ inside the quoted text */ }
337+ <IN_DOLLARQUOTE ><<EOF>> {
338+ plpgsql_error_lineno = start_lineno;
339+ ereport (ERROR,
340+ (errcode (ERRCODE_DATATYPE_MISMATCH),
341+ errmsg (" unterminated dollar-quoted string" )));
342+ }
343+
292344/* ----------
293345 * Any unmatched character is returned as is
294346 * ----------
@@ -429,7 +481,6 @@ plpgsql_scanner_init(const char *str, int functype)
429481BEGIN (INITIAL);
430482}
431483
432-
433484/*
434485 * Called after parsing is done to clean up after plpgsql_scanner_init()
435486 */
@@ -439,3 +490,54 @@ plpgsql_scanner_finish(void)
439490yy_delete_buffer (scanbufhandle);
440491pfree (scanbuf);
441492}
493+
494+ /*
495+ * Called after a T_STRING token is read to get the string literal's value
496+ * as a malloc'd string. (We make this a separate call because in many
497+ * scenarios there's no need to get the decoded value.)
498+ *
499+ * Note: we expect the literal to be the most recently lexed token. This
500+ * would not work well if we supported multiple-token pushback or if
501+ * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
502+ */
503+ char *
504+ plpgsql_get_string_value (void )
505+ {
506+ char *result;
507+ const char *cp;
508+ int len;
509+
510+ if (dolqlen >0 )
511+ {
512+ /* Token is a $foo$...$foo$ string */
513+ len = yyleng -2 * dolqlen;
514+ Assert (len >=0 );
515+ result = (char *)malloc (len +1 );
516+ memcpy (result, yytext + dolqlen, len);
517+ result[len] =' \0 ' ;
518+ }
519+ else
520+ {
521+ /* Token is a '...' string */
522+ result = (char *)malloc (yyleng +1 );/* more than enough room */
523+ len =0 ;
524+ for (cp = yytext; *cp; cp++)
525+ {
526+ if (*cp ==' \' ' )
527+ {
528+ if (cp[1 ] ==' \' ' )
529+ result[len++] = *cp++;
530+ /* else it must be string start or end quote */
531+ }
532+ else if (*cp ==' \\ ' )
533+ {
534+ if (cp[1 ] !=' \0 ' )/* just a paranoid check */
535+ result[len++] = *(++cp);
536+ }
537+ else
538+ result[len++] = *cp;
539+ }
540+ result[len] =' \0 ' ;
541+ }
542+ return result;
543+ }