4
4
* procedural language
5
5
*
6
6
* IDENTIFICATION
7
- * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.31 2004/02/24 22:06:32 tgl Exp $
7
+ * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.32 2004/02/25 18:10:51 tgl Exp $
8
8
*
9
9
* This software is copyrighted by Jan Wieck - Hamburg.
10
10
*
@@ -57,6 +57,8 @@ static intlookahead_token;
57
57
static bool have_lookahead_token;
58
58
static const char *cur_line_start;
59
59
static int cur_line_num;
60
+ static char *dolqstart;/* current $foo$ quote start string*/
61
+ static int dolqlen;/* signal to plpgsql_get_string_value*/
60
62
61
63
int plpgsql_SpaceScanned =0 ;
62
64
%}
@@ -70,7 +72,9 @@ intplpgsql_SpaceScanned = 0;
70
72
%option case-insensitive
71
73
72
74
73
- %x IN_STRING IN_COMMENT
75
+ %x IN_STRING
76
+ %x IN_COMMENT
77
+ %x IN_DOLLARQUOTE
74
78
75
79
digit [0 -9 ]
76
80
ident_start [A -Za -z \200 -\377 _ ]
@@ -84,6 +88,14 @@ param\${digit}+
84
88
85
89
space [\t\n\r\f ]
86
90
91
+ /* $foo$ style quotes ("dollar quoting")
92
+ * copied straight from the backend SQL parser
93
+ */
94
+ dolq_start [A -Za -z \200 -\377 _ ]
95
+ dolq_cont [A -Za -z \200 -\377 _0 -9 ]
96
+ dolqdelim \$ ({dolq_start }{dolq_cont }* )? \$
97
+ dolqinside [^ $ ]+
98
+
87
99
%%
88
100
/* ----------
89
101
* Local variables in scanner to remember where
@@ -97,7 +109,7 @@ space[ \t\n\r\f]
97
109
* Reset the state when entering the scanner
98
110
* ----------
99
111
*/
100
- BEGIN INITIAL;
112
+ BEGIN( INITIAL) ;
101
113
plpgsql_SpaceScanned =0 ;
102
114
103
115
/* ----------
@@ -247,9 +259,9 @@ dump{ return O_DUMP;}
247
259
--[^ \r\n ]* ;
248
260
249
261
\/\* { start_lineno =plpgsql_scanner_lineno ();
250
- BEGIN IN_COMMENT;
262
+ BEGIN ( IN_COMMENT) ;
251
263
}
252
- <IN_COMMENT >\*\/ { BEGIN INITIAL; plpgsql_SpaceScanned =1 ; }
264
+ <IN_COMMENT >\*\/ {BEGIN ( INITIAL) ; plpgsql_SpaceScanned =1 ; }
253
265
<IN_COMMENT >\n ;
254
266
<IN_COMMENT >. ;
255
267
<IN_COMMENT ><<EOF>> {
@@ -260,7 +272,7 @@ dump{ return O_DUMP;}
260
272
}
261
273
262
274
/* ----------
263
- * Collect anything inside of ''s and return one STRING
275
+ * Collect anything inside of ''s and return one STRING token
264
276
*
265
277
* Hacking yytext/yyleng here lets us avoid using yymore(), which is
266
278
* a win for performance. It's safe because we know the underlying
@@ -270,15 +282,18 @@ dump{ return O_DUMP;}
270
282
'{
271
283
start_lineno =plpgsql_scanner_lineno ();
272
284
start_charpos = yytext;
273
- BEGIN IN_STRING;
285
+ BEGIN ( IN_STRING) ;
274
286
}
275
287
<IN_STRING >\\ . { }
276
288
<IN_STRING >\\ {/* can only happen with \ at EOF */ }
277
289
<IN_STRING >''{ }
278
290
<IN_STRING >'{
279
- yyleng -= (yytext - start_charpos);
291
+ /* tell plpgsql_get_string_value it's not a dollar quote */
292
+ dolqlen =0 ;
293
+ /* adjust yytext/yyleng to describe whole string token */
294
+ yyleng += (yytext - start_charpos);
280
295
yytext = start_charpos;
281
- BEGIN INITIAL;
296
+ BEGIN ( INITIAL) ;
282
297
return T_STRING;
283
298
}
284
299
<IN_STRING >[^ ' \\ ]+ { }
@@ -289,6 +304,43 @@ dump{ return O_DUMP;}
289
304
errmsg (" unterminated string" )));
290
305
}
291
306
307
+ {dolqdelim }{
308
+ start_lineno =plpgsql_scanner_lineno ();
309
+ start_charpos = yytext;
310
+ dolqstart =pstrdup (yytext);
311
+ BEGIN (IN_DOLLARQUOTE);
312
+ }
313
+ <IN_DOLLARQUOTE >{dolqdelim } {
314
+ if (strcmp (yytext, dolqstart) ==0 )
315
+ {
316
+ pfree (dolqstart);
317
+ /* tell plpgsql_get_string_value it is a dollar quote */
318
+ dolqlen = yyleng;
319
+ /* adjust yytext/yyleng to describe whole string token */
320
+ yyleng += (yytext - start_charpos);
321
+ yytext = start_charpos;
322
+ BEGIN (INITIAL);
323
+ return T_STRING;
324
+ }
325
+ else
326
+ {
327
+ /*
328
+ * When we fail to match $...$ to dolqstart, transfer
329
+ * the $... part to the output, but put back the final
330
+ * $ for rescanning. Consider $delim$...$junk$delim$
331
+ */
332
+ yyless (yyleng-1 );
333
+ }
334
+ }
335
+ <IN_DOLLARQUOTE >{dolqinside } { }
336
+ <IN_DOLLARQUOTE >. {/* needed for $ inside the quoted text */ }
337
+ <IN_DOLLARQUOTE ><<EOF>> {
338
+ plpgsql_error_lineno = start_lineno;
339
+ ereport (ERROR,
340
+ (errcode (ERRCODE_DATATYPE_MISMATCH),
341
+ errmsg (" unterminated dollar-quoted string" )));
342
+ }
343
+
292
344
/* ----------
293
345
* Any unmatched character is returned as is
294
346
* ----------
@@ -429,7 +481,6 @@ plpgsql_scanner_init(const char *str, int functype)
429
481
BEGIN (INITIAL);
430
482
}
431
483
432
-
433
484
/*
434
485
* Called after parsing is done to clean up after plpgsql_scanner_init()
435
486
*/
@@ -439,3 +490,54 @@ plpgsql_scanner_finish(void)
439
490
yy_delete_buffer (scanbufhandle);
440
491
pfree (scanbuf);
441
492
}
493
+
494
+ /*
495
+ * Called after a T_STRING token is read to get the string literal's value
496
+ * as a malloc'd string. (We make this a separate call because in many
497
+ * scenarios there's no need to get the decoded value.)
498
+ *
499
+ * Note: we expect the literal to be the most recently lexed token. This
500
+ * would not work well if we supported multiple-token pushback or if
501
+ * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
502
+ */
503
+ char *
504
+ plpgsql_get_string_value (void )
505
+ {
506
+ char *result;
507
+ const char *cp;
508
+ int len;
509
+
510
+ if (dolqlen >0 )
511
+ {
512
+ /* Token is a $foo$...$foo$ string */
513
+ len = yyleng -2 * dolqlen;
514
+ Assert (len >=0 );
515
+ result = (char *)malloc (len +1 );
516
+ memcpy (result, yytext + dolqlen, len);
517
+ result[len] =' \0 ' ;
518
+ }
519
+ else
520
+ {
521
+ /* Token is a '...' string */
522
+ result = (char *)malloc (yyleng +1 );/* more than enough room */
523
+ len =0 ;
524
+ for (cp = yytext; *cp; cp++)
525
+ {
526
+ if (*cp ==' \' ' )
527
+ {
528
+ if (cp[1 ] ==' \' ' )
529
+ result[len++] = *cp++;
530
+ /* else it must be string start or end quote */
531
+ }
532
+ else if (*cp ==' \\ ' )
533
+ {
534
+ if (cp[1 ] !=' \0 ' )/* just a paranoid check */
535
+ result[len++] = *(++cp);
536
+ }
537
+ else
538
+ result[len++] = *cp;
539
+ }
540
+ result[len] =' \0 ' ;
541
+ }
542
+ return result;
543
+ }