9
9
*
10
10
*
11
11
* IDENTIFICATION
12
- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.92 2002/04/20 21:56:14 petere Exp $
12
+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.93 2002/05/01 17:12:07 tgl Exp $
13
13
*
14
14
*-------------------------------------------------------------------------
15
15
*/
@@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng);
56
56
static void addlitchar (unsigned char ychar);
57
57
static char *litbufdup (void );
58
58
59
+ /*
60
+ * When we parse a token that requires multiple lexer rules to process,
61
+ * we set token_start to point at the true start of the token, for use
62
+ * by yyerror(). yytext will point at just the text consumed by the last
63
+ * rule, so it's not very helpful (eg, it might contain just the last
64
+ * quote mark of a quoted identifier). But to avoid cluttering every rule
65
+ * with setting token_start, we allow token_start = NULL to denote that
66
+ * it's okay to use yytext.
67
+ */
68
+ static char *token_start;
69
+
59
70
/* Handles to the buffer that the lexer uses internally*/
60
71
static YY_BUFFER_STATE scanbufhandle;
61
72
static char *scanbuf;
@@ -208,7 +219,7 @@ non_newline[^\n\r]
208
219
209
220
comment (" --" {non_newline }* )
210
221
211
- whitespace ({space }| {comment })
222
+ whitespace ({space }+ | {comment })
212
223
213
224
/*
214
225
* SQL92 requires at least one newline in the whitespace separating
@@ -235,9 +246,16 @@ other.
235
246
*/
236
247
237
248
%%
249
+
250
+ %{
251
+ /* code to execute during start of each call of yylex()*/
252
+ token_start =NULL ;
253
+ %}
254
+
238
255
{whitespace }{/* ignore */ }
239
256
240
257
{xcstart }{
258
+ token_start = yytext;
241
259
xcdepth =0 ;
242
260
BEGIN (xc);
243
261
/* Put back any characters past slash-star; see above */
@@ -252,7 +270,11 @@ other.
252
270
253
271
<xc >{xcstop }{
254
272
if (xcdepth <=0 )
273
+ {
255
274
BEGIN (INITIAL);
275
+ /* reset token_start for next token */
276
+ token_start =NULL ;
277
+ }
256
278
else
257
279
xcdepth--;
258
280
}
@@ -261,18 +283,18 @@ other.
261
283
262
284
<xc >{op_chars }{/* ignore */ }
263
285
264
- <xc ><<EOF>> {elog (ERROR, " Unterminated /* comment" ); }
286
+ <xc ><<EOF>> {yyerror ( " unterminated /* comment" ); }
265
287
266
288
{xbitstart }{
289
+ token_start = yytext;
267
290
BEGIN (xbit);
268
291
startlit ();
269
292
addlitchar (' b' );
270
293
}
271
294
<xbit >{xbitstop }{
272
295
BEGIN (INITIAL);
273
296
if (literalbuf[strspn (literalbuf +1 ," 01" ) +1 ] !=' \0 ' )
274
- elog (ERROR," invalid bit string input: '%s'" ,
275
- literalbuf);
297
+ yyerror (" invalid bit string input" );
276
298
yylval.str =litbufdup ();
277
299
return BITCONST;
278
300
}
@@ -284,9 +306,10 @@ other.
284
306
<xbit >{xbitcat }{
285
307
/* ignore */
286
308
}
287
- <xbit ><<EOF>> {elog (ERROR, " unterminated bit string literal" ); }
309
+ <xbit ><<EOF>> {yyerror ( " unterminated bit string literal" ); }
288
310
289
311
{xhstart }{
312
+ token_start = yytext;
290
313
BEGIN (xh);
291
314
startlit ();
292
315
}
@@ -303,14 +326,14 @@ other.
303
326
|| val != (long ) ((int32) val)
304
327
#endif
305
328
)
306
- elog (ERROR," Bad hexadecimal integer input '%s'" ,
307
- literalbuf);
329
+ yyerror (" bad hexadecimal integer input" );
308
330
yylval.ival = val;
309
331
return ICONST;
310
332
}
311
- <xh ><<EOF>> {elog (ERROR, " Unterminated hexadecimal integer" ); }
333
+ <xh ><<EOF>> {yyerror ( " unterminated hexadecimal integer" ); }
312
334
313
335
{xqstart }{
336
+ token_start = yytext;
314
337
BEGIN (xq);
315
338
startlit ();
316
339
}
@@ -335,30 +358,31 @@ other.
335
358
<xq >{xqcat }{
336
359
/* ignore */
337
360
}
338
- <xq ><<EOF>> {elog (ERROR, " Unterminated quoted string" ); }
361
+ <xq ><<EOF>> {yyerror ( " unterminated quoted string" ); }
339
362
340
363
341
364
{xdstart }{
365
+ token_start = yytext;
342
366
BEGIN (xd);
343
367
startlit ();
344
368
}
345
369
<xd >{xdstop }{
346
370
BEGIN (INITIAL);
347
- if (strlen (literalbuf) ==0 )
348
- elog (ERROR, " zero-length delimited identifier" );
349
- if (strlen (literalbuf) >= NAMEDATALEN)
371
+ if (literallen ==0 )
372
+ yyerror ( " zero-length delimited identifier" );
373
+ if (literallen >= NAMEDATALEN)
350
374
{
351
- #ifdef MULTIBYTE
352
375
int len;
353
- len =pg_mbcliplen (literalbuf,strlen (literalbuf),NAMEDATALEN-1 );
354
- elog (WARNING," identifier\" %s\" will be truncated to\" %.*s\" " ,
355
- literalbuf, len, literalbuf);
356
- literalbuf[len] =' \0 ' ;
376
+ #ifdef MULTIBYTE
377
+ len =pg_mbcliplen (literalbuf, literallen,
378
+ NAMEDATALEN-1 );
357
379
#else
358
- elog (WARNING," identifier\" %s\" will be truncated to\" %.*s\" " ,
359
- literalbuf, NAMEDATALEN-1 , literalbuf);
360
- literalbuf[NAMEDATALEN-1 ] =' \0 ' ;
380
+ len = NAMEDATALEN-1 ;
361
381
#endif
382
+ elog (NOTICE," identifier\" %s\" will be truncated to\" %.*s\" " ,
383
+ literalbuf, len, literalbuf);
384
+ literalbuf[len] =' \0 ' ;
385
+ literallen = len;
362
386
}
363
387
yylval.str =litbufdup ();
364
388
return IDENT;
@@ -369,7 +393,7 @@ other.
369
393
<xd >{xdinside }{
370
394
addlit (yytext, yyleng);
371
395
}
372
- <xd ><<EOF>> {elog (ERROR, " Unterminated quoted identifier" ); }
396
+ <xd ><<EOF>> {yyerror ( " unterminated quoted identifier" ); }
373
397
374
398
{typecast }{return TYPECAST; }
375
399
@@ -383,8 +407,8 @@ other.
383
407
* character will match a prior rule, not this one.
384
408
*/
385
409
int nchars = yyleng;
386
- char *slashstar =strstr (( char *) yytext," /*" );
387
- char *dashdash =strstr (( char *) yytext," --" );
410
+ char *slashstar =strstr (yytext," /*" );
411
+ char *dashdash =strstr (yytext," --" );
388
412
389
413
if (slashstar && dashdash)
390
414
{
@@ -395,7 +419,7 @@ other.
395
419
else if (!slashstar)
396
420
slashstar = dashdash;
397
421
if (slashstar)
398
- nchars = slashstar -(( char *) yytext) ;
422
+ nchars = slashstar - yytext;
399
423
400
424
/*
401
425
* For SQL92 compatibility, '+' and '-' cannot be the
@@ -437,15 +461,15 @@ other.
437
461
}
438
462
439
463
/* Convert "!=" operator to "<>" for compatibility */
440
- if (strcmp (( char *) yytext," !=" ) ==0 )
464
+ if (strcmp (yytext," !=" ) ==0 )
441
465
yylval.str =pstrdup (" <>" );
442
466
else
443
- yylval.str =pstrdup (( char *) yytext);
467
+ yylval.str =pstrdup (yytext);
444
468
return Op;
445
469
}
446
470
447
471
{param }{
448
- yylval.ival =atol (( char *)& yytext[ 1 ] );
472
+ yylval.ival =atol (yytext + 1 );
449
473
return PARAM;
450
474
}
451
475
@@ -454,7 +478,7 @@ other.
454
478
char * endptr;
455
479
456
480
errno =0 ;
457
- val =strtol (( char *) yytext, &endptr,10 );
481
+ val =strtol (yytext, &endptr,10 );
458
482
if (*endptr !=' \0 ' || errno == ERANGE
459
483
#ifdef HAVE_LONG_INT_64
460
484
/* if long > 32 bits, check for overflow of int4 */
@@ -463,28 +487,29 @@ other.
463
487
)
464
488
{
465
489
/* integer too large, treat it as a float */
466
- yylval.str =pstrdup (( char *) yytext);
490
+ yylval.str =pstrdup (yytext);
467
491
return FCONST;
468
492
}
469
493
yylval.ival = val;
470
494
return ICONST;
471
495
}
472
496
{decimal }{
473
- yylval.str =pstrdup (( char *) yytext);
497
+ yylval.str =pstrdup (yytext);
474
498
return FCONST;
475
499
}
476
500
{real }{
477
- yylval.str =pstrdup (( char *) yytext);
501
+ yylval.str =pstrdup (yytext);
478
502
return FCONST;
479
503
}
480
504
481
505
482
506
{identifier }{
483
507
ScanKeyword *keyword;
508
+ char *ident;
484
509
int i;
485
510
486
511
/* Is it a keyword? */
487
- keyword =ScanKeywordLookup (( char *) yytext);
512
+ keyword =ScanKeywordLookup (yytext);
488
513
if (keyword !=NULL )
489
514
return keyword->value ;
490
515
@@ -496,26 +521,25 @@ other.
496
521
* which seems appropriate under SQL99 rules, whereas
497
522
* the keyword comparison was NOT locale-dependent.
498
523
*/
499
- for (i =0 ; yytext[i]; i++)
524
+ ident =pstrdup (yytext);
525
+ for (i =0 ; ident[i]; i++)
500
526
{
501
- if (isupper ((unsigned char )yytext [i]))
502
- yytext [i] =tolower ((unsigned char )yytext [i]);
527
+ if (isupper ((unsigned char )ident [i]))
528
+ ident [i] =tolower ((unsigned char )ident [i]);
503
529
}
504
530
if (i >= NAMEDATALEN)
505
531
{
506
- #ifdef MULTIBYTE
507
532
int len;
508
- len =pg_mbcliplen (yytext,i,NAMEDATALEN-1 );
509
- elog (WARNING," identifier\" %s\" will be truncated to\" %.*s\" " ,
510
- yytext, len, yytext);
511
- yytext[len] =' \0 ' ;
533
+ #ifdef MULTIBYTE
534
+ len =pg_mbcliplen (ident, i, NAMEDATALEN-1 );
512
535
#else
513
- elog (WARNING," identifier\" %s\" will be truncated to\" %.*s\" " ,
514
- yytext, NAMEDATALEN-1 , yytext);
515
- yytext[NAMEDATALEN-1 ] =' \0 ' ;
536
+ len = NAMEDATALEN-1 ;
516
537
#endif
538
+ elog (NOTICE," identifier\" %s\" will be truncated to\" %.*s\" " ,
539
+ ident, len, ident);
540
+ ident[len] =' \0 ' ;
517
541
}
518
- yylval.str =pstrdup (( char *) yytext) ;
542
+ yylval.str =ident ;
519
543
return IDENT;
520
544
}
521
545
@@ -526,7 +550,8 @@ other.
526
550
void
527
551
yyerror (const char *message)
528
552
{
529
- elog (ERROR," parser: %s at or near\" %s\" " , message, yytext);
553
+ elog (ERROR," parser: %s at or near\" %s\" " , message,
554
+ token_start ? token_start : yytext);
530
555
}
531
556
532
557