2424 * Portions Copyright (c) 1994, Regents of the University of California
2525 *
2626 * IDENTIFICATION
27- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.150 2009/04/14 22:18:47 tgl Exp $
27+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.151 2009/04/19 21:08:54 tgl Exp $
2828 *
2929 *-------------------------------------------------------------------------
3030*/
@@ -60,7 +60,7 @@ boolescape_string_warning = true;
6060bool standard_conforming_strings =false ;
6161
6262static bool warn_on_first_escape;
63- static bool saw_high_bit =false ;
63+ static bool saw_non_ascii =false ;
6464
6565/*
6666 * literalbuf is used to accumulate literal values when multiple rules
@@ -453,7 +453,7 @@ other.
453453
454454{xqstart }{
455455warn_on_first_escape =true ;
456- saw_high_bit =false ;
456+ saw_non_ascii =false ;
457457SET_YYLLOC ();
458458if (standard_conforming_strings)
459459BEGIN (xq);
@@ -463,7 +463,7 @@ other.
463463}
464464{xestart }{
465465warn_on_first_escape =false ;
466- saw_high_bit =false ;
466+ saw_non_ascii =false ;
467467SET_YYLLOC ();
468468BEGIN (xe);
469469startlit ();
@@ -477,10 +477,11 @@ other.
477477<xq ,xe >{quotefail } {
478478yyless (1 );
479479BEGIN (INITIAL);
480- /* check that the data remains valid if it might have been
480+ /*
481+ * check that the data remains valid if it might have been
481482 * made invalid by unescaping any chars.
482483 */
483- if (saw_high_bit )
484+ if (saw_non_ascii )
484485pg_verifymbstr (literalbuf, literallen,false );
485486yylval.str =litbufdup ();
486487return SCONST;
@@ -526,16 +527,16 @@ other.
526527
527528check_escape_warning ();
528529addlitchar (c);
529- if (IS_HIGHBIT_SET (c))
530- saw_high_bit =true ;
530+ if (c == ' \0 ' || IS_HIGHBIT_SET (c))
531+ saw_non_ascii =true ;
531532}
532533<xe >{xehexesc } {
533534unsigned char c =strtoul (yytext+2 ,NULL ,16 );
534535
535536check_escape_warning ();
536537addlitchar (c);
537- if (IS_HIGHBIT_SET (c))
538- saw_high_bit =true ;
538+ if (c == ' \0 ' || IS_HIGHBIT_SET (c))
539+ saw_non_ascii =true ;
539540}
540541<xq ,xe ,xus >{quotecontinue } {
541542/* ignore */
@@ -1083,21 +1084,18 @@ litbuf_udeescape(unsigned char escape)
10831084}
10841085
10851086*out =' \0 ' ;
1087+ /*
1088+ * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1089+ * codes; but it's probably not worth the trouble, since this isn't
1090+ * likely to be a performance-critical path.
1091+ */
10861092pg_verifymbstr (new , out -new ,false );
10871093return new ;
10881094}
10891095
10901096static unsigned char
10911097unescape_single_char (unsigned char c)
10921098{
1093- /* Normally we wouldn't expect to see \n where n has its high bit set
1094- * but we set the flag to check the string if we do get it, so
1095- * that this doesn't become a way of getting around the coding validity
1096- * checks.
1097- */
1098- if (IS_HIGHBIT_SET (c))
1099- saw_high_bit =true ;
1100-
11011099switch (c)
11021100{
11031101case ' b' :
@@ -1111,6 +1109,10 @@ unescape_single_char(unsigned char c)
11111109case ' t' :
11121110return ' \t ' ;
11131111default :
1112+ /* check for backslash followed by non-7-bit-ASCII */
1113+ if (c ==' \0 ' ||IS_HIGHBIT_SET (c))
1114+ saw_non_ascii =true ;
1115+
11141116return c;
11151117}
11161118}