Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbe92ad4

Browse files
committed
Change case-folding of keywords to conform to SQL99 and fix misbehavior
in Turkish locale. Keywords are now checked under pure ASCII case-foldingrules ('A'-'Z'->'a'-'z' and nothing else). However, once a word isdetermined not to be a keyword, it will be case-folded under the currentlocale, same as before. See pghackers discussion 20-Feb-01.
1 parent496373e commitbe92ad4

File tree

6 files changed

+230
-107
lines changed

6 files changed

+230
-107
lines changed

‎src/backend/parser/keywords.c

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,22 @@
11
/*-------------------------------------------------------------------------
22
*
33
* keywords.c
4-
* lexical token lookup for reserved words inpostgres SQL
4+
* lexical token lookup for reserved words inPostgreSQL
55
*
66
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
15-
#include<ctype.h>
16-
1715
#include"postgres.h"
1816

17+
#include<ctype.h>
18+
1919
#include"nodes/parsenodes.h"
20-
#include"nodes/pg_list.h"
2120
#include"parser/keywords.h"
2221
#include"parser/parse.h"
2322

@@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = {
286285
{"zone",ZONE},
287286
};
288287

288+
/*
289+
* ScanKeywordLookup - see if a given word is a keyword
290+
*
291+
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
292+
*
293+
* The match is done case-insensitively. Note that we deliberately use a
294+
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
295+
* even if we are in a locale where tolower() would produce more or different
296+
* translations. This is to conform to the SQL99 spec, which says that
297+
* keywords are to be matched in this way even though non-keyword identifiers
298+
* receive a different case-normalization mapping.
299+
*/
289300
ScanKeyword*
290301
ScanKeywordLookup(char*text)
291302
{
292-
ScanKeyword*low=&ScanKeywords[0];
293-
ScanKeyword*high=endof(ScanKeywords)-1;
294-
ScanKeyword*middle;
295-
intdifference;
303+
intlen,
304+
i;
305+
charword[NAMEDATALEN];
306+
ScanKeyword*low;
307+
ScanKeyword*high;
308+
309+
len=strlen(text);
310+
/* We assume all keywords are shorter than NAMEDATALEN. */
311+
if (len >=NAMEDATALEN)
312+
returnNULL;
313+
314+
/*
315+
* Apply an ASCII-only downcasing. We must not use tolower() since
316+
* it may produce the wrong translation in some locales (eg, Turkish),
317+
* and we don't trust isupper() very much either. In an ASCII-based
318+
* encoding the tests against A and Z are sufficient, but we also check
319+
* isupper() so that we will work correctly under EBCDIC. The actual
320+
* case conversion step should work for either ASCII or EBCDIC.
321+
*/
322+
for (i=0;i<len;i++)
323+
{
324+
charch=text[i];
296325

326+
if (ch >='A'&&ch <='Z'&&isupper((unsignedchar)ch))
327+
ch+='a'-'A';
328+
word[i]=ch;
329+
}
330+
word[len]='\0';
331+
332+
/*
333+
* Now do a binary search using plain strcmp() comparison.
334+
*/
335+
low=&ScanKeywords[0];
336+
high=endof(ScanKeywords)-1;
297337
while (low <=high)
298338
{
339+
ScanKeyword*middle;
340+
intdifference;
341+
299342
middle=low+ (high-low) /2;
300-
difference=strcmp(middle->name,text);
343+
difference=strcmp(middle->name,word);
301344
if (difference==0)
302345
returnmiddle;
303346
elseif (difference<0)

‎src/backend/parser/scan.l

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
/*-------------------------------------------------------------------------
33
*
44
* scan.l
5-
* lexical scanner forPOSTGRES
5+
* lexical scanner forPostgreSQL
66
*
77
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.86 2001/02/03 20:13:05 petere Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.87 2001/02/21 18:53:47 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -477,12 +477,27 @@ other.
477477

478478

479479
{identifier}{
480-
int i;
481-
ScanKeyword*keyword;
480+
ScanKeyword *keyword;
481+
inti;
482482

483-
for(i =0; yytext[i]; i++)
483+
/* Is it a keyword? */
484+
keyword =ScanKeywordLookup((char*) yytext);
485+
if (keyword !=NULL)
486+
return keyword->value;
487+
488+
/*
489+
* No. Convert the identifier to lower case, and truncate
490+
* if necessary.
491+
*
492+
* Note: here we use a locale-dependent case conversion,
493+
* which seems appropriate under SQL99 rules, whereas
494+
* the keyword comparison was NOT locale-dependent.
495+
*/
496+
for (i =0; yytext[i]; i++)
497+
{
484498
if (isupper((unsignedchar) yytext[i]))
485499
yytext[i] =tolower((unsignedchar) yytext[i]);
500+
}
486501
if (i >= NAMEDATALEN)
487502
{
488503
#ifdef MULTIBYTE
@@ -497,15 +512,8 @@ other.
497512
yytext[NAMEDATALEN-1] ='\0';
498513
#endif
499514
}
500-
keyword =ScanKeywordLookup((char*)yytext);
501-
if (keyword !=NULL) {
502-
return keyword->value;
503-
}
504-
else
505-
{
506-
yylval.str =pstrdup((char*)yytext);
507-
return IDENT;
508-
}
515+
yylval.str =pstrdup((char*) yytext);
516+
return IDENT;
509517
}
510518

511519
{other}{return yytext[0]; }

‎src/backend/utils/adt/ruleutils.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*back to source text
44
*
55
* IDENTIFICATION
6-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.72 2001/02/14 21:35:05 tgl Exp $
6+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.73 2001/02/21 18:53:47 tgl Exp $
77
*
88
* This software is copyrighted by Jan Wieck - Hamburg.
99
*
@@ -2563,8 +2563,8 @@ quote_identifier(char *ident)
25632563
* but the parser doesn't provide any easy way to test for whether
25642564
* an identifier is safe or not... so be safe not sorry.
25652565
*
2566-
* Note: ScanKeywordLookup()expects an all-lower-case input, but
2567-
*we'vealreadychecked we havethat.
2566+
* Note: ScanKeywordLookup()doescase-insensitive comparison,
2567+
*but that's fine, since wealreadyknow we haveall-lower-case.
25682568
*/
25692569
if (ScanKeywordLookup(ident)!=NULL)
25702570
safe= false;

‎src/interfaces/ecpg/preproc/ecpg_keywords.c

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
/*-------------------------------------------------------------------------
22
*
3-
*keywords.c
3+
*ecpg_keywords.c
44
* lexical token lookup for reserved words in postgres embedded SQL
55
*
6+
* IDENTIFICATION
7+
* $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.22 2001/02/21 18:53:47 tgl Exp $
8+
*
69
*-------------------------------------------------------------------------
710
*/
811
#include"postgres_fe.h"
@@ -12,6 +15,7 @@
1215
#include"extern.h"
1316
#include"preproc.h"
1417

18+
1519
/*
1620
* List of (keyword-name, keyword-token-value) pairs.
1721
*
@@ -73,18 +77,62 @@ static ScanKeyword ScanKeywords[] = {
7377
{"whenever",SQL_WHENEVER},
7478
};
7579

80+
/*
81+
* ScanECPGKeywordLookup - see if a given word is a keyword
82+
*
83+
* Returns a pointer to the ScanKeyword table entry, or NULL if no match.
84+
*
85+
* The match is done case-insensitively. Note that we deliberately use a
86+
* dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
87+
* even if we are in a locale where tolower() would produce more or different
88+
* translations. This is to conform to the SQL99 spec, which says that
89+
* keywords are to be matched in this way even though non-keyword identifiers
90+
* receive a different case-normalization mapping.
91+
*/
7692
ScanKeyword*
7793
ScanECPGKeywordLookup(char*text)
7894
{
79-
ScanKeyword*low=&ScanKeywords[0];
80-
ScanKeyword*high=endof(ScanKeywords)-1;
81-
ScanKeyword*middle;
82-
intdifference;
95+
intlen,
96+
i;
97+
charword[NAMEDATALEN];
98+
ScanKeyword*low;
99+
ScanKeyword*high;
83100

101+
len=strlen(text);
102+
/* We assume all keywords are shorter than NAMEDATALEN. */
103+
if (len >=NAMEDATALEN)
104+
returnNULL;
105+
106+
/*
107+
* Apply an ASCII-only downcasing. We must not use tolower() since
108+
* it may produce the wrong translation in some locales (eg, Turkish),
109+
* and we don't trust isupper() very much either. In an ASCII-based
110+
* encoding the tests against A and Z are sufficient, but we also check
111+
* isupper() so that we will work correctly under EBCDIC. The actual
112+
* case conversion step should work for either ASCII or EBCDIC.
113+
*/
114+
for (i=0;i<len;i++)
115+
{
116+
charch=text[i];
117+
118+
if (ch >='A'&&ch <='Z'&&isupper((unsignedchar)ch))
119+
ch+='a'-'A';
120+
word[i]=ch;
121+
}
122+
word[len]='\0';
123+
124+
/*
125+
* Now do a binary search using plain strcmp() comparison.
126+
*/
127+
low=&ScanKeywords[0];
128+
high=endof(ScanKeywords)-1;
84129
while (low <=high)
85130
{
131+
ScanKeyword*middle;
132+
intdifference;
133+
86134
middle=low+ (high-low) /2;
87-
difference=strcmp(middle->name,text);
135+
difference=strcmp(middle->name,word);
88136
if (difference==0)
89137
returnmiddle;
90138
elseif (difference<0)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp