NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commitbe92ad4

committed

Change case-folding of keywords to conform to SQL99 and fix misbehavior

in Turkish locale. Keywords are now checked under pure ASCII case-foldingrules ('A'-'Z'->'a'-'z' and nothing else). However, once a word isdetermined not to be a keyword, it will be case-folded under the currentlocale, same as before. See pghackers discussion 20-Feb-01.

1 parent496373e commitbe92ad4Copy full SHA for be92ad4

File tree

6 files changed

+230

-107

lines changed

src
- backend
  - parser
    - keywords.c
    - scan.l
  - utils/adt
    - ruleutils.c
- interfaces/ecpg/preproc

6 files changed

+230

-107

lines changed

`‎src/backend/parser/keywords.c`

Lines changed: 53 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,23 +1,22 @@`
`1`	`1`	`/*-------------------------------------------------------------------------`
`2`	`2`	`*`
`3`	`3`	`* keywords.c`
`4`		`- * lexical token lookup for reserved words inpostgres SQL`
	`4`	`+ * lexical token lookup for reserved words inPostgreSQL`
`5`	`5`	`*`
`6`	`6`	`* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group`
`7`	`7`	`* Portions Copyright (c) 1994, Regents of the University of California`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $`
	`11`	`+ * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`15`		`-#include<ctype.h>`
`16`		`-`
`17`	`15`	`#include"postgres.h"`
`18`	`16`
	`17`	`+#include<ctype.h>`
	`18`	`+`
`19`	`19`	`#include"nodes/parsenodes.h"`
`20`		`-#include"nodes/pg_list.h"`
`21`	`20`	`#include"parser/keywords.h"`
`22`	`21`	`#include"parser/parse.h"`
`23`	`22`
`@@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = {`
`286`	`285`	`{"zone",ZONE},`
`287`	`286`	`};`
`288`	`287`
	`288`	`+/*`
	`289`	`+ * ScanKeywordLookup - see if a given word is a keyword`
	`290`	`+ *`
	`291`	`+ * Returns a pointer to the ScanKeyword table entry, or NULL if no match.`
	`292`	`+ *`
	`293`	`+ * The match is done case-insensitively. Note that we deliberately use a`
	`294`	`+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',`
	`295`	`+ * even if we are in a locale where tolower() would produce more or different`
	`296`	`+ * translations. This is to conform to the SQL99 spec, which says that`
	`297`	`+ * keywords are to be matched in this way even though non-keyword identifiers`
	`298`	`+ * receive a different case-normalization mapping.`
	`299`	`+ */`
`289`	`300`	`ScanKeyword*`
`290`	`301`	`ScanKeywordLookup(char*text)`
`291`	`302`	`{`
`292`		`-ScanKeyword*low=&ScanKeywords[0];`
`293`		`-ScanKeyword*high=endof(ScanKeywords)-1;`
`294`		`-ScanKeyword*middle;`
`295`		`-intdifference;`
	`303`	`+intlen,`
	`304`	`+i;`
	`305`	`+charword[NAMEDATALEN];`
	`306`	`+ScanKeyword*low;`
	`307`	`+ScanKeyword*high;`
	`308`	`+`
	`309`	`+len=strlen(text);`
	`310`	`+/* We assume all keywords are shorter than NAMEDATALEN. */`
	`311`	`+if (len >=NAMEDATALEN)`
	`312`	`+returnNULL;`
	`313`	`+`
	`314`	`+/*`
	`315`	`+ * Apply an ASCII-only downcasing. We must not use tolower() since`
	`316`	`+ * it may produce the wrong translation in some locales (eg, Turkish),`
	`317`	`+ * and we don't trust isupper() very much either. In an ASCII-based`
	`318`	`+ * encoding the tests against A and Z are sufficient, but we also check`
	`319`	`+ * isupper() so that we will work correctly under EBCDIC. The actual`
	`320`	`+ * case conversion step should work for either ASCII or EBCDIC.`
	`321`	`+ */`
	`322`	`+for (i=0;i<len;i++)`
	`323`	`+{`
	`324`	`+charch=text[i];`
`296`	`325`
	`326`	`+if (ch >='A'&&ch <='Z'&&isupper((unsignedchar)ch))`
	`327`	`+ch+='a'-'A';`
	`328`	`+word[i]=ch;`
	`329`	`+}`
	`330`	`+word[len]='\0';`
	`331`	`+`
	`332`	`+/*`
	`333`	`+ * Now do a binary search using plain strcmp() comparison.`
	`334`	`+ */`
	`335`	`+low=&ScanKeywords[0];`
	`336`	`+high=endof(ScanKeywords)-1;`
`297`	`337`	`while (low <=high)`
`298`	`338`	`{`
	`339`	`+ScanKeyword*middle;`
	`340`	`+intdifference;`
	`341`	`+`
`299`	`342`	`middle=low+ (high-low) /2;`
`300`		`-difference=strcmp(middle->name,text);`
	`343`	`+difference=strcmp(middle->name,word);`
`301`	`344`	`if (difference==0)`
`302`	`345`	`returnmiddle;`
`303`	`346`	`elseif (difference<0)`

`‎src/backend/parser/scan.l`

Lines changed: 22 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,14 +2,14 @@`
`2`	`2`	`/*-------------------------------------------------------------------------`
`3`	`3`	`*`
`4`	`4`	`* scan.l`
`5`		`- * lexical scanner forPOSTGRES`
	`5`	`+ * lexical scanner forPostgreSQL`
`6`	`6`	`*`
`7`	`7`	`* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group`
`8`	`8`	`* Portions Copyright (c) 1994, Regents of the University of California`
`9`	`9`	`*`
`10`	`10`	`*`
`11`	`11`	`* IDENTIFICATION`
`12`		`- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.86 2001/02/03 20:13:05 petere Exp $`
	`12`	`+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.87 2001/02/21 18:53:47 tgl Exp $`
`13`	`13`	`*`
`14`	`14`	`*-------------------------------------------------------------------------`
`15`	`15`	`*/`
`@@ -477,12 +477,27 @@ other.`
`477`	`477`
`478`	`478`
`479`	`479`	`{identifier}{`
`480`		`-int i;`
`481`		`-ScanKeyword*keyword;`
	`480`	`+ScanKeyword *keyword;`
	`481`	`+inti;`
`482`	`482`
`483`		`-for(i =0; yytext[i]; i++)`
	`483`	`+/* Is it a keyword? */`
	`484`	`+keyword =ScanKeywordLookup((char*) yytext);`
	`485`	`+if (keyword !=NULL)`
	`486`	`+return keyword->value;`
	`487`	`+`
	`488`	`+/*`
	`489`	`+ * No. Convert the identifier to lower case, and truncate`
	`490`	`+ * if necessary.`
	`491`	`+ *`
	`492`	`+ * Note: here we use a locale-dependent case conversion,`
	`493`	`+ * which seems appropriate under SQL99 rules, whereas`
	`494`	`+ * the keyword comparison was NOT locale-dependent.`
	`495`	`+ */`
	`496`	`+for (i =0; yytext[i]; i++)`
	`497`	`+{`
`484`	`498`	`if (isupper((unsignedchar) yytext[i]))`
`485`	`499`	`yytext[i] =tolower((unsignedchar) yytext[i]);`
	`500`	`+}`
`486`	`501`	`if (i >= NAMEDATALEN)`
`487`	`502`	`{`
`488`	`503`	`#ifdef MULTIBYTE`
`@@ -497,15 +512,8 @@ other.`
`497`	`512`	`yytext[NAMEDATALEN-1] ='\0';`
`498`	`513`	`#endif`
`499`	`514`	`}`
`500`		`-keyword =ScanKeywordLookup((char*)yytext);`
`501`		`-if (keyword !=NULL) {`
`502`		`-return keyword->value;`
`503`		`-}`
`504`		`-else`
`505`		`-{`
`506`		`-yylval.str =pstrdup((char*)yytext);`
`507`		`-return IDENT;`
`508`		`-}`
	`515`	`+yylval.str =pstrdup((char*) yytext);`
	`516`	`+return IDENT;`
`509`	`517`	`}`
`510`	`518`
`511`	`519`	`{other}{return yytext[0]; }`

`‎src/backend/utils/adt/ruleutils.c`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`*back to source text`
`4`	`4`	`*`
`5`	`5`	`* IDENTIFICATION`
`6`		`- * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.72 2001/02/14 21:35:05 tgl Exp $`
	`6`	`+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.73 2001/02/21 18:53:47 tgl Exp $`
`7`	`7`	`*`
`8`	`8`	`* This software is copyrighted by Jan Wieck - Hamburg.`
`9`	`9`	`*`
`@@ -2563,8 +2563,8 @@ quote_identifier(char *ident)`
`2563`	`2563`	`* but the parser doesn't provide any easy way to test for whether`
`2564`	`2564`	`* an identifier is safe or not... so be safe not sorry.`
`2565`	`2565`	`*`
`2566`		`- * Note: ScanKeywordLookup()expects an all-lower-case input, but`
`2567`		`- *we'vealreadychecked we havethat.`
	`2566`	`+ * Note: ScanKeywordLookup()doescase-insensitive comparison,`
	`2567`	`+ *but that's fine, since wealreadyknow we haveall-lower-case.`
`2568`	`2568`	`*/`
`2569`	`2569`	`if (ScanKeywordLookup(ident)!=NULL)`
`2570`	`2570`	`safe= false;`

`‎src/interfaces/ecpg/preproc/ecpg_keywords.c`

Lines changed: 54 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,11 @@`
`1`	`1`	`/*-------------------------------------------------------------------------`
`2`	`2`	`*`
`3`		`- *keywords.c`
	`3`	`+ *ecpg_keywords.c`
`4`	`4`	`* lexical token lookup for reserved words in postgres embedded SQL`
`5`	`5`	`*`
	`6`	`+ * IDENTIFICATION`
	`7`	`+ * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/ecpg_keywords.c,v 1.22 2001/02/21 18:53:47 tgl Exp $`
	`8`	`+ *`
`6`	`9`	`*-------------------------------------------------------------------------`
`7`	`10`	`*/`
`8`	`11`	`#include"postgres_fe.h"`
`@@ -12,6 +15,7 @@`
`12`	`15`	`#include"extern.h"`
`13`	`16`	`#include"preproc.h"`
`14`	`17`
	`18`	`+`
`15`	`19`	`/*`
`16`	`20`	`* List of (keyword-name, keyword-token-value) pairs.`
`17`	`21`	`*`
`@@ -73,18 +77,62 @@ static ScanKeyword ScanKeywords[] = {`
`73`	`77`	`{"whenever",SQL_WHENEVER},`
`74`	`78`	`};`
`75`	`79`
	`80`	`+/*`
	`81`	`+ * ScanECPGKeywordLookup - see if a given word is a keyword`
	`82`	`+ *`
	`83`	`+ * Returns a pointer to the ScanKeyword table entry, or NULL if no match.`
	`84`	`+ *`
	`85`	`+ * The match is done case-insensitively. Note that we deliberately use a`
	`86`	`+ * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',`
	`87`	`+ * even if we are in a locale where tolower() would produce more or different`
	`88`	`+ * translations. This is to conform to the SQL99 spec, which says that`
	`89`	`+ * keywords are to be matched in this way even though non-keyword identifiers`
	`90`	`+ * receive a different case-normalization mapping.`
	`91`	`+ */`
`76`	`92`	`ScanKeyword*`
`77`	`93`	`ScanECPGKeywordLookup(char*text)`
`78`	`94`	`{`
`79`		`-ScanKeyword*low=&ScanKeywords[0];`
`80`		`-ScanKeyword*high=endof(ScanKeywords)-1;`
`81`		`-ScanKeyword*middle;`
`82`		`-intdifference;`
	`95`	`+intlen,`
	`96`	`+i;`
	`97`	`+charword[NAMEDATALEN];`
	`98`	`+ScanKeyword*low;`
	`99`	`+ScanKeyword*high;`
`83`	`100`
	`101`	`+len=strlen(text);`
	`102`	`+/* We assume all keywords are shorter than NAMEDATALEN. */`
	`103`	`+if (len >=NAMEDATALEN)`
	`104`	`+returnNULL;`
	`105`	`+`
	`106`	`+/*`
	`107`	`+ * Apply an ASCII-only downcasing. We must not use tolower() since`
	`108`	`+ * it may produce the wrong translation in some locales (eg, Turkish),`
	`109`	`+ * and we don't trust isupper() very much either. In an ASCII-based`
	`110`	`+ * encoding the tests against A and Z are sufficient, but we also check`
	`111`	`+ * isupper() so that we will work correctly under EBCDIC. The actual`
	`112`	`+ * case conversion step should work for either ASCII or EBCDIC.`
	`113`	`+ */`
	`114`	`+for (i=0;i<len;i++)`
	`115`	`+{`
	`116`	`+charch=text[i];`
	`117`	`+`
	`118`	`+if (ch >='A'&&ch <='Z'&&isupper((unsignedchar)ch))`
	`119`	`+ch+='a'-'A';`
	`120`	`+word[i]=ch;`
	`121`	`+}`
	`122`	`+word[len]='\0';`
	`123`	`+`
	`124`	`+/*`
	`125`	`+ * Now do a binary search using plain strcmp() comparison.`
	`126`	`+ */`
	`127`	`+low=&ScanKeywords[0];`
	`128`	`+high=endof(ScanKeywords)-1;`
`84`	`129`	`while (low <=high)`
`85`	`130`	`{`
	`131`	`+ScanKeyword*middle;`
	`132`	`+intdifference;`
	`133`	`+`
`86`	`134`	`middle=low+ (high-low) /2;`
`87`		`-difference=strcmp(middle->name,text);`
	`135`	`+difference=strcmp(middle->name,word);`
`88`	`136`	`if (difference==0)`
`89`	`137`	`returnmiddle;`
`90`	`138`	`elseif (difference<0)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitbe92ad4

File tree

6 files changed

6 files changed

`‎src/backend/parser/keywords.c`

`‎src/backend/parser/scan.l`

`‎src/backend/utils/adt/ruleutils.c`

`‎src/interfaces/ecpg/preproc/ecpg_keywords.c`

0 commit comments