Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit59f9a0b

Browse files
committed
Implement a solution to the 'Turkish locale downcases I incorrectly'
problem, per previous discussion. Make some additional changes tocentralize the knowledge of just how identifier downcasing is done,in hopes of simplifying any future tweaking in this area.
1 parent1d567ae commit59f9a0b

File tree

10 files changed

+158
-125
lines changed

10 files changed

+158
-125
lines changed

‎src/backend/commands/define.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/commands/define.c,v 1.85 2003/11/29 19:51:47 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/commands/define.c,v 1.86 2004/02/21 00:34:52 tgl Exp $
1313
*
1414
* DESCRIPTION
1515
* The "DefineFoo" routines take the parse tree and pick out the
@@ -38,24 +38,19 @@
3838
#include"catalog/namespace.h"
3939
#include"commands/defrem.h"
4040
#include"parser/parse_type.h"
41+
#include"parser/scansup.h"
4142
#include"utils/int8.h"
4243

4344

4445
/*
45-
* Translate the input language name to lower case.
46+
* Translate the input language name to lower case, and truncate if needed.
4647
*
47-
*Output buffer must be NAMEDATALEN long.
48+
*Returns a palloc'd string
4849
*/
49-
void
50-
case_translate_language_name(constchar*input,char*output)
50+
char*
51+
case_translate_language_name(constchar*input)
5152
{
52-
inti;
53-
54-
MemSet(output,0,NAMEDATALEN);/* ensure result Name is
55-
* zero-filled */
56-
57-
for (i=0;i<NAMEDATALEN-1&&input[i];++i)
58-
output[i]=tolower((unsignedchar)input[i]);
53+
returndowncase_truncate_identifier(input,strlen(input), false);
5954
}
6055

6156

‎src/backend/commands/functioncmds.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.43 2004/01/06 23:55:18 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.44 2004/02/21 00:34:52 tgl Exp $
1414
*
1515
* DESCRIPTION
1616
* These routines take the parse tree and pick out the
@@ -401,7 +401,7 @@ CreateFunction(CreateFunctionStmt *stmt)
401401
Oidprorettype;
402402
boolreturnsSet;
403403
char*language;
404-
charlanguageName[NAMEDATALEN];
404+
char*languageName;
405405
OidlanguageOid;
406406
OidlanguageValidator;
407407
char*funcname;
@@ -437,7 +437,7 @@ CreateFunction(CreateFunctionStmt *stmt)
437437
&as_clause,&language,&volatility,&isStrict,&security);
438438

439439
/* Convert language name to canonical case */
440-
case_translate_language_name(language,languageName);
440+
languageName=case_translate_language_name(language);
441441

442442
/* Look up the language and validate permissions */
443443
languageTuple=SearchSysCache(LANGNAME,

‎src/backend/commands/proclang.c

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/proclang.c,v 1.52 2003/11/29 19:51:47 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/proclang.c,v 1.53 2004/02/21 00:34:52 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -40,11 +40,12 @@
4040
void
4141
CreateProceduralLanguage(CreatePLangStmt*stmt)
4242
{
43-
charlanguageName[NAMEDATALEN];
43+
char*languageName;
4444
OidprocOid,
4545
valProcOid;
4646
Oidfuncrettype;
4747
Oidtypev[FUNC_MAX_ARGS];
48+
NameDatalangname;
4849
charnulls[Natts_pg_language];
4950
Datumvalues[Natts_pg_language];
5051
Relationrel;
@@ -66,7 +67,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
6667
* Translate the language name and check that this language doesn't
6768
* already exist
6869
*/
69-
case_translate_language_name(stmt->plname,languageName);
70+
languageName=case_translate_language_name(stmt->plname);
7071

7172
if (SearchSysCacheExists(LANGNAME,
7273
PointerGetDatum(languageName),
@@ -124,12 +125,13 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
124125
}
125126

126127
i=0;
127-
values[i++]=PointerGetDatum(languageName);
128-
values[i++]=BoolGetDatum(true);/* lanispl */
129-
values[i++]=BoolGetDatum(stmt->pltrusted);
130-
values[i++]=ObjectIdGetDatum(procOid);
131-
values[i++]=ObjectIdGetDatum(valProcOid);
132-
nulls[i]='n';/* lanacl */
128+
namestrcpy(&langname,languageName);
129+
values[i++]=NameGetDatum(&langname);/* lanname */
130+
values[i++]=BoolGetDatum(true);/* lanispl */
131+
values[i++]=BoolGetDatum(stmt->pltrusted);/* lanpltrusted */
132+
values[i++]=ObjectIdGetDatum(procOid);/* lanplcallfoid */
133+
values[i++]=ObjectIdGetDatum(valProcOid);/* lanvalidator */
134+
nulls[i]='n';/* lanacl */
133135

134136
rel=heap_openr(LanguageRelationName,RowExclusiveLock);
135137

@@ -173,7 +175,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
173175
void
174176
DropProceduralLanguage(DropPLangStmt*stmt)
175177
{
176-
charlanguageName[NAMEDATALEN];
178+
char*languageName;
177179
HeapTuplelangTup;
178180
ObjectAddressobject;
179181

@@ -189,7 +191,7 @@ DropProceduralLanguage(DropPLangStmt *stmt)
189191
* Translate the language name, check that this language exist and is
190192
* a PL
191193
*/
192-
case_translate_language_name(stmt->plname,languageName);
194+
languageName=case_translate_language_name(stmt->plname);
193195

194196
langTup=SearchSysCache(LANGNAME,
195197
CStringGetDatum(languageName),

‎src/backend/parser/keywords.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.144 2003/11/29 19:51:51 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.145 2004/02/21 00:34:52 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -369,17 +369,13 @@ ScanKeywordLookup(const char *text)
369369

370370
/*
371371
* Apply an ASCII-only downcasing.We must not use tolower() since it
372-
* may produce the wrong translation in some locales (eg, Turkish),
373-
* and we don't trust isupper() very much either. In an ASCII-based
374-
* encoding the tests against A and Z are sufficient, but we also
375-
* check isupper() so that we will work correctly under EBCDIC. The
376-
* actual case conversion step should work for either ASCII or EBCDIC.
372+
* may produce the wrong translation in some locales (eg, Turkish).
377373
*/
378374
for (i=0;i<len;i++)
379375
{
380376
charch=text[i];
381377

382-
if (ch >='A'&&ch <='Z'&&isupper((unsignedchar)ch))
378+
if (ch >='A'&&ch <='Z')
383379
ch+='a'-'A';
384380
word[i]=ch;
385381
}

‎src/backend/parser/scan.l

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1994, Regents of the University of California
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.113 2004/02/19 19:11:30 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.114 2004/02/21 00:34:52 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -27,6 +27,7 @@
2727
#include"parser/keywords.h"
2828
/* Not needed now that this file is compiled as part of gram.y*/
2929
/* #include "parser/parse.h"*/
30+
#include"parser/scansup.h"
3031
#include"utils/builtins.h"
3132
#include"mb/pg_wchar.h"
3233

@@ -395,23 +396,15 @@ other.
395396
startlit();
396397
}
397398
<xd>{xdstop}{
399+
char *ident;
400+
398401
BEGIN(INITIAL);
399402
if (literallen ==0)
400403
yyerror("zero-length delimited identifier");
404+
ident =litbufdup();
401405
if (literallen >= NAMEDATALEN)
402-
{
403-
int len;
404-
405-
len =pg_mbcliplen(literalbuf, literallen,
406-
NAMEDATALEN-1);
407-
ereport(NOTICE,
408-
(errcode(ERRCODE_NAME_TOO_LONG),
409-
errmsg("identifier\"%s\" will be truncated to\"%.*s\"",
410-
literalbuf, len, literalbuf)));
411-
literalbuf[len] ='\0';
412-
literallen = len;
413-
}
414-
yylval.str =litbufdup();
406+
truncate_identifier(ident, literallen,true);
407+
yylval.str = ident;
415408
return IDENT;
416409
}
417410
<xd>{xddouble} {
@@ -537,7 +530,6 @@ other.
537530
{identifier}{
538531
const ScanKeyword *keyword;
539532
char *ident;
540-
inti;
541533

542534
/* Is it a keyword? */
543535
keyword =ScanKeywordLookup(yytext);
@@ -550,28 +542,8 @@ other.
550542
/*
551543
* No. Convert the identifier to lower case, and truncate
552544
* if necessary.
553-
*
554-
* Note: here we use a locale-dependent case conversion,
555-
* which seems appropriate under standard SQL rules, whereas
556-
* the keyword comparison was NOT locale-dependent.
557545
*/
558-
ident =pstrdup(yytext);
559-
for (i =0; ident[i]; i++)
560-
{
561-
if (isupper((unsignedchar) ident[i]))
562-
ident[i] =tolower((unsignedchar) ident[i]);
563-
}
564-
if (i >= NAMEDATALEN)
565-
{
566-
int len;
567-
568-
len =pg_mbcliplen(ident, i, NAMEDATALEN-1);
569-
ereport(NOTICE,
570-
(errcode(ERRCODE_NAME_TOO_LONG),
571-
errmsg("identifier\"%s\" will be truncated to\"%.*s\"",
572-
ident, len, ident)));
573-
ident[len] ='\0';
574-
}
546+
ident =downcase_truncate_identifier(yytext, yyleng,true);
575547
yylval.str = ident;
576548
return IDENT;
577549
}

‎src/backend/parser/scansup.c

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.25 2003/11/29 19:51:52 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.26 2004/02/21 00:34:53 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -19,6 +19,8 @@
1919

2020
#include"miscadmin.h"
2121
#include"parser/scansup.h"
22+
#include"mb/pg_wchar.h"
23+
2224

2325
/* ----------------
2426
*scanstr
@@ -32,7 +34,7 @@
3234
*/
3335

3436
char*
35-
scanstr(char*s)
37+
scanstr(constchar*s)
3638
{
3739
char*newStr;
3840
intlen,
@@ -109,3 +111,75 @@ scanstr(char *s)
109111
newStr[j]='\0';
110112
returnnewStr;
111113
}
114+
115+
116+
/*
117+
* downcase_truncate_identifier() --- do appropriate downcasing and
118+
* truncation of an unquoted identifier. Optionally warn of truncation.
119+
*
120+
* Returns a palloc'd string containing the adjusted identifier.
121+
*
122+
* Note: in some usages the passed string is not null-terminated.
123+
*
124+
* Note: the API of this function is designed to allow for downcasing
125+
* transformations that increase the string length, but we don't yet
126+
* support that. If you want to implement it, you'll need to fix
127+
* SplitIdentifierString() in utils/adt/varlena.c.
128+
*/
129+
char*
130+
downcase_truncate_identifier(constchar*ident,intlen,boolwarn)
131+
{
132+
char*result;
133+
inti;
134+
135+
result=palloc(len+1);
136+
/*
137+
* SQL99 specifies Unicode-aware case normalization, which we don't yet
138+
* have the infrastructure for. Instead we use tolower() to provide a
139+
* locale-aware translation. However, there are some locales where this
140+
* is not right either (eg, Turkish may do strange things with 'i' and
141+
* 'I'). Our current compromise is to use tolower() for characters with
142+
* the high bit set, and use an ASCII-only downcasing for 7-bit
143+
* characters.
144+
*/
145+
for (i=0;i<len;i++)
146+
{
147+
unsignedcharch= (unsignedchar)ident[i];
148+
149+
if (ch >='A'&&ch <='Z')
150+
ch+='a'-'A';
151+
elseif (ch >=0x80&&isupper(ch))
152+
ch=tolower(ch);
153+
result[i]= (char)ch;
154+
}
155+
result[i]='\0';
156+
157+
if (i >=NAMEDATALEN)
158+
truncate_identifier(result,i,warn);
159+
160+
returnresult;
161+
}
162+
163+
/*
164+
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
165+
*
166+
* The given string is modified in-place, if necessary. A warning is
167+
* issued if requested.
168+
*
169+
* We require the caller to pass in the string length since this saves a
170+
* strlen() call in some common usages.
171+
*/
172+
void
173+
truncate_identifier(char*ident,intlen,boolwarn)
174+
{
175+
if (len >=NAMEDATALEN)
176+
{
177+
len=pg_mbcliplen(ident,len,NAMEDATALEN-1);
178+
if (warn)
179+
ereport(NOTICE,
180+
(errcode(ERRCODE_NAME_TOO_LONG),
181+
errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
182+
ident,len,ident)));
183+
ident[len]='\0';
184+
}
185+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp