Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf2a01b0

Browse files
committed
Fix localization support for multibyte encoding and C locale.
Slightly reworked patch from Tatsuo Ishii
1 parent7021d6f commitf2a01b0

File tree

3 files changed

+144
-52
lines changed

3 files changed

+144
-52
lines changed

‎contrib/tsearch2/ts_locale.c

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
size_t
1313
wchar2char(char*to,constwchar_t*from,size_tlen)
1414
{
15+
if (len==0)
16+
return0;
17+
1518
if (GetDatabaseEncoding()==PG_UTF8)
1619
{
1720
intr;
1821

19-
if (len==0)
20-
return0;
21-
2222
r=WideCharToMultiByte(CP_UTF8,0,from,-1,to,len,
2323
NULL,NULL);
2424

@@ -34,17 +34,19 @@ wchar2char(char *to, const wchar_t *from, size_t len)
3434

3535
returnwcstombs(to,from,len);
3636
}
37+
#endif/* WIN32 */
3738

3839
size_t
3940
char2wchar(wchar_t*to,constchar*from,size_tlen)
4041
{
42+
if (len==0)
43+
return0;
44+
45+
#ifdefWIN32
4146
if (GetDatabaseEncoding()==PG_UTF8)
4247
{
4348
intr;
4449

45-
if (len==0)
46-
return0;
47-
4850
r=MultiByteToWideChar(CP_UTF8,0,from,len,to,len);
4951

5052
if (!r)
@@ -60,29 +62,44 @@ char2wchar(wchar_t *to, const char *from, size_t len)
6062

6163
returnr;
6264
}
65+
else
66+
#endif/* WIN32 */
67+
if (lc_ctype_is_c() )
68+
{
69+
/*
70+
* pg_mb2wchar_with_len always adds trailing '\0', so
71+
* 'to' should be allocated with sufficient space
72+
*/
73+
returnpg_mb2wchar_with_len(from, (pg_wchar*)to,len);
74+
}
6375

6476
returnmbstowcs(to,from,len);
6577
}
66-
#endif/* WIN32 */
6778

6879
int
6980
_t_isalpha(constchar*ptr)
7081
{
71-
wchar_tcharacter;
82+
wchar_tcharacter[2];
83+
84+
if (lc_ctype_is_c())
85+
returnisalpha(TOUCHAR(ptr));
7286

73-
char2wchar(&character,ptr,1);
87+
char2wchar(character,ptr,1);
7488

75-
returniswalpha((wint_t)character);
89+
returniswalpha((wint_t)*character);
7690
}
7791

7892
int
7993
_t_isprint(constchar*ptr)
8094
{
81-
wchar_tcharacter;
95+
wchar_tcharacter[2];
96+
97+
if (lc_ctype_is_c())
98+
returnisprint(TOUCHAR(ptr));
8299

83-
char2wchar(&character,ptr,1);
100+
char2wchar(character,ptr,1);
84101

85-
returniswprint((wint_t)character);
102+
returniswprint((wint_t)*character);
86103
}
87104
#endif/* TS_USE_WIDE */
88105

@@ -126,7 +143,7 @@ lowerstr(char *str)
126143
if (wlen<0 )
127144
ereport(ERROR,
128145
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
129-
errmsg("transalation failed from server encoding to wchar_t")));
146+
errmsg("translation failed from server encoding to wchar_t")));
130147

131148
Assert(wlen<=len);
132149
wstr[wlen]=0;
@@ -152,7 +169,7 @@ lowerstr(char *str)
152169
if (wlen<0 )
153170
ereport(ERROR,
154171
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
155-
errmsg("transalation failed from wchar_t to server encoding %d",errno)));
172+
errmsg("translation failed from wchar_t to server encoding %d",errno)));
156173
Assert(wlen<=len);
157174
out[wlen]='\0';
158175
}

‎contrib/tsearch2/ts_locale.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,17 @@
3030
#defineTOUCHAR(x)(*((unsigned char*)(x)))
3131

3232
#ifdefTS_USE_WIDE
33+
size_tchar2wchar(wchar_t*to,constchar*from,size_tlen);
3334

3435
#ifdefWIN32
3536

3637
size_twchar2char(char*to,constwchar_t*from,size_tlen);
37-
size_tchar2wchar(wchar_t*to,constchar*from,size_tlen);
38+
3839
#else/* WIN32 */
3940

40-
/* correct mbstowcs */
41-
#definechar2wchar mbstowcs
41+
/* correct wcstombs */
4242
#definewchar2char wcstombs
43+
4344
#endif/* WIN32 */
4445

4546
#definet_isdigit(x)( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
@@ -55,10 +56,10 @@ extern int_t_isprint(const char *ptr);
5556
*/
5657
#definet_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
5758

58-
#defineCOPYCHAR(d,s)do {\
59-
int lll = pg_mblen( s );\
60-
\
61-
while( lll-- )\
59+
#defineCOPYCHAR(d,s)do {\
60+
int lll = pg_mblen( s );\
61+
\
62+
while( lll-- )\
6263
TOUCHAR((d)+lll) = TOUCHAR((s)+lll);\
6364
} while(0)
6465

‎contrib/tsearch2/wordparser/parser.c

Lines changed: 104 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11 2006/10/04 00:29:47 momjian Exp $ */
1+
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.12 2007/01/15 15:16:28 teodor Exp $ */
22

33
#include"postgres.h"
44

@@ -40,16 +40,13 @@ TParserInit(char *str, int len)
4040
#ifdefTS_USE_WIDE
4141

4242
/*
43-
* Use wide char code only when max encoding length > 1 and ctype != C.
44-
* Some operating systems fail with multi-byte encodings and a C locale.
45-
* Also, for a C locale there is no need to process as multibyte. From
46-
* backend/utils/adt/oracle_compat.c Teodor
43+
* Use wide char code only when max encoding length > 1.
4744
*/
4845

49-
if (prs->charmaxlen>1&& !lc_ctype_is_c())
46+
if (prs->charmaxlen>1)
5047
{
5148
prs->usewide= true;
52-
prs->wstr= (wchar_t*)palloc(sizeof(wchar_t)*prs->lenstr);
49+
prs->wstr= (wchar_t*)palloc(sizeof(wchar_t)*(prs->lenstr+1));
5350
prs->lenwstr=char2wchar(prs->wstr,prs->str,prs->lenstr);
5451
}
5552
else
@@ -83,25 +80,99 @@ TParserClose(TParser * prs)
8380

8481
/*
8582
* defining support function, equvalent is* macroses, but
86-
* working with any possible encodings and locales
83+
* working with any possible encodings and locales. Note,
84+
* that with multibyte encoding and C-locale isw* function may fail
85+
* or give wrong result. Note 2: multibyte encoding and C-locale
86+
* often are used for Asian languages.
8787
*/
8888

8989
#ifdefTS_USE_WIDE
9090

91-
#definep_iswhat(type)\
92-
static int\
93-
p_is##type(TParser *prs) {\
94-
Assert( prs->state );\
95-
return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
96-
is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) );\
97-
}\
98-
\
99-
static int\
100-
p_isnot##type(TParser *prs) {\
101-
return !p_is##type(prs);\
91+
#definep_iswhat(type)\
92+
static int\
93+
p_is##type(TParser *prs) {\
94+
Assert( prs->state );\
95+
if ( prs->usewide )\
96+
{\
97+
if ( lc_ctype_is_c() )\
98+
return is##type( 0xff & *( prs->wstr + prs->state->poschar) );\
99+
\
100+
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) );\
101+
}\
102+
\
103+
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) );\
104+
}\
105+
\
106+
static int\
107+
p_isnot##type(TParser *prs) {\
108+
return !p_is##type(prs);\
102109
}
103110

111+
staticint
112+
p_isalnum(TParser*prs)
113+
{
114+
Assert(prs->state );
115+
116+
if (prs->usewide)
117+
{
118+
if (lc_ctype_is_c())
119+
{
120+
unsignedintc=*(unsignedint*)(prs->wstr+prs->state->poschar);
121+
122+
/*
123+
* any non-ascii symbol with multibyte encoding
124+
* with C-locale is an alpha character
125+
*/
126+
if (c>0x7f )
127+
return1;
128+
129+
returnisalnum(0xff&c);
130+
}
131+
132+
returniswalnum( (wint_t)*(prs->wstr+prs->state->poschar));
133+
}
104134

135+
returnisalnum(*(unsignedchar*)(prs->str+prs->state->posbyte ));
136+
}
137+
138+
staticint
139+
p_isnotalnum(TParser*prs)
140+
{
141+
return !p_isalnum(prs);
142+
}
143+
144+
staticint
145+
p_isalpha(TParser*prs)
146+
{
147+
Assert(prs->state );
148+
149+
if (prs->usewide)
150+
{
151+
if (lc_ctype_is_c())
152+
{
153+
unsignedintc=*(prs->wstr+prs->state->poschar);
154+
155+
/*
156+
* any non-ascii symbol with multibyte encoding
157+
* with C-locale is an alpha character
158+
*/
159+
if (c>0x7f )
160+
return1;
161+
162+
returnisalpha(0xff&c);
163+
}
164+
165+
returniswalpha( (wint_t)*(prs->wstr+prs->state->poschar));
166+
}
167+
168+
returnisalpha(*(unsignedchar*)(prs->str+prs->state->posbyte ));
169+
}
170+
171+
staticint
172+
p_isnotalpha(TParser*prs)
173+
{
174+
return !p_isalpha(prs);
175+
}
105176

106177
/* p_iseq should be used only for ascii symbols */
107178

@@ -111,18 +182,19 @@ p_iseq(TParser * prs, char c)
111182
Assert(prs->state);
112183
return ((prs->state->charlen==1&&*(prs->str+prs->state->posbyte)==c)) ?1 :0;
113184
}
185+
114186
#else/* TS_USE_WIDE */
115187

116-
#definep_iswhat(type)\
117-
static int\
118-
p_is##type(TParser *prs) {\
119-
Assert( prs->state );\
120-
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) );\
121-
}\
122-
\
123-
static int\
124-
p_isnot##type(TParser *prs) {\
125-
return !p_is##type(prs);\
188+
#definep_iswhat(type)\
189+
static int\
190+
p_is##type(TParser *prs) {\
191+
Assert( prs->state );\
192+
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) );\
193+
}\
194+
\
195+
static int\
196+
p_isnot##type(TParser *prs) {\
197+
return !p_is##type(prs);\
126198
}
127199

128200

@@ -132,10 +204,12 @@ p_iseq(TParser * prs, char c)
132204
Assert(prs->state);
133205
return (*(prs->str+prs->state->posbyte)==c) ?1 :0;
134206
}
135-
#endif/* TS_USE_WIDE */
136207

137208
p_iswhat(alnum)
138209
p_iswhat(alpha)
210+
211+
#endif/* TS_USE_WIDE */
212+
139213
p_iswhat(digit)
140214
p_iswhat(lower)
141215
p_iswhat(print)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp