Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit654dcfb

Browse files
committed
Clean up ts_locale.h/.c. Fix broken and not-consistent-across-platforms
behavior of wchar2char/char2wchar; this should resolve bug #3730. Avoidexcess computations of pg_mblen in t_isalpha and friends. Const-ifyAPIs where possible.
1 parent83290b6 commit654dcfb

File tree

5 files changed

+150
-112
lines changed

5 files changed

+150
-112
lines changed

‎src/backend/tsearch/ts_locale.c

Lines changed: 121 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
/*-------------------------------------------------------------------------
22
*
33
* ts_locale.c
4-
*localecompatiblility layer for tsearch
4+
*localecompatibility layer for tsearch
55
*
66
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.3 2007/11/09 22:37:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,113 +16,174 @@
1616
#include"tsearch/ts_locale.h"
1717
#include"tsearch/ts_public.h"
1818

19-
#ifdefTS_USE_WIDE
2019

21-
#ifdefWIN32
20+
#ifdefTS_USE_WIDE
2221

22+
/*
23+
* wchar2char --- convert wide characters to multibyte format
24+
*
25+
* This has the same API as the standard wcstombs() function; in particular,
26+
* tolen is the maximum number of bytes to store at *to, and *from should be
27+
* zero-terminated. The output will be zero-terminated iff there is room.
28+
*/
2329
size_t
24-
wchar2char(char*to,constwchar_t*from,size_tlen)
30+
wchar2char(char*to,constwchar_t*from,size_ttolen)
2531
{
26-
if (len==0)
32+
if (tolen==0)
2733
return0;
2834

35+
#ifdefWIN32
2936
if (GetDatabaseEncoding()==PG_UTF8)
3037
{
3138
intr;
3239

33-
r=WideCharToMultiByte(CP_UTF8,0,from,-1,to,len,
40+
r=WideCharToMultiByte(CP_UTF8,0,from,-1,to,tolen,
3441
NULL,NULL);
3542

36-
if (r==0)
37-
ereport(ERROR,
38-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
39-
errmsg("UTF-16 to UTF-8 translation failed: %lu",
40-
GetLastError())));
41-
Assert(r <=len);
43+
if (r <=0)
44+
return (size_t)-1;
45+
46+
Assert(r <=tolen);
4247

43-
returnr;
48+
/* Microsoft counts the zero terminator in the result */
49+
returnr-1;
4450
}
51+
#endif/* WIN32 */
4552

46-
returnwcstombs(to,from,len);
53+
returnwcstombs(to,from,tolen);
4754
}
48-
#endif/* WIN32 */
4955

56+
/*
57+
* char2wchar --- convert multibyte characters to wide characters
58+
*
59+
* This has almost the API of mbstowcs(), except that *from need not be
60+
* null-terminated; instead, the number of input bytes is specified as
61+
* fromlen. Also, we ereport() rather than returning -1 for invalid
62+
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
63+
* The output will be zero-terminated iff there is room.
64+
*/
5065
size_t
51-
char2wchar(wchar_t*to,constchar*from,size_tlen)
66+
char2wchar(wchar_t*to,size_ttolen,constchar*from,size_tfromlen)
5267
{
53-
if (len==0)
68+
if (tolen==0)
5469
return0;
5570

5671
#ifdefWIN32
5772
if (GetDatabaseEncoding()==PG_UTF8)
5873
{
5974
intr;
6075

61-
r=MultiByteToWideChar(CP_UTF8,0,from,len,to,len);
76+
r=MultiByteToWideChar(CP_UTF8,0,from,fromlen,to,tolen);
6277

63-
if (!r)
78+
if (r <=0)
6479
{
65-
pg_verifymbstr(from,len, false);
80+
pg_verifymbstr(from,fromlen, false);
6681
ereport(ERROR,
6782
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
6883
errmsg("invalid multibyte character for locale"),
6984
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
7085
}
7186

72-
Assert(r <=len);
87+
Assert(r <=tolen);
7388

74-
returnr;
89+
/* Microsoft counts the zero terminator in the result */
90+
returnr-1;
7591
}
76-
else
7792
#endif/* WIN32 */
93+
7894
if (lc_ctype_is_c())
7995
{
8096
/*
8197
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
8298
* allocated with sufficient space
8399
*/
84-
returnpg_mb2wchar_with_len(from, (pg_wchar*)to,len);
100+
returnpg_mb2wchar_with_len(from, (pg_wchar*)to,fromlen);
85101
}
86102
else
87103
{
88104
/*
89-
* mbstowcsrequire ending '\0'
105+
* mbstowcsrequires ending '\0'
90106
*/
91-
char*str=pnstrdup(from,len);
92-
size_ttolen;
107+
char*str=pnstrdup(from,fromlen);
108+
size_tresult;
109+
110+
result=mbstowcs(to,str,tolen);
93111

94-
tolen=mbstowcs(to,str,len);
95112
pfree(str);
96113

97-
returntolen;
114+
if (result== (size_t)-1)
115+
{
116+
pg_verifymbstr(from,fromlen, false);
117+
ereport(ERROR,
118+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
119+
errmsg("invalid multibyte character for locale"),
120+
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
121+
}
122+
123+
if (result<tolen)
124+
to[result]=0;
125+
126+
returnresult;
98127
}
99128
}
100129

130+
101131
int
102-
_t_isalpha(constchar*ptr)
132+
t_isdigit(constchar*ptr)
103133
{
134+
intclen=pg_mblen(ptr);
104135
wchar_tcharacter[2];
105136

106-
if (lc_ctype_is_c())
137+
if (clen==1||lc_ctype_is_c())
138+
returnisdigit(TOUCHAR(ptr));
139+
140+
char2wchar(character,2,ptr,clen);
141+
142+
returniswdigit((wint_t)character[0]);
143+
}
144+
145+
int
146+
t_isspace(constchar*ptr)
147+
{
148+
intclen=pg_mblen(ptr);
149+
wchar_tcharacter[2];
150+
151+
if (clen==1||lc_ctype_is_c())
152+
returnisspace(TOUCHAR(ptr));
153+
154+
char2wchar(character,2,ptr,clen);
155+
156+
returniswspace((wint_t)character[0]);
157+
}
158+
159+
int
160+
t_isalpha(constchar*ptr)
161+
{
162+
intclen=pg_mblen(ptr);
163+
wchar_tcharacter[2];
164+
165+
if (clen==1||lc_ctype_is_c())
107166
returnisalpha(TOUCHAR(ptr));
108167

109-
char2wchar(character,ptr,1);
168+
char2wchar(character,2,ptr,clen);
110169

111-
returniswalpha((wint_t)*character);
170+
returniswalpha((wint_t)character[0]);
112171
}
113172

114173
int
115-
_t_isprint(constchar*ptr)
174+
t_isprint(constchar*ptr)
116175
{
176+
intclen=pg_mblen(ptr);
117177
wchar_tcharacter[2];
118178

119-
if (lc_ctype_is_c())
179+
if (clen==1||lc_ctype_is_c())
120180
returnisprint(TOUCHAR(ptr));
121181

122-
char2wchar(character,ptr,1);
182+
char2wchar(character,2,ptr,clen);
123183

124-
returniswprint((wint_t)*character);
184+
returniswprint((wint_t)character[0]);
125185
}
186+
126187
#endif/* TS_USE_WIDE */
127188

128189

@@ -168,19 +229,27 @@ t_readline(FILE *fp)
168229
returnrecoded;
169230
}
170231

232+
/*
233+
* lowerstr --- fold null-terminated string to lower case
234+
*
235+
* Returned string is palloc'd
236+
*/
171237
char*
172-
lowerstr(char*str)
238+
lowerstr(constchar*str)
173239
{
174240
returnlowerstr_with_len(str,strlen(str));
175241
}
176242

177243
/*
244+
* lowerstr_with_len --- fold string to lower case
245+
*
246+
* Input string need not be null-terminated.
247+
*
178248
* Returned string is palloc'd
179249
*/
180250
char*
181-
lowerstr_with_len(char*str,intlen)
251+
lowerstr_with_len(constchar*str,intlen)
182252
{
183-
char*ptr=str;
184253
char*out;
185254

186255
if (len==0)
@@ -202,23 +271,13 @@ lowerstr_with_len(char *str, int len)
202271

203272
/*
204273
* alloc number of wchar_t for worst case, len contains number of
205-
* bytes<= number of characters and alloc 1 wchar_t for 0, because
206-
* wchar2char(wcstombs in really) wants zero-terminated string
274+
* bytes>= number of characters and alloc 1 wchar_t for 0, because
275+
* wchar2char wants zero-terminated string
207276
*/
208277
wptr=wstr= (wchar_t*)palloc(sizeof(wchar_t)* (len+1));
209278

210-
/*
211-
* str SHOULD be cstring, so wlen contains number of converted
212-
* character
213-
*/
214-
wlen=char2wchar(wstr,str,len);
215-
if (wlen<0)
216-
ereport(ERROR,
217-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
218-
errmsg("translation failed from server encoding to wchar_t")));
219-
279+
wlen=char2wchar(wstr,len+1,str,len);
220280
Assert(wlen <=len);
221-
wstr[wlen]=0;
222281

223282
while (*wptr)
224283
{
@@ -229,31 +288,29 @@ lowerstr_with_len(char *str, int len)
229288
/*
230289
* Alloc result string for worst case + '\0'
231290
*/
232-
len=sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
291+
len=pg_database_encoding_max_length()*wlen+1;
233292
out= (char*)palloc(len);
234293

235-
/*
236-
* wlen now is number of bytes which is always >= number of characters
237-
*/
238294
wlen=wchar2char(out,wstr,len);
295+
239296
pfree(wstr);
240297

241298
if (wlen<0)
242299
ereport(ERROR,
243300
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
244-
errmsg("translation failed from wchar_t to server encoding %d",errno)));
245-
Assert(wlen <=len);
246-
out[wlen]='\0';
301+
errmsg("translation from wchar_t to server encoding failed: %m")));
302+
Assert(wlen<len);
247303
}
248304
else
249-
#endif
305+
#endif/* TS_USE_WIDE */
250306
{
307+
constchar*ptr=str;
251308
char*outptr;
252309

253310
outptr=out= (char*)palloc(sizeof(char)* (len+1));
254-
while (*ptr&&ptr-str<len)
311+
while ((ptr-str)<len&&*ptr)
255312
{
256-
*outptr++=tolower(*(unsignedchar*)ptr);
313+
*outptr++=tolower(TOUCHAR(ptr));
257314
ptr++;
258315
}
259316
*outptr='\0';

‎src/backend/tsearch/ts_utils.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.4 2007/09/04 02:16:56 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.5 2007/11/09 22:37:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -75,7 +75,7 @@ comparestr(const void *a, const void *b)
7575
* or palloc a new version.
7676
*/
7777
void
78-
readstoplist(constchar*fname,StopList*s,char*(*wordop) (char*))
78+
readstoplist(constchar*fname,StopList*s,char*(*wordop) (constchar*))
7979
{
8080
char**stop=NULL;
8181

‎src/backend/tsearch/wparser_def.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.7 2007/10/27 19:03:45 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.8 2007/11/09 22:37:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -294,12 +294,12 @@ TParserInit(char *str, int len)
294294
/*
295295
* Use wide char code only when max encoding length > 1.
296296
*/
297-
298297
if (prs->charmaxlen>1)
299298
{
300299
prs->usewide= true;
301300
prs->wstr= (wchar_t*)palloc(sizeof(wchar_t)* (prs->lenstr+1));
302-
prs->lenwstr=char2wchar(prs->wstr,prs->str,prs->lenstr);
301+
prs->lenwstr=char2wchar(prs->wstr,prs->lenstr+1,
302+
prs->str,prs->lenstr);
303303
}
304304
else
305305
#endif

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp