NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit654dcfb

committed

Clean up ts_locale.h/.c. Fix broken and not-consistent-across-platforms

behavior of wchar2char/char2wchar; this should resolve bug #3730. Avoidexcess computations of pg_mblen in t_isalpha and friends. Const-ifyAPIs where possible.

1 parent83290b6 commit654dcfbCopy full SHA for 654dcfb

File tree

5 files changed

+150

-112

lines changed

src
- backend/tsearch
- include/tsearch
  - ts_locale.h
  - ts_public.h

5 files changed

+150

-112

lines changed

`‎src/backend/tsearch/ts_locale.c`

Lines changed: 121 additions & 64 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,13 +1,13 @@`
`1`	`1`	`/*-------------------------------------------------------------------------`
`2`	`2`	`*`
`3`	`3`	`* ts_locale.c`
`4`		`- *localecompatiblility layer for tsearch`
	`4`	`+ *localecompatibility layer for tsearch`
`5`	`5`	`*`
`6`	`6`	`* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group`
`7`	`7`	`*`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.2 2007/08/25 00:03:59 tgl Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.3 2007/11/09 22:37:35 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -16,113 +16,174 @@`
`16`	`16`	`#include"tsearch/ts_locale.h"`
`17`	`17`	`#include"tsearch/ts_public.h"`
`18`	`18`
`19`		`-#ifdefTS_USE_WIDE`
`20`	`19`
`21`		`-#ifdefWIN32`
	`20`	`+#ifdefTS_USE_WIDE`
`22`	`21`
	`22`	`+/*`
	`23`	`+ * wchar2char --- convert wide characters to multibyte format`
	`24`	`+ *`
	`25`	`+ * This has the same API as the standard wcstombs() function; in particular,`
	`26`	`+ * tolen is the maximum number of bytes to store at to, and from should be`
	`27`	`+ * zero-terminated. The output will be zero-terminated iff there is room.`
	`28`	`+ */`
`23`	`29`	`size_t`
`24`		`-wchar2char(charto,constwchar_tfrom,size_tlen)`
	`30`	`+wchar2char(charto,constwchar_tfrom,size_ttolen)`
`25`	`31`	`{`
`26`		`-if (len==0)`
	`32`	`+if (tolen==0)`
`27`	`33`	`return0;`
`28`	`34`
	`35`	`+#ifdefWIN32`
`29`	`36`	`if (GetDatabaseEncoding()==PG_UTF8)`
`30`	`37`	`{`
`31`	`38`	`intr;`
`32`	`39`
`33`		`-r=WideCharToMultiByte(CP_UTF8,0,from,-1,to,len,`
	`40`	`+r=WideCharToMultiByte(CP_UTF8,0,from,-1,to,tolen,`
`34`	`41`	`NULL,NULL);`
`35`	`42`
`36`		`-if (r==0)`
`37`		`-ereport(ERROR,`
`38`		`-(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),`
`39`		`-errmsg("UTF-16 to UTF-8 translation failed: %lu",`
`40`		`-GetLastError())));`
`41`		`-Assert(r <=len);`
	`43`	`+if (r <=0)`
	`44`	`+return (size_t)-1;`
	`45`	`+`
	`46`	`+Assert(r <=tolen);`
`42`	`47`
`43`		`-returnr;`
	`48`	`+/* Microsoft counts the zero terminator in the result */`
	`49`	`+returnr-1;`
`44`	`50`	`}`
	`51`	`+#endif/* WIN32 */`
`45`	`52`
`46`		`-returnwcstombs(to,from,len);`
	`53`	`+returnwcstombs(to,from,tolen);`
`47`	`54`	`}`
`48`		`-#endif/* WIN32 */`
`49`	`55`
	`56`	`+/*`
	`57`	`+ * char2wchar --- convert multibyte characters to wide characters`
	`58`	`+ *`
	`59`	`+ * This has almost the API of mbstowcs(), except that *from need not be`
	`60`	`+ * null-terminated; instead, the number of input bytes is specified as`
	`61`	`+ * fromlen. Also, we ereport() rather than returning -1 for invalid`
	`62`	`+ * input encoding. tolen is the maximum number of wchar_t's to store at *to.`
	`63`	`+ * The output will be zero-terminated iff there is room.`
	`64`	`+ */`
`50`	`65`	`size_t`
`51`		`-char2wchar(wchar_tto,constcharfrom,size_tlen)`
	`66`	`+char2wchar(wchar_tto,size_ttolen,constcharfrom,size_tfromlen)`
`52`	`67`	`{`
`53`		`-if (len==0)`
	`68`	`+if (tolen==0)`
`54`	`69`	`return0;`
`55`	`70`
`56`	`71`	`#ifdefWIN32`
`57`	`72`	`if (GetDatabaseEncoding()==PG_UTF8)`
`58`	`73`	`{`
`59`	`74`	`intr;`
`60`	`75`
`61`		`-r=MultiByteToWideChar(CP_UTF8,0,from,len,to,len);`
	`76`	`+r=MultiByteToWideChar(CP_UTF8,0,from,fromlen,to,tolen);`
`62`	`77`
`63`		`-if (!r)`
	`78`	`+if (r <=0)`
`64`	`79`	`{`
`65`		`-pg_verifymbstr(from,len, false);`
	`80`	`+pg_verifymbstr(from,fromlen, false);`
`66`	`81`	`ereport(ERROR,`
`67`	`82`	`(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),`
`68`	`83`	`errmsg("invalid multibyte character for locale"),`
`69`	`84`	`errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));`
`70`	`85`	`}`
`71`	`86`
`72`		`-Assert(r <=len);`
	`87`	`+Assert(r <=tolen);`
`73`	`88`
`74`		`-returnr;`
	`89`	`+/* Microsoft counts the zero terminator in the result */`
	`90`	`+returnr-1;`
`75`	`91`	`}`
`76`		`-else`
`77`	`92`	`#endif/* WIN32 */`
	`93`	`+`
`78`	`94`	`if (lc_ctype_is_c())`
`79`	`95`	`{`
`80`	`96`	`/*`
`81`	`97`	`* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be`
`82`	`98`	`* allocated with sufficient space`
`83`	`99`	`*/`
`84`		`-returnpg_mb2wchar_with_len(from, (pg_wchar*)to,len);`
	`100`	`+returnpg_mb2wchar_with_len(from, (pg_wchar*)to,fromlen);`
`85`	`101`	`}`
`86`	`102`	`else`
`87`	`103`	`{`
`88`	`104`	`/*`
`89`		`- * mbstowcsrequire ending '\0'`
	`105`	`+ * mbstowcsrequires ending '\0'`
`90`	`106`	`*/`
`91`		`-char*str=pnstrdup(from,len);`
`92`		`-size_ttolen;`
	`107`	`+char*str=pnstrdup(from,fromlen);`
	`108`	`+size_tresult;`
	`109`	`+`
	`110`	`+result=mbstowcs(to,str,tolen);`
`93`	`111`
`94`		`-tolen=mbstowcs(to,str,len);`
`95`	`112`	`pfree(str);`
`96`	`113`
`97`		`-returntolen;`
	`114`	`+if (result== (size_t)-1)`
	`115`	`+{`
	`116`	`+pg_verifymbstr(from,fromlen, false);`
	`117`	`+ereport(ERROR,`
	`118`	`+(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),`
	`119`	`+errmsg("invalid multibyte character for locale"),`
	`120`	`+errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));`
	`121`	`+}`
	`122`	`+`
	`123`	`+if (result<tolen)`
	`124`	`+to[result]=0;`
	`125`	`+`
	`126`	`+returnresult;`
`98`	`127`	`}`
`99`	`128`	`}`
`100`	`129`
	`130`	`+`
`101`	`131`	`int`
`102`		`-_t_isalpha(constchar*ptr)`
	`132`	`+t_isdigit(constchar*ptr)`
`103`	`133`	`{`
	`134`	`+intclen=pg_mblen(ptr);`
`104`	`135`	`wchar_tcharacter[2];`
`105`	`136`
`106`		`-if (lc_ctype_is_c())`
	`137`	`+if (clen==1\|\|lc_ctype_is_c())`
	`138`	`+returnisdigit(TOUCHAR(ptr));`
	`139`	`+`
	`140`	`+char2wchar(character,2,ptr,clen);`
	`141`	`+`
	`142`	`+returniswdigit((wint_t)character[0]);`
	`143`	`+}`
	`144`	`+`
	`145`	`+int`
	`146`	`+t_isspace(constchar*ptr)`
	`147`	`+{`
	`148`	`+intclen=pg_mblen(ptr);`
	`149`	`+wchar_tcharacter[2];`
	`150`	`+`
	`151`	`+if (clen==1\|\|lc_ctype_is_c())`
	`152`	`+returnisspace(TOUCHAR(ptr));`
	`153`	`+`
	`154`	`+char2wchar(character,2,ptr,clen);`
	`155`	`+`
	`156`	`+returniswspace((wint_t)character[0]);`
	`157`	`+}`
	`158`	`+`
	`159`	`+int`
	`160`	`+t_isalpha(constchar*ptr)`
	`161`	`+{`
	`162`	`+intclen=pg_mblen(ptr);`
	`163`	`+wchar_tcharacter[2];`
	`164`	`+`
	`165`	`+if (clen==1\|\|lc_ctype_is_c())`
`107`	`166`	`returnisalpha(TOUCHAR(ptr));`
`108`	`167`
`109`		`-char2wchar(character,ptr,1);`
	`168`	`+char2wchar(character,2,ptr,clen);`
`110`	`169`
`111`		`-returniswalpha((wint_t)*character);`
	`170`	`+returniswalpha((wint_t)character[0]);`
`112`	`171`	`}`
`113`	`172`
`114`	`173`	`int`
`115`		`-_t_isprint(constchar*ptr)`
	`174`	`+t_isprint(constchar*ptr)`
`116`	`175`	`{`
	`176`	`+intclen=pg_mblen(ptr);`
`117`	`177`	`wchar_tcharacter[2];`
`118`	`178`
`119`		`-if (lc_ctype_is_c())`
	`179`	`+if (clen==1\|\|lc_ctype_is_c())`
`120`	`180`	`returnisprint(TOUCHAR(ptr));`
`121`	`181`
`122`		`-char2wchar(character,ptr,1);`
	`182`	`+char2wchar(character,2,ptr,clen);`
`123`	`183`
`124`		`-returniswprint((wint_t)*character);`
	`184`	`+returniswprint((wint_t)character[0]);`
`125`	`185`	`}`
	`186`	`+`
`126`	`187`	`#endif/* TS_USE_WIDE */`
`127`	`188`
`128`	`189`
`@@ -168,19 +229,27 @@ t_readline(FILE *fp)`
`168`	`229`	`returnrecoded;`
`169`	`230`	`}`
`170`	`231`
	`232`	`+/*`
	`233`	`+ * lowerstr --- fold null-terminated string to lower case`
	`234`	`+ *`
	`235`	`+ * Returned string is palloc'd`
	`236`	`+ */`
`171`	`237`	`char*`
`172`		`-lowerstr(char*str)`
	`238`	`+lowerstr(constchar*str)`
`173`	`239`	`{`
`174`	`240`	`returnlowerstr_with_len(str,strlen(str));`
`175`	`241`	`}`
`176`	`242`
`177`	`243`	`/*`
	`244`	`+ * lowerstr_with_len --- fold string to lower case`
	`245`	`+ *`
	`246`	`+ * Input string need not be null-terminated.`
	`247`	`+ *`
`178`	`248`	`* Returned string is palloc'd`
`179`	`249`	`*/`
`180`	`250`	`char*`
`181`		`-lowerstr_with_len(char*str,intlen)`
	`251`	`+lowerstr_with_len(constchar*str,intlen)`
`182`	`252`	`{`
`183`		`-char*ptr=str;`
`184`	`253`	`char*out;`
`185`	`254`
`186`	`255`	`if (len==0)`
`@@ -202,23 +271,13 @@ lowerstr_with_len(char *str, int len)`
`202`	`271`
`203`	`272`	`/*`
`204`	`273`	`* alloc number of wchar_t for worst case, len contains number of`
`205`		`- * bytes<= number of characters and alloc 1 wchar_t for 0, because`
`206`		`- * wchar2char(wcstombs in really) wants zero-terminated string`
	`274`	`+ * bytes>= number of characters and alloc 1 wchar_t for 0, because`
	`275`	`+ * wchar2char wants zero-terminated string`
`207`	`276`	`*/`
`208`	`277`	`wptr=wstr= (wchar_t)palloc(sizeof(wchar_t) (len+1));`
`209`	`278`
`210`		`-/*`
`211`		`- * str SHOULD be cstring, so wlen contains number of converted`
`212`		`- * character`
`213`		`- */`
`214`		`-wlen=char2wchar(wstr,str,len);`
`215`		`-if (wlen<0)`
`216`		`-ereport(ERROR,`
`217`		`-(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),`
`218`		`-errmsg("translation failed from server encoding to wchar_t")));`
`219`		`-`
	`279`	`+wlen=char2wchar(wstr,len+1,str,len);`
`220`	`280`	`Assert(wlen <=len);`
`221`		`-wstr[wlen]=0;`
`222`	`281`
`223`	`282`	`while (*wptr)`
`224`	`283`	`{`
`@@ -229,31 +288,29 @@ lowerstr_with_len(char *str, int len)`
`229`	`288`	`/*`
`230`	`289`	`* Alloc result string for worst case + '\0'`
`231`	`290`	`*/`
`232`		`-len=sizeof(char)pg_database_encoding_max_length()(wlen+1);`
	`291`	`+len=pg_database_encoding_max_length()*wlen+1;`
`233`	`292`	`out= (char*)palloc(len);`
`234`	`293`
`235`		`-/*`
`236`		`- * wlen now is number of bytes which is always >= number of characters`
`237`		`- */`
`238`	`294`	`wlen=wchar2char(out,wstr,len);`
	`295`	`+`
`239`	`296`	`pfree(wstr);`
`240`	`297`
`241`	`298`	`if (wlen<0)`
`242`	`299`	`ereport(ERROR,`
`243`	`300`	`(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),`
`244`		`-errmsg("translation failed from wchar_t to server encoding %d",errno)));`
`245`		`-Assert(wlen <=len);`
`246`		`-out[wlen]='\0';`
	`301`	`+errmsg("translation from wchar_t to server encoding failed: %m")));`
	`302`	`+Assert(wlen<len);`
`247`	`303`	`}`
`248`	`304`	`else`
`249`		`-#endif`
	`305`	`+#endif/* TS_USE_WIDE */`
`250`	`306`	`{`
	`307`	`+constchar*ptr=str;`
`251`	`308`	`char*outptr;`
`252`	`309`
`253`	`310`	`outptr=out= (char)palloc(sizeof(char) (len+1));`
`254`		`-while (*ptr&&ptr-str<len)`
	`311`	`+while ((ptr-str)<len&&*ptr)`
`255`	`312`	`{`
`256`		`-outptr++=tolower((unsignedchar*)ptr);`
	`313`	`+*outptr++=tolower(TOUCHAR(ptr));`
`257`	`314`	`ptr++;`
`258`	`315`	`}`
`259`	`316`	`*outptr='\0';`

`‎src/backend/tsearch/ts_utils.c`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`*`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.4 2007/09/04 02:16:56 tgl Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.5 2007/11/09 22:37:35 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -75,7 +75,7 @@ comparestr(const void a, const void b)`
`75`	`75`	`* or palloc a new version.`
`76`	`76`	`*/`
`77`	`77`	`void`
`78`		`-readstoplist(constcharfname,StopLists,char(wordop) (char*))`
	`78`	`+readstoplist(constcharfname,StopLists,char(wordop) (constchar*))`
`79`	`79`	`{`
`80`	`80`	`char**stop=NULL;`
`81`	`81`

`‎src/backend/tsearch/wparser_def.c`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`*`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.7 2007/10/27 19:03:45 tgl Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.8 2007/11/09 22:37:35 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -294,12 +294,12 @@ TParserInit(char *str, int len)`
`294`	`294`	`/*`
`295`	`295`	`* Use wide char code only when max encoding length > 1.`
`296`	`296`	`*/`
`297`		`-`
`298`	`297`	`if (prs->charmaxlen>1)`
`299`	`298`	`{`
`300`	`299`	`prs->usewide= true;`
`301`	`300`	`prs->wstr= (wchar_t)palloc(sizeof(wchar_t) (prs->lenstr+1));`
`302`		`-prs->lenwstr=char2wchar(prs->wstr,prs->str,prs->lenstr);`
	`301`	`+prs->lenwstr=char2wchar(prs->wstr,prs->lenstr+1,`
	`302`	`+prs->str,prs->lenstr);`
`303`	`303`	`}`
`304`	`304`	`else`
`305`	`305`	`#endif`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit654dcfb

File tree

5 files changed

5 files changed

`‎src/backend/tsearch/ts_locale.c`

`‎src/backend/tsearch/ts_utils.c`

`‎src/backend/tsearch/wparser_def.c`

0 commit comments