Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6c55760

Browse files
committed
Add direct conversion routines between EUC_TW and Big5.
Conversions between EUC_TW and Big5 were previously implemented byconverting the whole input to MIC first, and then from MIC to the targetencoding. Implement functions to convert directly between the two.The reason to do this now is that I'm working on a patch that will changethe conversion function signature so that if the input is invalid, weconvert as much as we can and return the number of bytes successfullyconverted. That's not possible if we use an intermediary format, becauseif an error happens in the intermediary -> final conversion, we lose trackof the location of the invalid character in the original input. Avoidingthe intermediate step makes the conversions faster, too.Reviewed-by: John NaylorDiscussion:https://www.postgresql.org/message-id/b9e3167f-f84b-7aa4-5738-be578a4db924%40iki.fi
1 parentb80e106 commit6c55760

File tree

1 file changed

+134
-10
lines changed

1 file changed

+134
-10
lines changed

‎src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c

Lines changed: 134 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ PG_FUNCTION_INFO_V1(mic_to_big5);
3737
* ----------
3838
*/
3939

40+
staticvoideuc_tw2big5(constunsignedchar*euc,unsignedchar*p,intlen);
41+
staticvoidbig52euc_tw(constunsignedchar*euc,unsignedchar*p,intlen);
4042
staticvoidbig52mic(constunsignedchar*big5,unsignedchar*p,intlen);
4143
staticvoidmic2big5(constunsignedchar*mic,unsignedchar*p,intlen);
4244
staticvoideuc_tw2mic(constunsignedchar*euc,unsignedchar*p,intlen);
@@ -48,14 +50,10 @@ euc_tw_to_big5(PG_FUNCTION_ARGS)
4850
unsignedchar*src= (unsignedchar*)PG_GETARG_CSTRING(2);
4951
unsignedchar*dest= (unsignedchar*)PG_GETARG_CSTRING(3);
5052
intlen=PG_GETARG_INT32(4);
51-
unsignedchar*buf;
5253

5354
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW,PG_BIG5);
5455

55-
buf=palloc(len*ENCODING_GROWTH_RATE+1);
56-
euc_tw2mic(src,buf,len);
57-
mic2big5(buf,dest,strlen((char*)buf));
58-
pfree(buf);
56+
euc_tw2big5(src,dest,len);
5957

6058
PG_RETURN_VOID();
6159
}
@@ -66,14 +64,10 @@ big5_to_euc_tw(PG_FUNCTION_ARGS)
6664
unsignedchar*src= (unsignedchar*)PG_GETARG_CSTRING(2);
6765
unsignedchar*dest= (unsignedchar*)PG_GETARG_CSTRING(3);
6866
intlen=PG_GETARG_INT32(4);
69-
unsignedchar*buf;
7067

7168
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5,PG_EUC_TW);
7269

73-
buf=palloc(len*ENCODING_GROWTH_RATE+1);
74-
big52mic(src,buf,len);
75-
mic2euc_tw(buf,dest,strlen((char*)buf));
76-
pfree(buf);
70+
big52euc_tw(src,dest,len);
7771

7872
PG_RETURN_VOID();
7973
}
@@ -134,6 +128,136 @@ mic_to_big5(PG_FUNCTION_ARGS)
134128
PG_RETURN_VOID();
135129
}
136130

131+
132+
/*
133+
* EUC_TW ---> Big5
134+
*/
135+
staticvoid
136+
euc_tw2big5(constunsignedchar*euc,unsignedchar*p,intlen)
137+
{
138+
unsignedcharc1;
139+
unsigned shortbig5buf,
140+
cnsBuf;
141+
unsignedcharlc;
142+
intl;
143+
144+
while (len>0)
145+
{
146+
c1=*euc;
147+
if (IS_HIGHBIT_SET(c1))
148+
{
149+
/* Verify and decode the next EUC_TW input character */
150+
l=pg_encoding_verifymbchar(PG_EUC_TW, (constchar*)euc,len);
151+
if (l<0)
152+
report_invalid_encoding(PG_EUC_TW,
153+
(constchar*)euc,len);
154+
if (c1==SS2)
155+
{
156+
c1=euc[1];/* plane No. */
157+
if (c1==0xa1)
158+
lc=LC_CNS11643_1;
159+
elseif (c1==0xa2)
160+
lc=LC_CNS11643_2;
161+
else
162+
lc=c1-0xa3+LC_CNS11643_3;
163+
cnsBuf= (euc[2] <<8) |euc[3];
164+
}
165+
else
166+
{/* CNS11643-1 */
167+
lc=LC_CNS11643_1;
168+
cnsBuf= (c1 <<8) |euc[1];
169+
}
170+
171+
/* Write it out in Big5 */
172+
big5buf=CNStoBIG5(cnsBuf,lc);
173+
if (big5buf==0)
174+
report_untranslatable_char(PG_EUC_TW,PG_BIG5,
175+
(constchar*)euc,len);
176+
*p++= (big5buf >>8)&0x00ff;
177+
*p++=big5buf&0x00ff;
178+
179+
euc+=l;
180+
len-=l;
181+
}
182+
else
183+
{/* should be ASCII */
184+
if (c1==0)
185+
report_invalid_encoding(PG_EUC_TW,
186+
(constchar*)euc,len);
187+
*p++=c1;
188+
euc++;
189+
len--;
190+
}
191+
}
192+
*p='\0';
193+
}
194+
195+
/*
196+
* Big5 ---> EUC_TW
197+
*/
198+
staticvoid
199+
big52euc_tw(constunsignedchar*big5,unsignedchar*p,intlen)
200+
{
201+
unsigned shortc1;
202+
unsigned shortbig5buf,
203+
cnsBuf;
204+
unsignedcharlc;
205+
intl;
206+
207+
while (len>0)
208+
{
209+
/* Verify and decode the next Big5 input character */
210+
c1=*big5;
211+
if (IS_HIGHBIT_SET(c1))
212+
{
213+
l=pg_encoding_verifymbchar(PG_BIG5, (constchar*)big5,len);
214+
if (l<0)
215+
report_invalid_encoding(PG_BIG5,
216+
(constchar*)big5,len);
217+
big5buf= (c1 <<8) |big5[1];
218+
cnsBuf=BIG5toCNS(big5buf,&lc);
219+
220+
if (lc==LC_CNS11643_1)
221+
{
222+
*p++= (cnsBuf >>8)&0x00ff;
223+
*p++=cnsBuf&0x00ff;
224+
}
225+
elseif (lc==LC_CNS11643_2)
226+
{
227+
*p++=SS2;
228+
*p++=0xa2;
229+
*p++= (cnsBuf >>8)&0x00ff;
230+
*p++=cnsBuf&0x00ff;
231+
}
232+
elseif (lc >=LC_CNS11643_3&&lc <=LC_CNS11643_7)
233+
{
234+
*p++=SS2;
235+
*p++=lc-LC_CNS11643_3+0xa3;
236+
*p++= (cnsBuf >>8)&0x00ff;
237+
*p++=cnsBuf&0x00ff;
238+
}
239+
else
240+
report_untranslatable_char(PG_BIG5,PG_EUC_TW,
241+
(constchar*)big5,len);
242+
243+
big5+=l;
244+
len-=l;
245+
}
246+
else
247+
{
248+
/* ASCII */
249+
if (c1==0)
250+
report_invalid_encoding(PG_BIG5,
251+
(constchar*)big5,len);
252+
*p++=c1;
253+
big5++;
254+
len--;
255+
continue;
256+
}
257+
}
258+
*p='\0';
259+
}
260+
137261
/*
138262
* EUC_TW ---> MIC
139263
*/

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp