Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit75c6519

Browse files
committed
Add new encoding EUC_JIS_2004 and SHIFT_JIS_2004,
along with new conversions among EUC_JIS_2004, SHIFT_JIS_2004 and UTF-8.catalog version has been bump up.
1 parent7b4726e commit75c6519

File tree

41 files changed

+70346
-120
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+70346
-120
lines changed

‎doc/src/sgml/charset.sgml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.81 2007/01/31 20:56:16 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.82 2007/03/25 11:56:01 ishii Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -364,6 +364,14 @@ initdb --locale=sv_SE
364364
<entry>1-3</entry>
365365
<entry></entry>
366366
</row>
367+
<row>
368+
<entry><literal>EUC_JIS_2004</literal></entry>
369+
<entry>Extended UNIX Code-JP, JIS X 0213</entry>
370+
<entry>Japanese</entry>
371+
<entry>Yes</entry>
372+
<entry>1-3</entry>
373+
<entry></entry>
374+
</row>
367375
<row>
368376
<entry><literal>EUC_KR</literal></entry>
369377
<entry>Extended UNIX Code-KR</entry>
@@ -540,6 +548,14 @@ initdb --locale=sv_SE
540548
<entry>1-2</entry>
541549
<entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry>
542550
</row>
551+
<row>
552+
<entry><literal>SHIFT_JIS_2004</literal></entry>
553+
<entry>Shift JIS, JIS X 0213</entry>
554+
<entry>Japanese</entry>
555+
<entry>No</entry>
556+
<entry>1-2</entry>
557+
<entry></entry>
558+
</row>
543559
<row>
544560
<entry><literal>SQL_ASCII</literal></entry>
545561
<entry>unspecified (see text)</entry>

‎doc/src/sgml/func.sgml

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.371 2007/03/25 11:56:01 ishii Exp $ -->
22

33
<chapter id="functions">
44
<title>Functions and Operators</title>
@@ -2394,6 +2394,42 @@
23942394
<entry><literal>UTF8</literal></entry>
23952395
</row>
23962396

2397+
<row>
2398+
<entry><literal>euc_jis_2004_to_utf8</literal></entry>
2399+
<entry><literal>EUC_JIS_2004</literal></entry>
2400+
<entry><literal>UTF8</literal></entry>
2401+
</row>
2402+
2403+
<row>
2404+
<entry><literal>ut8_to_euc_jis_2004</literal></entry>
2405+
<entry><literal>UTF8</literal></entry>
2406+
<entry><literal>EUC_JIS_2004</literal></entry>
2407+
</row>
2408+
2409+
<row>
2410+
<entry><literal>shift_jis_2004_to_utf8</literal></entry>
2411+
<entry><literal>SHIFT_JIS_2004</literal></entry>
2412+
<entry><literal>UTF8</literal></entry>
2413+
</row>
2414+
2415+
<row>
2416+
<entry><literal>ut8_to_shift_jis_2004</literal></entry>
2417+
<entry><literal>UTF8</literal></entry>
2418+
<entry><literal>SHIFT_JIS_2004</literal></entry>
2419+
</row>
2420+
2421+
<row>
2422+
<entry><literal>euc_jis_2004_to_shift_jis_2004</literal></entry>
2423+
<entry><literal>EUC_JIS_2004</literal></entry>
2424+
<entry><literal>SHIFT_JIS_2004</literal></entry>
2425+
</row>
2426+
2427+
<row>
2428+
<entry><literal>shift_jis_2004_to_euc_jis_2004</literal></entry>
2429+
<entry><literal>SHIFT_JIS_2004</literal></entry>
2430+
<entry><literal>EUC_JIS_2004</literal></entry>
2431+
</row>
2432+
23972433
</tbody>
23982434
</tgroup>
23992435
</table>
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
#! /usr/bin/perl
2+
#
3+
# Copyright (c) 2007, PostgreSQL Global Development Group
4+
#
5+
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl,v 1.1 2007/03/25 11:56:02 ishii Exp $
6+
#
7+
# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
8+
# "euc-jis-2004-std.txt" (http://x0213.org)
9+
10+
require"ucs2utf.pl";
11+
12+
$TEST = 1;
13+
14+
# first generate UTF-8 --> EUC_JIS_2004 table
15+
16+
$in_file ="euc-jis-2004-std.txt";
17+
18+
open( FILE,$in_file ) ||die("cannot open$in_file" );
19+
20+
reset'array';
21+
reset'array1';
22+
reset'comment';
23+
reset'comment1';
24+
25+
while($line = <FILE> ){
26+
if ($line =~/^0x(.*)[\t]*U\+(.*)\+(.*)[\t]*#(.*)$/) {
27+
$c =$1;
28+
$u1 =$2;
29+
$u2 =$3;
30+
$rest ="U+" .$u1 ."+" .$u2 .$4;
31+
$code =hex($c);
32+
$ucs =hex($u1);
33+
$utf1 = &ucs2utf($ucs);
34+
$ucs =hex($u2);
35+
$utf2 = &ucs2utf($ucs);
36+
$str =sprintf"%08x%08x",$utf1,$utf2;
37+
$array1{$str } =$code;
38+
$comment1{$str } =$rest;
39+
$count1++;
40+
next;
41+
}elsif ($line =~/^0x(.*)[\t]*U\+(.*)[\t]*#(.*)$/) {
42+
$c =$1;
43+
$u =$2;
44+
$rest ="U+" .$u .$3;
45+
}else {
46+
next;
47+
}
48+
49+
$ucs =hex($u);
50+
$code =hex($c);
51+
$utf = &ucs2utf($ucs);
52+
if($array{$utf }ne"" ){
53+
printfSTDERR"Warning: duplicate UTF8: %04x\n",$ucs;
54+
next;
55+
}
56+
$count++;
57+
58+
$array{$utf } =$code;
59+
$comment{$code } =$rest;
60+
}
61+
close( FILE );
62+
63+
$file ="utf8_to_euc_jis_2004.map";
64+
open( FILE,">$file" ) ||die("cannot open$file" );
65+
print FILE"/*\n";
66+
print FILE" * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
67+
print FILE" */\n";
68+
print FILE"static pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
69+
70+
for$index (sort {$a<=>$b}keys(%array ) ){
71+
$code =$array{$index };
72+
$count--;
73+
if($count == 0 ){
74+
printf FILE" {0x%08x, 0x%06x}/*%s */\n",$index,$code,$comment{$code };
75+
}else {
76+
printf FILE" {0x%08x, 0x%06x},/*%s */\n",$index,$code,$comment{$code };
77+
}
78+
}
79+
80+
print FILE"};\n";
81+
close(FILE);
82+
83+
if ($TEST == 1) {
84+
$file1 ="utf8.data";
85+
$file2 ="euc_jis_2004.data";
86+
open( FILE1,">$file1" ) ||die("cannot open$file1" );
87+
open( FILE2,">$file2" ) ||die("cannot open$file2" );
88+
89+
for$index (sort {$a<=>$b}keys(%array ) ){
90+
$code =$array{$index };
91+
if ($code > 0x00 &&$code != 0x09 &&$code != 0x0a &&$code != 0x0d &&
92+
$code != 0x5c &&
93+
($code < 0x80 ||
94+
($code >= 0x8ea1 &&$code <= 0x8efe) ||
95+
($code >= 0x8fa1a1 &&$code <= 0x8ffefe) ||
96+
($code >= 0xa1a1 &&$code <= 0x8fefe))) {
97+
for ($i = 3;$i >= 0;$i--) {
98+
$s =$i * 8;
99+
$mask = 0xff <<$s;
100+
print FILE1pack("C", ($index &$mask) >>$s)if$index &$mask;
101+
print FILE2pack("C", ($code &$mask) >>$s)if$code &$mask;
102+
}
103+
print FILE1"\n";
104+
print FILE2"\n";
105+
}
106+
}
107+
}
108+
109+
$file ="utf8_to_euc_jis_2004_combined.map";
110+
open( FILE,">$file" ) ||die("cannot open$file" );
111+
print FILE"/*\n";
112+
print FILE" * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
113+
print FILE" */\n";
114+
print FILE"static pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
115+
116+
for$index (sort {$acmp$b}keys(%array1 ) ){
117+
$code =$array1{$index };
118+
$count1--;
119+
if($count1 == 0 ){
120+
printf FILE" {0x%s, 0x%s, 0x%06x}/*%s */\n",substr($index, 0, 8),substr($index, 8, 8),$code,$comment1{$index };
121+
}else {
122+
printf FILE" {0x%s, 0x%s, 0x%06x},/*%s */\n",substr($index, 0, 8),substr($index, 8, 8),$code,$comment1{$index };
123+
}
124+
}
125+
126+
print FILE"};\n";
127+
close(FILE);
128+
129+
if ($TEST == 1) {
130+
for$index (sort {$acmp$b}keys(%array1 ) ){
131+
$code =$array1{$index };
132+
if ($code > 0x00 &&$code != 0x09 &&$code != 0x0a &&$code != 0x0d &&
133+
$code != 0x5c &&
134+
($code < 0x80 ||
135+
($code >= 0x8ea1 &&$code <= 0x8efe) ||
136+
($code >= 0x8fa1a1 &&$code <= 0x8ffefe) ||
137+
($code >= 0xa1a1 &&$code <= 0x8fefe))) {
138+
139+
$v1 =hex(substr($index, 0, 8));
140+
$v2 =hex(substr($index, 8, 8));
141+
142+
for ($i = 3;$i >= 0;$i--) {
143+
$s =$i * 8;
144+
$mask = 0xff <<$s;
145+
print FILE1pack("C", ($v1 &$mask) >>$s)if$v1 &$mask;
146+
print FILE2pack("C", ($code &$mask) >>$s)if$code &$mask;
147+
}
148+
for ($i = 3;$i >= 0;$i--) {
149+
$s =$i * 8;
150+
$mask = 0xff <<$s;
151+
print FILE1pack("C", ($v2 &$mask) >>$s)if$v2 &$mask;
152+
}
153+
print FILE1"\n";
154+
print FILE2"\n";
155+
}
156+
}
157+
close(FILE1);
158+
close(FILE2);
159+
}
160+
161+
# then generate EUC_JIS_2004 --> UTF-8 table
162+
163+
$in_file ="euc-jis-2004-std.txt";
164+
165+
open( FILE,$in_file ) ||die("cannot open$in_file" );
166+
167+
reset'array';
168+
reset'array1';
169+
reset'comment';
170+
reset'comment1';
171+
172+
while($line = <FILE> ){
173+
if ($line =~/^0x(.*)[\t]*U\+(.*)\+(.*)[\t]*#(.*)$/) {
174+
$c =$1;
175+
$u1 =$2;
176+
$u2 =$3;
177+
$rest ="U+" .$u1 ."+" .$u2 .$4;
178+
$code =hex($c);
179+
$ucs =hex($u1);
180+
$utf1 = &ucs2utf($ucs);
181+
$ucs =hex($u2);
182+
$utf2 = &ucs2utf($ucs);
183+
$str =sprintf"%08x%08x",$utf1,$utf2;
184+
$array1{$code } =$str;
185+
$comment1{$code } =$rest;
186+
$count1++;
187+
next;
188+
}elsif ($line =~/^0x(.*)[\t]*U\+(.*)[\t]*#(.*)$/) {
189+
$c =$1;
190+
$u =$2;
191+
$rest ="U+" .$u .$3;
192+
}else {
193+
next;
194+
}
195+
196+
$ucs =hex($u);
197+
$code =hex($c);
198+
$utf = &ucs2utf($ucs);
199+
if($array{$code }ne"" ){
200+
printfSTDERR"Warning: duplicate UTF8: %04x\n",$ucs;
201+
next;
202+
}
203+
$count++;
204+
205+
$array{$code } =$utf;
206+
$comment{$utf } =$rest;
207+
}
208+
close( FILE );
209+
210+
$file ="euc_jis_2004_to_utf8.map";
211+
open( FILE,">$file" ) ||die("cannot open$file" );
212+
print FILE"/*\n";
213+
print FILE" * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
214+
print FILE" */\n";
215+
print FILE"static pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
216+
217+
for$index (sort {$a<=>$b}keys(%array ) ){
218+
$code =$array{$index };
219+
$count--;
220+
if($count == 0 ){
221+
printf FILE" {0x%06x, 0x%08x}/*%s */\n",$index,$code,$comment{$code };
222+
}else {
223+
printf FILE" {0x%06x, 0x%08x},/*%s */\n",$index,$code,$comment{$code };
224+
}
225+
}
226+
227+
print FILE"};\n";
228+
close(FILE);
229+
230+
$file ="euc_jis_2004_to_utf8_combined.map";
231+
open( FILE,">$file" ) ||die("cannot open$file" );
232+
print FILE"/*\n";
233+
print FILE" * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
234+
print FILE" */\n";
235+
print FILE"static pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
236+
237+
for$index (sort {$a<=>$b}keys(%array1 ) ){
238+
$code =$array1{$index };
239+
$count1--;
240+
if($count1 == 0 ){
241+
printf FILE" {0x%06x, 0x%s, 0x%s}/*%s */\n",$index,substr($code, 0, 8),substr($code, 8, 8),$comment1{$index };
242+
}else {
243+
printf FILE" {0x%06x, 0x%s, 0x%s},/*%s */\n",$index,substr($code, 0, 8),substr($code, 8, 8),$comment1{$index };
244+
}
245+
}
246+
247+
print FILE"};\n";
248+
close(FILE);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp