|
2 | 2 | #
|
3 | 3 | # Copyright 2001 by PostgreSQL Global Development Group
|
4 | 4 | #
|
5 |
| -# $Id: UCS_to_SJIS.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $ |
| 5 | +# $Id: UCS_to_SJIS.pl,v 1.2 2001/02/23 08:44:33 ishii Exp $ |
6 | 6 | #
|
7 | 7 | # Generate UTF-8 <--> SJIS code conversion tables from
|
8 | 8 | # map files provided by Unicode organization.
|
|
21 | 21 |
|
22 | 22 | # first generate UTF-8 --> SJIS table
|
23 | 23 |
|
24 |
| -$in_file ="SHIFTJIS.TXT"; |
| 24 | +$in_file ="CP932.TXT"; |
| 25 | +$count = 0; |
25 | 26 |
|
26 | 27 | open( FILE,$in_file ) ||die("cannot open$in_file" );
|
27 | 28 |
|
28 | 29 | while( <FILE> ){
|
29 |
| -chop; |
30 |
| -if(/^#/ ){ |
31 |
| -next; |
32 |
| -} |
33 |
| -($c,$u,$rest ) =split; |
34 |
| -$ucs =hex($u); |
35 |
| -$code =hex($c); |
36 |
| -if($code >= 0x80 &&$ucs >= 0x100 ){ |
37 |
| -$utf = &ucs2utf($ucs); |
38 |
| -if($array{$utf }ne"" ){ |
39 |
| -printfSTDERR"Warning: duplicate unicode: %04x\n",$ucs; |
40 |
| -next; |
41 |
| -} |
42 |
| -$count++; |
43 |
| - |
44 |
| -$array{$utf } =$code; |
45 |
| -} |
| 30 | +chop; |
| 31 | +if(/^#/ ){ |
| 32 | +next; |
| 33 | + } |
| 34 | + ($c,$u,$rest ) =split; |
| 35 | +$ucs =hex($u); |
| 36 | +$code =hex($c); |
| 37 | +if($code >= 0x80 &&$ucs >= 0x100 ){ |
| 38 | +$utf = &ucs2utf($ucs); |
| 39 | +if((($code >= 0xed40 ) |
| 40 | +&& ($code <= 0xeefc )) |
| 41 | + || (($code >= 0x8754 ) |
| 42 | + &&($code <= 0x875d )) |
| 43 | + || ($code == 0x878a ) |
| 44 | + || ($code == 0x8782 ) |
| 45 | + || ($code == 0x8784 ) |
| 46 | + || ($code == 0xfa5b ) |
| 47 | + || ($code == 0xfa54 ) |
| 48 | + || (($code >= 0x8790 ) |
| 49 | + && ($code <= 0x8792 )) |
| 50 | + || (($code >= 0x8795 ) |
| 51 | + && ($code <= 0x8797 )) |
| 52 | + || (($code >= 0x879a ) |
| 53 | + && ($code <= 0x879c ))) |
| 54 | + { |
| 55 | +printfSTDERR"Warning: duplicate unicode : UCS=0x%04x SJIS=0x%04x\n",$ucs,$code; |
| 56 | +next; |
| 57 | + } |
| 58 | +$count++; |
| 59 | +$array{$utf } =$code; |
| 60 | + } |
46 | 61 | }
|
| 62 | + |
47 | 63 | close( FILE );
|
48 | 64 |
|
49 | 65 | #
|
|
57 | 73 | for$index (sort {$a<=>$b}keys(%array ) ){
|
58 | 74 | $code =$array{$index };
|
59 | 75 | $count--;
|
60 |
| -if($count == 0 ){ |
| 76 | +if($count == 0 ){ |
61 | 77 | printf FILE" {0x%04x, 0x%04x}\n",$index,$code;
|
62 | 78 | }else {
|
63 | 79 | printf FILE" {0x%04x, 0x%04x},\n",$index,$code;
|
|
68 | 84 | close(FILE);
|
69 | 85 |
|
70 | 86 | #
|
71 |
| -# then generateEUC_JP --> UTF8 table |
| 87 | +# then generateSJIS --> UTF8 table |
72 | 88 | #
|
73 | 89 |
|
74 | 90 | open( FILE,$in_file ) ||die("cannot open$in_file" );
|
75 | 91 |
|
76 | 92 | reset'array';
|
| 93 | +$count = 0; |
77 | 94 |
|
78 | 95 | while( <FILE> ){
|
79 | 96 | chop;
|
|
85 | 102 | $code =hex($c);
|
86 | 103 | if($code >= 0x80 &&$ucs >= 0x100 ){
|
87 | 104 | $utf = &ucs2utf($ucs);
|
88 |
| -if($array{$code }ne"" ){ |
89 |
| -printfSTDERR"Warning: duplicate code: %04x\n",$ucs; |
90 |
| -next; |
91 |
| -} |
92 | 105 | $count++;
|
93 | 106 |
|
94 | 107 | $array{$code } =$utf;
|
|