Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitaa64aa5

Browse files
Support KS X 1026-1
1 parenta6a221a commitaa64aa5

File tree

7 files changed

+500
-29
lines changed

7 files changed

+500
-29
lines changed

‎.github/workflows/rust.yml‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ env:
1111
CARGO_TERM_COLOR:always
1212
RUST_BACKTRACE:1
1313
RUSTFLAGS:-D warnings
14-
RUSTDOCFLAGS:-D warnings --cfg docsrs
14+
RUSTDOCFLAGS:-D warnings
1515

1616
jobs:
1717
build:
@@ -43,6 +43,8 @@ jobs:
4343
run:cd $(find target/package/ -maxdepth 1 -mindepth 1 -type d) && cargo test --no-default-features
4444
-name:Build docs
4545
if:matrix.rust == 'nightly'
46+
env:
47+
RUSTDOCFLAGS:-D warnings --cfg docsrs
4648
run:cargo doc --all-features --verbose
4749
-name:Check formatting
4850
if:matrix.rust == 'stable'

‎Cargo.toml‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,8 @@ features = ["alloc"]
4040

4141
[features]
4242
default = ["std"]
43+
ks_x_1026-1 = []
4344
std = []
45+
46+
[package.metadata.docs.rs]
47+
rustc-args = ["--cfg","feature=\"ks_x_1026-1\""]

‎README.md‎

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ fn main() {
2626

2727
##crates.io
2828

29-
You can use this package in your project by adding the following
30-
to your`Cargo.toml`:
29+
You can use this package in your project by adding the following to your
30+
`Cargo.toml`:
3131

3232
```toml
3333
[dependencies]
@@ -36,4 +36,15 @@ unicode-normalization = "0.1.23"
3636

3737
##`no_std` +`alloc` support
3838

39-
This crate is completely`no_std` +`alloc` compatible. This can be enabled by disabling the`std` feature, i.e. specifying`default-features = false` for this crate on your`Cargo.toml`.
39+
This crate is completely`no_std` +`alloc` compatible. This can be enabled by
40+
disabling the`std` feature, i.e. specifying`default-features = false` for this
41+
crate on your`Cargo.toml`.
42+
43+
##KS X 1026-1
44+
45+
Korean Standard KS X 1026-1 ([Korean](https://standard.go.kr/KSCI/standardIntro/getStandardSearchView.do?ksNo=KSX1026-1),
46+
[English](http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3422.pdf)) is an ROK government
47+
standard that corrects some defects and makes some changes to the Unicode NFC,
48+
NFKC, and NFKD normalization forms for certain Korean characters. The
49+
`ks_x_1026-1` crate feature (disabled by default) adds methods to support these
50+
alternate normalizations.

‎src/ks_x_1026_1.rs‎

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
//! <http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3422.pdf> Annex B
2+
3+
use core::{
4+
convert::{TryFrom,TryInto},
5+
iter::FusedIterator,
6+
};
7+
8+
use tinyvec::ArrayVec;
9+
10+
// § B.1.1
11+
12+
usecrate::normalize::hangul_constants::{
13+
L_BASE,L_LAST,N_COUNT,S_BASE,S_COUNT,T_BASE,T_COUNT,T_LAST,V_BASE,V_LAST,
14+
};
15+
16+
// § B.1.2
17+
18+
fnis_old_jongseong(t:char) ->bool{
19+
matches!(t,'\u{11C3}'..='\u{11FF}' |'\u{D7CB}'..='\u{D7FB}')
20+
}
21+
22+
/// Iterator that decomposes modern Hangul LV syllables immediately followed by old Hangul T jamo
23+
/// into a 3-character L V T sequences, as specified in KS X 1026-1 annex B.1.5.
24+
#[derive(Clone,Debug)]
25+
pubstructRecomposeHangul<I>{
26+
/// Medial vowel of a decomposed LV syllable
27+
v:Option<char>,
28+
/// Character yielded by inner iterator in last call to its `next()`
29+
last:Option<char>,
30+
inner:I,
31+
}
32+
33+
impl<I:Iterator<Item =char>>IteratorforRecomposeHangul<I>{
34+
typeItem =char;
35+
36+
fnnext(&mutself) ->Option<Self::Item>{
37+
ifletSome(v) =self.v{
38+
// If an LV syllable was decomposed in the last call to `next`,
39+
// yield its medial vowel.
40+
self.v =None;
41+
Some(v)
42+
}else{
43+
let prev =self.last;
44+
self.last =self.inner.next();
45+
46+
iflet(Some(prev),Some(next)) =(prev,self.last){
47+
let s_index = u32::from(prev).wrapping_sub(S_BASE);
48+
if s_index <S_COUNT && s_index %T_COUNT ==0 &&is_old_jongseong(next){
49+
// We have an LV syllable followed by an old jongseong, decompose into L V
50+
let l:char =(L_BASE + s_index /N_COUNT).try_into().unwrap();
51+
self.v =Some((V_BASE +(s_index %N_COUNT) /T_COUNT).try_into().unwrap());
52+
returnSome(l);
53+
}
54+
}
55+
56+
prev
57+
}
58+
}
59+
60+
#[inline]
61+
fnsize_hint(&self) ->(usize,Option<usize>){
62+
let(inner_lo, inner_hi) =self.inner.size_hint();
63+
let add_factor:usize =self.v.map_or(0, |_|1) +self.last.map_or(0, |_|1);
64+
(
65+
inner_lo.saturating_add(add_factor),
66+
inner_hi
67+
.and_then(|h| h.checked_mul(2))
68+
.and_then(|h| h.checked_add(add_factor)),
69+
)
70+
}
71+
}
72+
73+
impl<I:Iterator<Item =char> +FusedIterator>FusedIteratorforRecomposeHangul<I>{}
74+
75+
impl<I:Iterator<Item =char>>RecomposeHangul<I>{
76+
#[inline]
77+
pub(crate)fnnew(mutiter:I) ->Self{
78+
RecomposeHangul{
79+
v:None,
80+
last: iter.next(),
81+
inner: iter,
82+
}
83+
}
84+
}
85+
86+
// B.2.1
87+
88+
staticCP_JAMO:[char;94] =[
89+
'\u{1100}','\u{1101}','\u{11AA}','\u{1102}','\u{11AC}','\u{11AD}','\u{1103}','\u{1104}',
90+
'\u{1105}','\u{11B0}','\u{11B1}','\u{11B2}','\u{11B3}','\u{11B4}','\u{11B5}','\u{111A}',
91+
'\u{1106}','\u{1107}','\u{1108}','\u{1121}','\u{1109}','\u{110A}','\u{110B}','\u{110C}',
92+
'\u{110D}','\u{110E}','\u{110F}','\u{1110}','\u{1111}','\u{1112}','\u{1161}','\u{1162}',
93+
'\u{1163}','\u{1164}','\u{1165}','\u{1166}','\u{1167}','\u{1168}','\u{1169}','\u{116A}',
94+
'\u{116B}','\u{116C}','\u{116D}','\u{116E}','\u{116F}','\u{1170}','\u{1171}','\u{1172}',
95+
'\u{1173}','\u{1174}','\u{1175}','\u{1160}','\u{1114}','\u{1115}','\u{11C7}','\u{11C8}',
96+
'\u{11CC}','\u{11CE}','\u{11D3}','\u{11D7}','\u{11D9}','\u{111C}','\u{11DD}','\u{11DF}',
97+
'\u{111D}','\u{111E}','\u{1120}','\u{1122}','\u{1123}','\u{1127}','\u{1129}','\u{112B}',
98+
'\u{112C}','\u{112D}','\u{112E}','\u{112F}','\u{1132}','\u{1136}','\u{1140}','\u{1147}',
99+
'\u{114C}','\u{11F1}','\u{11F2}','\u{1157}','\u{1158}','\u{1159}','\u{1184}','\u{1185}',
100+
'\u{1188}','\u{1191}','\u{1192}','\u{1194}','\u{119E}','\u{11A1}',
101+
];
102+
103+
// § B.2.2
104+
105+
staticHW_JAMO:[char;64] =[
106+
'\u{1160}','\u{1100}','\u{1101}','\u{11AA}','\u{1102}','\u{11AC}','\u{11AD}','\u{1103}',
107+
'\u{1104}','\u{1105}','\u{11B0}','\u{11B1}','\u{11B2}','\u{11B3}','\u{11B4}','\u{11B5}',
108+
'\u{111A}','\u{1106}','\u{1107}','\u{1108}','\u{1121}','\u{1109}','\u{110A}','\u{110B}',
109+
'\u{110C}','\u{110D}','\u{110E}','\u{110F}','\u{1110}','\u{1111}','\u{1112}','\u{FFBF}',
110+
'\u{FFC0}','\u{FFC1}','\u{1161}','\u{1162}','\u{1163}','\u{1164}','\u{1165}','\u{1166}',
111+
'\u{FFC8}','\u{FFC9}','\u{1167}','\u{1168}','\u{1169}','\u{116A}','\u{116B}','\u{116C}',
112+
'\u{FFD0}','\u{FFD1}','\u{116D}','\u{116E}','\u{116F}','\u{1170}','\u{1171}','\u{1172}',
113+
'\u{FFD8}','\u{FFD9}','\u{1173}','\u{1174}','\u{1175}','\u{FFDD}','\u{FFDE}','\u{FFDF}',
114+
];
115+
116+
// § B.2.3
117+
118+
staticPC_JAMO:[char;14] =[
119+
'\u{1100}','\u{1102}','\u{1103}','\u{1105}','\u{1106}','\u{1107}','\u{1109}','\u{110B}',
120+
'\u{110C}','\u{110E}','\u{110F}','\u{1110}','\u{1111}','\u{1112}',
121+
];
122+
123+
// § B.2.4
124+
125+
/// Iterator that decomposes compatibility characters containing Hangul jamo
126+
/// in a manner that avoids introducing new nonstandard jamo sequences,
127+
/// as specified in KS X 1026-1 annex B.2.4.
128+
#[derive(Clone,Debug)]
129+
pubstructNormalizeJamoKdkc<I>{
130+
inner:I,
131+
// Buffer for when a character normalizes into multiple.
132+
// Characters are pushed to and popped from the end.
133+
// Length 3 is sufficient, as the longest possible expansion
134+
// is for a parenthesized choseong like U+3200,
135+
// which expands into ['(', <choseong>, '\u{1160}', ')'] (length 4).
136+
// (There are no parenthesized jungseong or jongseong.)
137+
buf:ArrayVec<[char;3]>,
138+
}
139+
140+
impl<I:Iterator<Item =char>>IteratorforNormalizeJamoKdkc<I>{
141+
typeItem =char;
142+
143+
fnnext(&mutself) ->Option<Self::Item>{
144+
ifletSome(c) =self.buf.pop(){
145+
// Empty buffer before yielding from underlying iterator.
146+
Some(c)
147+
}else{
148+
let ch =self.inner.next()?;
149+
// Whether ch is a parenthesized Hangul letter
150+
letmut pf =false;
151+
152+
let uch:u32 = ch.into();
153+
let base_jamo:char =match uch{
154+
// Hangul compatibility letter
155+
0x3131..=0x318E =>CP_JAMO[usize::try_from(uch -0x3131).unwrap()],
156+
157+
// Parenthesized Hangul letter
158+
0x3200..=0x320D =>{
159+
pf =true;
160+
self.buf.push(')');
161+
PC_JAMO[usize::try_from(uch -0x3200).unwrap()]
162+
}
163+
164+
// Circled Hangul letter
165+
0x3260..=0x326D =>PC_JAMO[usize::try_from(uch -0x3260).unwrap()],
166+
167+
// Halfwidth Hangul letter
168+
0xFFA0..=0xFFDF =>HW_JAMO[usize::try_from(uch -0xFFA0).unwrap()],
169+
170+
_ =>returnSome(ch),
171+
};
172+
173+
// Insert fillers
174+
let first_ret:char =match base_jamo.into(){
175+
// `base_jamo` is choseong, yield a jungseong filler after
176+
L_BASE..=L_LAST =>{
177+
self.buf.push('\u{1160}');
178+
base_jamo
179+
}
180+
181+
// `base_jamo` is jungseong, yield a choseong filler before
182+
V_BASE..=V_LAST =>{
183+
self.buf.push(base_jamo);
184+
'\u{115F}'
185+
}
186+
187+
// `base_jamo` is jongseong, yield a choseong and a jungseong filler before
188+
T_BASE..=T_LAST =>{
189+
self.buf.push(base_jamo);
190+
self.buf.push('\u{1160}');
191+
'\u{115F}'
192+
}
193+
194+
_ =>unreachable!("`base_jamo` shluld be a jamo, but is not"),
195+
};
196+
197+
if pf{
198+
// Parenthesized Hangul letter, yield open paren before
199+
self.buf.push(first_ret);
200+
Some('(')
201+
}else{
202+
Some(first_ret)
203+
}
204+
}
205+
}
206+
207+
#[inline]
208+
fnsize_hint(&self) ->(usize,Option<usize>){
209+
let(inner_lo, inner_hi) =self.inner.size_hint();
210+
let add_factor:usize =self.buf.len();
211+
(
212+
inner_lo.saturating_add(add_factor),
213+
inner_hi
214+
.and_then(|h| h.checked_mul(4))// Why 4? See comment on `buf` field
215+
.and_then(|h| h.checked_add(add_factor)),
216+
)
217+
}
218+
}
219+
220+
impl<I:Iterator<Item =char> +FusedIterator>FusedIteratorforNormalizeJamoKdkc<I>{}
221+
222+
impl<I:Iterator<Item =char>>NormalizeJamoKdkc<I>{
223+
#[inline]
224+
pub(crate)fnnew(iter:I) ->Self{
225+
NormalizeJamoKdkc{
226+
inner: iter,
227+
buf:ArrayVec::new(),
228+
}
229+
}
230+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp