Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit887d390

Browse files
Add API to transform into KS X 1026-1 standard Korean syllables
Gated behind the `ks_x_1026-1` feature.
1 parentc24ac7f commit887d390

File tree

3 files changed

+262
-60
lines changed

3 files changed

+262
-60
lines changed

‎src/lib.rs‎

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ pub use crate::quick_check::{
7373
};
7474
pubusecrate::recompose::Recompositions;
7575
pubusecrate::replace::Replacements;
76-
pubusecrate::standardize_korean_syllables::StandardKoreanSyllables;
76+
pubusecrate::standardize_korean_syllables::StandardizeKoreanSyllables;
77+
#[cfg(feature ="ks_x_1026-1")]
78+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
79+
pubusecrate::standardize_korean_syllables::StandardizeKoreanSyllablesKsX1026_1;
7780
pubusecrate::stream_safe::StreamSafe;
7881
pubusecrate::tables::UNICODE_VERSION;
7982
use core::{option, str::Chars};
@@ -148,9 +151,9 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
148151
/// inserted according to the Stream-Safe Text Process ([UAX15-D4](https://unicode.org/reports/tr15/#UAX15-D4))
149152
fnstream_safe(self) ->StreamSafe<I>;
150153

151-
/// An iterator over the string with Hangul choseong andjugseong filler characters inserted
154+
/// An iterator over the string with Hangul choseong andjungseong filler characters inserted
152155
/// to ensure that all Korean syllable blocks are in standard form according to [UAX29](https://www.unicode.org/reports/tr29/#Transforming_Into_SKS).
153-
fnstandard_korean_syllables(self) ->StandardKoreanSyllables<I>;
156+
fnstandard_korean_syllables(self) ->StandardizeKoreanSyllables<I>;
154157

155158
/// An iterator over the string in the variant of Unicode Normalization Form KD
156159
/// defined by Korean Standard X 1026-1. This normalization differs from that defined by Unicode
@@ -183,6 +186,12 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
183186
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
184187

185188
fnnfkc_ks_x_1026_1(self) ->RecomposeHangul<Recompositions<NormalizeJamoKdkc<I>>>;
189+
190+
/// An iterator over the string with Hangul choseong and jungseong filler characters inserted
191+
/// to ensure that all Korean syllable blocks are in standard form according to KS X 1026-1 § 7.8.
192+
#[cfg(feature ="ks_x_1026-1")]
193+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
194+
fnstandard_korean_syllables_ks_x_1026_1(self) ->StandardizeKoreanSyllablesKsX1026_1<I>;
186195
}
187196

188197
impl<'a>UnicodeNormalization<Chars<'a>>for&'astr{
@@ -217,8 +226,8 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
217226
}
218227

219228
#[inline]
220-
fnstandard_korean_syllables(self) ->StandardKoreanSyllables<Chars<'a>>{
221-
StandardKoreanSyllables::new(self.chars())
229+
fnstandard_korean_syllables(self) ->StandardizeKoreanSyllables<Chars<'a>>{
230+
StandardizeKoreanSyllables::new(self.chars())
222231
}
223232

224233
#[cfg(feature ="ks_x_1026-1")]
@@ -243,6 +252,14 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
243252
self.chars(),
244253
)))
245254
}
255+
256+
#[cfg(feature ="ks_x_1026-1")]
257+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
258+
fnstandard_korean_syllables_ks_x_1026_1(
259+
self,
260+
) ->StandardizeKoreanSyllablesKsX1026_1<Chars<'a>>{
261+
StandardizeKoreanSyllablesKsX1026_1::new(self.chars())
262+
}
246263
}
247264

248265
implUnicodeNormalization<option::IntoIter<char>>forchar{
@@ -277,8 +294,8 @@ impl UnicodeNormalization<option::IntoIter<char>> for char {
277294
}
278295

279296
#[inline]
280-
fnstandard_korean_syllables(self) ->StandardKoreanSyllables<option::IntoIter<char>>{
281-
StandardKoreanSyllables::new(Some(self).into_iter())
297+
fnstandard_korean_syllables(self) ->StandardizeKoreanSyllables<option::IntoIter<char>>{
298+
StandardizeKoreanSyllables::new(Some(self).into_iter())
282299
}
283300

284301
#[cfg(feature ="ks_x_1026-1")]
@@ -305,6 +322,14 @@ impl UnicodeNormalization<option::IntoIter<char>> for char {
305322
Some(self).into_iter(),
306323
)))
307324
}
325+
326+
#[cfg(feature ="ks_x_1026-1")]
327+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
328+
fnstandard_korean_syllables_ks_x_1026_1(
329+
self,
330+
) ->StandardizeKoreanSyllablesKsX1026_1<option::IntoIter<char>>{
331+
StandardizeKoreanSyllablesKsX1026_1::new(Some(self).into_iter())
332+
}
308333
}
309334

310335
impl<I:Iterator<Item =char>>UnicodeNormalization<I>forI{
@@ -339,8 +364,8 @@ impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
339364
}
340365

341366
#[inline]
342-
fnstandard_korean_syllables(self) ->StandardKoreanSyllables<I>{
343-
StandardKoreanSyllables::new(self)
367+
fnstandard_korean_syllables(self) ->StandardizeKoreanSyllables<I>{
368+
StandardizeKoreanSyllables::new(self)
344369
}
345370

346371
#[cfg(feature ="ks_x_1026-1")]
@@ -363,4 +388,10 @@ impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
363388
fnnfkc_ks_x_1026_1(self) ->RecomposeHangul<Recompositions<NormalizeJamoKdkc<I>>>{
364389
RecomposeHangul::new(recompose::new_compatible(NormalizeJamoKdkc::new(self)))
365390
}
391+
392+
#[cfg(feature ="ks_x_1026-1")]
393+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
394+
fnstandard_korean_syllables_ks_x_1026_1(self) ->StandardizeKoreanSyllablesKsX1026_1<I>{
395+
StandardizeKoreanSyllablesKsX1026_1::new(self)
396+
}
366397
}

‎src/standardize_korean_syllables.rs‎

Lines changed: 172 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use core::iter::FusedIterator;
1+
use core::{iter::FusedIterator, marker::PhantomData};
22

33
use tinyvec::ArrayVec;
44

@@ -42,17 +42,27 @@ impl JamoKind {
4242
}
4343
}
4444

45-
/// Iterator over a string's characters, with '\u{115F}' and '\u{1160}' inserted
46-
/// where needed to ensure all Korean syllable blocks are in standard form
47-
/// by [UAX29 rules](https://www.unicode.org/reports/tr29/#Standard_Korean_Syllables).
45+
traitNormalizeKoreanSyllables{
46+
fninsert_fillers(
47+
next_c:Option<char>,
48+
prev_end_jamo_kind:Option<JamoKind>,
49+
next_start_jamo_kind:Option<JamoKind>,
50+
buf:&mutArrayVec<[Option<char>;3]>,
51+
) ->Option<char>;
52+
}
53+
54+
// Used to abstract over UAX29 and KS X 1026-1 rules
4855
#[derive(Clone,Debug)]
49-
pubstructStandardKoreanSyllables<I>{
56+
structStandardizeKoreanSyllablesInner<I,N>{
5057
prev_end_jamo_kind:Option<JamoKind>,
5158
buf:ArrayVec<[Option<char>;3]>,
5259
inner:I,
60+
normalizer:PhantomData<N>,
5361
}
5462

55-
impl<I:Iterator<Item =char>>IteratorforStandardKoreanSyllables<I>{
63+
impl<I:Iterator<Item =char>,N:NormalizeKoreanSyllables>Iterator
64+
forStandardizeKoreanSyllablesInner<I,N>
65+
{
5666
typeItem =char;
5767

5868
fnnext(&mutself) ->Option<Self::Item>{
@@ -65,7 +75,7 @@ impl<I: Iterator<Item = char>> Iterator for StandardKoreanSyllables<I> {
6575
next_c.map_or((None,None),JamoKind::of);
6676
self.prev_end_jamo_kind = next_end_jamo_kind;
6777

68-
insert_fillers(
78+
N::insert_fillers(
6979
next_c,
7080
prev_end_jamo_kind,
7181
next_start_jamo_kind,
@@ -87,50 +97,169 @@ impl<I: Iterator<Item = char>> Iterator for StandardKoreanSyllables<I> {
8797
}
8898
}
8999

90-
impl<I:Iterator<Item =char> +FusedIterator>FusedIteratorforStandardKoreanSyllables<I>{}
100+
impl<I:Iterator<Item =char> +FusedIterator,N:NormalizeKoreanSyllables>FusedIterator
101+
forStandardizeKoreanSyllablesInner<I,N>
102+
{
103+
}
91104

92-
#[inline]
93-
fninsert_fillers(
94-
next_c:Option<char>,
95-
prev_end_jamo_kind:Option<JamoKind>,
96-
next_start_jamo_kind:Option<JamoKind>,
97-
buf:&mutArrayVec<[Option<char>;3]>,
98-
) ->Option<char>{
99-
match(prev_end_jamo_kind, next_start_jamo_kind){
100-
// Insert choseong filler before V not preceded by L or V
101-
(None,Some(JamoKind::V)) |(Some(JamoKind::T),Some(JamoKind::V)) =>{
102-
buf.push(next_c);
103-
Some('\u{115F}')
104-
}
105-
// Insert choseong and jungseong fillers before T preceded non-jamo
106-
(None,Some(JamoKind::T)) =>{
107-
buf.push(next_c);
108-
buf.push(Some('\u{1160}'));
109-
Some('\u{115F}')
110-
}
111-
// Insert V filler between L and non-jamo
112-
(Some(JamoKind::L),None) =>{
113-
buf.push(next_c);
114-
Some('\u{1160}')
105+
impl<I,N>StandardizeKoreanSyllablesInner<I,N>{
106+
#[inline]
107+
fnnew(iter:I) ->Self{
108+
Self{
109+
prev_end_jamo_kind:None,
110+
buf:ArrayVec::new(),
111+
inner: iter,
112+
normalizer:PhantomData,
115113
}
116-
// For L followed by T, insert V filler, L filler, then another V filler
117-
(Some(JamoKind::L),Some(JamoKind::T)) =>{
118-
buf.push(next_c);
119-
buf.push(Some('\u{1160}'));
120-
buf.push(Some('\u{115F}'));
121-
Some('\u{1160}')
114+
}
115+
}
116+
117+
// UAX 29 normalization
118+
119+
#[derive(Clone,Debug)]
120+
structUax29;
121+
122+
implNormalizeKoreanSyllablesforUax29{
123+
#[inline]
124+
fninsert_fillers(
125+
next_c:Option<char>,
126+
prev_end_jamo_kind:Option<JamoKind>,
127+
next_start_jamo_kind:Option<JamoKind>,
128+
buf:&mutArrayVec<[Option<char>;3]>,
129+
) ->Option<char>{
130+
match(prev_end_jamo_kind, next_start_jamo_kind){
131+
// Insert choseong filler before V not preceded by L or V
132+
(None,Some(JamoKind::V)) |(Some(JamoKind::T),Some(JamoKind::V)) =>{
133+
buf.push(next_c);
134+
Some('\u{115F}')
135+
}
136+
// Insert choseong and jungseong fillers before T preceded non-jamo
137+
(None,Some(JamoKind::T)) =>{
138+
buf.push(next_c);
139+
buf.push(Some('\u{1160}'));
140+
Some('\u{115F}')
141+
}
142+
// Insert V filler between L and non-jamo
143+
(Some(JamoKind::L),None) =>{
144+
buf.push(next_c);
145+
Some('\u{1160}')
146+
}
147+
// For L followed by T, insert V filler, L filler, then another V filler
148+
(Some(JamoKind::L),Some(JamoKind::T)) =>{
149+
buf.push(next_c);
150+
buf.push(Some('\u{1160}'));
151+
buf.push(Some('\u{115F}'));
152+
Some('\u{1160}')
153+
}
154+
_ => next_c,
122155
}
123-
_ => next_c,
124156
}
125157
}
126158

127-
impl<I>StandardKoreanSyllables<I>{
159+
/// Iterator over a string's characters, with U+115F and U+1160 inserted
160+
/// where needed to ensure all Korean syllable blocks are in standard form
161+
/// by [UAX29 rules](https://www.unicode.org/reports/tr29/#Standard_Korean_Syllables).
162+
#[derive(Clone,Debug)]
163+
pubstructStandardizeKoreanSyllables<I>(StandardizeKoreanSyllablesInner<I,Uax29>);
164+
165+
impl<I>StandardizeKoreanSyllables<I>{
128166
#[inline]
129167
pub(crate)fnnew(iter:I) ->Self{
130-
Self{
131-
prev_end_jamo_kind:None,
132-
buf:ArrayVec::new(),
133-
inner: iter,
168+
Self(StandardizeKoreanSyllablesInner::new(iter))
169+
}
170+
}
171+
172+
impl<I:Iterator<Item =char>>IteratorforStandardizeKoreanSyllables<I>{
173+
typeItem =char;
174+
175+
fnnext(&mutself) ->Option<Self::Item>{
176+
self.0.next()
177+
}
178+
179+
fnsize_hint(&self) ->(usize,Option<usize>){
180+
self.0.size_hint()
181+
}
182+
}
183+
184+
impl<I:Iterator<Item =char> +FusedIterator>FusedIteratorforStandardizeKoreanSyllables<I>{}
185+
186+
// KS X 1026 1 normalization
187+
188+
#[cfg(feature ="ks_x_1026-1")]
189+
#[derive(Clone,Debug)]
190+
structKsX1026_1;
191+
192+
#[cfg(feature ="ks_x_1026-1")]
193+
implNormalizeKoreanSyllablesforKsX1026_1{
194+
#[inline]
195+
fninsert_fillers(
196+
next_c:Option<char>,
197+
prev_end_jamo_kind:Option<JamoKind>,
198+
next_start_jamo_kind:Option<JamoKind>,
199+
buf:&mutArrayVec<[Option<char>;3]>,
200+
) ->Option<char>{
201+
match(prev_end_jamo_kind, next_start_jamo_kind){
202+
// Insert choseong filler before V preceded by V, T or non-jamo
203+
(None,Some(JamoKind::V))
204+
|(Some(JamoKind::V),Some(JamoKind::V))
205+
|(Some(JamoKind::T),Some(JamoKind::V)) =>{
206+
buf.push(next_c);
207+
Some('\u{115F}')
208+
}
209+
// Insert choseong and jungseong fillers before T preceded by T or non-jamo
210+
(None,Some(JamoKind::T)) |(Some(JamoKind::T),Some(JamoKind::T)) =>{
211+
buf.push(next_c);
212+
buf.push(Some('\u{1160}'));
213+
Some('\u{115F}')
214+
}
215+
// Insert V filler between L and non-jamo or other L
216+
(Some(JamoKind::L),None) |(Some(JamoKind::L),Some(JamoKind::L)) =>{
217+
buf.push(next_c);
218+
Some('\u{1160}')
219+
}
220+
// For L followed by T, insert V filler, L filler, then another V filler
221+
(Some(JamoKind::L),Some(JamoKind::T)) =>{
222+
buf.push(next_c);
223+
buf.push(Some('\u{1160}'));
224+
buf.push(Some('\u{115F}'));
225+
Some('\u{1160}')
226+
}
227+
_ => next_c,
134228
}
135229
}
136230
}
231+
232+
/// Iterator over a string's characters, with U+115F and U+1160 inserted
233+
/// where needed to ensure all Korean syllable blocks are in standard form
234+
/// by [KS X 1026-1](http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3422.pdf) rules.
235+
#[cfg(feature ="ks_x_1026-1")]
236+
#[cfg_attr(docsrs, doc(cfg(feature ="ks_x_1026-1")))]
237+
#[derive(Clone,Debug)]
238+
pubstructStandardizeKoreanSyllablesKsX1026_1<I>(StandardizeKoreanSyllablesInner<I,KsX1026_1>);
239+
240+
#[cfg(feature ="ks_x_1026-1")]
241+
impl<I>StandardizeKoreanSyllablesKsX1026_1<I>{
242+
#[inline]
243+
pub(crate)fnnew(iter:I) ->Self{
244+
Self(StandardizeKoreanSyllablesInner::new(iter))
245+
}
246+
}
247+
248+
#[cfg(feature ="ks_x_1026-1")]
249+
impl<I:Iterator<Item =char>>IteratorforStandardizeKoreanSyllablesKsX1026_1<I>{
250+
typeItem =char;
251+
252+
fnnext(&mutself) ->Option<Self::Item>{
253+
self.0.next()
254+
}
255+
256+
fnsize_hint(&self) ->(usize,Option<usize>){
257+
self.0.size_hint()
258+
}
259+
}
260+
261+
#[cfg(feature ="ks_x_1026-1")]
262+
impl<I:Iterator<Item =char> +FusedIterator>FusedIterator
263+
forStandardizeKoreanSyllablesKsX1026_1<I>
264+
{
265+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp