Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Expose all iterator constructors, add hyperlinks to Unicode glossary/technical reports#106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Manishearth merged 1 commit intounicode-rs:masterfromShE3py:iter-ctors
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletionsrc/decompose.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -53,7 +53,7 @@ impl<I: Iterator<Item = char>> Decompositions<I> {

/// Create a new decomposition iterator for compatability decompositions (NFkD)
///
/// Note that this iterator can also be obtained by directly calling [`.nfd()`](crate::UnicodeNormalization::nfd)
/// Note that this iterator can also be obtained by directly calling [`.nfkd()`](crate::UnicodeNormalization::nfkd)
/// on the iterator.
#[inline]
pub fn new_compatible(iter: I) -> Decompositions<I> {
Expand Down
17 changes: 11 additions & 6 deletionssrc/lib.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -113,8 +113,8 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
/// (compatibility decomposition followed by canonical composition).
fn nfkc(self) -> Recompositions<I>;

/// A transformation which replaces CJK Compatibility Ideograph codepoints
/// with normal forms using Standardized Variation Sequences. This is not
/// A transformation which replaces[CJK Compatibility Ideograph] codepoints
/// with normal forms using[Standardized Variation Sequences]. This is not
/// part of the canonical or compatibility decomposition algorithms, but
/// performing it before those algorithms produces normalized output which
/// better preserves the intent of the original text.
Expand All@@ -123,10 +123,15 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
/// may not immediately help text display as intended, but they at
/// least preserve the information in a standardized form, giving
/// implementations the option to recognize them.
///
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
fn cjk_compat_variants(self) -> Replacements<I>;

/// An Iterator over the string with Conjoining Grapheme Joiner characters
/// inserted according to the Stream-Safe Text Process (UAX15-D4)
/// inserted according to the Stream-Safe Text Process ([UAX15-D4]).
///
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
fn stream_safe(self) -> StreamSafe<I>;
}

Expand All@@ -153,7 +158,7 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {

#[inline]
fn cjk_compat_variants(self) -> Replacements<Chars<'a>> {
replace::new_cjk_compat_variants(self.chars())
Replacements::new_cjk_compat_variants(self.chars())
}

#[inline]
Expand DownExpand Up@@ -185,7 +190,7 @@ impl UnicodeNormalization<option::IntoIter<char>> for char {

#[inline]
fn cjk_compat_variants(self) -> Replacements<option::IntoIter<char>> {
replace::new_cjk_compat_variants(Some(self).into_iter())
Replacements::new_cjk_compat_variants(Some(self).into_iter())
}

#[inline]
Expand DownExpand Up@@ -217,7 +222,7 @@ impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {

#[inline]
fn cjk_compat_variants(self) -> Replacements<I> {
replace::new_cjk_compat_variants(self)
Replacements::new_cjk_compat_variants(self)
}

#[inline]
Expand Down
11 changes: 6 additions & 5 deletionssrc/normalize.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -41,11 +41,12 @@ pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) {
///
/// [Standardized Variation Sequences] are used instead of the standard canonical
/// decompositions, notably for CJK codepoints with singleton canonical decompositions,
/// to avoid losing information. See the
/// [Unicode Variation Sequence FAQ](http://unicode.org/faq/vs.html) and the
/// "Other Enhancements" section of the
/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary)
/// for more information.
/// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the
/// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information.
///
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
/// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html
/// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary
#[inline]
pub fn decompose_cjk_compat_variants<F>(c: char, mut emit_char: F)
where
Expand Down
15 changes: 12 additions & 3 deletionssrc/replace.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -22,9 +22,18 @@ pub struct Replacements<I> {
buffer: Option<char>,
}

#[inline]
pub fn new_cjk_compat_variants<I: Iterator<Item = char>>(iter: I) -> Replacements<I> {
Replacements { iter, buffer: None }
impl<I: Iterator<Item = char>> Replacements<I> {
/// Create a new iterator that replaces [CJK Compatibility Ideograph] codepoints with normal forms using [Standardized Variation Sequences].
///
/// Note that this iterator can also be obtained by directly calling [`.cjk_compat_variants()`] on the iterator.
///
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
/// [`.cjk_compat_variants()`]: crate::UnicodeNormalization::cjk_compat_variants
#[inline]
pub fn new_cjk_compat_variants(iter: I) -> Replacements<I> {
Replacements { iter, buffer: None }
}
}

impl<I: Iterator<Item = char>> Iterator for Replacements<I> {
Expand Down
13 changes: 10 additions & 3 deletionssrc/stream_safe.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -10,17 +10,24 @@ use crate::tables::stream_safe_leading_nonstarters;
pub(crate) const MAX_NONSTARTERS: usize = 30;
const COMBINING_GRAPHEME_JOINER: char = '\u{034F}';

/// UAX15-D4: This iterator keeps track of how many non-starters there have been
///[UAX15-D4]: This iterator keeps track of how many non-starters there have been
/// since the last starter in *NFKD* and will emit a Combining Grapheme Joiner
/// (U+034F) if the count exceeds 30.
///
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
pub struct StreamSafe<I> {
iter: I,
nonstarter_count: usize,
buffer: Option<char>,
}

impl<I> StreamSafe<I> {
pub(crate) fn new(iter: I) -> Self {
impl<I: Iterator<Item = char>> StreamSafe<I> {
/// Create a new stream safe iterator.
///
/// Note that this iterator can also be obtained by directly calling [`.stream_safe()`](crate::UnicodeNormalization::stream_safe)
/// on the iterator.
#[inline]
pub fn new(iter: I) -> Self {
Self {
iter,
nonstarter_count: 0,
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp