Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb726540

Browse files
authored
Merge branch 'master' into patch-1
2 parents0772fc9 +9b20974 commitb726540

File tree

7 files changed

+124
-18
lines changed

7 files changed

+124
-18
lines changed

‎fuzz/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,11 @@ path = "fuzz_targets/streaming.rs"
2323
test =false
2424
doc =false
2525

26+
[[bin]]
27+
name ="process"
28+
path ="fuzz_targets/process.rs"
29+
test =false
30+
doc =false
31+
2632
# Work around https://github.com/rust-lang/cargo/issues/8338
2733
[workspace]

‎fuzz/fuzz_targets/process.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// The fuzzing harness fuzz test some of the the
2+
// unicode string normalization processing
3+
4+
#![no_main]
5+
6+
#[macro_use]
7+
externcrate libfuzzer_sys;
8+
externcrate unicode_normalization;
9+
10+
use unicode_normalization::{
11+
char::{
12+
canonical_combining_class, compose, decompose_canonical, decompose_compatible,
13+
is_combining_mark,
14+
},
15+
UnicodeNormalization,
16+
};
17+
18+
fuzz_target!(|data:(u8,String)|{
19+
let(function_index, string_data) = data;
20+
21+
// Create an iterator for characters
22+
letmut chars = string_data.chars();
23+
24+
// Randomly fuzz a target function
25+
match function_index %10{
26+
0 =>{
27+
// Fuzz compose with two distinct characters
28+
iflet(Some(c1),Some(c2)) =(chars.next(), chars.next()){
29+
let _ = compose(c1, c2);
30+
}
31+
}
32+
1 =>{
33+
// Fuzz canonical_combining_class
34+
ifletSome(c) = chars.next(){
35+
let _ = canonical_combining_class(c);
36+
}
37+
}
38+
2 =>{
39+
// Fuzz is_combining_mark
40+
ifletSome(c) = chars.next(){
41+
let _ = is_combining_mark(c);
42+
}
43+
}
44+
3 =>{
45+
// Fuzz NFC
46+
let _ = string_data.nfc().collect::<String>();
47+
}
48+
4 =>{
49+
// Fuzz NFKD
50+
let _ = string_data.nfkd().collect::<String>();
51+
}
52+
5 =>{
53+
// Fuzz NFD
54+
let _ = string_data.nfd().collect::<String>();
55+
}
56+
6 =>{
57+
// Fuzz NFKC
58+
let _ = string_data.nfkc().collect::<String>();
59+
}
60+
7 =>{
61+
// Fuzz stream_safe
62+
let _ = string_data.stream_safe().collect::<String>();
63+
}
64+
8 =>{
65+
// Fuzz decompose_canonical
66+
ifletSome(c) = chars.next(){
67+
decompose_canonical(c, |_|{});
68+
}
69+
}
70+
9 =>{
71+
// Fuzz decompose_compatible
72+
ifletSome(c) = chars.next(){
73+
decompose_compatible(c, |_|{});
74+
}
75+
}
76+
_ =>{}
77+
}
78+
});

‎src/decompose.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl<I: Iterator<Item = char>> Decompositions<I> {
5353

5454
/// Create a new decomposition iterator for compatability decompositions (NFkD)
5555
///
56-
/// Note that this iterator can also be obtained by directly calling [`.nfd()`](crate::UnicodeNormalization::nfd)
56+
/// Note that this iterator can also be obtained by directly calling [`.nfkd()`](crate::UnicodeNormalization::nfkd)
5757
/// on the iterator.
5858
#[inline]
5959
pubfnnew_compatible(iter:I) ->Decompositions<I>{

‎src/lib.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
113113
/// (compatibility decomposition followed by canonical composition).
114114
fnnfkc(self) ->Recompositions<I>;
115115

116-
/// A transformation which replaces CJK Compatibility Ideograph codepoints
117-
/// with normal forms using Standardized Variation Sequences. This is not
116+
/// A transformation which replaces[CJK Compatibility Ideograph] codepoints
117+
/// with normal forms using[Standardized Variation Sequences]. This is not
118118
/// part of the canonical or compatibility decomposition algorithms, but
119119
/// performing it before those algorithms produces normalized output which
120120
/// better preserves the intent of the original text.
@@ -123,10 +123,15 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
123123
/// may not immediately help text display as intended, but they at
124124
/// least preserve the information in a standardized form, giving
125125
/// implementations the option to recognize them.
126+
///
127+
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
128+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
126129
fncjk_compat_variants(self) ->Replacements<I>;
127130

128131
/// An Iterator over the string with Conjoining Grapheme Joiner characters
129-
/// inserted according to the Stream-Safe Text Process (UAX15-D4)
132+
/// inserted according to the Stream-Safe Text Process ([UAX15-D4]).
133+
///
134+
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
130135
fnstream_safe(self) ->StreamSafe<I>;
131136
}
132137

@@ -153,7 +158,7 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
153158

154159
#[inline]
155160
fncjk_compat_variants(self) ->Replacements<Chars<'a>>{
156-
replace::new_cjk_compat_variants(self.chars())
161+
Replacements::new_cjk_compat_variants(self.chars())
157162
}
158163

159164
#[inline]
@@ -185,7 +190,7 @@ impl UnicodeNormalization<option::IntoIter<char>> for char {
185190

186191
#[inline]
187192
fncjk_compat_variants(self) ->Replacements<option::IntoIter<char>>{
188-
replace::new_cjk_compat_variants(Some(self).into_iter())
193+
Replacements::new_cjk_compat_variants(Some(self).into_iter())
189194
}
190195

191196
#[inline]
@@ -217,7 +222,7 @@ impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
217222

218223
#[inline]
219224
fncjk_compat_variants(self) ->Replacements<I>{
220-
replace::new_cjk_compat_variants(self)
225+
Replacements::new_cjk_compat_variants(self)
221226
}
222227

223228
#[inline]

‎src/normalize.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) {
4141
///
4242
/// [Standardized Variation Sequences] are used instead of the standard canonical
4343
/// decompositions, notably for CJK codepoints with singleton canonical decompositions,
44-
/// to avoid losing information. See the
45-
/// [Unicode Variation Sequence FAQ](http://unicode.org/faq/vs.html) and the
46-
/// "Other Enhancements" section of the
47-
/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary)
48-
/// for more information.
44+
/// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the
45+
/// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information.
46+
///
47+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
48+
/// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html
49+
/// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary
4950
#[inline]
5051
pubfndecompose_cjk_compat_variants<F>(c:char,mutemit_char:F)
5152
where

‎src/replace.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,18 @@ pub struct Replacements<I> {
2222
buffer:Option<char>,
2323
}
2424

25-
#[inline]
26-
pubfnnew_cjk_compat_variants<I:Iterator<Item =char>>(iter:I) ->Replacements<I>{
27-
Replacements{ iter,buffer:None}
25+
impl<I:Iterator<Item =char>>Replacements<I>{
26+
/// Create a new iterator that replaces [CJK Compatibility Ideograph] codepoints with normal forms using [Standardized Variation Sequences].
27+
///
28+
/// Note that this iterator can also be obtained by directly calling [`.cjk_compat_variants()`] on the iterator.
29+
///
30+
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
31+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
32+
/// [`.cjk_compat_variants()`]: crate::UnicodeNormalization::cjk_compat_variants
33+
#[inline]
34+
pubfnnew_cjk_compat_variants(iter:I) ->Replacements<I>{
35+
Replacements{ iter,buffer:None}
36+
}
2837
}
2938

3039
impl<I:Iterator<Item =char>>IteratorforReplacements<I>{

‎src/stream_safe.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,24 @@ use crate::tables::stream_safe_leading_nonstarters;
1010
pub(crate)constMAX_NONSTARTERS:usize =30;
1111
constCOMBINING_GRAPHEME_JOINER:char ='\u{034F}';
1212

13-
/// UAX15-D4: This iterator keeps track of how many non-starters there have been
13+
///[UAX15-D4]: This iterator keeps track of how many non-starters there have been
1414
/// since the last starter in *NFKD* and will emit a Combining Grapheme Joiner
1515
/// (U+034F) if the count exceeds 30.
16+
///
17+
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
1618
pubstructStreamSafe<I>{
1719
iter:I,
1820
nonstarter_count:usize,
1921
buffer:Option<char>,
2022
}
2123

22-
impl<I>StreamSafe<I>{
23-
pub(crate)fnnew(iter:I) ->Self{
24+
impl<I:Iterator<Item =char>>StreamSafe<I>{
25+
/// Create a new stream safe iterator.
26+
///
27+
/// Note that this iterator can also be obtained by directly calling [`.stream_safe()`](crate::UnicodeNormalization::stream_safe)
28+
/// on the iterator.
29+
#[inline]
30+
pubfnnew(iter:I) ->Self{
2431
Self{
2532
iter,
2633
nonstarter_count:0,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp