Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitaf87c8d

Browse files
authored
Add ascii fast path for unicode_word_indices and unicode_words (#147)
* add benchmark* add ascii fastpath* add test case IP* add log to benches* restore iterators* add backwards iterator* restore test* replace Box with Enum* add comments with reference to the spec* remove unused alloc* readd Debug derive* use import* remove pub
1 parent9e3f88c commitaf87c8d

File tree

8 files changed

+426
-42
lines changed

8 files changed

+426
-42
lines changed

‎Cargo.toml‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
2424
[dev-dependencies]
2525
quickcheck ="0.7"
2626
criterion ="0.5"
27+
proptest ="1.7.0"
2728

2829
[[bench]]
2930
name ="chars"
@@ -36,3 +37,8 @@ harness = false
3637
[[bench]]
3738
name ="word_bounds"
3839
harness =false
40+
41+
[[bench]]
42+
name ="unicode_word_indices"
43+
harness =false
44+

‎benches/chars.rs‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
4141
for fileinFILES{
4242
group.bench_with_input(
4343
BenchmarkId::new("grapheme", file),
44-
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
44+
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
4545
|b, content| b.iter(||grapheme(content)),
4646
);
4747
}
4848

4949
for fileinFILES{
5050
group.bench_with_input(
5151
BenchmarkId::new("scalar", file),
52-
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
52+
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
5353
|b, content| b.iter(||scalar(content)),
5454
);
5555
}

‎benches/texts/log.txt‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
2018-07-12 13:59:01 UTC | ERROR | (worker.go:131 in process) | Too many errors for endpoint 'dummy/api/v1/check_run?api_key=*************************00000': retrying later

‎benches/unicode_word_indices.rs‎

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
use criterion::{black_box, criterion_group, criterion_main,BenchmarkId,Criterion};
2+
3+
use std::fs;
4+
use unicode_segmentation::UnicodeSegmentation;
5+
6+
constFILES:&[&str] =&[
7+
"log",//"arabic",
8+
"english",
9+
//"hindi",
10+
"japanese",
11+
//"korean",
12+
//"mandarin",
13+
//"russian",
14+
//"source_code",
15+
];
16+
17+
#[inline(always)]
18+
fngrapheme(text:&str){
19+
for win text.unicode_word_indices(){
20+
black_box(w);
21+
}
22+
}
23+
24+
fnbench_all(c:&mutCriterion){
25+
letmut group = c.benchmark_group("unicode_word_indices");
26+
27+
for fileinFILES{
28+
let input = fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap();
29+
group.throughput(criterion::Throughput::Bytes(input.len()asu64));
30+
group.bench_with_input(BenchmarkId::from_parameter(file),&input, |b, content|{
31+
b.iter(||grapheme(content))
32+
});
33+
}
34+
}
35+
36+
criterion_group!(benches, bench_all);
37+
criterion_main!(benches);

‎benches/word_bounds.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fn bench_all(c: &mut Criterion) {
2727
for fileinFILES{
2828
group.bench_with_input(
2929
BenchmarkId::new("grapheme", file),
30-
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
30+
&fs::read_to_string(format!("benches/texts/{file}.txt",)).unwrap(),
3131
|b, content| b.iter(||grapheme(content)),
3232
);
3333
}

‎benches/words.rs‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
4141
for fileinFILES{
4242
group.bench_with_input(
4343
BenchmarkId::new("grapheme", file),
44-
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
44+
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
4545
|b, content| b.iter(||grapheme(content)),
4646
);
4747
}
4848

4949
for fileinFILES{
5050
group.bench_with_input(
5151
BenchmarkId::new("scalar", file),
52-
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
52+
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
5353
|b, content| b.iter(||scalar(content)),
5454
);
5555
}

‎src/lib.rs‎

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,16 @@
5656
)]
5757
#![no_std]
5858

59+
#[cfg(test)]
60+
externcrate std;
61+
5962
pubuse grapheme::{GraphemeCursor,GraphemeIncomplete};
6063
pubuse grapheme::{GraphemeIndices,Graphemes};
6164
pubuse sentence::{USentenceBoundIndices,USentenceBounds,UnicodeSentences};
6265
pubuse tables::UNICODE_VERSION;
63-
pubuse word::{UWordBoundIndices,UWordBounds,UnicodeWordIndices,UnicodeWords};
66+
pubuse word::{UWordBoundIndices,UWordBounds};
67+
68+
usecrate::word::{UnicodeWordIndices,UnicodeWords};
6469

6570
mod grapheme;
6671
mod sentence;
@@ -248,7 +253,7 @@ pub trait UnicodeSegmentation {
248253

249254
implUnicodeSegmentationforstr{
250255
#[inline]
251-
fngraphemes(&self,is_extended:bool) ->Graphemes{
256+
fngraphemes(&self,is_extended:bool) ->Graphemes<'_>{
252257
grapheme::new_graphemes(self, is_extended)
253258
}
254259

@@ -258,32 +263,32 @@ impl UnicodeSegmentation for str {
258263
}
259264

260265
#[inline]
261-
fnunicode_words(&self) ->UnicodeWords{
266+
fnunicode_words(&self) ->UnicodeWords<'_>{
262267
word::new_unicode_words(self)
263268
}
264269

265270
#[inline]
266-
fnunicode_word_indices(&self) ->UnicodeWordIndices{
271+
fnunicode_word_indices(&self) ->UnicodeWordIndices<'_>{
267272
word::new_unicode_word_indices(self)
268273
}
269274

270275
#[inline]
271-
fnsplit_word_bounds(&self) ->UWordBounds{
276+
fnsplit_word_bounds(&self) ->UWordBounds<'_>{
272277
word::new_word_bounds(self)
273278
}
274279

275280
#[inline]
276-
fnsplit_word_bound_indices(&self) ->UWordBoundIndices{
281+
fnsplit_word_bound_indices(&self) ->UWordBoundIndices<'_>{
277282
word::new_word_bound_indices(self)
278283
}
279284

280285
#[inline]
281-
fnunicode_sentences(&self) ->UnicodeSentences{
286+
fnunicode_sentences(&self) ->UnicodeSentences<'_>{
282287
sentence::new_unicode_sentences(self)
283288
}
284289

285290
#[inline]
286-
fnsplit_sentence_bounds(&self) ->USentenceBounds{
291+
fnsplit_sentence_bounds(&self) ->USentenceBounds<'_>{
287292
sentence::new_sentence_bounds(self)
288293
}
289294

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp