99// except according to those terms.
1010
1111use core:: cmp;
12+ use core:: iter:: Filter ;
1213
1314// All of the logic for forward iteration over sentences
1415mod fwd{
@@ -40,6 +41,7 @@ mod fwd {
4041StatePart :: Sot
4142] ) ;
4243
44+ #[ derive( Clone ) ]
4345pub struct SentenceBreaks < ' a > {
4446pub string : & ' a str ,
4547pos : usize ,
@@ -256,13 +258,32 @@ mod fwd {
256258
257259}
258260
261+ /// An iterator over the substrings of a string which, after splitting the string on
262+ /// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries),
263+ /// contain any characters with the
264+ /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
265+ /// property, or with
266+ /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
267+ #[ derive( Clone ) ]
268+ pub struct UnicodeSentences < ' a > {
269+ inner : Filter < USentenceBounds < ' a > , fn ( & & str ) ->bool > ,
270+ }
271+
259272/// External iterator for a string's
260273/// [sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
274+ #[ derive( Clone ) ]
261275pub struct USentenceBounds < ' a > {
262276iter : fwd:: SentenceBreaks < ' a > ,
263277sentence_start : Option < usize >
264278}
265279
280+ /// External iterator for sentence boundaries and byte offsets.
281+ #[ derive( Clone ) ]
282+ pub struct USentenceBoundIndices < ' a > {
283+ start_offset : usize ,
284+ iter : USentenceBounds < ' a > ,
285+ }
286+
266287#[ inline]
267288pub fn new_sentence_bounds < ' a > ( source : & ' a str ) ->USentenceBounds < ' a > {
268289USentenceBounds {
@@ -271,6 +292,32 @@ pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> {
271292}
272293}
273294
295+ #[ inline]
296+ pub fn new_sentence_bound_indices < ' a > ( source : & ' a str ) ->USentenceBoundIndices < ' a > {
297+ USentenceBoundIndices {
298+ start_offset : source. as_ptr ( ) as usize ,
299+ iter : new_sentence_bounds ( source)
300+ }
301+ }
302+
303+ #[ inline]
304+ pub fn new_unicode_sentences < ' b > ( s : & ' b str ) ->UnicodeSentences < ' b > {
305+ use super :: UnicodeSegmentation ;
306+ use tables:: util:: is_alphanumeric;
307+
308+ fn has_alphanumeric ( s : & & str ) ->bool { s. chars ( ) . any ( |c|is_alphanumeric ( c) ) }
309+ let has_alphanumeric: fn ( & & str ) ->bool = has_alphanumeric; // coerce to fn pointer
310+
311+ UnicodeSentences { inner : s. split_sentence_bounds ( ) . filter ( has_alphanumeric) }
312+ }
313+
314+ impl < ' a > Iterator for UnicodeSentences < ' a > {
315+ type Item =& ' a str ;
316+
317+ #[ inline]
318+ fn next ( & mut self ) ->Option < & ' a str > { self . inner . next ( ) }
319+ }
320+
274321impl < ' a > Iterator for USentenceBounds < ' a > {
275322type Item =& ' a str ;
276323
@@ -300,3 +347,17 @@ impl<'a> Iterator for USentenceBounds<'a> {
300347}
301348}
302349}
350+
351+ impl < ' a > Iterator for USentenceBoundIndices < ' a > {
352+ type Item =( usize , & ' a str ) ;
353+
354+ #[ inline]
355+ fn next ( & mut self ) ->Option < ( usize , & ' a str ) > {
356+ self . iter . next ( ) . map ( |s|( s. as_ptr ( ) as usize -self . start_offset , s) )
357+ }
358+
359+ #[ inline]
360+ fn size_hint ( & self ) ->( usize , Option < usize > ) {
361+ self . iter . size_hint ( )
362+ }
363+ }