Jun 20, 2019 · Jun 20, 2019 · Jun 20, 2019 · Jun 20, 2019 · Jun 20, 2019
diff --git a/.travis.yml b/.travis.yml
 script:
  - cargo build --verbose
  - cargo test --verbose
  - rustdoc --test README.md -L target/debug -L target/debug/deps
  - cargo doc
 after_success: |
  [ $TRAVIS_RUST_VERSION = stable ] &&
diff --git a/Cargo.toml b/Cargo.toml
 name = "unicode-segmentation"
 version = "1.3.0"
 authors = ["kwantam <kwantam@gmail.com>"]

 edition = "2018"
 homepage = "https://github.com/unicode-rs/unicode-segmentation"
 repository = "https://github.com/unicode-rs/unicode-segmentation"
 documentation = "https://unicode-rs.github.io/unicode-segmentation"
diff --git a/scripts/unicode.py b/scripts/unicode.py
    #[inline]
    fn is_alphabetic(c: char) -> bool {
        match c {
            'a' ... 'z' | 'A' ... 'Z' => true,
            'a' ...\= 'z' | 'A' ..= 'Z' => true,
            c if c > '\x7f' => super::derived_property::Alphabetic(c),
            _ => false,
        }
    #[inline]
    fn is_numeric(c: char) -> bool {
        match c {
            '0' ... '9' => true,
            '0' ..= '9' => true,
            c if c > '\x7f' => super::general_category::N(c),
            _ => false,
        }
diff --git a/src/grapheme.rs b/src/grapheme.rs

 use core::cmp;

 use tables::grapheme::GraphemeCat;
 usecrate::tables::grapheme::GraphemeCat;

 /// External iterator for grapheme clusters and byte offsets.
 #[derive(Clone)]
 }

 fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
    use tables::grapheme::GraphemeCat::*;
    usecrate::tables::grapheme::GraphemeCat::*;
    use self::PairResult::*;
    match (before, after) {
        (GC_CR, GC_LF) => NotBreak,  // GB3
    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
    /// ```
    pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
        self.pre_context_offset = None;
        if self.is_extended && chunk_start + chunk.len() == self.offset {
    }

    fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        let mut ris_count = self.ris_count.unwrap_or(0);
        for ch in chunk.chars().rev() {
            if gr::grapheme_category(ch) != gr::GC_Regional_Indicator {
    }

    fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        for ch in chunk.chars().rev() {
            match gr::grapheme_category(ch) {
                gr::GC_Extend => (),
    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
    /// ```
    pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        if self.state == GraphemeState::Break {
            return Ok(true)
        }
    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
    /// ```
    pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        if self.offset == self.len {
            return Ok(None);
        }
    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
    /// ```
    pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
        use tables::grapheme as gr;
        usecrate::tables::grapheme as gr;
        if self.offset == 0 {
            return Ok(None);
        }
diff --git a/src/lib.rs b/src/lib.rs
 //! to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules.
 //!
 //! ```rust
 //! extern crate unicode_segmentation;
 //!
 //! use unicode_segmentation::UnicodeSegmentation;
 //!
 //! fn main() {
 #[macro_use]
 extern crate quickcheck;

 pub use grapheme::{Graphemes, GraphemeIndices};
 pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
 pub use tables::UNICODE_VERSION;
 pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
 pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
 pub usecrate::grapheme::{Graphemes, GraphemeIndices};
 pub usecrate::grapheme::{GraphemeCursor, GraphemeIncomplete};
 pub usecrate::tables::UNICODE_VERSION;
 pub usecrate::word::{UWordBounds, UWordBoundIndices, UnicodeWords};
 pub usecrate::sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};

 mod grapheme;
 mod tables;
diff --git a/src/sentence.rs b/src/sentence.rs

 // All of the logic for forward iteration over sentences
 mod fwd {
    use tables::sentence::SentenceCat;
    usecrate::tables::sentence::SentenceCat;
    use core::cmp;

    // Describe a parsed part of source string as described in this table:
        if parts[idx] == StatePart::ClosePlus { idx -= 1 }

        if parts[idx] == StatePart::ATerm {
            use tables::sentence as se;
            usecrate::tables::sentence as se;

            for next_char in ahead.chars() {
                //( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower

        #[inline]
        fn next(&mut self) -> Option<usize> {
            use tables::sentence as se;
            usecrate::tables::sentence as se;

            for next_char in self.string[self.pos..].chars() {
                let position_before = self.pos;
 #[inline]
 pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> {
    use super::UnicodeSegmentation;
    use tables::util::is_alphanumeric;
    usecrate::tables::util::is_alphanumeric;

    fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| is_alphanumeric(c)) }
    let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
diff --git a/src/tables.rs b/src/tables.rs
    #[inline]
    fn is_alphabetic(c: char) -> bool {
        match c {
            'a' ... 'z' | 'A' ... 'Z' => true,
            'a' ..= 'z' | 'A' ..= 'Z' => true,
            c if c > '' => super::derived_property::Alphabetic(c),
            _ => false,
        }
    #[inline]
    fn is_numeric(c: char) -> bool {
        match c {
            '0' ... '9' => true,
            '0' ..= '9' => true,
            c if c > '' => super::general_category::N(c),
            _ => false,
        }
diff --git a/src/test.rs b/src/test.rs

 #[test]
 fn test_graphemes() {
    use testdata::{TEST_SAME, TEST_DIFF};
    usecrate::testdata::{TEST_SAME, TEST_DIFF};

    pub const EXTRA_DIFF: &'static [(&'static str,
                                     &'static [&'static str],

 #[test]
 fn test_words() {
    use testdata::TEST_WORD;
    usecrate::testdata::TEST_WORD;

    // Unicode's official tests don't really test longer chains of flag emoji
    // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ

 #[test]
 fn test_sentences() {
    use testdata::TEST_SENTENCE;
    usecrate::testdata::TEST_SENTENCE;

    for &(s, w) in TEST_SENTENCE.iter() {
        macro_rules! assert_ {
diff --git a/src/word.rs b/src/word.rs
 use core::cmp;
 use core::iter::Filter;

 use tables::word::WordCat;
 usecrate::tables::word::WordCat;

 /// An iterator over the substrings of a string which, after splitting the string on
 /// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
    fn next(&mut self) -> Option<&'a str> {
        use self::UWordBoundsState::*;
        use self::FormatExtendType::*;
        use tables::word as wd;
        usecrate::tables::word as wd;
        if self.string.len() == 0 {
            return None;
        }
    fn next_back(&mut self) -> Option<&'a str> {
        use self::UWordBoundsState::*;
        use self::FormatExtendType::*;
        use tables::word as wd;
        usecrate::tables::word as wd;
        if self.string.len() == 0 {
            return None;
        }

    #[inline]
    fn get_next_cat(&self, idx: usize) -> Option<WordCat> {
        use tables::word as wd;
        usecrate::tables::word as wd;
        let nidx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
        if nidx < self.string.len() {
            let nch = self.string[nidx..].chars().next().unwrap();

    #[inline]
    fn get_prev_cat(&self, idx: usize) -> Option<WordCat> {
        use tables::word as wd;
        usecrate::tables::word as wd;
        if idx > 0 {
            let nch = self.string[..idx].chars().next_back().unwrap();
            Some(wd::word_category(nch))
 #[inline]
 pub fn new_unicode_words<'b>(s: &'b str) -> UnicodeWords<'b> {
    use super::UnicodeSegmentation;
    use tables::util::is_alphanumeric;
    usecrate::tables::util::is_alphanumeric;

    fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| is_alphanumeric(c)) }
    let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,7 +6,6 @@ os: linux
		script:
		- cargo build --verbose
		- cargo test --verbose
		- rustdoc --test README.md -L target/debug -L target/debug/deps
		- cargo doc
		after_success: \|
		[ $TRAVIS_RUST_VERSION = stable ] &&
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,7 @@
		name = "unicode-segmentation"
		version = "1.3.0"
		authors = ["kwantam <kwantam@gmail.com>"]

		edition = "2018"
		homepage = "https://github.com/unicode-rs/unicode-segmentation"
		repository = "https://github.com/unicode-rs/unicode-segmentation"
		documentation = "https://unicode-rs.github.io/unicode-segmentation"
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -221,7 +221,7 @@ def emit_util_mod(f):
		#[inline]
		fn is_alphabetic(c: char) -> bool {
		match c {
		'a' ... 'z' \| 'A' ... 'Z' => true,
		'a' ...\= 'z' \| 'A' ..= 'Z' => true,
		c if c > '\x7f' => super::derived_property::Alphabetic(c),
		_ => false,
		}
Expand All		@@ -230,7 +230,7 @@ def emit_util_mod(f):
		#[inline]
		fn is_numeric(c: char) -> bool {
		match c {
		'0' ... '9' => true,
		'0' ..= '9' => true,
		c if c > '\x7f' => super::general_category::N(c),
		_ => false,
		}
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,7 +10,7 @@

		use core::cmp;

		use tables::grapheme::GraphemeCat;
		usecrate::tables::grapheme::GraphemeCat;

		/// External iterator for grapheme clusters and byte offsets.
		#[derive(Clone)]
Expand DownExpand Up		@@ -215,7 +215,7 @@ enum PairResult {
		}

		fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
		use tables::grapheme::GraphemeCat::*;
		usecrate::tables::grapheme::GraphemeCat::*;
		use self::PairResult::*;
		match (before, after) {
		(GC_CR, GC_LF) => NotBreak, // GB3
Expand DownExpand Up		@@ -348,7 +348,7 @@ impl GraphemeCursor {
		/// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
		/// ```
		pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
		self.pre_context_offset = None;
		if self.is_extended && chunk_start + chunk.len() == self.offset {
Expand DownExpand Up		@@ -394,7 +394,7 @@ impl GraphemeCursor {
		}

		fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		let mut ris_count = self.ris_count.unwrap_or(0);
		for ch in chunk.chars().rev() {
		if gr::grapheme_category(ch) != gr::GC_Regional_Indicator {
Expand All		@@ -414,7 +414,7 @@ impl GraphemeCursor {
		}

		fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		for ch in chunk.chars().rev() {
		match gr::grapheme_category(ch) {
		gr::GC_Extend => (),
Expand DownExpand Up		@@ -460,7 +460,7 @@ impl GraphemeCursor {
		/// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
		/// ```
		pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		if self.state == GraphemeState::Break {
		return Ok(true)
		}
Expand DownExpand Up		@@ -550,7 +550,7 @@ impl GraphemeCursor {
		/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
		/// ```
		pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		if self.offset == self.len {
		return Ok(None);
		}
Expand DownExpand Up		@@ -626,7 +626,7 @@ impl GraphemeCursor {
		/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
		/// ```
		pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
		use tables::grapheme as gr;
		usecrate::tables::grapheme as gr;
		if self.offset == 0 {
		return Ok(None);
		}
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,8 +12,6 @@
		//! to the [Unicode Standard Annex #29](http://www.unicode.org/reports/tr29/) rules.
		//!
		//! ```rust
		//! extern crate unicode_segmentation;
		//!
		//! use unicode_segmentation::UnicodeSegmentation;
		//!
		//! fn main() {
Expand DownExpand Up		@@ -63,11 +61,11 @@ extern crate std;
		#[macro_use]
		extern crate quickcheck;

		pub use grapheme::{Graphemes, GraphemeIndices};
		pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
		pub use tables::UNICODE_VERSION;
		pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
		pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
		pub usecrate::grapheme::{Graphemes, GraphemeIndices};
		pub usecrate::grapheme::{GraphemeCursor, GraphemeIncomplete};
		pub usecrate::tables::UNICODE_VERSION;
		pub usecrate::word::{UWordBounds, UWordBoundIndices, UnicodeWords};
		pub usecrate::sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};

		mod grapheme;
		mod tables;
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,7 +13,7 @@ use core::iter::Filter;

		// All of the logic for forward iteration over sentences
		mod fwd {
		use tables::sentence::SentenceCat;
		usecrate::tables::sentence::SentenceCat;
		use core::cmp;

		// Describe a parsed part of source string as described in this table:
Expand DownExpand Up		@@ -111,7 +111,7 @@ mod fwd {
		if parts[idx] == StatePart::ClosePlus { idx -= 1 }

		if parts[idx] == StatePart::ATerm {
		use tables::sentence as se;
		usecrate::tables::sentence as se;

		for next_char in ahead.chars() {
		//( ¬(OLetter \| Upper \| Lower \| ParaSep \| SATerm) )* Lower
Expand DownExpand Up		@@ -176,7 +176,7 @@ mod fwd {

		#[inline]
		fn next(&mut self) -> Option<usize> {
		use tables::sentence as se;
		usecrate::tables::sentence as se;

		for next_char in self.string[self.pos..].chars() {
		let position_before = self.pos;
Expand DownExpand Up		@@ -313,7 +313,7 @@ pub fn new_sentence_bound_indices<'a>(source: &'a str) -> USentenceBoundIndices<
		#[inline]
		pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> {
		use super::UnicodeSegmentation;
		use tables::util::is_alphanumeric;
		usecrate::tables::util::is_alphanumeric;

		fn has_alphanumeric(s: &&str) -> bool { s.chars().any(\|c\| is_alphanumeric(c)) }
		let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,7 +30,7 @@ pub mod util {
		#[inline]
		fn is_alphabetic(c: char) -> bool {
		match c {
		'a' ... 'z' \| 'A' ... 'Z' => true,
		'a' ..= 'z' \| 'A' ..= 'Z' => true,
		c if c > '' => super::derived_property::Alphabetic(c),
		_ => false,
		}
Expand All		@@ -39,7 +39,7 @@ pub mod util {
		#[inline]
		fn is_numeric(c: char) -> bool {
		match c {
		'0' ... '9' => true,
		'0' ..= '9' => true,
		c if c > '' => super::general_category::N(c),
		_ => false,
		}
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,7 +14,7 @@ use std::prelude::v1::*;

		#[test]
		fn test_graphemes() {
		use testdata::{TEST_SAME, TEST_DIFF};
		usecrate::testdata::{TEST_SAME, TEST_DIFF};

		pub const EXTRA_DIFF: &'static [(&'static str,
		&'static [&'static str],
Expand DownExpand Up		@@ -88,7 +88,7 @@ fn test_graphemes() {

		#[test]
		fn test_words() {
		use testdata::TEST_WORD;
		usecrate::testdata::TEST_WORD;

		// Unicode's official tests don't really test longer chains of flag emoji
		// TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
Expand DownExpand Up		@@ -144,7 +144,7 @@ fn test_words() {

		#[test]
		fn test_sentences() {
		use testdata::TEST_SENTENCE;
		usecrate::testdata::TEST_SENTENCE;

		for &(s, w) in TEST_SENTENCE.iter() {
		macro_rules! assert_ {
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,7 +11,7 @@
		use core::cmp;
		use core::iter::Filter;

		use tables::word::WordCat;
		usecrate::tables::word::WordCat;

		/// An iterator over the substrings of a string which, after splitting the string on
		/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
Expand DownExpand Up		@@ -135,7 +135,7 @@ impl<'a> Iterator for UWordBounds<'a> {
		fn next(&mut self) -> Option<&'a str> {
		use self::UWordBoundsState::*;
		use self::FormatExtendType::*;
		use tables::word as wd;
		usecrate::tables::word as wd;
		if self.string.len() == 0 {
		return None;
		}
Expand DownExpand Up		@@ -364,7 +364,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
		fn next_back(&mut self) -> Option<&'a str> {
		use self::UWordBoundsState::*;
		use self::FormatExtendType::*;
		use tables::word as wd;
		usecrate::tables::word as wd;
		if self.string.len() == 0 {
		return None;
		}
Expand DownExpand Up		@@ -605,7 +605,7 @@ impl<'a> UWordBounds<'a> {

		#[inline]
		fn get_next_cat(&self, idx: usize) -> Option<WordCat> {
		use tables::word as wd;
		usecrate::tables::word as wd;
		let nidx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
		if nidx < self.string.len() {
		let nch = self.string[nidx..].chars().next().unwrap();
Expand All		@@ -617,7 +617,7 @@ impl<'a> UWordBounds<'a> {

		#[inline]
		fn get_prev_cat(&self, idx: usize) -> Option<WordCat> {
		use tables::word as wd;
		usecrate::tables::word as wd;
		if idx > 0 {
		let nch = self.string[..idx].chars().next_back().unwrap();
		Some(wd::word_category(nch))
Expand All		@@ -640,7 +640,7 @@ pub fn new_word_bound_indices<'b>(s: &'b str) -> UWordBoundIndices<'b> {
		#[inline]
		pub fn new_unicode_words<'b>(s: &'b str) -> UnicodeWords<'b> {
		use super::UnicodeSegmentation;
		use tables::util::is_alphanumeric;
		usecrate::tables::util::is_alphanumeric;

		fn has_alphanumeric(s: &&str) -> bool { s.chars().any(\|c\| is_alphanumeric(c)) }
		let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
Expand Down