Expand Up @@ -43,7 +43,9 @@ impl<'a> Iterator for GraphemeIndices<'a> { #[inline] fn next(&mut self) -> Option<(usize, &'a str)> { self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s)) self.iter .next() .map(|s| (s.as_ptr() as usize - self.start_offset, s)) } #[inline] Expand All @@ -55,7 +57,9 @@ impl<'a> Iterator for GraphemeIndices<'a> { impl<'a> DoubleEndedIterator for GraphemeIndices<'a> { #[inline] fn next_back(&mut self) -> Option<(usize, &'a str)> { self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s)) self.iter .next_back() .map(|s| (s.as_ptr() as usize - self.start_offset, s)) } } Expand Down Expand Up @@ -114,7 +118,11 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> { if end == self.cursor.cur_cursor() { return None; } let prev = self.cursor_back.prev_boundary(self.string, 0).unwrap().unwrap(); let prev = self .cursor_back .prev_boundary(self.string, 0) .unwrap() .unwrap(); Some(&self.string[prev..end]) } } Expand All @@ -131,7 +139,10 @@ pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> { #[inline] pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> { GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) } GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended), } } // maybe unify with PairResult? Expand Down Expand Up @@ -198,7 +209,7 @@ pub enum GraphemeIncomplete { /// current chunk, so the chunk after that is requested. This will only be /// returned if the chunk ends before the `len` parameter provided on /// creation of the cursor. NextChunk, // requesting chunk following the one given NextChunk, // requesting chunk following the one given /// An error returned when the chunk given does not contain the cursor position. InvalidOffset, Expand All @@ -207,41 +218,41 @@ pub enum GraphemeIncomplete { // An enum describing the result from lookup of a pair of categories. #[derive(PartialEq, Eq)] enum PairResult { NotBreak, // definitely not a break Break, // definitely a break Extended, // a break iff not in extended mode Regional, // a break if preceded by an even number of RIS Emoji, // a break if preceded by emoji base and (Extend)* NotBreak, // definitely not a break Break, // definitely a break Extended, // a break iff not in extended mode Regional, // a break if preceded by an even number of RIS Emoji, // a break if preceded by emoji base and (Extend)* } fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult { use tables::grapheme::GraphemeCat::*; use self::PairResult::*; use tables::grapheme::GraphemeCat::*; match (before, after) { (GC_CR, GC_LF) => NotBreak, // GB3 (GC_Control, _) => Break, // GB4 (GC_CR, _) => Break, // GB4 (GC_LF, _) => Break, // GB4 (_, GC_Control) => Break, // GB5 (_, GC_CR) => Break, // GB5 (_, GC_LF) => Break, // GB5 (GC_L, GC_L) => NotBreak, // GB6 (GC_L, GC_V) => NotBreak, // GB6 (GC_L, GC_LV) => NotBreak, // GB6 (GC_L, GC_LVT) => NotBreak, // GB6 (GC_LV, GC_V) => NotBreak, // GB7 (GC_LV, GC_T) => NotBreak, // GB7 (GC_V, GC_V) => NotBreak, // GB7 (GC_V, GC_T) => NotBreak, // GB7 (GC_LVT, GC_T) => NotBreak, // GB8 (GC_T, GC_T) => NotBreak, // GB8 (_, GC_Extend) => NotBreak, // GB9 (_, GC_ZWJ) => NotBreak, // GB9 (_, GC_SpacingMark) => Extended, // GB9a (GC_Prepend, _) => Extended, // GB9b (GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11 (GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13 (_, _) => Break, // GB999 (GC_CR, GC_LF) => NotBreak, // GB3 (GC_Control, _) => Break, // GB4 (GC_CR, _) => Break, // GB4 (GC_LF, _) => Break, // GB4 (_, GC_Control) => Break, // GB5 (_, GC_CR) => Break, // GB5 (_, GC_LF) => Break, // GB5 (GC_L, GC_L) => NotBreak, // GB6 (GC_L, GC_V) => NotBreak, // GB6 (GC_L, GC_LV) => NotBreak, // GB6 (GC_L, GC_LVT) => NotBreak, // GB6 (GC_LV, GC_V) => NotBreak, // GB7 (GC_LV, GC_T) => NotBreak, // GB7 (GC_V, GC_V) => NotBreak, // GB7 (GC_V, GC_T) => NotBreak, // GB7 (GC_LVT, GC_T) => NotBreak, // GB8 (GC_T, GC_T) => NotBreak, // GB8 (_, GC_Extend) => NotBreak, // GB9 (_, GC_ZWJ) => NotBreak, // GB9 (_, GC_SpacingMark) => Extended, // GB9a (GC_Prepend, _) => Extended, // GB9b (GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11 (GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13 (_, _) => Break, // GB999 } } Expand Down Expand Up @@ -350,17 +361,19 @@ impl GraphemeCursor { if self.is_extended && chunk_start + chunk.len() == self.offset { let ch = chunk.chars().rev().next().unwrap(); if gr::grapheme_category(ch) == gr::GC_Prepend { self.decide(false); // GB9b self.decide(false); // GB9b return; } } match self.state { GraphemeState::Regional => self.handle_regional(chunk, chunk_start), GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start), _ => if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start { let ch = chunk.chars().rev().next().unwrap(); self.cat_before = Some(gr::grapheme_category(ch)); }, _ => { if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start { let ch = chunk.chars().rev().next().unwrap(); self.cat_before = Some(gr::grapheme_category(ch)); } } } } Expand Down Expand Up @@ -462,17 +475,21 @@ impl GraphemeCursor { /// cursor.set_cursor(12); /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false)); /// ``` pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> { pub fn is_boundary( &mut self, chunk: &str, chunk_start: usize, ) -> Result<bool, GraphemeIncomplete> { use tables::grapheme as gr; if self.state == GraphemeState::Break { return Ok(true) return Ok(true); } if self.state == GraphemeState::NotBreak { return Ok(false) return Ok(false); } if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() { if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() { return Err(GraphemeIncomplete::InvalidOffset) return Err(GraphemeIncomplete::InvalidOffset); } } if let Some(pre_context_offset) = self.pre_context_offset { Expand Down Expand Up @@ -552,7 +569,11 @@ impl GraphemeCursor { /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4))); /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None)); /// ``` pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> { pub fn next_boundary( &mut self, chunk: &str, chunk_start: usize, ) -> Result<Option<usize>, GraphemeIncomplete> { use tables::grapheme as gr; if self.offset == self.len { return Ok(None); Expand Down Expand Up @@ -628,7 +649,11 @@ impl GraphemeCursor { /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0))); /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None)); /// ``` pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> { pub fn prev_boundary( &mut self, chunk: &str, chunk_start: usize, ) -> Result<Option<usize>, GraphemeIncomplete> { use tables::grapheme as gr; if self.offset == 0 { return Ok(None); Expand All @@ -650,7 +675,11 @@ impl GraphemeCursor { self.cat_after = self.cat_before.take(); self.state = GraphemeState::Unknown; if let Some(ris_count) = self.ris_count { self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None }; self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None }; } if let Some(prev_ch) = iter.next() { ch = prev_ch; Expand All @@ -677,7 +706,10 @@ impl GraphemeCursor { fn test_grapheme_cursor_ris_precontext() { let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}"; let mut c = GraphemeCursor::new(8, s.len(), true); assert_eq!(c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4))); assert_eq!( c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4)) ); c.provide_context(&s[..4], 0); assert_eq!(c.is_boundary(&s[4..], 4), Ok(true)); } Expand All @@ -686,7 +718,10 @@ fn test_grapheme_cursor_ris_precontext() { fn test_grapheme_cursor_chunk_start_require_precontext() { let s = "\r\n"; let mut c = GraphemeCursor::new(1, s.len(), true); assert_eq!(c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1))); assert_eq!( c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1)) ); c.provide_context(&s[..1], 0); assert_eq!(c.is_boundary(&s[1..], 1), Ok(false)); } Expand All @@ -695,14 +730,20 @@ fn test_grapheme_cursor_chunk_start_require_precontext() { fn test_grapheme_cursor_prev_boundary() { let s = "abcd"; let mut c = GraphemeCursor::new(3, s.len(), true); assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk)); assert_eq!( c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk) ); assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2))); } #[test] fn test_grapheme_cursor_prev_boundary_chunk_start() { let s = "abcd"; let mut c = GraphemeCursor::new(2, s.len(), true); assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk)); assert_eq!( c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk) ); assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1))); }