@@ -102,6 +102,7 @@ enum UWordBoundsState {
102102FormatExtend ( FormatExtendType ) ,
103103Zwj ,
104104Emoji ,
105+ WSegSpace ,
105106}
106107
107108// subtypes for FormatExtend state in UWordBoundsState
@@ -156,6 +157,8 @@ impl<'a> Iterator for UWordBounds<'a> {
156157// Whether or not the previous category was ZWJ
157158// ZWJs get collapsed, so this handles precedence of WB3c over WB4
158159let mut prev_zwj;
160+ // If extend/format/zwj were skipped. Handles precedence of WB3d over WB4
161+ let mut skipped_format_extend =false ;
159162for ( curr, ch) in self . string . char_indices ( ) {
160163 idx = curr;
161164 prev_zwj = cat == wd:: WC_ZWJ ;
@@ -177,6 +180,7 @@ impl<'a> Iterator for UWordBounds<'a> {
177180if state !=Start {
178181match cat{
179182 wd:: WC_Extend | wd:: WC_Format | wd:: WC_ZWJ =>{
183+ skipped_format_extend =true ;
180184continue
181185}
182186 _ =>{ }
@@ -219,6 +223,7 @@ impl<'a> Iterator for UWordBounds<'a> {
219223 wd:: WC_Regional_Indicator =>Regional ( RegionalState :: Half ) , // rule WB13c
220224 wd:: WC_LF | wd:: WC_Newline =>break , // rule WB3a
221225 wd:: WC_ZWJ =>Zwj , // rule WB3c
226+ wd:: WC_WSegSpace =>WSegSpace , // rule WB3d
222227 _ =>{
223228if let Some ( ncat) =self . get_next_cat ( idx) { // rule WB4
224229if ncat == wd:: WC_Format || ncat == wd:: WC_Extend || ncat == wd:: WC_ZWJ {
@@ -230,6 +235,13 @@ impl<'a> Iterator for UWordBounds<'a> {
230235break ; // rule WB999
231236}
232237} ,
238+ WSegSpace =>match cat{
239+ wd:: WC_WSegSpace if !skipped_format_extend =>WSegSpace ,
240+ _ =>{
241+ take_curr =false ;
242+ break ;
243+ }
244+ } ,
233245Zwj =>{
234246// We already handle WB3c above.
235247 take_curr =false ;
@@ -371,6 +383,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
371383let mut savestate =Start ;
372384let mut cat = wd:: WC_Any ;
373385
386+ let mut skipped_format_extend =false ;
387+
374388for ( curr, ch) in self . string . char_indices ( ) . rev ( ) {
375389 previdx = idx;
376390 idx = curr;
@@ -409,6 +423,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
409423 state = savestate;
410424 previdx = saveidx;
411425 take_cat =false ;
426+ skipped_format_extend =true ;
412427}
413428
414429// Don't use `continue` in this match without updating `catb`
@@ -427,6 +442,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
427442 saveidx = idx;
428443FormatExtend ( AcceptQLetter ) // rule WB7a
429444} ,
445+ wd:: WC_WSegSpace =>WSegSpace ,
430446 wd:: WC_CR | wd:: WC_LF | wd:: WC_Newline =>{
431447if state ==Start {
432448if cat == wd:: WC_LF {
@@ -451,6 +467,15 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
451467break ;
452468}
453469} ,
470+ WSegSpace =>match cat{ // rule WB3d
471+ wd:: WC_WSegSpace if !skipped_format_extend =>{
472+ WSegSpace
473+ }
474+ _ =>{
475+ take_curr =false ;
476+ break ;
477+ }
478+ } ,
454479Letter |HLetter =>match cat{
455480 wd:: WC_ALetter =>Letter , // rule WB5
456481 wd:: WC_Hebrew_Letter =>HLetter , // rule WB5