Commit14dbeb8

committed

tests pass

1 parent1c3e5af commit14dbeb8Copy full SHA for 14dbeb8

File tree

1 file changed

+273

-14

lines changed

src
- sentence.rs

1 file changed

+273

-14

lines changed

`‎src/sentence.rs‎`

Lines changed: 273 additions & 14 deletions

Original file line number	Diff line number	Diff line change
`@@ -9,33 +9,292 @@`
`9`	`9`	`// except according to those terms.`
`10`	`10`
`11`	`11`	`use core::cmp;`
`12`		`-use core::iter::Filter;`
`13`	`12`
`14`		`-use tables::sentence::SentenceCat;`
	`13`	`+mod fwd{`
	`14`	`+use tables::sentence::SentenceCat;`
	`15`	`+use core::cmp;`
`15`	`16`
`16`		`-/// TODO`
`17`		`-#[derive(Clone)]`
	`17`	`+#[derive(Clone,Copy,PartialEq,Eq)]`
	`18`	`+enumStatePart{`
	`19`	`+Sot,`
	`20`	`+Eot,`
	`21`	`+Other,`
	`22`	`+CR,`
	`23`	`+LF,`
	`24`	`+Sep,`
	`25`	`+ATerm,`
	`26`	`+UpperLower,`
	`27`	`+ClosePlus,`
	`28`	`+SpPlus,`
	`29`	`+STerm`
	`30`	`+}`
	`31`	`+`
	`32`	`+#[derive(Clone,PartialEq,Eq)]`
	`33`	`+structSentenceBreaksState(pub[StatePart;4]);`
	`34`	`+`
	`35`	`+constINITIAL_STATE:SentenceBreaksState =SentenceBreaksState([`
	`36`	`+StatePart::Sot,`
	`37`	`+StatePart::Sot,`
	`38`	`+StatePart::Sot,`
	`39`	`+StatePart::Sot`
	`40`	`+]);`
	`41`	`+`
	`42`	`+pubstructSentenceBreaks<'a>{`
	`43`	`+pubstring:&'astr,`
	`44`	`+pos:usize,`
	`45`	`+state:SentenceBreaksState`
	`46`	`+}`
	`47`	`+`
	`48`	`+implSentenceBreaksState{`
	`49`	`+fnnext(&self,cat:SentenceCat) ->SentenceBreaksState{`
	`50`	`+let&SentenceBreaksState(parts) =self;`
	`51`	`+let parts =match(parts[3], cat){`
	`52`	`+(StatePart::ClosePlus,SentenceCat::SC_Close) => parts,`
	`53`	`+(StatePart::SpPlus,SentenceCat::SC_Sp) => parts,`
	`54`	`+ _ =>[`
	`55`	`+ parts[1],`
	`56`	`+ parts[2],`
	`57`	`+ parts[3],`
	`58`	`+match cat{`
	`59`	`+SentenceCat::SC_CR =>StatePart::CR,`
	`60`	`+SentenceCat::SC_LF =>StatePart::LF,`
	`61`	`+SentenceCat::SC_Sep =>StatePart::Sep,`
	`62`	`+SentenceCat::SC_ATerm =>StatePart::ATerm,`
	`63`	`+SentenceCat::SC_Upper \|`
	`64`	`+SentenceCat::SC_Lower =>StatePart::UpperLower,`
	`65`	`+SentenceCat::SC_Close =>StatePart::ClosePlus,`
	`66`	`+SentenceCat::SC_Sp =>StatePart::SpPlus,`
	`67`	`+SentenceCat::SC_STerm =>StatePart::STerm,`
	`68`	`+ _ =>StatePart::Other`
	`69`	`+}`
	`70`	`+]`
	`71`	`+};`
	`72`	`+SentenceBreaksState(parts)`
	`73`	`+}`
	`74`	`+`
	`75`	`+fnend(&self) ->SentenceBreaksState{`
	`76`	`+let&SentenceBreaksState(parts) =self;`
	`77`	`+SentenceBreaksState([`
	`78`	`+ parts[1],`
	`79`	`+ parts[2],`
	`80`	`+ parts[3],`
	`81`	`+StatePart::Eot`
	`82`	`+])`
	`83`	`+}`
	`84`	`+`
	`85`	`+fnmatch1(&self,part:StatePart) ->bool{`
	`86`	`+let&SentenceBreaksState(parts) =self;`
	`87`	`+ part == parts[3]`
	`88`	`+}`
	`89`	`+`
	`90`	`+fnmatch2(&self,part1:StatePart,part2:StatePart) ->bool{`
	`91`	`+let&SentenceBreaksState(parts) =self;`
	`92`	`+ part1 == parts[2] && part2 == parts[3]`
	`93`	`+}`
	`94`	`+}`
	`95`	`+`
	`96`	`+fnmatch_sb8(state:&SentenceBreaksState,ahead:&str) ->bool{`
	`97`	`+let aterm_part ={`
	`98`	`+// ATerm Close* Sp*`
	`99`	`+let&SentenceBreaksState(parts) = state;`
	`100`	`+letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};`
	`101`	`+if parts[idx] ==StatePart::ClosePlus{ idx -=1}`
	`102`	`+ parts[idx]`
	`103`	`+};`
	`104`	`+`
	`105`	`+if aterm_part ==StatePart::ATerm{`
	`106`	`+use tables::sentenceas se;`
	`107`	`+`
	`108`	`+for next_charin ahead.chars(){`
	`109`	`+//( ¬(OLetter \| Upper \| Lower \| ParaSep \| SATerm) )* Lower`
	`110`	`+match se::sentence_category(next_char){`
	`111`	`+ se::SC_Lower =>returntrue,`
	`112`	`+ se::SC_OLetter \|`
	`113`	`+ se::SC_Upper \|`
	`114`	`+ se::SC_Sep \| se::SC_CR \| se::SC_LF \|`
	`115`	`+ se::SC_STerm \| se::SC_ATerm =>returnfalse,`
	`116`	`+ _ =>continue`
	`117`	`+}`
	`118`	`+}`
	`119`	`+}`
	`120`	`+`
	`121`	`+false`
	`122`	`+}`
	`123`	`+`
	`124`	`+fnmatch_sb8a(state:&SentenceBreaksState) ->bool{`
	`125`	`+// SATerm Close* Sp*`
	`126`	`+let&SentenceBreaksState(parts) = state;`
	`127`	`+letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};`
	`128`	`+if parts[idx] ==StatePart::ClosePlus{ idx -=1}`
	`129`	`+ parts[idx] ==StatePart::STerm \|\| parts[idx] ==StatePart::ATerm`
	`130`	`+}`
	`131`	`+`
	`132`	`+fnmatch_sb9(state:&SentenceBreaksState) ->bool{`
	`133`	`+// SATerm Close*`
	`134`	`+let&SentenceBreaksState(parts) = state;`
	`135`	`+let idx =if parts[3] ==StatePart::ClosePlus{2}else{3};`
	`136`	`+ parts[idx] ==StatePart::STerm \|\| parts[idx] ==StatePart::ATerm`
	`137`	`+}`
	`138`	`+`
	`139`	`+fnmatch_sb11(state:&SentenceBreaksState) ->bool{`
	`140`	`+// SATerm Close* Sp* ParaSep?`
	`141`	`+let&SentenceBreaksState(parts) = state;`
	`142`	`+letmut idx =match parts[3]{`
	`143`	`+StatePart::Sep \|`
	`144`	`+StatePart::CR \|`
	`145`	`+StatePart::LF =>2,`
	`146`	`+ _ =>3`
	`147`	`+};`
	`148`	`+`
	`149`	`+if parts[idx] ==StatePart::SpPlus{ idx -=1}`
	`150`	`+if parts[idx] ==StatePart::ClosePlus{ idx -=1}`
	`151`	`+`
	`152`	`+ parts[idx] ==StatePart::STerm \|\| parts[idx] ==StatePart::ATerm`
	`153`	`+}`
	`154`	`+`
	`155`	`+impl<'a>IteratorforSentenceBreaks<'a>{`
	`156`	`+// Returns the index of the character which follows a break`
	`157`	`+typeItem =usize;`
	`158`	`+`
	`159`	`+#[inline]`
	`160`	`+fnsize_hint(&self) ->(usize,Option<usize>){`
	`161`	`+let slen =self.string.len();`
	`162`	`+// A sentence could be one character`
	`163`	`+(cmp::min(slen,2),Some(slen +1))`
	`164`	`+}`
	`165`	`+`
	`166`	`+#[inline]`
	`167`	`+fnnext(&mutself) ->Option<usize>{`
	`168`	`+use tables::sentenceas se;`
	`169`	`+`
	`170`	`+for next_charinself.string[self.pos..].chars(){`
	`171`	`+let position_before =self.pos;`
	`172`	`+let state_before =self.state.clone();`
	`173`	`+`
	`174`	`+let next_cat = se::sentence_category(next_char);`
	`175`	`+`
	`176`	`+self.pos += next_char.len_utf8();`
	`177`	`+self.state =self.state.next(next_cat);`
	`178`	`+`
	`179`	`+match next_cat{`
	`180`	`+// SB1`
	`181`	`+ _if state_before.match1(StatePart::Sot) =>`
	`182`	`+returnSome(position_before),`
	`183`	`+`
	`184`	`+// SB3`
	`185`	`+SentenceCat::SC_LFif state_before.match1(StatePart::CR) =>`
	`186`	`+continue,`
	`187`	`+`
	`188`	`+// SB4`
	`189`	`+ _if state_before.match1(StatePart::Sep)`
	`190`	`+ \|\| state_before.match1(StatePart::CR)`
	`191`	`+ \|\| state_before.match1(StatePart::LF)`
	`192`	`+ =>returnSome(position_before),`
	`193`	`+`
	`194`	`+// SB5`
	`195`	`+SentenceCat::SC_Extend \|`
	`196`	`+SentenceCat::SC_Format =>self.state = state_before,`
	`197`	`+`
	`198`	`+// SB6`
	`199`	`+SentenceCat::SC_Numericif state_before.match1(StatePart::ATerm) =>`
	`200`	`+continue,`
	`201`	`+`
	`202`	`+// SB7`
	`203`	`+SentenceCat::SC_Upperif state_before.match2(StatePart::UpperLower,StatePart::ATerm) =>`
	`204`	`+continue,`
	`205`	`+`
	`206`	`+// SB8`
	`207`	`+ _ifmatch_sb8(&state_before,&self.string[position_before..]) =>`
	`208`	`+continue,`
	`209`	`+`
	`210`	`+// SB8a`
	`211`	`+SentenceCat::SC_SContinue \|`
	`212`	`+SentenceCat::SC_STerm \|`
	`213`	`+SentenceCat::SC_ATermifmatch_sb8a(&state_before) =>`
	`214`	`+continue,`
	`215`	`+`
	`216`	`+// SB9`
	`217`	`+SentenceCat::SC_Close \|`
	`218`	`+SentenceCat::SC_Sp \|`
	`219`	`+SentenceCat::SC_Sep \|`
	`220`	`+SentenceCat::SC_CR \|`
	`221`	`+SentenceCat::SC_LFifmatch_sb9(&state_before) =>`
	`222`	`+continue,`
	`223`	`+`
	`224`	`+// SB10`
	`225`	`+SentenceCat::SC_Sp \|`
	`226`	`+SentenceCat::SC_Sep \|`
	`227`	`+SentenceCat::SC_CR \|`
	`228`	`+SentenceCat::SC_LFifmatch_sb8a(&state_before) =>`
	`229`	`+continue,`
	`230`	`+`
	`231`	`+// SB11`
	`232`	`+ _ifmatch_sb11(&state_before) =>`
	`233`	`+returnSome(position_before),`
	`234`	`+`
	`235`	`+// SB998`
	`236`	`+ _ =>continue`
	`237`	`+}`
	`238`	`+}`
	`239`	`+`
	`240`	`+// SB2`
	`241`	`+ifself.state.match1(StatePart::Sot){`
	`242`	`+None`
	`243`	`+}elseifself.state.match1(StatePart::Eot){`
	`244`	`+None`
	`245`	`+}else{`
	`246`	`+self.state =self.state.end();`
	`247`	`+Some(self.pos)`
	`248`	`+}`
	`249`	`+}`
	`250`	`+}`
	`251`	`+`
	`252`	`+pubfnnew_sentence_breaks<'a>(source:&'astr) ->SentenceBreaks<'a>{`
	`253`	`+SentenceBreaks{string: source,pos:0,state:INITIAL_STATE}`
	`254`	`+}`
	`255`	`+`
	`256`	`+}`
	`257`	`+`
	`258`	`+/// TODO docs`
`18`	`259`	`pubstructUSentenceBounds<'a>{`
`19`		`-string:&'astr`
`20`		`-// state?`
	`260`	`+iter: fwd::SentenceBreaks<'a>,`
	`261`	`+sentence_start:Option<usize>`
	`262`	`+}`
	`263`	`+`
	`264`	`+/// TODO docs`
	`265`	`+pubfnnew_sentence_bounds<'a>(source:&'astr) ->USentenceBounds<'a>{`
	`266`	`+USentenceBounds{`
	`267`	`+iter: fwd::new_sentence_breaks(source),`
	`268`	`+sentence_start:None`
	`269`	`+}`
`21`	`270`	`}`
`22`	`271`
`23`	`272`	`impl<'a>IteratorforUSentenceBounds<'a>{`
`24`	`273`	`typeItem =&'astr;`
`25`	`274`
`26`	`275`	`#[inline]`
`27`	`276`	`fnsize_hint(&self) ->(usize,Option<usize>){`
`28`		`-letslen=self.string.len();`
`29`		`-(cmp::min(slen,1),Some(slen))`
	`277`	`+let(lower, upper)=self.iter.size_hint();`
	`278`	`+(cmp::max(0, lower -1),upper.map(\|u\| cmp::max(0, u -1)))`
`30`	`279`	`}`
`31`	`280`
`32`	`281`	`#[inline]`
`33`	`282`	`fnnext(&mutself) ->Option<&'astr>{`
`34`		`-panic!("todo")`
`35`		`-}`
`36`		`-}`
	`283`	`+ifself.sentence_start ==None{`
	`284`	`+ifletSome(start_pos) =self.iter.next(){`
	`285`	`+self.sentence_start =Some(start_pos)`
	`286`	`+}else{`
	`287`	`+returnNone`
	`288`	`+}`
	`289`	`+}`
`37`	`290`
`38`		`-#[inline]`
`39`		`-pubfnnew_sentence_bounds<'b>(s:&'bstr) ->USentenceBounds<'b>{`
`40`		`-USentenceBounds{string: s}`
	`291`	`+ifletSome(break_pos) =self.iter.next(){`
	`292`	`+let start_pos =self.sentence_start.unwrap();`
	`293`	`+let sentence =&self.iter.string[start_pos..break_pos];`
	`294`	`+self.sentence_start =Some(break_pos);`
	`295`	`+Some(sentence)`
	`296`	`+}else{`
	`297`	`+None`
	`298`	`+}`
	`299`	`+}`
`41`	`300`	`}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit14dbeb8

File tree

1 file changed

1 file changed

`‎src/sentence.rs‎`

0 commit comments