Commit9c7abf2

committed

Documentation and code reorg

1 parent50058a5 commit9c7abf2Copy full SHA for 9c7abf2

File tree

1 file changed

+31

-21

lines changed

src
- sentence.rs

1 file changed

+31

-21

lines changed

`‎src/sentence.rs‎`

Lines changed: 31 additions & 21 deletions

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,8 @@ mod fwd {`
`16`	`16`	`use tables::sentence::SentenceCat;`
`17`	`17`	`use core::cmp;`
`18`	`18`
	`19`	`+// Describe a parsed part of source string as described in this table:`
	`20`	`+// https://unicode.org/reports/tr29/#Default_Sentence_Boundaries`
`19`	`21`	`#[derive(Clone,Copy,PartialEq,Eq)]`
`20`	`22`	`enumStatePart{`
`21`	`23`	`Sot,`
`@@ -49,6 +51,8 @@ mod fwd {`
`49`	`51`	`}`
`50`	`52`
`51`	`53`	`implSentenceBreaksState{`
	`54`	`+// Attempt to advance the internal state by one part`
	`55`	`+// Whitespace and some punctutation will be collapsed`
`52`	`56`	`fnnext(&self,cat:SentenceCat) ->SentenceBreaksState{`
`53`	`57`	`let&SentenceBreaksState(parts) =self;`
`54`	`58`	`let parts =match(parts[3], cat){`
`@@ -85,27 +89,28 @@ mod fwd {`
`85`	`89`	`])`
`86`	`90`	`}`
`87`	`91`
	`92`	+// Helper function to check if state head matches a single `StatePart`
`88`	`93`	`fnmatch1(&self,part:StatePart) ->bool{`
`89`	`94`	`let&SentenceBreaksState(parts) =self;`
`90`	`95`	`part == parts[3]`
`91`	`96`	`}`
`92`	`97`
	`98`	+// Helper function to check if first two `StateParts` in state match
	`99`	`+// the given two`
`93`	`100`	`fnmatch2(&self,part1:StatePart,part2:StatePart) ->bool{`
`94`	`101`	`let&SentenceBreaksState(parts) =self;`
`95`	`102`	`part1 == parts[2] && part2 == parts[3]`
`96`	`103`	`}`
`97`	`104`	`}`
`98`	`105`
	`106`	`+// https://unicode.org/reports/tr29/#SB8`
	`107`	`+// TODO cache this, it is currently quadratic`
`99`	`108`	`fnmatch_sb8(state:&SentenceBreaksState,ahead:&str) ->bool{`
`100`		`-let aterm_part ={`
`101`		`-// ATerm Close* Sp*`
`102`		`-let&SentenceBreaksState(parts) = state;`
`103`		`-letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};`
`104`		`-if parts[idx] ==StatePart::ClosePlus{ idx -=1}`
`105`		`- parts[idx]`
`106`		`-};`
	`109`	`+let&SentenceBreaksState(parts) = state;`
	`110`	`+letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};`
	`111`	`+if parts[idx] ==StatePart::ClosePlus{ idx -=1}`
`107`	`112`
`108`		`-ifaterm_part ==StatePart::ATerm{`
	`113`	`+ifparts[idx] ==StatePart::ATerm{`
`109`	`114`	`use tables::sentenceas se;`
`110`	`115`
`111`	`116`	`for next_charin ahead.chars(){`
`@@ -124,6 +129,7 @@ mod fwd {`
`124`	`129`	`false`
`125`	`130`	`}`
`126`	`131`
	`132`	`+// https://unicode.org/reports/tr29/#SB8a`
`127`	`133`	`fnmatch_sb8a(state:&SentenceBreaksState) ->bool{`
`128`	`134`	`// SATerm Close* Sp*`
`129`	`135`	`let&SentenceBreaksState(parts) = state;`
`@@ -132,13 +138,15 @@ mod fwd {`
`132`	`138`	`parts[idx] ==StatePart::STerm \|\| parts[idx] ==StatePart::ATerm`
`133`	`139`	`}`
`134`	`140`
	`141`	`+// https://unicode.org/reports/tr29/#SB9`
`135`	`142`	`fnmatch_sb9(state:&SentenceBreaksState) ->bool{`
`136`	`143`	`// SATerm Close*`
`137`	`144`	`let&SentenceBreaksState(parts) = state;`
`138`	`145`	`let idx =if parts[3] ==StatePart::ClosePlus{2}else{3};`
`139`	`146`	`parts[idx] ==StatePart::STerm \|\| parts[idx] ==StatePart::ATerm`
`140`	`147`	`}`
`141`	`148`
	`149`	`+// https://unicode.org/reports/tr29/#SB11`
`142`	`150`	`fnmatch_sb11(state:&SentenceBreaksState) ->bool{`
`143`	`151`	`// SATerm Close* Sp* ParaSep?`
`144`	`152`	`let&SentenceBreaksState(parts) = state;`
`@@ -180,67 +188,69 @@ mod fwd {`
`180`	`188`	`self.state =self.state.next(next_cat);`
`181`	`189`
`182`	`190`	`match next_cat{`
`183`		`-// SB1`
	`191`	`+// SB1 https://unicode.org/reports/tr29/#SB1`
`184`	`192`	`_if state_before.match1(StatePart::Sot) =>`
`185`	`193`	`returnSome(position_before),`
`186`	`194`
`187`		`-// SB3`
	`195`	`+// SB2 is handled when inner iterator (chars) is finished`
	`196`	`+`
	`197`	`+// SB3 https://unicode.org/reports/tr29/#SB3`
`188`	`198`	`SentenceCat::SC_LFif state_before.match1(StatePart::CR) =>`
`189`	`199`	`continue,`
`190`	`200`
`191`		`-// SB4`
	`201`	`+// SB4 https://unicode.org/reports/tr29/#SB4`
`192`	`202`	`_if state_before.match1(StatePart::Sep)`
`193`	`203`	`\|\| state_before.match1(StatePart::CR)`
`194`	`204`	`\|\| state_before.match1(StatePart::LF)`
`195`	`205`	`=>returnSome(position_before),`
`196`	`206`
`197`		`-// SB5`
	`207`	`+// SB5 https://unicode.org/reports/tr29/#SB5`
`198`	`208`	`SentenceCat::SC_Extend \|`
`199`	`209`	`SentenceCat::SC_Format =>self.state = state_before,`
`200`	`210`
`201`		`-// SB6`
	`211`	`+// SB6 https://unicode.org/reports/tr29/#SB6`
`202`	`212`	`SentenceCat::SC_Numericif state_before.match1(StatePart::ATerm) =>`
`203`	`213`	`continue,`
`204`	`214`
`205`		`-// SB7`
	`215`	`+// SB7 https://unicode.org/reports/tr29/#SB7`
`206`	`216`	`SentenceCat::SC_Upperif state_before.match2(StatePart::UpperLower,StatePart::ATerm) =>`
`207`	`217`	`continue,`
`208`	`218`
`209`		`-// SB8`
	`219`	`+// SB8 https://unicode.org/reports/tr29/#SB8`
`210`	`220`	`_ifmatch_sb8(&state_before,&self.string[position_before..]) =>`
`211`	`221`	`continue,`
`212`	`222`
`213`		`-// SB8a`
	`223`	`+// SB8a https://unicode.org/reports/tr29/#SB8a`
`214`	`224`	`SentenceCat::SC_SContinue \|`
`215`	`225`	`SentenceCat::SC_STerm \|`
`216`	`226`	`SentenceCat::SC_ATermifmatch_sb8a(&state_before) =>`
`217`	`227`	`continue,`
`218`	`228`
`219`		`-// SB9`
	`229`	`+// SB9 https://unicode.org/reports/tr29/#SB9`
`220`	`230`	`SentenceCat::SC_Close \|`
`221`	`231`	`SentenceCat::SC_Sp \|`
`222`	`232`	`SentenceCat::SC_Sep \|`
`223`	`233`	`SentenceCat::SC_CR \|`
`224`	`234`	`SentenceCat::SC_LFifmatch_sb9(&state_before) =>`
`225`	`235`	`continue,`
`226`	`236`
`227`		`-// SB10`
	`237`	`+// SB10 https://unicode.org/reports/tr29/#SB10`
`228`	`238`	`SentenceCat::SC_Sp \|`
`229`	`239`	`SentenceCat::SC_Sep \|`
`230`	`240`	`SentenceCat::SC_CR \|`
`231`	`241`	`SentenceCat::SC_LFifmatch_sb8a(&state_before) =>`
`232`	`242`	`continue,`
`233`	`243`
`234`		`-// SB11`
	`244`	`+// SB11 https://unicode.org/reports/tr29/#SB11`
`235`	`245`	`_ifmatch_sb11(&state_before) =>`
`236`	`246`	`returnSome(position_before),`
`237`	`247`
`238`		`-// SB998`
	`248`	`+// SB998 https://unicode.org/reports/tr29/#SB998`
`239`	`249`	`_ =>continue`
`240`	`250`	`}`
`241`	`251`	`}`
`242`	`252`
`243`		`-// SB2`
	`253`	`+// SB2 https://unicode.org/reports/tr29/#SB2`
`244`	`254`	`ifself.state.match1(StatePart::Sot){`
`245`	`255`	`None`
`246`	`256`	`}elseifself.state.match1(StatePart::Eot){`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9c7abf2

File tree

1 file changed

1 file changed

`‎src/sentence.rs‎`

0 commit comments