Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit14dbeb8

Browse files
committed
tests pass
1 parent1c3e5af commit14dbeb8

File tree

1 file changed

+273
-14
lines changed

1 file changed

+273
-14
lines changed

‎src/sentence.rs

Lines changed: 273 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,292 @@
99
// except according to those terms.
1010

1111
use core::cmp;
12-
use core::iter::Filter;
1312

14-
use tables::sentence::SentenceCat;
13+
mod fwd{
14+
use tables::sentence::SentenceCat;
15+
use core::cmp;
1516

16-
/// TODO
17-
#[derive(Clone)]
17+
#[derive(Clone,Copy,PartialEq,Eq)]
18+
enumStatePart{
19+
Sot,
20+
Eot,
21+
Other,
22+
CR,
23+
LF,
24+
Sep,
25+
ATerm,
26+
UpperLower,
27+
ClosePlus,
28+
SpPlus,
29+
STerm
30+
}
31+
32+
#[derive(Clone,PartialEq,Eq)]
33+
structSentenceBreaksState(pub[StatePart;4]);
34+
35+
constINITIAL_STATE:SentenceBreaksState =SentenceBreaksState([
36+
StatePart::Sot,
37+
StatePart::Sot,
38+
StatePart::Sot,
39+
StatePart::Sot
40+
]);
41+
42+
pubstructSentenceBreaks<'a>{
43+
pubstring:&'astr,
44+
pos:usize,
45+
state:SentenceBreaksState
46+
}
47+
48+
implSentenceBreaksState{
49+
fnnext(&self,cat:SentenceCat) ->SentenceBreaksState{
50+
let&SentenceBreaksState(parts) =self;
51+
let parts =match(parts[3], cat){
52+
(StatePart::ClosePlus,SentenceCat::SC_Close) => parts,
53+
(StatePart::SpPlus,SentenceCat::SC_Sp) => parts,
54+
_ =>[
55+
parts[1],
56+
parts[2],
57+
parts[3],
58+
match cat{
59+
SentenceCat::SC_CR =>StatePart::CR,
60+
SentenceCat::SC_LF =>StatePart::LF,
61+
SentenceCat::SC_Sep =>StatePart::Sep,
62+
SentenceCat::SC_ATerm =>StatePart::ATerm,
63+
SentenceCat::SC_Upper |
64+
SentenceCat::SC_Lower =>StatePart::UpperLower,
65+
SentenceCat::SC_Close =>StatePart::ClosePlus,
66+
SentenceCat::SC_Sp =>StatePart::SpPlus,
67+
SentenceCat::SC_STerm =>StatePart::STerm,
68+
_ =>StatePart::Other
69+
}
70+
]
71+
};
72+
SentenceBreaksState(parts)
73+
}
74+
75+
fnend(&self) ->SentenceBreaksState{
76+
let&SentenceBreaksState(parts) =self;
77+
SentenceBreaksState([
78+
parts[1],
79+
parts[2],
80+
parts[3],
81+
StatePart::Eot
82+
])
83+
}
84+
85+
fnmatch1(&self,part:StatePart) ->bool{
86+
let&SentenceBreaksState(parts) =self;
87+
part == parts[3]
88+
}
89+
90+
fnmatch2(&self,part1:StatePart,part2:StatePart) ->bool{
91+
let&SentenceBreaksState(parts) =self;
92+
part1 == parts[2] && part2 == parts[3]
93+
}
94+
}
95+
96+
fnmatch_sb8(state:&SentenceBreaksState,ahead:&str) ->bool{
97+
let aterm_part ={
98+
// ATerm Close* Sp*
99+
let&SentenceBreaksState(parts) = state;
100+
letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};
101+
if parts[idx] ==StatePart::ClosePlus{ idx -=1}
102+
parts[idx]
103+
};
104+
105+
if aterm_part ==StatePart::ATerm{
106+
use tables::sentenceas se;
107+
108+
for next_charin ahead.chars(){
109+
//( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower
110+
match se::sentence_category(next_char){
111+
se::SC_Lower =>returntrue,
112+
se::SC_OLetter |
113+
se::SC_Upper |
114+
se::SC_Sep | se::SC_CR | se::SC_LF |
115+
se::SC_STerm | se::SC_ATerm =>returnfalse,
116+
_ =>continue
117+
}
118+
}
119+
}
120+
121+
false
122+
}
123+
124+
fnmatch_sb8a(state:&SentenceBreaksState) ->bool{
125+
// SATerm Close* Sp*
126+
let&SentenceBreaksState(parts) = state;
127+
letmut idx =if parts[3] ==StatePart::SpPlus{2}else{3};
128+
if parts[idx] ==StatePart::ClosePlus{ idx -=1}
129+
parts[idx] ==StatePart::STerm || parts[idx] ==StatePart::ATerm
130+
}
131+
132+
fnmatch_sb9(state:&SentenceBreaksState) ->bool{
133+
// SATerm Close*
134+
let&SentenceBreaksState(parts) = state;
135+
let idx =if parts[3] ==StatePart::ClosePlus{2}else{3};
136+
parts[idx] ==StatePart::STerm || parts[idx] ==StatePart::ATerm
137+
}
138+
139+
fnmatch_sb11(state:&SentenceBreaksState) ->bool{
140+
// SATerm Close* Sp* ParaSep?
141+
let&SentenceBreaksState(parts) = state;
142+
letmut idx =match parts[3]{
143+
StatePart::Sep |
144+
StatePart::CR |
145+
StatePart::LF =>2,
146+
_ =>3
147+
};
148+
149+
if parts[idx] ==StatePart::SpPlus{ idx -=1}
150+
if parts[idx] ==StatePart::ClosePlus{ idx -=1}
151+
152+
parts[idx] ==StatePart::STerm || parts[idx] ==StatePart::ATerm
153+
}
154+
155+
impl<'a>IteratorforSentenceBreaks<'a>{
156+
// Returns the index of the character which follows a break
157+
typeItem =usize;
158+
159+
#[inline]
160+
fnsize_hint(&self) ->(usize,Option<usize>){
161+
let slen =self.string.len();
162+
// A sentence could be one character
163+
(cmp::min(slen,2),Some(slen +1))
164+
}
165+
166+
#[inline]
167+
fnnext(&mutself) ->Option<usize>{
168+
use tables::sentenceas se;
169+
170+
for next_charinself.string[self.pos..].chars(){
171+
let position_before =self.pos;
172+
let state_before =self.state.clone();
173+
174+
let next_cat = se::sentence_category(next_char);
175+
176+
self.pos += next_char.len_utf8();
177+
self.state =self.state.next(next_cat);
178+
179+
match next_cat{
180+
// SB1
181+
_if state_before.match1(StatePart::Sot) =>
182+
returnSome(position_before),
183+
184+
// SB3
185+
SentenceCat::SC_LFif state_before.match1(StatePart::CR) =>
186+
continue,
187+
188+
// SB4
189+
_if state_before.match1(StatePart::Sep)
190+
|| state_before.match1(StatePart::CR)
191+
|| state_before.match1(StatePart::LF)
192+
=>returnSome(position_before),
193+
194+
// SB5
195+
SentenceCat::SC_Extend |
196+
SentenceCat::SC_Format =>self.state = state_before,
197+
198+
// SB6
199+
SentenceCat::SC_Numericif state_before.match1(StatePart::ATerm) =>
200+
continue,
201+
202+
// SB7
203+
SentenceCat::SC_Upperif state_before.match2(StatePart::UpperLower,StatePart::ATerm) =>
204+
continue,
205+
206+
// SB8
207+
_ifmatch_sb8(&state_before,&self.string[position_before..]) =>
208+
continue,
209+
210+
// SB8a
211+
SentenceCat::SC_SContinue |
212+
SentenceCat::SC_STerm |
213+
SentenceCat::SC_ATermifmatch_sb8a(&state_before) =>
214+
continue,
215+
216+
// SB9
217+
SentenceCat::SC_Close |
218+
SentenceCat::SC_Sp |
219+
SentenceCat::SC_Sep |
220+
SentenceCat::SC_CR |
221+
SentenceCat::SC_LFifmatch_sb9(&state_before) =>
222+
continue,
223+
224+
// SB10
225+
SentenceCat::SC_Sp |
226+
SentenceCat::SC_Sep |
227+
SentenceCat::SC_CR |
228+
SentenceCat::SC_LFifmatch_sb8a(&state_before) =>
229+
continue,
230+
231+
// SB11
232+
_ifmatch_sb11(&state_before) =>
233+
returnSome(position_before),
234+
235+
// SB998
236+
_ =>continue
237+
}
238+
}
239+
240+
// SB2
241+
ifself.state.match1(StatePart::Sot){
242+
None
243+
}elseifself.state.match1(StatePart::Eot){
244+
None
245+
}else{
246+
self.state =self.state.end();
247+
Some(self.pos)
248+
}
249+
}
250+
}
251+
252+
pubfnnew_sentence_breaks<'a>(source:&'astr) ->SentenceBreaks<'a>{
253+
SentenceBreaks{string: source,pos:0,state:INITIAL_STATE}
254+
}
255+
256+
}
257+
258+
/// TODO docs
18259
pubstructUSentenceBounds<'a>{
19-
string:&'astr
20-
// state?
260+
iter: fwd::SentenceBreaks<'a>,
261+
sentence_start:Option<usize>
262+
}
263+
264+
/// TODO docs
265+
pubfnnew_sentence_bounds<'a>(source:&'astr) ->USentenceBounds<'a>{
266+
USentenceBounds{
267+
iter: fwd::new_sentence_breaks(source),
268+
sentence_start:None
269+
}
21270
}
22271

23272
impl<'a>IteratorforUSentenceBounds<'a>{
24273
typeItem =&'astr;
25274

26275
#[inline]
27276
fnsize_hint(&self) ->(usize,Option<usize>){
28-
letslen=self.string.len();
29-
(cmp::min(slen,1),Some(slen))
277+
let(lower, upper)=self.iter.size_hint();
278+
(cmp::max(0, lower -1),upper.map(|u| cmp::max(0, u -1)))
30279
}
31280

32281
#[inline]
33282
fnnext(&mutself) ->Option<&'astr>{
34-
panic!("todo")
35-
}
36-
}
283+
ifself.sentence_start ==None{
284+
ifletSome(start_pos) =self.iter.next(){
285+
self.sentence_start =Some(start_pos)
286+
}else{
287+
returnNone
288+
}
289+
}
37290

38-
#[inline]
39-
pubfnnew_sentence_bounds<'b>(s:&'bstr) ->USentenceBounds<'b>{
40-
USentenceBounds{string: s}
291+
ifletSome(break_pos) =self.iter.next(){
292+
let start_pos =self.sentence_start.unwrap();
293+
let sentence =&self.iter.string[start_pos..break_pos];
294+
self.sentence_start =Some(break_pos);
295+
Some(sentence)
296+
}else{
297+
None
298+
}
299+
}
41300
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp