Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd5b9759

Browse files
committed
- changed replacement detection to use sorting instead of a map
- introduced additional tests
1 parentde2e6f9 commitd5b9759

File tree

5 files changed

+196
-47
lines changed

5 files changed

+196
-47
lines changed

‎src/main/java/com/indoqa/fsa/character/CharAcceptorBuilder.java‎

Lines changed: 71 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
publicclassCharAcceptorBuilderimplementsAcceptorBuilder {
2929

30+
privatestaticfinalfloatLOAD_FACTOR =0.9f;
3031
publicstaticfinalintFILE_VERSION =2;
3132
publicstaticfinalintDEFAULT_CAPACITY_INCREMENT =16 *1024;
3233

@@ -82,6 +83,10 @@ public static CharAcceptor read(InputStream inputStream) throws IOException {
8283
returnnewCharAcceptor(data,caseSensitive);
8384
}
8485

86+
privatestatic <K,V>ConcurrentHashMap<K,V>createHashMap(intsize) {
87+
returnnewConcurrentHashMap<>((int) (size /LOAD_FACTOR +1),LOAD_FACTOR,1);
88+
}
89+
8590
privatestaticStringgetKey(char[]node) {
8691
StringBuilderstringBuilder =newStringBuilder();
8792

@@ -102,41 +107,18 @@ private static String getKey(char[] node) {
102107
@Override
103108
publicvoidaddAcceptedInput(CharSequence...value) {
104109
for (CharSequenceeachValue :value) {
105-
this.addAcceptedInput(eachValue);
110+
this.addAcceptedInput(eachValue,0,eachValue.length());
106111
}
107112
}
108113

109114
publicvoidaddAcceptedInput(CharSequencevalue,intstart,intlength) {
110-
if (this.minified ||this.remapped) {
111-
thrownewIllegalStateException("The data have already been minified / remapped.");
112-
}
113-
114-
intnode =0;
115-
116-
for (inti =start;i <start +length;i++) {
117-
booleanlastChar =i ==start +length -1;
118-
119-
intarc =CharDataAccessor.getArc(this.nodes[node],0,value.charAt(i),this.caseSensitive);
120-
if (arc == -1) {
121-
this.addArc(node,value.charAt(i),this.nodeCount,lastChar);
122-
node =this.nodeCount;
123-
this.addNode();
124-
continue;
125-
}
126-
127-
if (lastChar) {
128-
CharDataAccessor.setTerminal(this.nodes[node],arc,true);
129-
break;
130-
}
131-
132-
node =getTarget(this.nodes[node],arc);
133-
}
115+
this.addAcceptedInput(value,start,length,0,true);
134116
}
135117

136118
@Override
137119
publicvoidaddAcceptedInput(Iterable<?extendsCharSequence>value) {
138120
for (CharSequenceeachValue :value) {
139-
this.addAcceptedInput(eachValue);
121+
this.addAcceptedInput(eachValue,0,eachValue.length());
140122
}
141123
}
142124

@@ -168,11 +150,36 @@ public void write(OutputStream outputStream) throws IOException {
168150
dataOutputStream.flush();
169151
}
170152

171-
privatevoidaddAcceptedInput(CharSequencevalue) {
172-
this.addAcceptedInput(value,0,value.length());
153+
protectedintaddAcceptedInput(CharSequencevalue,intstart,intlength,intstartNode,booleanmakeTerminal) {
154+
if (this.minified ||this.remapped) {
155+
thrownewIllegalStateException("The data have already been minified / remapped.");
156+
}
157+
158+
intnode =startNode;
159+
160+
for (inti =start;i <start +length;i++) {
161+
booleanterminal =makeTerminal &&i ==start +length -1;
162+
163+
intarc =CharDataAccessor.getArc(this.nodes[node],0,value.charAt(i),this.caseSensitive);
164+
if (arc == -1) {
165+
this.addArc(node,value.charAt(i),this.nodeCount,terminal);
166+
node =this.nodeCount;
167+
this.addNode();
168+
continue;
169+
}
170+
171+
if (terminal) {
172+
CharDataAccessor.setTerminal(this.nodes[node],arc,true);
173+
break;
174+
}
175+
176+
node =getTarget(this.nodes[node],arc);
177+
}
178+
179+
returnnode;
173180
}
174181

175-
privatevoidaddArc(intnode,charlabel,inttarget,booleanterminal) {
182+
protectedvoidaddArc(intnode,charlabel,inttarget,booleanterminal) {
176183
char[]oldNodeData =this.nodes[node];
177184

178185
this.nodes[node] =newchar[oldNodeData.length +CharDataAccessor.NODE_SIZE];
@@ -283,7 +290,9 @@ private char[] buildData() {
283290
}
284291

285292
privateMap<String,List<Integer>>buildGroups() {
286-
Map<String,List<Integer>>result =newTreeMap<>();
293+
this.sendMessage("Building groups");
294+
295+
Map<String,List<Integer>>result =createHashMap(1000);
287296

288297
for (inti =0;i <this.nodeCount;i++) {
289298
char[]node =this.nodes[i];
@@ -295,7 +304,7 @@ private Map<String, List<Integer>> buildGroups() {
295304

296305
List<Integer>indexes =result.get(key);
297306
if (indexes ==null) {
298-
indexes =newArrayList<>();
307+
indexes =newLinkedList<>();
299308
result.put(key,indexes);
300309
}
301310

@@ -308,19 +317,29 @@ private Map<String, List<Integer>> buildGroups() {
308317
privateMap<Integer,Integer>findReplacements(List<Integer>group) {
309318
Map<Integer,Integer>result =newHashMap<>();
310319

311-
Map<String,Integer>hashes =newConcurrentHashMap<>(group.size());
320+
List<NodeReference>references =newArrayList<>(group.size());
321+
322+
for (Iterator<Integer>iterator =group.iterator();iterator.hasNext();) {
323+
Integerindex =iterator.next();
312324

313-
for (inti =0;i <group.size();i++) {
314-
IntegereachIndex =group.get(i);
315-
char[]node =this.nodes[eachIndex];
325+
char[]node =this.nodes[index];
316326
if (node ==null) {
327+
iterator.remove();
317328
continue;
318329
}
319330

320-
Integerprevious =hashes.putIfAbsent(newString(node),eachIndex);
321-
if (previous !=null) {
322-
result.put(eachIndex,previous);
323-
this.nodes[eachIndex] =null;
331+
references.add(newNodeReference(node,index));
332+
}
333+
334+
references.sort(null);
335+
336+
NodeReferencelastReference =null;
337+
for (NodeReferenceeachReference :references) {
338+
if (lastReference ==null || !eachReference.equals(lastReference)) {
339+
lastReference =eachReference;
340+
}else {
341+
result.put(eachReference.getIndex(),lastReference.getIndex());
342+
this.nodes[eachReference.getIndex()] =null;
324343
}
325344
}
326345

@@ -332,18 +351,25 @@ private void minify() {
332351
return;
333352
}
334353

335-
this.sendMessage("Minifying " +this.nodeCount +" nodes ...");
354+
this.sendMessage("Minifying " +this.nodeCount +" nodes");
336355

337356
Map<Integer,Integer>replacements =this.replaceEndNodes();
338-
Set<String>changedGroups =this.applyReplacements(replacements);
357+
this.applyReplacements(replacements);
339358

340359
Map<String,List<Integer>>groups =this.buildGroups();
360+
Set<String>changedGroups =newHashSet<>(groups.keySet());
341361

342362
while (true) {
343363
replacements.clear();
344364

365+
this.sendMessage("Finding duplicates in " +changedGroups.size() +" groups");
345366
for (StringeachChangedGroup :changedGroups) {
346-
replacements.putAll(this.findReplacements(groups.get(eachChangedGroup)));
367+
List<Integer>group =groups.get(eachChangedGroup);
368+
if (group ==null ||group.size() <2) {
369+
continue;
370+
}
371+
372+
replacements.putAll(this.findReplacements(group));
347373
}
348374

349375
if (replacements.isEmpty()) {
@@ -364,7 +390,7 @@ private void remap() {
364390
this.sendMessage("Remapping node addresses ...");
365391
this.requiredLength =0;
366392

367-
Map<Integer,Integer>replacements =newHashMap<>();
393+
Map<Integer,Integer>replacements =createHashMap(this.nodeCount);
368394

369395
for (inti =0;i <this.nodeCount;i++) {
370396
char[]node =this.nodes[i];
@@ -386,10 +412,10 @@ private void remap() {
386412
}
387413

388414
privateMap<Integer,Integer>replaceEndNodes() {
389-
Map<Integer,Integer>result =newHashMap<>();
415+
Map<Integer,Integer>result =createHashMap(1000);
390416

391417
for (inti =0;i <this.nodeCount;i++) {
392-
if (this.nodes[i].length !=0) {
418+
if (this.nodes[i] ==null ||this.nodes[i].length !=0) {
393419
continue;
394420
}
395421

‎src/main/java/com/indoqa/fsa/character/CharTransducerBuilder.java‎

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
importjava.io.IOException;
2020
importjava.io.InputStream;
2121
importjava.io.OutputStream;
22+
importjava.util.ArrayList;
2223
importjava.util.Arrays;
24+
importjava.util.List;
2325
importjava.util.function.Consumer;
2426

2527
importcom.indoqa.fsa.TransducerBuilder;
@@ -68,6 +70,26 @@ public static CharTransducer read(InputStream inputStream) throws IOException {
6870
returnnewCharTransducer(charAcceptor,separator);
6971
}
7072

73+
publicvoidadd(Iterable<?extendsCharSequence>input,Stringoutput) {
74+
List<Integer>nodes =newArrayList<>();
75+
76+
for (CharSequenceeachInput :input) {
77+
nodes.add(this.acceptorBuilder.addAcceptedInput(eachInput,0,eachInput.length(),0,false));
78+
}
79+
80+
if (nodes.isEmpty()) {
81+
return;
82+
}
83+
84+
intseparatorNode =this.acceptorBuilder.addAcceptedInput(String.valueOf(this.separator),0,1,nodes.get(0),false);
85+
86+
for (inti =1;i <nodes.size();i++) {
87+
this.acceptorBuilder.addArc(nodes.get(i),this.separator,separatorNode,false);
88+
}
89+
90+
this.acceptorBuilder.addAcceptedInput(output,0,output.length(),separatorNode,true);
91+
}
92+
7193
@Override
7294
publicvoidadd(Stringinput,Stringoutput) {
7395
this.acceptorBuilder.addAcceptedInput(input +this.separator +output);
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to the Indoqa Software Design und Beratung GmbH (Indoqa) under
3+
* one or more contributor license agreements. See the NOTICE file distributed
4+
* with this work for additional information regarding copyright ownership.
5+
* Indoqa licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
packagecom.indoqa.fsa.character;
18+
19+
importjava.util.Arrays;
20+
21+
publicclassNodeReferenceimplementsComparable<NodeReference> {
22+
23+
privatefinalchar[]data;
24+
privatefinalIntegerindex;
25+
26+
publicNodeReference(char[]data,Integerindex) {
27+
super();
28+
this.data =data;
29+
this.index =index;
30+
}
31+
32+
@Override
33+
publicintcompareTo(NodeReferenceother) {
34+
for (inti =0;i <this.data.length;i++) {
35+
if (other.data.length <=i) {
36+
return1;
37+
}
38+
39+
if (this.data[i] <other.data[i]) {
40+
return -1;
41+
}
42+
43+
if (this.data[i] >other.data[i]) {
44+
return1;
45+
}
46+
}
47+
48+
if (other.data.length >this.data.length) {
49+
return -1;
50+
}
51+
52+
returnthis.index.compareTo(other.index);
53+
}
54+
55+
@Override
56+
publicbooleanequals(Objectobj) {
57+
if (this ==obj) {
58+
returntrue;
59+
}
60+
61+
NodeReferenceother = (NodeReference)obj;
62+
returnArrays.equals(this.data,other.data);
63+
}
64+
65+
publicIntegergetIndex() {
66+
returnthis.index;
67+
}
68+
69+
@Override
70+
publicinthashCode() {
71+
returnArrays.hashCode(this.data);
72+
}
73+
}

‎src/test/java/com/indoqa/fsa/character/CharAcceptorTest.java‎

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
publicclassCharAcceptorTest {
3131

32-
privatestaticfinalintSTRING_COUNT =10000;
32+
privatestaticfinalintSTRING_COUNT =10_000;
3333

3434
privatestaticString[]getValues(List<Token>tokens) {
3535
returntokens.stream().map(Token::getValue).toArray(String[]::new);
@@ -126,6 +126,22 @@ public void overlapping1() {
126126
assertFalse(acceptor.accepts("sse"));
127127
}
128128

129+
@Test
130+
publicvoidoverlapping2() {
131+
CharAcceptorBuilderbuilder =newCharAcceptorBuilder(false);
132+
133+
builder.addAcceptedInput("12");
134+
builder.addAcceptedInput("22");
135+
builder.addAcceptedInput("123");
136+
137+
CharAcceptoracceptor =builder.build();
138+
139+
assertTrue(acceptor.accepts("12"));
140+
assertTrue(acceptor.accepts("22"));
141+
assertTrue(acceptor.accepts("123"));
142+
assertFalse(acceptor.accepts("223"));
143+
}
144+
129145
@Test
130146
publicvoidrandom() {
131147
Set<String>inputs =TestUtils.generateRandomStrings(STRING_COUNT);

‎src/test/java/com/indoqa/fsa/character/CharTransducerTest.java‎

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
packagecom.indoqa.fsa.character;
1818

1919
importstaticcom.indoqa.fsa.TestUtils.generateRandomStrings;
20-
importstaticorg.junit.Assert.assertEquals;
20+
importstaticorg.junit.Assert.*;
2121

2222
importjava.util.ArrayList;
23+
importjava.util.Arrays;
2324
importjava.util.List;
2425

2526
importorg.junit.Test;
@@ -120,6 +121,17 @@ public void test3() {
120121
assertEquals(1,tokens.size());
121122
}
122123

124+
@Test
125+
publicvoidtest4() {
126+
CharTransducerBuilderbuilder =newCharTransducerBuilder(true);
127+
builder.add(Arrays.asList("A","B","C","D","F","G"),"A");
128+
Transducertransducer =builder.build();
129+
130+
List<Token>tokens =transducer.getAllOccurrences("ABCDEFG");
131+
assertEquals(6,tokens.size());
132+
assertTrue(tokens.stream().map(Token::getValue).allMatch(value ->value.equals("A")));
133+
}
134+
123135
@Test
124136
publicvoidtransduce() {
125137
Transducertransducer =CharTransducerBuilder.build(false,"#","Auto#PKW");

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp