Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit475aee2

Browse files
committed
optimized minifying and remapping; added transducer methods
1 parent49f42db commit475aee2

File tree

9 files changed

+353
-152
lines changed

9 files changed

+353
-152
lines changed

‎src/main/java/com/indoqa/fsa/Transducer.java‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020

2121
publicinterfaceTransducer {
2222

23+
List<Token>getAllTransducedMatches(Stringword);
24+
25+
List<Token>getAllTransducedMatches(Stringword,intstart,intlength);
26+
2327
StringgetLongestTransducedMatch(CharSequencesequence);
2428

2529
List<Token>getLongestTransducedTokens(CharSequencesequence);

‎src/main/java/com/indoqa/fsa/character/CharAcceptor.java‎

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,33 @@ public List<Token> getLongestTokens(CharSequence sequence, int start, int length
191191
returnTokenCandidate.eliminateOverlapping(this.getAllTokens(sequence,start,length));
192192
}
193193

194+
protectedList<Token>getAllPrefixes(CharSequencesequence,intstart,intlength,charseparator) {
195+
List<Token>result =newArrayList<>();
196+
197+
intmatchedLength =0;
198+
intindex =0;
199+
intarc =0;
200+
201+
for (inti =start;i <start +length;i++) {
202+
arc =getArc(this.data,index,sequence.charAt(i),this.caseSensitive);
203+
if (arc == -1) {
204+
break;
205+
}
206+
207+
index =getTarget(this.data,arc);
208+
if (index ==0) {
209+
break;
210+
}
211+
212+
matchedLength++;
213+
if (getArc(this.data,index,separator,this.caseSensitive) != -1) {
214+
result.add(Token.create(start,sequence.subSequence(start,start +matchedLength).toString()));
215+
}
216+
}
217+
218+
returnresult;
219+
}
220+
194221
protectedStringgetInput(intstartIndex) {
195222
StringBuilderstringBuilder =newStringBuilder();
196223

‎src/main/java/com/indoqa/fsa/character/CharAcceptorBuilder.java‎

Lines changed: 195 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,13 @@
2222
importjava.util.Arrays;
2323
importjava.util.HashMap;
2424
importjava.util.Map;
25+
importjava.util.Map.Entry;
26+
importjava.util.TreeMap;
2527
importjava.util.function.Consumer;
2628

2729
importcom.indoqa.fsa.AcceptorBuilder;
30+
importcom.indoqa.fsa.utils.IntList;
31+
importcom.indoqa.fsa.utils.NodeData;
2832

2933
publicclassCharAcceptorBuilderimplementsAcceptorBuilder {
3034

@@ -38,6 +42,10 @@ public class CharAcceptorBuilder implements AcceptorBuilder {
3842

3943
privateConsumer<String>messageConsumer;
4044

45+
privatebooleanminified;
46+
privatebooleanremapped;
47+
privateintrequiredLength;
48+
4149
publicCharAcceptorBuilder(booleancaseSensitive) {
4250
this(caseSensitive,DEFAULT_CAPACITY_INCREMENT);
4351
}
@@ -73,39 +81,77 @@ public static CharAcceptor read(InputStream inputStream) throws IOException {
7381
returnnewCharAcceptor(data,caseSensitive);
7482
}
7583

76-
@Override
77-
publicvoidaddAcceptedInput(CharSequence...value) {
78-
this.addAcceptedInput(Arrays.asList(value));
84+
privatestaticStringgetKey(char[]node) {
85+
StringBuilderstringBuilder =newStringBuilder();
86+
87+
if (node.length >0) {
88+
stringBuilder.append(node[0]);
89+
}
90+
91+
if (node.length >CharDataAccessor.NODE_SIZE) {
92+
stringBuilder.append(node[CharDataAccessor.NODE_SIZE]);
93+
}
94+
95+
if (node.length >CharDataAccessor.NODE_SIZE *2) {
96+
stringBuilder.append(node[CharDataAccessor.NODE_SIZE *2]);
97+
}
98+
99+
stringBuilder.append('_');
100+
stringBuilder.append(node.length);
101+
102+
returnstringBuilder.toString();
79103
}
80104

81105
@Override
82-
publicvoidaddAcceptedInput(Iterable<?extendsCharSequence>value) {
106+
publicvoidaddAcceptedInput(CharSequence...value) {
83107
for (CharSequenceeachValue :value) {
84-
intnode =0;
108+
this.addAcceptedInput(eachValue);
109+
}
110+
}
111+
112+
publicvoidaddAcceptedInput(CharSequencevalue) {
113+
this.addAcceptedInput(value,0,value.length());
114+
}
85115

86-
for (inti =0;i <eachValue.length();i++) {
87-
booleanlastChar =i ==eachValue.length() -1;
116+
publicvoidaddAcceptedInput(CharSequencevalue,intstart,intlength) {
117+
if (this.minified ||this.remapped) {
118+
thrownewIllegalStateException("The data have already been minified / remapped.");
119+
}
88120

89-
intarc =CharDataAccessor.getArc(this.nodes[node],0,eachValue.charAt(i),this.caseSensitive);
90-
if (arc == -1) {
91-
this.addArc(node,eachValue.charAt(i),this.nodeCount,lastChar);
92-
node =this.nodeCount;
93-
this.addNode();
94-
continue;
95-
}
121+
intnode =0;
96122

97-
if (lastChar) {
98-
CharDataAccessor.setTerminal(this.nodes[node],arc,true);
99-
break;
100-
}
123+
for (inti =start;i <start +length;i++) {
124+
booleanlastChar =i ==start +length -1;
125+
126+
intarc =CharDataAccessor.getArc(this.nodes[node],0,value.charAt(i),this.caseSensitive);
127+
if (arc == -1) {
128+
this.addArc(node,value.charAt(i),this.nodeCount,lastChar);
129+
node =this.nodeCount;
130+
this.addNode();
131+
continue;
132+
}
101133

102-
node =getTarget(this.nodes[node],arc);
134+
if (lastChar) {
135+
CharDataAccessor.setTerminal(this.nodes[node],arc,true);
136+
break;
103137
}
138+
139+
node =getTarget(this.nodes[node],arc);
140+
}
141+
}
142+
143+
@Override
144+
publicvoidaddAcceptedInput(Iterable<?extendsCharSequence>value) {
145+
for (CharSequenceeachValue :value) {
146+
this.addAcceptedInput(eachValue);
104147
}
105148
}
106149

107150
@Override
108151
publicCharAcceptorbuild() {
152+
this.minify();
153+
this.remap();
154+
109155
char[]data =this.buildData();
110156
returnnewCharAcceptor(data,this.caseSensitive);
111157
}
@@ -116,15 +162,14 @@ public void setMessageConsumer(Consumer<String> messageConsumer) {
116162

117163
@Override
118164
publicvoidwrite(OutputStreamoutputStream)throwsIOException {
119-
char[]data =this.buildData();
165+
this.minify();
166+
this.remap();
120167

121168
DataOutputStreamdataOutputStream =newDataOutputStream(outputStream);
122169
dataOutputStream.writeBoolean(this.caseSensitive);
123-
dataOutputStream.writeInt(data.length);
170+
dataOutputStream.writeInt(this.requiredLength);
124171

125-
for (chareachChar :data) {
126-
dataOutputStream.writeChar(eachChar);
127-
}
172+
this.serialize(dataOutputStream);
128173

129174
dataOutputStream.flush();
130175
}
@@ -205,86 +250,177 @@ private boolean applyReplacements(char[] nodeData, Map<Integer, Integer> replace
205250
returnresult;
206251
}
207252

208-
privatechar[]buildData() {
209-
this.minify();
253+
privatevoidapplyReplacements(Map<Integer,Integer>replacements) {
254+
this.sendMessage("Applying " +replacements.size() +" replacements");
210255

211-
intoffset =0;
256+
for (inti =0;i <this.nodeCount;i++) {
257+
char[]node =this.nodes[i];
258+
if (node ==null) {
259+
continue;
260+
}
212261

213-
Map<Integer,Integer>replacements =newHashMap<>();
262+
this.applyReplacements(node,replacements);
263+
}
264+
}
265+
266+
privatechar[]buildData() {
267+
char[]data =newchar[this.requiredLength];
268+
intoffset =0;
214269

215270
for (inti =0;i <this.nodeCount;i++) {
216271
char[]node =this.nodes[i];
217272
if (node ==null) {
218273
continue;
219274
}
220275

221-
if (node.length ==0) {
222-
replacements.put(i,0);
223-
}else {
224-
replacements.put(i,offset);
225-
offset +=node.length;
226-
}
276+
System.arraycopy(node,0,data,offset,node.length);
277+
offset +=node.length;
227278
}
279+
returndata;
280+
}
228281

229-
char[]result =newchar[offset];
230-
offset =0;
282+
privateMap<String,IntList>buildGroups() {
283+
Map<String,IntList>result =newTreeMap<>();
231284

232285
for (inti =0;i <this.nodeCount;i++) {
233286
char[]node =this.nodes[i];
234287
if (node ==null) {
235288
continue;
236289
}
237290

238-
this.applyReplacements(node,replacements);
291+
Stringkey =getKey(node);
239292

240-
System.arraycopy(node,0,result,offset,node.length);
241-
offset +=node.length;
293+
IntListindexes =result.get(key);
294+
if (indexes ==null) {
295+
indexes =newIntList();
296+
result.put(key,indexes);
297+
}
298+
299+
indexes.add(i);
300+
}
301+
302+
returnresult;
303+
}
304+
305+
privateMap<Integer,Integer>findReplacements(IntListgroup) {
306+
Map<Integer,Integer>result =newHashMap<>();
307+
308+
Map<NodeData,Integer>hashes =newHashMap<>();
309+
310+
for (inti =0;i <group.size();i++) {
311+
inteachIndex =group.get(i);
312+
char[]node =this.nodes[eachIndex];
313+
if (node ==null) {
314+
continue;
315+
}
316+
317+
Integerprevious =hashes.putIfAbsent(newNodeData(node),eachIndex);
318+
319+
if (previous !=null) {
320+
result.put(eachIndex,previous);
321+
}
242322
}
243323

244324
returnresult;
245325
}
246326

247327
privatevoidminify() {
248-
if (this.messageConsumer !=null) {
249-
this.messageConsumer.accept("Minifying node data ...");
328+
if (this.minified) {
329+
return;
250330
}
251331

252-
Map<Integer,Integer>replacements =newHashMap<>();
253-
HashNodehashNode =newHashNode();
332+
this.sendMessage("Minifying " +this.nodeCount +" nodes ...");
333+
334+
Map<Integer,Integer>replacements =this.replaceEndNodes();
335+
this.applyReplacements(replacements);
336+
337+
Map<String,IntList>groups =this.buildGroups();
254338

255339
while (true) {
256340
replacements.clear();
257341

258-
for (inti =0;i <this.nodeCount;i++) {
259-
char[]node =this.nodes[i];
260-
if (node ==null) {
342+
for (IntListeachGroup :groups.values()) {
343+
if (eachGroup.size() <2) {
261344
continue;
262345
}
263346

264-
intprevious =hashNode.addIfMissing(node,i);
265-
if (previous == -1 ||previous ==i) {
266-
continue;
347+
Map<Integer,Integer>groupReplacements =this.findReplacements(eachGroup);
348+
for (Entry<Integer,Integer>eachReplacement :groupReplacements.entrySet()) {
349+
this.nodes[eachReplacement.getKey()] =null;
267350
}
268351

269-
replacements.put(i,previous);
270-
this.nodes[i] =null;
352+
replacements.putAll(groupReplacements);
271353
}
272354

273355
if (replacements.isEmpty()) {
274356
break;
275357
}
276358

277-
if (this.messageConsumer !=null) {
278-
this.messageConsumer.accept("Applying " +replacements.size() +" replacements...");
359+
this.applyReplacements(replacements);
360+
}
361+
362+
this.minified =true;
363+
}
364+
365+
privatevoidremap() {
366+
if (this.remapped) {
367+
return;
368+
}
369+
370+
this.sendMessage("Remapping node addresses ...");
371+
this.requiredLength =0;
372+
373+
Map<Integer,Integer>replacements =newHashMap<>();
374+
375+
for (inti =0;i <this.nodeCount;i++) {
376+
char[]node =this.nodes[i];
377+
if (node ==null) {
378+
continue;
379+
}
380+
381+
if (node.length ==0) {
382+
replacements.put(i,0);
383+
}else {
384+
replacements.put(i,this.requiredLength);
385+
this.requiredLength +=node.length;
279386
}
387+
}
280388

281-
for (inti =0;i <this.nodeCount;i++) {
282-
char[]node =this.nodes[i];
283-
if (node ==null) {
284-
continue;
285-
}
389+
this.applyReplacements(replacements);
390+
391+
this.remapped =true;
392+
}
393+
394+
privateMap<Integer,Integer>replaceEndNodes() {
395+
Map<Integer,Integer>result =newHashMap<>();
396+
397+
for (inti =0;i <this.nodeCount;i++) {
398+
if (this.nodes[i].length !=0) {
399+
continue;
400+
}
401+
402+
this.nodes[i] =null;
403+
result.put(i,0);
404+
}
405+
406+
returnresult;
407+
}
408+
409+
privatevoidsendMessage(Stringmessage) {
410+
if (this.messageConsumer !=null) {
411+
this.messageConsumer.accept(message);
412+
}
413+
}
414+
415+
privatevoidserialize(DataOutputStreamoutputStream)throwsIOException {
416+
for (inti =0;i <this.nodeCount;i++) {
417+
char[]node =this.nodes[i];
418+
if (node ==null) {
419+
continue;
420+
}
286421

287-
this.applyReplacements(node,replacements);
422+
for (chareachChar :node) {
423+
outputStream.writeChar(eachChar);
288424
}
289425
}
290426
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp