2222import java .util .Arrays ;
2323import java .util .HashMap ;
2424import java .util .Map ;
25+ import java .util .Map .Entry ;
26+ import java .util .TreeMap ;
2527import java .util .function .Consumer ;
2628
2729import com .indoqa .fsa .AcceptorBuilder ;
30+ import com .indoqa .fsa .utils .IntList ;
31+ import com .indoqa .fsa .utils .NodeData ;
2832
2933public class CharAcceptorBuilder implements AcceptorBuilder {
3034
@@ -38,6 +42,10 @@ public class CharAcceptorBuilder implements AcceptorBuilder {
3842
3943private Consumer <String >messageConsumer ;
4044
45+ private boolean minified ;
46+ private boolean remapped ;
47+ private int requiredLength ;
48+
4149public CharAcceptorBuilder (boolean caseSensitive ) {
4250this (caseSensitive ,DEFAULT_CAPACITY_INCREMENT );
4351 }
@@ -73,39 +81,77 @@ public static CharAcceptor read(InputStream inputStream) throws IOException {
7381return new CharAcceptor (data ,caseSensitive );
7482 }
7583
76- @ Override
77- public void addAcceptedInput (CharSequence ...value ) {
78- this .addAcceptedInput (Arrays .asList (value ));
84+ private static String getKey (char []node ) {
85+ StringBuilder stringBuilder =new StringBuilder ();
86+
87+ if (node .length >0 ) {
88+ stringBuilder .append (node [0 ]);
89+ }
90+
91+ if (node .length >CharDataAccessor .NODE_SIZE ) {
92+ stringBuilder .append (node [CharDataAccessor .NODE_SIZE ]);
93+ }
94+
95+ if (node .length >CharDataAccessor .NODE_SIZE *2 ) {
96+ stringBuilder .append (node [CharDataAccessor .NODE_SIZE *2 ]);
97+ }
98+
99+ stringBuilder .append ('_' );
100+ stringBuilder .append (node .length );
101+
102+ return stringBuilder .toString ();
79103 }
80104
81105@ Override
82- public void addAcceptedInput (Iterable <? extends CharSequence > value ) {
106+ public void addAcceptedInput (CharSequence ... value ) {
83107for (CharSequence eachValue :value ) {
84- int node =0 ;
108+ this .addAcceptedInput (eachValue );
109+ }
110+ }
111+
112+ public void addAcceptedInput (CharSequence value ) {
113+ this .addAcceptedInput (value ,0 ,value .length ());
114+ }
85115
86- for (int i =0 ;i <eachValue .length ();i ++) {
87- boolean lastChar =i ==eachValue .length () -1 ;
116+ public void addAcceptedInput (CharSequence value ,int start ,int length ) {
117+ if (this .minified ||this .remapped ) {
118+ throw new IllegalStateException ("The data have already been minified / remapped." );
119+ }
88120
89- int arc =CharDataAccessor .getArc (this .nodes [node ],0 ,eachValue .charAt (i ),this .caseSensitive );
90- if (arc == -1 ) {
91- this .addArc (node ,eachValue .charAt (i ),this .nodeCount ,lastChar );
92- node =this .nodeCount ;
93- this .addNode ();
94- continue ;
95- }
121+ int node =0 ;
96122
97- if (lastChar ) {
98- CharDataAccessor .setTerminal (this .nodes [node ],arc ,true );
99- break ;
100- }
123+ for (int i =start ;i <start +length ;i ++) {
124+ boolean lastChar =i ==start +length -1 ;
125+
126+ int arc =CharDataAccessor .getArc (this .nodes [node ],0 ,value .charAt (i ),this .caseSensitive );
127+ if (arc == -1 ) {
128+ this .addArc (node ,value .charAt (i ),this .nodeCount ,lastChar );
129+ node =this .nodeCount ;
130+ this .addNode ();
131+ continue ;
132+ }
101133
102- node =getTarget (this .nodes [node ],arc );
134+ if (lastChar ) {
135+ CharDataAccessor .setTerminal (this .nodes [node ],arc ,true );
136+ break ;
103137 }
138+
139+ node =getTarget (this .nodes [node ],arc );
140+ }
141+ }
142+
143+ @ Override
144+ public void addAcceptedInput (Iterable <?extends CharSequence >value ) {
145+ for (CharSequence eachValue :value ) {
146+ this .addAcceptedInput (eachValue );
104147 }
105148 }
106149
107150@ Override
108151public CharAcceptor build () {
152+ this .minify ();
153+ this .remap ();
154+
109155char []data =this .buildData ();
110156return new CharAcceptor (data ,this .caseSensitive );
111157 }
@@ -116,15 +162,14 @@ public void setMessageConsumer(Consumer<String> messageConsumer) {
116162
117163@ Override
118164public void write (OutputStream outputStream )throws IOException {
119- char []data =this .buildData ();
165+ this .minify ();
166+ this .remap ();
120167
121168DataOutputStream dataOutputStream =new DataOutputStream (outputStream );
122169dataOutputStream .writeBoolean (this .caseSensitive );
123- dataOutputStream .writeInt (data . length );
170+ dataOutputStream .writeInt (this . requiredLength );
124171
125- for (char eachChar :data ) {
126- dataOutputStream .writeChar (eachChar );
127- }
172+ this .serialize (dataOutputStream );
128173
129174dataOutputStream .flush ();
130175 }
@@ -205,86 +250,177 @@ private boolean applyReplacements(char[] nodeData, Map<Integer, Integer> replace
205250return result ;
206251 }
207252
208- private char [] buildData ( ) {
209- this .minify ( );
253+ private void applyReplacements ( Map < Integer , Integer > replacements ) {
254+ this .sendMessage ( "Applying " + replacements . size () + " replacements" );
210255
211- int offset =0 ;
256+ for (int i =0 ;i <this .nodeCount ;i ++) {
257+ char []node =this .nodes [i ];
258+ if (node ==null ) {
259+ continue ;
260+ }
212261
213- Map <Integer ,Integer >replacements =new HashMap <>();
262+ this .applyReplacements (node ,replacements );
263+ }
264+ }
265+
266+ private char []buildData () {
267+ char []data =new char [this .requiredLength ];
268+ int offset =0 ;
214269
215270for (int i =0 ;i <this .nodeCount ;i ++) {
216271char []node =this .nodes [i ];
217272if (node ==null ) {
218273continue ;
219274 }
220275
221- if (node .length ==0 ) {
222- replacements .put (i ,0 );
223- }else {
224- replacements .put (i ,offset );
225- offset +=node .length ;
226- }
276+ System .arraycopy (node ,0 ,data ,offset ,node .length );
277+ offset +=node .length ;
227278 }
279+ return data ;
280+ }
228281
229- char [] result = new char [ offset ];
230- offset = 0 ;
282+ private Map < String , IntList > buildGroups () {
283+ Map < String , IntList > result = new TreeMap <>() ;
231284
232285for (int i =0 ;i <this .nodeCount ;i ++) {
233286char []node =this .nodes [i ];
234287if (node ==null ) {
235288continue ;
236289 }
237290
238- this . applyReplacements (node , replacements );
291+ String key = getKey (node );
239292
240- System .arraycopy (node ,0 ,result ,offset ,node .length );
241- offset +=node .length ;
293+ IntList indexes =result .get (key );
294+ if (indexes ==null ) {
295+ indexes =new IntList ();
296+ result .put (key ,indexes );
297+ }
298+
299+ indexes .add (i );
300+ }
301+
302+ return result ;
303+ }
304+
305+ private Map <Integer ,Integer >findReplacements (IntList group ) {
306+ Map <Integer ,Integer >result =new HashMap <>();
307+
308+ Map <NodeData ,Integer >hashes =new HashMap <>();
309+
310+ for (int i =0 ;i <group .size ();i ++) {
311+ int eachIndex =group .get (i );
312+ char []node =this .nodes [eachIndex ];
313+ if (node ==null ) {
314+ continue ;
315+ }
316+
317+ Integer previous =hashes .putIfAbsent (new NodeData (node ),eachIndex );
318+
319+ if (previous !=null ) {
320+ result .put (eachIndex ,previous );
321+ }
242322 }
243323
244324return result ;
245325 }
246326
247327private void minify () {
248- if (this .messageConsumer != null ) {
249- this . messageConsumer . accept ( "Minifying node data ..." ) ;
328+ if (this .minified ) {
329+ return ;
250330 }
251331
252- Map <Integer ,Integer >replacements =new HashMap <>();
253- HashNode hashNode =new HashNode ();
332+ this .sendMessage ("Minifying " +this .nodeCount +" nodes ..." );
333+
334+ Map <Integer ,Integer >replacements =this .replaceEndNodes ();
335+ this .applyReplacements (replacements );
336+
337+ Map <String ,IntList >groups =this .buildGroups ();
254338
255339while (true ) {
256340replacements .clear ();
257341
258- for (int i =0 ;i <this .nodeCount ;i ++) {
259- char []node =this .nodes [i ];
260- if (node ==null ) {
342+ for (IntList eachGroup :groups .values ()) {
343+ if (eachGroup .size () <2 ) {
261344continue ;
262345 }
263346
264- int previous = hashNode . addIfMissing ( node , i );
265- if ( previous == - 1 || previous == i ) {
266- continue ;
347+ Map < Integer , Integer > groupReplacements = this . findReplacements ( eachGroup );
348+ for ( Entry < Integer , Integer > eachReplacement : groupReplacements . entrySet () ) {
349+ this . nodes [ eachReplacement . getKey ()] = null ;
267350 }
268351
269- replacements .put (i ,previous );
270- this .nodes [i ] =null ;
352+ replacements .putAll (groupReplacements );
271353 }
272354
273355if (replacements .isEmpty ()) {
274356break ;
275357 }
276358
277- if (this .messageConsumer !=null ) {
278- this .messageConsumer .accept ("Applying " +replacements .size () +" replacements..." );
359+ this .applyReplacements (replacements );
360+ }
361+
362+ this .minified =true ;
363+ }
364+
365+ private void remap () {
366+ if (this .remapped ) {
367+ return ;
368+ }
369+
370+ this .sendMessage ("Remapping node addresses ..." );
371+ this .requiredLength =0 ;
372+
373+ Map <Integer ,Integer >replacements =new HashMap <>();
374+
375+ for (int i =0 ;i <this .nodeCount ;i ++) {
376+ char []node =this .nodes [i ];
377+ if (node ==null ) {
378+ continue ;
379+ }
380+
381+ if (node .length ==0 ) {
382+ replacements .put (i ,0 );
383+ }else {
384+ replacements .put (i ,this .requiredLength );
385+ this .requiredLength +=node .length ;
279386 }
387+ }
280388
281- for (int i =0 ;i <this .nodeCount ;i ++) {
282- char []node =this .nodes [i ];
283- if (node ==null ) {
284- continue ;
285- }
389+ this .applyReplacements (replacements );
390+
391+ this .remapped =true ;
392+ }
393+
394+ private Map <Integer ,Integer >replaceEndNodes () {
395+ Map <Integer ,Integer >result =new HashMap <>();
396+
397+ for (int i =0 ;i <this .nodeCount ;i ++) {
398+ if (this .nodes [i ].length !=0 ) {
399+ continue ;
400+ }
401+
402+ this .nodes [i ] =null ;
403+ result .put (i ,0 );
404+ }
405+
406+ return result ;
407+ }
408+
409+ private void sendMessage (String message ) {
410+ if (this .messageConsumer !=null ) {
411+ this .messageConsumer .accept (message );
412+ }
413+ }
414+
415+ private void serialize (DataOutputStream outputStream )throws IOException {
416+ for (int i =0 ;i <this .nodeCount ;i ++) {
417+ char []node =this .nodes [i ];
418+ if (node ==null ) {
419+ continue ;
420+ }
286421
287- this .applyReplacements (node ,replacements );
422+ for (char eachChar :node ) {
423+ outputStream .writeChar (eachChar );
288424 }
289425 }
290426 }