2020
2121import java .io .*;
2222import java .util .*;
23- import java .util .concurrent . ConcurrentHashMap ;
23+ import java .util .Map . Entry ;
2424import java .util .function .Consumer ;
2525
2626import com .indoqa .fsa .AcceptorBuilder ;
@@ -30,14 +30,14 @@ public class CharAcceptorBuilder implements AcceptorBuilder {
3030public static final int FILE_VERSION =2 ;
3131public static final int DEFAULT_CAPACITY_INCREMENT =16 *1024 ;
3232
33- private static final float LOAD_FACTOR =0.9f ;
34-
3533private final boolean caseSensitive ;
3634
3735private char [][]nodes =new char [0 ][];
3836private int nodeCount ;
3937private int capacityIncrement ;
4038
39+ private Replacements replacements ;
40+
4141private Consumer <String >messageConsumer ;
4242
4343private boolean minified ;
@@ -88,25 +88,30 @@ public static CharAcceptor read(InputStream inputStream) throws IOException {
8888return new CharAcceptor (data ,caseSensitive );
8989 }
9090
91- private static <K ,V >ConcurrentHashMap <K ,V >createHashMap (int size ) {
92- return new ConcurrentHashMap <>((int ) (size /LOAD_FACTOR +1 ),LOAD_FACTOR ,1 );
93- }
94-
9591private static String getKey (char []node ) {
9692StringBuilder stringBuilder =new StringBuilder ();
9793
98- for (int i =0 ;i <10 ;i ++) {
99- if (node .length <=CharDataAccessor .NODE_SIZE *i ) {
100- break ;
101- }
94+ for (int i =0 ;i <node .length ;i +=CharDataAccessor .NODE_SIZE ) {
95+ stringBuilder .append (CharDataAccessor .getLabel (node ,i ));
96+ }
10297
103- stringBuilder .append (node [CharDataAccessor .NODE_SIZE *i ]);
98+ return stringBuilder .toString ();
99+ }
100+
101+ private static int sort (NodeReference n1 ,NodeReference n2 ) {
102+ if (n1 ==null &&n2 ==null ) {
103+ return 0 ;
104104 }
105105
106- stringBuilder .append ('_' );
107- stringBuilder .append (node .length );
106+ if (n1 ==null ) {
107+ return 1 ;
108+ }
108109
109- return stringBuilder .toString ();
110+ if (n2 ==null ) {
111+ return -1 ;
112+ }
113+
114+ return n1 .compareTo (n2 );
110115 }
111116
112117@ Override
@@ -116,8 +121,10 @@ public void addAcceptedInput(CharSequence value, int start, int length) {
116121
117122@ Override
118123public CharAcceptor build () {
124+ this .replacements =new Replacements (this .nodeCount );
119125this .minify ();
120126this .remap ();
127+ this .replacements =null ;
121128
122129char []data =this .buildData ();
123130return new CharAcceptor (data ,this .caseSensitive );
@@ -129,8 +136,10 @@ public void setMessageConsumer(Consumer<String> messageConsumer) {
129136
130137@ Override
131138public void write (OutputStream outputStream )throws IOException {
139+ this .replacements =new Replacements (this .nodeCount );
132140this .minify ();
133141this .remap ();
142+ this .replacements =null ;
134143
135144DataOutputStream dataOutputStream =new DataOutputStream (new BufferedOutputStream (outputStream ));
136145dataOutputStream .writeInt (FILE_VERSION );
@@ -222,14 +231,34 @@ private void addNode() {
222231this .nodeCount ++;
223232 }
224233
225- private boolean applyReplacements (char []nodeData ,Map <Integer ,Integer >replacements ) {
234+ private Set <String >applyReplacements () {
235+ this .sendMessage ("Applying " +this .replacements .getCount () +" replacements" );
236+
237+ Set <String >result =new HashSet <>();
238+
239+ for (int i =0 ;i <this .nodeCount ;i ++) {
240+ char []node =this .nodes [i ];
241+ if (node ==null ) {
242+ continue ;
243+ }
244+
245+ boolean updated =this .applyReplacements (node );
246+ if (updated ) {
247+ result .add (getKey (node ));
248+ }
249+ }
250+
251+ return result ;
252+ }
253+
254+ private boolean applyReplacements (char []nodeData ) {
226255boolean result =false ;
227256
228257for (int i =0 ;i <nodeData .length ;i +=CharDataAccessor .NODE_SIZE ) {
229258int target =getTarget (nodeData ,i );
230259
231- Integer replacement =replacements .get (target );
232- if (replacement ==null ) {
260+ int replacement =this . replacements .getReplacement (target );
261+ if (replacement ==- 1 ) {
233262continue ;
234263 }
235264
@@ -246,25 +275,6 @@ private boolean applyReplacements(char[] nodeData, Map<Integer, Integer> replace
246275return result ;
247276 }
248277
249- private Set <String >applyReplacements (Map <Integer ,Integer >replacements ) {
250- this .sendMessage ("Applying " +replacements .size () +" replacements" );
251-
252- Set <String >result =new HashSet <>();
253-
254- for (int i =0 ;i <this .nodeCount ;i ++) {
255- char []node =this .nodes [i ];
256- if (node ==null ) {
257- continue ;
258- }
259-
260- if (this .applyReplacements (node ,replacements )) {
261- result .add (getKey (node ));
262- }
263- }
264-
265- return result ;
266- }
267-
268278private char []buildData () {
269279char []data =new char [this .requiredLength ];
270280int offset =0 ;
@@ -281,10 +291,8 @@ private char[] buildData() {
281291return data ;
282292 }
283293
284- private Map <String ,List <Integer >>buildGroups () {
285- this .sendMessage ("Building groups" );
286-
287- Map <String ,List <Integer >>result =createHashMap (1000 );
294+ private Map <String ,List <NodeReference >>buildGroups () {
295+ Map <String ,List <NodeReference >>result =new HashMap <>();
288296
289297for (int i =0 ;i <this .nodeCount ;i ++) {
290298char []node =this .nodes [i ];
@@ -294,48 +302,55 @@ private Map<String, List<Integer>> buildGroups() {
294302
295303String key =getKey (node );
296304
297- List <Integer >indexes =result .get (key );
305+ List <NodeReference >indexes =result .get (key );
298306if (indexes ==null ) {
299- indexes =new LinkedList <>();
307+ indexes =new ArrayList <>();
300308result .put (key ,indexes );
301309 }
302310
303- indexes .add (i );
311+ indexes .add (new NodeReference ( node , i ) );
304312 }
305313
314+ for (Iterator <Entry <String ,List <NodeReference >>>iterator =result .entrySet ().iterator ();iterator .hasNext ();) {
315+ if (iterator .next ().getValue ().size () <2 ) {
316+ iterator .remove ();
317+ }
318+ }
319+
320+ this .sendMessage ("Built " +result .size () +" groups" );
306321return result ;
307322 }
308323
309- private Map <Integer ,Integer >findReplacements (List <Integer >group ) {
310- Map <Integer ,Integer >result =new HashMap <>();
311-
312- List <NodeReference >references =new ArrayList <>(group .size ());
313-
314- for (Iterator <Integer >iterator =group .iterator ();iterator .hasNext ();) {
315- Integer index =iterator .next ();
316-
317- char []node =this .nodes [index ];
318- if (node ==null ) {
319- iterator .remove ();
324+ private void findEndNodeReplacements () {
325+ for (int i =0 ;i <this .nodeCount ;i ++) {
326+ if (this .nodes [i ] ==null ||this .nodes [i ].length !=0 ) {
320327continue ;
321328 }
322329
323- references .add (new NodeReference (node ,index ));
330+ this .nodes [i ] =null ;
331+ this .replacements .setReplacement (i ,0 );
324332 }
333+ }
325334
326- references .sort (null );
335+ private void findReplacements (List <NodeReference >group ) {
336+ group .sort (CharAcceptorBuilder ::sort );
327337
328338NodeReference lastReference =null ;
329- for (NodeReference eachReference :references ) {
330- if (lastReference ==null || !eachReference .equals (lastReference )) {
331- lastReference =eachReference ;
332- }else {
333- result .put (eachReference .getIndex (),lastReference .getIndex ());
334- this .nodes [eachReference .getIndex ()] =null ;
339+ for (ListIterator <NodeReference >iterator =group .listIterator ();iterator .hasNext ();) {
340+ NodeReference reference =iterator .next ();
341+ if (reference ==null ) {
342+ break ;
335343 }
336- }
337344
338- return result ;
345+ if (lastReference ==null || !reference .equals (lastReference )) {
346+ lastReference =reference ;
347+ continue ;
348+ }
349+
350+ this .replacements .setReplacement (reference .getIndex (),lastReference .getIndex ());
351+ this .nodes [reference .getIndex ()] =null ;
352+ iterator .set (null );
353+ }
339354 }
340355
341356private void minify () {
@@ -345,30 +360,28 @@ private void minify() {
345360
346361this .sendMessage ("Minifying " +this .nodeCount +" nodes" );
347362
348- Map <Integer ,Integer >replacements =this .replaceEndNodes ();
349- this .applyReplacements (replacements );
350-
351- Map <String ,List <Integer >>groups =this .buildGroups ();
352- Set <String >changedGroups =new HashSet <>(groups .keySet ());
363+ Map <String ,List <NodeReference >>groups =this .buildGroups ();
364+ this .replacements .clear ();
365+ this .findEndNodeReplacements ();
366+ Set <String >changedGroups =this .applyReplacements ();
353367
354368while (true ) {
355- replacements .clear ();
369+ this . replacements .clear ();
356370
357- this .sendMessage ("Finding duplicates in " +changedGroups .size () +" groups" );
358371for (String eachChangedGroup :changedGroups ) {
359- List <Integer >group =groups .get (eachChangedGroup );
360- if (group ==null || group . size () < 2 ) {
372+ List <NodeReference >group =groups .get (eachChangedGroup );
373+ if (group ==null ) {
361374continue ;
362375 }
363376
364- replacements . putAll ( this .findReplacements (group ) );
377+ this .findReplacements (group );
365378 }
366379
367- if (replacements .isEmpty () ) {
380+ if (this . replacements .getCount () == 0 ) {
368381break ;
369382 }
370383
371- changedGroups =this .applyReplacements (replacements );
384+ changedGroups =this .applyReplacements ();
372385 }
373386
374387this .minified =true ;
@@ -382,44 +395,27 @@ private void remap() {
382395this .sendMessage ("Remapping node addresses ..." );
383396this .requiredLength =0 ;
384397
385- Map <Integer ,Integer >replacements =createHashMap (this .nodeCount );
386-
387398for (int i =0 ;i <this .nodeCount ;i ++) {
388399char []node =this .nodes [i ];
389400if (node ==null ) {
390401continue ;
391402 }
392403
393404if (node .length ==0 ) {
394- replacements .put (i ,0 );
405+ this . replacements .setReplacement (i ,0 );
395406 }else {
396- replacements .put (i ,this .requiredLength );
407+ this . replacements .setReplacement (i ,this .requiredLength );
397408this .requiredLength +=node .length ;
398409 }
399410 }
400411
401412this .sendMessage ("RequiredLength: " +this .requiredLength );
402413
403- this .applyReplacements (replacements );
414+ this .applyReplacements ();
404415
405416this .remapped =true ;
406417 }
407418
408- private Map <Integer ,Integer >replaceEndNodes () {
409- Map <Integer ,Integer >result =createHashMap (1000 );
410-
411- for (int i =0 ;i <this .nodeCount ;i ++) {
412- if (this .nodes [i ] ==null ||this .nodes [i ].length !=0 ) {
413- continue ;
414- }
415-
416- this .nodes [i ] =null ;
417- result .put (i ,0 );
418- }
419-
420- return result ;
421- }
422-
423419private void sendMessage (String message ) {
424420if (this .messageConsumer !=null ) {
425421this .messageConsumer .accept (message );