2727
2828public class CharAcceptorBuilder implements AcceptorBuilder {
2929
30+ private static final float LOAD_FACTOR =0.9f ;
3031public static final int FILE_VERSION =2 ;
3132public static final int DEFAULT_CAPACITY_INCREMENT =16 *1024 ;
3233
@@ -82,6 +83,10 @@ public static CharAcceptor read(InputStream inputStream) throws IOException {
8283return new CharAcceptor (data ,caseSensitive );
8384 }
8485
86+ private static <K ,V >ConcurrentHashMap <K ,V >createHashMap (int size ) {
87+ return new ConcurrentHashMap <>((int ) (size /LOAD_FACTOR +1 ),LOAD_FACTOR ,1 );
88+ }
89+
8590private static String getKey (char []node ) {
8691StringBuilder stringBuilder =new StringBuilder ();
8792
@@ -102,41 +107,18 @@ private static String getKey(char[] node) {
102107@ Override
103108public void addAcceptedInput (CharSequence ...value ) {
104109for (CharSequence eachValue :value ) {
105- this .addAcceptedInput (eachValue );
110+ this .addAcceptedInput (eachValue , 0 , eachValue . length () );
106111 }
107112 }
108113
109114public void addAcceptedInput (CharSequence value ,int start ,int length ) {
110- if (this .minified ||this .remapped ) {
111- throw new IllegalStateException ("The data have already been minified / remapped." );
112- }
113-
114- int node =0 ;
115-
116- for (int i =start ;i <start +length ;i ++) {
117- boolean lastChar =i ==start +length -1 ;
118-
119- int arc =CharDataAccessor .getArc (this .nodes [node ],0 ,value .charAt (i ),this .caseSensitive );
120- if (arc == -1 ) {
121- this .addArc (node ,value .charAt (i ),this .nodeCount ,lastChar );
122- node =this .nodeCount ;
123- this .addNode ();
124- continue ;
125- }
126-
127- if (lastChar ) {
128- CharDataAccessor .setTerminal (this .nodes [node ],arc ,true );
129- break ;
130- }
131-
132- node =getTarget (this .nodes [node ],arc );
133- }
115+ this .addAcceptedInput (value ,start ,length ,0 ,true );
134116 }
135117
136118@ Override
137119public void addAcceptedInput (Iterable <?extends CharSequence >value ) {
138120for (CharSequence eachValue :value ) {
139- this .addAcceptedInput (eachValue );
121+ this .addAcceptedInput (eachValue , 0 , eachValue . length () );
140122 }
141123 }
142124
@@ -168,11 +150,36 @@ public void write(OutputStream outputStream) throws IOException {
168150dataOutputStream .flush ();
169151 }
170152
171- private void addAcceptedInput (CharSequence value ) {
172- this .addAcceptedInput (value ,0 ,value .length ());
153+ protected int addAcceptedInput (CharSequence value ,int start ,int length ,int startNode ,boolean makeTerminal ) {
154+ if (this .minified ||this .remapped ) {
155+ throw new IllegalStateException ("The data have already been minified / remapped." );
156+ }
157+
158+ int node =startNode ;
159+
160+ for (int i =start ;i <start +length ;i ++) {
161+ boolean terminal =makeTerminal &&i ==start +length -1 ;
162+
163+ int arc =CharDataAccessor .getArc (this .nodes [node ],0 ,value .charAt (i ),this .caseSensitive );
164+ if (arc == -1 ) {
165+ this .addArc (node ,value .charAt (i ),this .nodeCount ,terminal );
166+ node =this .nodeCount ;
167+ this .addNode ();
168+ continue ;
169+ }
170+
171+ if (terminal ) {
172+ CharDataAccessor .setTerminal (this .nodes [node ],arc ,true );
173+ break ;
174+ }
175+
176+ node =getTarget (this .nodes [node ],arc );
177+ }
178+
179+ return node ;
173180 }
174181
175- private void addArc (int node ,char label ,int target ,boolean terminal ) {
182+ protected void addArc (int node ,char label ,int target ,boolean terminal ) {
176183char []oldNodeData =this .nodes [node ];
177184
178185this .nodes [node ] =new char [oldNodeData .length +CharDataAccessor .NODE_SIZE ];
@@ -283,7 +290,9 @@ private char[] buildData() {
283290 }
284291
285292private Map <String ,List <Integer >>buildGroups () {
286- Map <String ,List <Integer >>result =new TreeMap <>();
293+ this .sendMessage ("Building groups" );
294+
295+ Map <String ,List <Integer >>result =createHashMap (1000 );
287296
288297for (int i =0 ;i <this .nodeCount ;i ++) {
289298char []node =this .nodes [i ];
@@ -295,7 +304,7 @@ private Map<String, List<Integer>> buildGroups() {
295304
296305List <Integer >indexes =result .get (key );
297306if (indexes ==null ) {
298- indexes =new ArrayList <>();
307+ indexes =new LinkedList <>();
299308result .put (key ,indexes );
300309 }
301310
@@ -308,19 +317,29 @@ private Map<String, List<Integer>> buildGroups() {
308317private Map <Integer ,Integer >findReplacements (List <Integer >group ) {
309318Map <Integer ,Integer >result =new HashMap <>();
310319
311- Map <String ,Integer >hashes =new ConcurrentHashMap <>(group .size ());
320+ List <NodeReference >references =new ArrayList <>(group .size ());
321+
322+ for (Iterator <Integer >iterator =group .iterator ();iterator .hasNext ();) {
323+ Integer index =iterator .next ();
312324
313- for (int i =0 ;i <group .size ();i ++) {
314- Integer eachIndex =group .get (i );
315- char []node =this .nodes [eachIndex ];
325+ char []node =this .nodes [index ];
316326if (node ==null ) {
327+ iterator .remove ();
317328continue ;
318329 }
319330
320- Integer previous =hashes .putIfAbsent (new String (node ),eachIndex );
321- if (previous !=null ) {
322- result .put (eachIndex ,previous );
323- this .nodes [eachIndex ] =null ;
331+ references .add (new NodeReference (node ,index ));
332+ }
333+
334+ references .sort (null );
335+
336+ NodeReference lastReference =null ;
337+ for (NodeReference eachReference :references ) {
338+ if (lastReference ==null || !eachReference .equals (lastReference )) {
339+ lastReference =eachReference ;
340+ }else {
341+ result .put (eachReference .getIndex (),lastReference .getIndex ());
342+ this .nodes [eachReference .getIndex ()] =null ;
324343 }
325344 }
326345
@@ -332,18 +351,25 @@ private void minify() {
332351return ;
333352 }
334353
335- this .sendMessage ("Minifying " +this .nodeCount +" nodes ... " );
354+ this .sendMessage ("Minifying " +this .nodeCount +" nodes" );
336355
337356Map <Integer ,Integer >replacements =this .replaceEndNodes ();
338- Set < String > changedGroups = this .applyReplacements (replacements );
357+ this .applyReplacements (replacements );
339358
340359Map <String ,List <Integer >>groups =this .buildGroups ();
360+ Set <String >changedGroups =new HashSet <>(groups .keySet ());
341361
342362while (true ) {
343363replacements .clear ();
344364
365+ this .sendMessage ("Finding duplicates in " +changedGroups .size () +" groups" );
345366for (String eachChangedGroup :changedGroups ) {
346- replacements .putAll (this .findReplacements (groups .get (eachChangedGroup )));
367+ List <Integer >group =groups .get (eachChangedGroup );
368+ if (group ==null ||group .size () <2 ) {
369+ continue ;
370+ }
371+
372+ replacements .putAll (this .findReplacements (group ));
347373 }
348374
349375if (replacements .isEmpty ()) {
@@ -364,7 +390,7 @@ private void remap() {
364390this .sendMessage ("Remapping node addresses ..." );
365391this .requiredLength =0 ;
366392
367- Map <Integer ,Integer >replacements =new HashMap <>( );
393+ Map <Integer ,Integer >replacements =createHashMap ( this . nodeCount );
368394
369395for (int i =0 ;i <this .nodeCount ;i ++) {
370396char []node =this .nodes [i ];
@@ -386,10 +412,10 @@ private void remap() {
386412 }
387413
388414private Map <Integer ,Integer >replaceEndNodes () {
389- Map <Integer ,Integer >result =new HashMap <>( );
415+ Map <Integer ,Integer >result =createHashMap ( 1000 );
390416
391417for (int i =0 ;i <this .nodeCount ;i ++) {
392- if (this .nodes [i ].length !=0 ) {
418+ if (this .nodes [i ] == null || this . nodes [ i ] .length !=0 ) {
393419continue ;
394420 }
395421