112112 *of identical bytes like trailing spaces) and for bigger ones
113113 *our 4K maximum look-back distance is too small.
114114 *
115- *The compressor creates a table for8192 lists of positions.
115+ *The compressor creates a table for lists of positions.
116116 *For each input position (except the last 3), a hash key is
117117 *built from the 4 next input bytes and the position remembered
118118 *in the appropriate list. Thus, the table points to linked
119119 *lists of likely to be at least in the first 4 characters
120120 *matching strings. This is done on the fly while the input
121121 *is compressed into the output area. Table entries are only
122122 *kept for the last 4096 input positions, since we cannot use
123- *back-pointers larger than that anyway.
123+ *back-pointers larger than that anyway. The size of the hash
124+ *table is chosen based on the size of the input - a larger table
125+ *has a larger startup cost, as it needs to be initialized to
126+ *zero, but reduces the number of hash collisions on long inputs.
124127 *
125- *For each byte in the input,it's hash key (built from this
128+ *For each byte in the input,its hash key (built from this
126129 *byte and the next 3) is used to find the appropriate list
127130 *in the table. The lists remember the positions of all bytes
128131 *that had the same hash key in the past in increasing backward
180183 * Local definitions
181184 * ----------
182185 */
183- #define PGLZ_HISTORY_LISTS 8192/* must be power of 2 */
184- #define PGLZ_HISTORY_MASK (PGLZ_HISTORY_LISTS - 1)
186+ #define PGLZ_MAX_HISTORY_LISTS 8192/* must be power of 2 */
185187#define PGLZ_HISTORY_SIZE 4096
186188#define PGLZ_MAX_MATCH 273
187189
@@ -241,9 +243,15 @@ const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
241243 * Statically allocated work arrays for history
242244 * ----------
243245 */
244- static PGLZ_HistEntry * hist_start [PGLZ_HISTORY_LISTS ];
245- static PGLZ_HistEntry hist_entries [PGLZ_HISTORY_SIZE ];
246+ static int16 hist_start [PGLZ_MAX_HISTORY_LISTS ];
247+ static PGLZ_HistEntry hist_entries [PGLZ_HISTORY_SIZE + 1 ];
246248
249+ /*
250+ * Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
251+ * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
252+ */
253+ #define INVALID_ENTRY 0
254+ #define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY])
247255
248256/* ----------
249257 * pglz_hist_idx -
@@ -257,10 +265,10 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
257265 * hash keys more.
258266 * ----------
259267 */
260- #define pglz_hist_idx (_s ,_e ) ( \
268+ #define pglz_hist_idx (_s ,_e , _mask ) (\
261269((((_e) - (_s)) < 4) ? (int) (_s)[0] :\
262- (((_s)[0] <<9 ) ^ ((_s)[1] <<6 ) ^\
263- ((_s)[2] <<3 ) ^ (_s)[3])) & (PGLZ_HISTORY_MASK )\
270+ (((_s)[0] <<6 ) ^ ((_s)[1] <<4 ) ^\
271+ ((_s)[2] <<2 ) ^ (_s)[3])) & (_mask )\
264272)
265273
266274
@@ -276,28 +284,35 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
276284 * _hn and _recycle are modified in the macro.
277285 * ----------
278286 */
279- #define pglz_hist_add (_hs ,_he ,_hn ,_recycle ,_s ,_e ) \
287+ #define pglz_hist_add (_hs ,_he ,_hn ,_recycle ,_s ,_e , _mask ) \
280288do {\
281- int __hindex = pglz_hist_idx((_s),(_e)); \
282- PGLZ_HistEntry ** __myhsp = &(_hs)[__hindex];\
289+ int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \
290+ int16 * __myhsp = &(_hs)[__hindex]; \
283291PGLZ_HistEntry *__myhe = &(_he)[_hn];\
284292if (_recycle) {\
285293if (__myhe->prev == NULL)\
286- (_hs)[__myhe->hindex] = __myhe->next; \
294+ (_hs)[__myhe->hindex] = __myhe->next - (_he); \
287295else\
288296__myhe->prev->next = __myhe->next;\
289297if (__myhe->next != NULL)\
290298__myhe->next->prev = __myhe->prev;\
291299}\
292- __myhe->next = *__myhsp; \
300+ __myhe->next =&(_he)[ *__myhsp]; \
293301__myhe->prev = NULL;\
294302__myhe->hindex = __hindex;\
295303__myhe->pos = (_s);\
296- if (*__myhsp != NULL)\
297- (*__myhsp)->prev = __myhe;\
298- *__myhsp = __myhe;\
299- if (++(_hn) >= PGLZ_HISTORY_SIZE) {\
300- (_hn) = 0;\
304+ /* If there was an existing entry in this hash slot, link */ \
305+ /* this new entry to it. However, the 0th entry in the */ \
306+ /* entries table is unused, so we can freely scribble on it. */ \
307+ /* So don't bother checking if the slot was used - we'll */ \
308+ /* scribble on the unused entry if it was not, but that's */ \
309+ /* harmless. Avoiding the branch in this critical path */ \
310+ /* speeds this up a little bit. */ \
311+ /* if (*__myhsp != INVALID_ENTRY) */ \
312+ (_he )[(* __myhsp )].prev = __myhe ;\
313+ * __myhsp = _hn ;\
314+ if (++ (_hn ) >=PGLZ_HISTORY_SIZE + 1 ) {\
315+ (_hn )= 1 ;\
301316(_recycle )= true;\
302317}\
303318}while (0 )
@@ -372,18 +387,20 @@ do { \
372387 * ----------
373388 */
374389static inline int
375- pglz_find_match (PGLZ_HistEntry * * hstart ,const char * input ,const char * end ,
376- int * lenp ,int * offp ,int good_match ,int good_drop )
390+ pglz_find_match (int16 * hstart ,const char * input ,const char * end ,
391+ int * lenp ,int * offp ,int good_match ,int good_drop , int mask )
377392{
378393PGLZ_HistEntry * hent ;
394+ int16 hentno ;
379395int32 len = 0 ;
380396int32 off = 0 ;
381397
382398/*
383399 * Traverse the linked history list until a good enough match is found.
384400 */
385- hent = hstart [pglz_hist_idx (input ,end )];
386- while (hent )
401+ hentno = hstart [pglz_hist_idx (input ,end ,mask )];
402+ hent = & hist_entries [hentno ];
403+ while (hent != INVALID_ENTRY_PTR )
387404{
388405const char * ip = input ;
389406const char * hp = hent -> pos ;
@@ -484,7 +501,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
484501{
485502unsignedchar * bp = ((unsignedchar * )dest )+ sizeof (PGLZ_Header );
486503unsignedchar * bstart = bp ;
487- int hist_next = 0 ;
504+ int hist_next = 1 ;
488505bool hist_recycle = false;
489506const char * dp = source ;
490507const char * dend = source + slen ;
@@ -500,6 +517,8 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
500517int32 result_size ;
501518int32 result_max ;
502519int32 need_rate ;
520+ int hashsz ;
521+ int mask ;
503522
504523/*
505524 * Our fallback strategy is the default.
@@ -555,11 +574,29 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
555574else
556575result_max = (slen * (100 - need_rate )) /100 ;
557576
577+ /*
578+ * Experiments suggest that these hash sizes work pretty well. A large
579+ * hash table minimizes collision, but has a higher startup cost. For
580+ * a small input, the startup cost dominates. The table size must be
581+ * a power of two.
582+ */
583+ if (slen < 128 )
584+ hashsz = 512 ;
585+ else if (slen < 256 )
586+ hashsz = 1024 ;
587+ else if (slen < 512 )
588+ hashsz = 2048 ;
589+ else if (slen < 1024 )
590+ hashsz = 4096 ;
591+ else
592+ hashsz = 8192 ;
593+ mask = hashsz - 1 ;
594+
558595/*
559596 * Initialize the history lists to empty. We do not need to zero the
560597 * hist_entries[] array; its entries are initialized as they are used.
561598 */
562- memset (hist_start ,0 ,sizeof (hist_start ));
599+ memset (hist_start ,0 ,hashsz * sizeof (int16 ));
563600
564601/*
565602 * Compress the source directly into the output buffer.
@@ -589,7 +626,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
589626 * Try to find a match in the history
590627 */
591628if (pglz_find_match (hist_start ,dp ,dend ,& match_len ,
592- & match_off ,good_match ,good_drop ))
629+ & match_off ,good_match ,good_drop , mask ))
593630{
594631/*
595632 * Create the tag and add history entries for all matched
@@ -600,7 +637,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
600637{
601638pglz_hist_add (hist_start ,hist_entries ,
602639hist_next ,hist_recycle ,
603- dp ,dend );
640+ dp ,dend , mask );
604641dp ++ ;/* Do not do this ++ in the line above! */
605642/* The macro would do it four times - Jan.*/
606643}
@@ -614,7 +651,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
614651pglz_out_literal (ctrlp ,ctrlb ,ctrl ,bp ,* dp );
615652pglz_hist_add (hist_start ,hist_entries ,
616653hist_next ,hist_recycle ,
617- dp ,dend );
654+ dp ,dend , mask );
618655dp ++ ;/* Do not do this ++ in the line above! */
619656/* The macro would do it four times - Jan.*/
620657}