Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit031cc55

Browse files
committed
Optimize pglz compressor for small inputs.
The pglz compressor has a significant startup cost, because it has toinitialize to zeros the history-tracking hash table. On a 64-bit system, thehash table was 64kB in size. While clearing memory is pretty fast, for veryshort inputs the relative cost of that was quite large.This patch alleviates that in two ways. First, instead of storing pointersin the hash table, store 16-bit indexes into the hist_entries array. Thatslashes the size of the hash table to 1/2 or 1/4 of the original, dependingon the pointer width. Secondly, adjust the size of the hash table based oninput size. For very small inputs, you don't need a large hash table toavoid collisions.Review by Amit Kapila.
1 parent79ce29c commit031cc55

File tree

1 file changed

+66
-29
lines changed

1 file changed

+66
-29
lines changed

‎src/backend/utils/adt/pg_lzcompress.c

Lines changed: 66 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,20 @@
112112
*of identical bytes like trailing spaces) and for bigger ones
113113
*our 4K maximum look-back distance is too small.
114114
*
115-
*The compressor creates a table for8192lists of positions.
115+
*The compressor creates a table for lists of positions.
116116
*For each input position (except the last 3), a hash key is
117117
*built from the 4 next input bytes and the position remembered
118118
*in the appropriate list. Thus, the table points to linked
119119
*lists of likely to be at least in the first 4 characters
120120
*matching strings. This is done on the fly while the input
121121
*is compressed into the output area. Table entries are only
122122
*kept for the last 4096 input positions, since we cannot use
123-
*back-pointers larger than that anyway.
123+
*back-pointers larger than that anyway. The size of the hash
124+
*table is chosen based on the size of the input - a larger table
125+
*has a larger startup cost, as it needs to be initialized to
126+
*zero, but reduces the number of hash collisions on long inputs.
124127
*
125-
*For each byte in the input,it's hash key (built from this
128+
*For each byte in the input,its hash key (built from this
126129
*byte and the next 3) is used to find the appropriate list
127130
*in the table. The lists remember the positions of all bytes
128131
*that had the same hash key in the past in increasing backward
@@ -180,8 +183,7 @@
180183
* Local definitions
181184
* ----------
182185
*/
183-
#definePGLZ_HISTORY_LISTS8192/* must be power of 2 */
184-
#definePGLZ_HISTORY_MASK(PGLZ_HISTORY_LISTS - 1)
186+
#definePGLZ_MAX_HISTORY_LISTS8192/* must be power of 2 */
185187
#definePGLZ_HISTORY_SIZE4096
186188
#definePGLZ_MAX_MATCH273
187189

@@ -241,9 +243,15 @@ const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
241243
* Statically allocated work arrays for history
242244
* ----------
243245
*/
244-
staticPGLZ_HistEntry*hist_start[PGLZ_HISTORY_LISTS];
245-
staticPGLZ_HistEntryhist_entries[PGLZ_HISTORY_SIZE];
246+
staticint16hist_start[PGLZ_MAX_HISTORY_LISTS];
247+
staticPGLZ_HistEntryhist_entries[PGLZ_HISTORY_SIZE+1];
246248

249+
/*
250+
* Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
251+
* INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
252+
*/
253+
#defineINVALID_ENTRY0
254+
#defineINVALID_ENTRY_PTR(&hist_entries[INVALID_ENTRY])
247255

248256
/* ----------
249257
* pglz_hist_idx -
@@ -257,10 +265,10 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
257265
* hash keys more.
258266
* ----------
259267
*/
260-
#definepglz_hist_idx(_s,_e) (\
268+
#definepglz_hist_idx(_s,_e,_mask) (\
261269
((((_e) - (_s)) < 4) ? (int) (_s)[0] :\
262-
(((_s)[0] <<9) ^ ((_s)[1] <<6) ^\
263-
((_s)[2] <<3) ^ (_s)[3])) & (PGLZ_HISTORY_MASK)\
270+
(((_s)[0] <<6) ^ ((_s)[1] <<4) ^\
271+
((_s)[2] <<2) ^ (_s)[3])) & (_mask)\
264272
)
265273

266274

@@ -276,28 +284,35 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
276284
* _hn and _recycle are modified in the macro.
277285
* ----------
278286
*/
279-
#definepglz_hist_add(_hs,_he,_hn,_recycle,_s,_e)\
287+
#definepglz_hist_add(_hs,_he,_hn,_recycle,_s,_e,_mask)\
280288
do {\
281-
int __hindex = pglz_hist_idx((_s),(_e));\
282-
PGLZ_HistEntry **__myhsp = &(_hs)[__hindex];\
289+
int __hindex = pglz_hist_idx((_s),(_e), (_mask));\
290+
int16 *__myhsp = &(_hs)[__hindex];\
283291
PGLZ_HistEntry *__myhe = &(_he)[_hn];\
284292
if (_recycle) {\
285293
if (__myhe->prev == NULL)\
286-
(_hs)[__myhe->hindex] = __myhe->next;\
294+
(_hs)[__myhe->hindex] = __myhe->next - (_he);\
287295
else\
288296
__myhe->prev->next = __myhe->next;\
289297
if (__myhe->next != NULL)\
290298
__myhe->next->prev = __myhe->prev;\
291299
}\
292-
__myhe->next = *__myhsp;\
300+
__myhe->next =&(_he)[*__myhsp];\
293301
__myhe->prev = NULL;\
294302
__myhe->hindex = __hindex;\
295303
__myhe->pos = (_s);\
296-
if (*__myhsp != NULL)\
297-
(*__myhsp)->prev = __myhe;\
298-
*__myhsp = __myhe;\
299-
if (++(_hn) >= PGLZ_HISTORY_SIZE) {\
300-
(_hn) = 0;\
304+
/* If there was an existing entry in this hash slot, link */\
305+
/* this new entry to it. However, the 0th entry in the */\
306+
/* entries table is unused, so we can freely scribble on it. */ \
307+
/* So don't bother checking if the slot was used - we'll */\
308+
/* scribble on the unused entry if it was not, but that's */\
309+
/* harmless. Avoiding the branch in this critical path */\
310+
/* speeds this up a little bit. */\
311+
/* if (*__myhsp != INVALID_ENTRY) */\
312+
(_he)[(*__myhsp)].prev=__myhe;\
313+
*__myhsp=_hn;\
314+
if (++(_hn) >=PGLZ_HISTORY_SIZE+1) {\
315+
(_hn)=1;\
301316
(_recycle)= true;\
302317
}\
303318
}while (0)
@@ -372,18 +387,20 @@ do { \
372387
* ----------
373388
*/
374389
staticinlineint
375-
pglz_find_match(PGLZ_HistEntry**hstart,constchar*input,constchar*end,
376-
int*lenp,int*offp,intgood_match,intgood_drop)
390+
pglz_find_match(int16*hstart,constchar*input,constchar*end,
391+
int*lenp,int*offp,intgood_match,intgood_drop,intmask)
377392
{
378393
PGLZ_HistEntry*hent;
394+
int16hentno;
379395
int32len=0;
380396
int32off=0;
381397

382398
/*
383399
* Traverse the linked history list until a good enough match is found.
384400
*/
385-
hent=hstart[pglz_hist_idx(input,end)];
386-
while (hent)
401+
hentno=hstart[pglz_hist_idx(input,end,mask)];
402+
hent=&hist_entries[hentno];
403+
while (hent!=INVALID_ENTRY_PTR)
387404
{
388405
constchar*ip=input;
389406
constchar*hp=hent->pos;
@@ -484,7 +501,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
484501
{
485502
unsignedchar*bp= ((unsignedchar*)dest)+sizeof(PGLZ_Header);
486503
unsignedchar*bstart=bp;
487-
inthist_next=0;
504+
inthist_next=1;
488505
boolhist_recycle= false;
489506
constchar*dp=source;
490507
constchar*dend=source+slen;
@@ -500,6 +517,8 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
500517
int32result_size;
501518
int32result_max;
502519
int32need_rate;
520+
inthashsz;
521+
intmask;
503522

504523
/*
505524
* Our fallback strategy is the default.
@@ -555,11 +574,29 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
555574
else
556575
result_max= (slen* (100-need_rate)) /100;
557576

577+
/*
578+
* Experiments suggest that these hash sizes work pretty well. A large
579+
* hash table minimizes collision, but has a higher startup cost. For
580+
* a small input, the startup cost dominates. The table size must be
581+
* a power of two.
582+
*/
583+
if (slen<128)
584+
hashsz=512;
585+
elseif (slen<256)
586+
hashsz=1024;
587+
elseif (slen<512)
588+
hashsz=2048;
589+
elseif (slen<1024)
590+
hashsz=4096;
591+
else
592+
hashsz=8192;
593+
mask=hashsz-1;
594+
558595
/*
559596
* Initialize the history lists to empty. We do not need to zero the
560597
* hist_entries[] array; its entries are initialized as they are used.
561598
*/
562-
memset(hist_start,0,sizeof(hist_start));
599+
memset(hist_start,0,hashsz*sizeof(int16));
563600

564601
/*
565602
* Compress the source directly into the output buffer.
@@ -589,7 +626,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
589626
* Try to find a match in the history
590627
*/
591628
if (pglz_find_match(hist_start,dp,dend,&match_len,
592-
&match_off,good_match,good_drop))
629+
&match_off,good_match,good_drop,mask))
593630
{
594631
/*
595632
* Create the tag and add history entries for all matched
@@ -600,7 +637,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
600637
{
601638
pglz_hist_add(hist_start,hist_entries,
602639
hist_next,hist_recycle,
603-
dp,dend);
640+
dp,dend,mask);
604641
dp++;/* Do not do this ++ in the line above! */
605642
/* The macro would do it four times - Jan.*/
606643
}
@@ -614,7 +651,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
614651
pglz_out_literal(ctrlp,ctrlb,ctrl,bp,*dp);
615652
pglz_hist_add(hist_start,hist_entries,
616653
hist_next,hist_recycle,
617-
dp,dend);
654+
dp,dend,mask);
618655
dp++;/* Do not do this ++ in the line above! */
619656
/* The macro would do it four times - Jan.*/
620657
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp