Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit75ae538

Browse files
committed
Use more efficient hashtable for tidbitmap.c to speed up bitmap scans.
Use the new simplehash.h to speed up tidbitmap.c uses. For bitmap scanheavy queries speedups of over 100% have been measured. Both lossy andexact scans benefit, but the wins are bigger for mostly exact scans.The conversion is mostly trivial, except that tbm_lossify() now restartslossifying at the point it previously stopped. Otherwise the hash tablebecomes unbalanced because the scan in done in hash-order, leaving theend of the hashtable more densely filled then the beginning. That causedperformance issues with dynahash as well, but due to the open chainingthey were less pronounced than with the linear adressing fromsimplehash.h.Reviewed-By: Tomas VondraDiscussion: <20160727004333.r3e2k2y6fvk2ntup@alap3.anarazel.de>
1 parentb30d3ea commit75ae538

File tree

1 file changed

+97
-72
lines changed

1 file changed

+97
-72
lines changed

‎src/backend/nodes/tidbitmap.c

Lines changed: 97 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
#include"access/htup_details.h"
4444
#include"nodes/bitmapset.h"
4545
#include"nodes/tidbitmap.h"
46-
#include"utils/hsearch.h"
4746

4847
/*
4948
* The maximum number of tuples per page is not large (typically 256 with
@@ -61,12 +60,12 @@
6160
* for that page in the page table.
6261
*
6362
* We actually store both exact pages and lossy chunks in the same hash
64-
* table, using identical data structures. (This is becausedynahash.c's
65-
*memorymanagement doesn't allow space to be transferred easily from one
66-
* hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the
67-
* same as MAX_TUPLES_PER_PAGE, or at least not too different. But we
68-
* also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer
69-
* remainder operations. So, define it like this:
63+
* table, using identical data structures. (This is becausethe memory
64+
* managementfor hashtablesdoesn'teasily/efficientlyallow space to be
65+
*transferred easily from onehashtable to another.) Therefore it's best
66+
*if PAGES_PER_CHUNK is thesame as MAX_TUPLES_PER_PAGE, or at least not
67+
*too different. But wealso want PAGES_PER_CHUNK to be a power of 2 to
68+
*avoid expensive integerremainder operations. So, define it like this:
7069
*/
7170
#definePAGES_PER_CHUNK (BLCKSZ / 32)
7271

@@ -97,21 +96,22 @@
9796
typedefstructPagetableEntry
9897
{
9998
BlockNumberblockno;/* page number (hashtable key) */
99+
charstatus;/* hash entry status */
100100
boolischunk;/* T = lossy storage, F = exact */
101101
boolrecheck;/* should the tuples be rechecked? */
102102
bitmapwordwords[Max(WORDS_PER_PAGE,WORDS_PER_CHUNK)];
103103
}PagetableEntry;
104104

105105
/*
106-
*dynahash.c is optimized for relatively large, long-lived hash tables.
107-
*This isnotideal for TIDBitMap, particularlywhen we are using a bitmap
108-
* scan on the inside of a nestloop join: a bitmap may well live only long
109-
* enough to accumulate one entry in such cases. We therefore avoid creating
110-
* an actual hashtable until we need two pagetable entries. When just one
111-
* pagetable entry is needed, we store it in a fixed field of TIDBitMap.
112-
* (NOTE: we don't get rid of the hashtable if the bitmap later shrinks down
113-
* to zero or one page again. So, status can be TBM_HASH even when nentries
114-
* is zero or one.)
106+
*We want to avoid the overhead of creating the hashtable, which is
107+
*comparatively large, whennotnecessary. Particularlywhen we are using a
108+
*bitmapscan on the inside of a nestloop join: a bitmap may well live only
109+
*longenough to accumulate one entry in such cases. We therefore avoid
110+
*creatingan actual hashtable until we need two pagetable entries. When
111+
*just onepagetable entry is needed, we store it in a fixed field of
112+
*TIDBitMap.(NOTE: we don't get rid of the hashtable if the bitmap later
113+
*shrinks downto zero or one page again. So, status can be TBM_HASH even
114+
*when nentriesis zero or one.)
115115
*/
116116
typedefenum
117117
{
@@ -128,12 +128,13 @@ struct TIDBitmap
128128
NodeTagtype;/* to make it a valid Node */
129129
MemoryContextmcxt;/* memory context containing me */
130130
TBMStatusstatus;/* see codes above */
131-
HTAB*pagetable;/* hash table of PagetableEntry's */
131+
structpagetable_hash*pagetable;/* hash table of PagetableEntry's */
132132
intnentries;/* number of entries in pagetable */
133133
intmaxentries;/* limit on same to meet maxbytes */
134134
intnpages;/* number of exact entries in pagetable */
135135
intnchunks;/* number of lossy entries in pagetable */
136136
booliterating;/* tbm_begin_iterate called? */
137+
uint32lossify_start;/* offset to start lossifying hashtable at */
137138
PagetableEntryentry1;/* used when status == TBM_ONE_PAGE */
138139
/* these are valid when iterating is true: */
139140
PagetableEntry**spages;/* sorted exact-page list, or NULL */
@@ -168,6 +169,35 @@ static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno);
168169
staticvoidtbm_lossify(TIDBitmap*tbm);
169170
staticinttbm_comparator(constvoid*left,constvoid*right);
170171

172+
/*
173+
* Simple inline murmur hash implementation for the exact width required, for
174+
* performance.
175+
*/
176+
staticinlineuint32
177+
hash_blockno(BlockNumberb)
178+
{
179+
uint32h=b;
180+
181+
h ^=h >>16;
182+
h *=0x85ebca6b;
183+
h ^=h >>13;
184+
h *=0xc2b2ae35;
185+
h ^=h >>16;
186+
returnh;
187+
}
188+
189+
/* define hashtable mapping block numbers to PagetableEntry's */
190+
#defineSH_PREFIX pagetable
191+
#defineSH_ELEMENT_TYPE PagetableEntry
192+
#defineSH_KEY_TYPE BlockNumber
193+
#defineSH_KEY blockno
194+
#defineSH_HASH_KEY(tb,key) hash_blockno(key)
195+
#defineSH_EQUAL(tb,a,b) a == b
196+
#defineSH_SCOPE static inline
197+
#defineSH_DEFINE
198+
#defineSH_DECLARE
199+
#include"lib/simplehash.h"
200+
171201

172202
/*
173203
* tbm_create - create an initially-empty bitmap
@@ -190,17 +220,16 @@ tbm_create(long maxbytes)
190220

191221
/*
192222
* Estimate number of hashtable entries we can have within maxbytes. This
193-
* estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT)) plus a
194-
* pointer per hash entry, which is crude but good enough for our purpose.
195-
* Also count an extra Pointer per entry for the arrays created during
196-
* iteration readout.
223+
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
224+
* for our purpose. Also count an extra Pointer per entry for the arrays
225+
* created during iteration readout.
197226
*/
198227
nbuckets=maxbytes /
199-
(MAXALIGN(sizeof(HASHELEMENT))+MAXALIGN(sizeof(PagetableEntry))
200-
+sizeof(Pointer)+sizeof(Pointer));
228+
(sizeof(PagetableEntry)+sizeof(Pointer)+sizeof(Pointer));
201229
nbuckets=Min(nbuckets,INT_MAX-1);/* safety limit */
202230
nbuckets=Max(nbuckets,16);/* sanity limit */
203231
tbm->maxentries= (int)nbuckets;
232+
tbm->lossify_start=0;
204233

205234
returntbm;
206235
}
@@ -212,32 +241,25 @@ tbm_create(long maxbytes)
212241
staticvoid
213242
tbm_create_pagetable(TIDBitmap*tbm)
214243
{
215-
HASHCTLhash_ctl;
216-
217244
Assert(tbm->status!=TBM_HASH);
218245
Assert(tbm->pagetable==NULL);
219246

220-
/* Create the hashtable proper */
221-
MemSet(&hash_ctl,0,sizeof(hash_ctl));
222-
hash_ctl.keysize=sizeof(BlockNumber);
223-
hash_ctl.entrysize=sizeof(PagetableEntry);
224-
hash_ctl.hcxt=tbm->mcxt;
225-
tbm->pagetable=hash_create("TIDBitmap",
226-
128,/* start small and extend */
227-
&hash_ctl,
228-
HASH_ELEM |HASH_BLOBS |HASH_CONTEXT);
247+
tbm->pagetable=pagetable_create(tbm->mcxt,128);
229248

230249
/* If entry1 is valid, push it into the hashtable */
231250
if (tbm->status==TBM_ONE_PAGE)
232251
{
233252
PagetableEntry*page;
234253
boolfound;
254+
charoldstatus;
235255

236-
page=(PagetableEntry*)hash_search(tbm->pagetable,
237-
(void*)&tbm->entry1.blockno,
238-
HASH_ENTER,&found);
256+
page=pagetable_insert(tbm->pagetable,
257+
tbm->entry1.blockno,
258+
&found);
239259
Assert(!found);
260+
oldstatus=page->status;
240261
memcpy(page,&tbm->entry1,sizeof(PagetableEntry));
262+
page->status=oldstatus;
241263
}
242264

243265
tbm->status=TBM_HASH;
@@ -250,7 +272,7 @@ void
250272
tbm_free(TIDBitmap*tbm)
251273
{
252274
if (tbm->pagetable)
253-
hash_destroy(tbm->pagetable);
275+
pagetable_destroy(tbm->pagetable);
254276
if (tbm->spages)
255277
pfree(tbm->spages);
256278
if (tbm->schunks)
@@ -357,12 +379,12 @@ tbm_union(TIDBitmap *a, const TIDBitmap *b)
357379
tbm_union_page(a,&b->entry1);
358380
else
359381
{
360-
HASH_SEQ_STATUSstatus;
382+
pagetable_iteratori;
361383
PagetableEntry*bpage;
362384

363385
Assert(b->status==TBM_HASH);
364-
hash_seq_init(&status,b->pagetable);
365-
while ((bpage=(PagetableEntry*)hash_seq_search(&status))!=NULL)
386+
pagetable_start_iterate(b->pagetable,&i);
387+
while ((bpage=pagetable_iterate(b->pagetable,&i))!=NULL)
366388
tbm_union_page(a,bpage);
367389
}
368390
}
@@ -449,12 +471,12 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
449471
}
450472
else
451473
{
452-
HASH_SEQ_STATUSstatus;
474+
pagetable_iteratori;
453475
PagetableEntry*apage;
454476

455477
Assert(a->status==TBM_HASH);
456-
hash_seq_init(&status,a->pagetable);
457-
while ((apage=(PagetableEntry*)hash_seq_search(&status))!=NULL)
478+
pagetable_start_iterate(a->pagetable,&i);
479+
while ((apage=pagetable_iterate(a->pagetable,&i))!=NULL)
458480
{
459481
if (tbm_intersect_page(a,apage,b))
460482
{
@@ -464,9 +486,7 @@ tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
464486
else
465487
a->npages--;
466488
a->nentries--;
467-
if (hash_search(a->pagetable,
468-
(void*)&apage->blockno,
469-
HASH_REMOVE,NULL)==NULL)
489+
if (!pagetable_delete(a->pagetable,apage->blockno))
470490
elog(ERROR,"hash table corrupted");
471491
}
472492
}
@@ -606,7 +626,7 @@ tbm_begin_iterate(TIDBitmap *tbm)
606626
*/
607627
if (tbm->status==TBM_HASH&& !tbm->iterating)
608628
{
609-
HASH_SEQ_STATUSstatus;
629+
pagetable_iteratori;
610630
PagetableEntry*page;
611631
intnpages;
612632
intnchunks;
@@ -620,9 +640,9 @@ tbm_begin_iterate(TIDBitmap *tbm)
620640
MemoryContextAlloc(tbm->mcxt,
621641
tbm->nchunks*sizeof(PagetableEntry*));
622642

623-
hash_seq_init(&status,tbm->pagetable);
624643
npages=nchunks=0;
625-
while ((page= (PagetableEntry*)hash_seq_search(&status))!=NULL)
644+
pagetable_start_iterate(tbm->pagetable,&i);
645+
while ((page=pagetable_iterate(tbm->pagetable,&i))!=NULL)
626646
{
627647
if (page->ischunk)
628648
tbm->schunks[nchunks++]=page;
@@ -791,9 +811,7 @@ tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno)
791811
returnpage;
792812
}
793813

794-
page= (PagetableEntry*)hash_search(tbm->pagetable,
795-
(void*)&pageno,
796-
HASH_FIND,NULL);
814+
page=pagetable_lookup(tbm->pagetable,pageno);
797815
if (page==NULL)
798816
returnNULL;
799817
if (page->ischunk)
@@ -834,15 +852,16 @@ tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno)
834852
}
835853

836854
/* Look up or create an entry */
837-
page= (PagetableEntry*)hash_search(tbm->pagetable,
838-
(void*)&pageno,
839-
HASH_ENTER,&found);
855+
page=pagetable_insert(tbm->pagetable,pageno,&found);
840856
}
841857

842858
/* Initialize it if not present before */
843859
if (!found)
844860
{
861+
charoldstatus=page->status;
862+
845863
MemSet(page,0,sizeof(PagetableEntry));
864+
page->status=oldstatus;
846865
page->blockno=pageno;
847866
/* must count it too */
848867
tbm->nentries++;
@@ -869,9 +888,9 @@ tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno)
869888

870889
bitno=pageno %PAGES_PER_CHUNK;
871890
chunk_pageno=pageno-bitno;
872-
page= (PagetableEntry*)hash_search(tbm->pagetable,
873-
(void*)&chunk_pageno,
874-
HASH_FIND,NULL);
891+
892+
page=pagetable_lookup(tbm->pagetable,chunk_pageno);
893+
875894
if (page!=NULL&&page->ischunk)
876895
{
877896
intwordnum=WORDNUM(bitno);
@@ -912,9 +931,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
912931
*/
913932
if (bitno!=0)
914933
{
915-
if (hash_search(tbm->pagetable,
916-
(void*)&pageno,
917-
HASH_REMOVE,NULL)!=NULL)
934+
if (pagetable_delete(tbm->pagetable,pageno))
918935
{
919936
/* It was present, so adjust counts */
920937
tbm->nentries--;
@@ -923,14 +940,15 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
923940
}
924941

925942
/* Look up or create entry for chunk-header page */
926-
page= (PagetableEntry*)hash_search(tbm->pagetable,
927-
(void*)&chunk_pageno,
928-
HASH_ENTER,&found);
943+
page=pagetable_insert(tbm->pagetable,chunk_pageno,&found);
929944

930945
/* Initialize it if not present before */
931946
if (!found)
932947
{
948+
charoldstatus=page->status;
949+
933950
MemSet(page,0,sizeof(PagetableEntry));
951+
page->status=oldstatus;
934952
page->blockno=chunk_pageno;
935953
page->ischunk= true;
936954
/* must count it too */
@@ -939,8 +957,11 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
939957
}
940958
elseif (!page->ischunk)
941959
{
960+
charoldstatus=page->status;
961+
942962
/* chunk header page was formerly non-lossy, make it lossy */
943963
MemSet(page,0,sizeof(PagetableEntry));
964+
page->status=oldstatus;
944965
page->blockno=chunk_pageno;
945966
page->ischunk= true;
946967
/* we assume it had some tuple bit(s) set, so mark it lossy */
@@ -962,7 +983,7 @@ tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
962983
staticvoid
963984
tbm_lossify(TIDBitmap*tbm)
964985
{
965-
HASH_SEQ_STATUSstatus;
986+
pagetable_iteratori;
966987
PagetableEntry*page;
967988

968989
/*
@@ -977,8 +998,8 @@ tbm_lossify(TIDBitmap *tbm)
977998
Assert(!tbm->iterating);
978999
Assert(tbm->status==TBM_HASH);
9791000

980-
hash_seq_init(&status,tbm->pagetable);
981-
while ((page=(PagetableEntry*)hash_seq_search(&status))!=NULL)
1001+
pagetable_start_iterate_at(tbm->pagetable,&i,tbm->lossify_start);
1002+
while ((page=pagetable_iterate(tbm->pagetable,&i))!=NULL)
9821003
{
9831004
if (page->ischunk)
9841005
continue;/* already a chunk header */
@@ -995,15 +1016,19 @@ tbm_lossify(TIDBitmap *tbm)
9951016

9961017
if (tbm->nentries <=tbm->maxentries /2)
9971018
{
998-
/* we have done enough */
999-
hash_seq_term(&status);
1019+
/*
1020+
* We have made enough room. Remember where to start lossifying
1021+
* next round, so we evenly iterate over the hashtable.
1022+
*/
1023+
tbm->lossify_start=i.cur;
10001024
break;
10011025
}
10021026

10031027
/*
10041028
* Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
1005-
* hashtable. We can continue the same seq_search scan since we do
1006-
* not care whether we visit lossy chunks or not.
1029+
* hashtable and may have deleted the non-lossy chunk. We can
1030+
* continue the same hash table scan, since failure to visit one
1031+
* element or visiting the newly inserted element, isn't fatal.
10071032
*/
10081033
}
10091034

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp