Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5dfc198

Browse files
committed
Use more efficient hashtable for execGrouping.c to speed up hash aggregation.
The more efficient hashtable speeds up hash-aggregations with more thana few hundred groups significantly. Improvements of over 120% have beenmeasured.Due to the the different hash table queries that not fullydetermined (e.g. GROUP BY without ORDER BY) may change their resultorder.The conversion is largely straight-forward, except that, due to thestatic element types of simplehash.h type hashes, the additional datasome users store in elements (e.g. the per-group working data for hashaggregaters) is now stored in TupleHashEntryData->additional. Themeaning of BuildTupleHashTable's entrysize (renamed to additionalsize)has been changed to only be about the additionally stored size. Thatsize is only used for the initial sizing of the hash-table.Reviewed-By: Tomas VondraDiscussion: <20160727004333.r3e2k2y6fvk2ntup@alap3.anarazel.de>
1 parent75ae538 commit5dfc198

File tree

9 files changed

+144
-186
lines changed

9 files changed

+144
-186
lines changed

‎src/backend/executor/execGrouping.c

Lines changed: 60 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,25 @@
2323
#include"utils/lsyscache.h"
2424
#include"utils/memutils.h"
2525

26+
staticuint32TupleHashTableHash(structtuplehash_hash*tb,constMinimalTupletuple);
27+
staticintTupleHashTableMatch(structtuplehash_hash*tb,constMinimalTupletuple1,constMinimalTupletuple2);
2628

27-
staticTupleHashTableCurTupleHashTable=NULL;
28-
29-
staticuint32TupleHashTableHash(constvoid*key,Sizekeysize);
30-
staticintTupleHashTableMatch(constvoid*key1,constvoid*key2,
31-
Sizekeysize);
29+
/*
30+
* Define parameters for tuple hash table code generation. The interface is
31+
* *also* declared in execnodes.h (to generate the types, which are externally
32+
* visible).
33+
*/
34+
#defineSH_PREFIX tuplehash
35+
#defineSH_ELEMENT_TYPE TupleHashEntryData
36+
#defineSH_KEY_TYPE MinimalTuple
37+
#defineSH_KEY firstTuple
38+
#defineSH_HASH_KEY(tb,key) TupleHashTableHash(tb, key)
39+
#defineSH_EQUAL(tb,a,b) TupleHashTableMatch(tb, a, b) == 0
40+
#defineSH_SCOPE extern
41+
#defineSH_STORE_HASH
42+
#defineSH_GET_HASH(tb,a) a->hash
43+
#defineSH_DEFINE
44+
#include"lib/simplehash.h"
3245

3346

3447
/*****************************************************************************
@@ -260,7 +273,7 @@ execTuplesHashPrepare(int numCols,
260273
*eqfunctions: equality comparison functions to use
261274
*hashfunctions: datatype-specific hashing functions to use
262275
*nbuckets: initial estimate of hashtable size
263-
*entrysize: size ofeach entry (at least sizeof(TupleHashEntryData))
276+
*additionalsize: size ofdata stored in ->additional
264277
*tablecxt: memory context in which to store table and table entries
265278
*tempcxt: short-lived context for evaluation hash and comparison functions
266279
*
@@ -275,20 +288,19 @@ TupleHashTable
275288
BuildTupleHashTable(intnumCols,AttrNumber*keyColIdx,
276289
FmgrInfo*eqfunctions,
277290
FmgrInfo*hashfunctions,
278-
longnbuckets,Sizeentrysize,
291+
longnbuckets,Sizeadditionalsize,
279292
MemoryContexttablecxt,MemoryContexttempcxt)
280293
{
281294
TupleHashTablehashtable;
282-
HASHCTLhash_ctl;
295+
Sizeentrysize=sizeof(TupleHashEntryData)+additionalsize;
283296

284297
Assert(nbuckets>0);
285-
Assert(entrysize >=sizeof(TupleHashEntryData));
286298

287299
/* Limit initial table size request to not more than work_mem */
288300
nbuckets=Min(nbuckets, (long) ((work_mem*1024L) /entrysize));
289301

290-
hashtable= (TupleHashTable)MemoryContextAlloc(tablecxt,
291-
sizeof(TupleHashTableData));
302+
hashtable= (TupleHashTable)
303+
MemoryContextAlloc(tablecxt,sizeof(TupleHashTableData));
292304

293305
hashtable->numCols=numCols;
294306
hashtable->keyColIdx=keyColIdx;
@@ -302,15 +314,8 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
302314
hashtable->in_hash_funcs=NULL;
303315
hashtable->cur_eq_funcs=NULL;
304316

305-
MemSet(&hash_ctl,0,sizeof(hash_ctl));
306-
hash_ctl.keysize=sizeof(TupleHashEntryData);
307-
hash_ctl.entrysize=entrysize;
308-
hash_ctl.hash=TupleHashTableHash;
309-
hash_ctl.match=TupleHashTableMatch;
310-
hash_ctl.hcxt=tablecxt;
311-
hashtable->hashtab=hash_create("TupleHashTable",nbuckets,
312-
&hash_ctl,
313-
HASH_ELEM |HASH_FUNCTION |HASH_COMPARE |HASH_CONTEXT);
317+
hashtable->hashtab=tuplehash_create(tablecxt,nbuckets);
318+
hashtable->hashtab->private=hashtable;
314319

315320
returnhashtable;
316321
}
@@ -324,18 +329,17 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
324329
*
325330
* If isnew isn't NULL, then a new entry is created if no existing entry
326331
* matches. On return, *isnew is true if the entry is newly created,
327-
* false if it existed already.Any extra spaceina new entry has been
328-
* zeroed.
332+
* false if it existed already.->additional_datainthe new entry has
333+
*beenzeroed.
329334
*/
330335
TupleHashEntry
331336
LookupTupleHashEntry(TupleHashTablehashtable,TupleTableSlot*slot,
332337
bool*isnew)
333338
{
334-
TupleHashEntryentry;
339+
TupleHashEntryData*entry;
335340
MemoryContextoldContext;
336-
TupleHashTablesaveCurHT;
337-
TupleHashEntryDatadummy;
338341
boolfound;
342+
MinimalTuplekey;
339343

340344
/* If first time through, clone the input slot to make table slot */
341345
if (hashtable->tableslot==NULL)
@@ -356,53 +360,37 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
356360
/* Need to run the hash functions in short-lived context */
357361
oldContext=MemoryContextSwitchTo(hashtable->tempcxt);
358362

359-
/*
360-
* Set up data needed by hash and match functions
361-
*
362-
* We save and restore CurTupleHashTable just in case someone manages to
363-
* invoke this code re-entrantly.
364-
*/
363+
/* set up data needed by hash and match functions */
365364
hashtable->inputslot=slot;
366365
hashtable->in_hash_funcs=hashtable->tab_hash_funcs;
367366
hashtable->cur_eq_funcs=hashtable->tab_eq_funcs;
368367

369-
saveCurHT=CurTupleHashTable;
370-
CurTupleHashTable=hashtable;
371-
372-
/* Search the hash table */
373-
dummy.firstTuple=NULL;/* flag to reference inputslot */
374-
entry= (TupleHashEntry)hash_search(hashtable->hashtab,
375-
&dummy,
376-
isnew ?HASH_ENTER :HASH_FIND,
377-
&found);
368+
key=NULL;/* flag to reference inputslot */
378369

379370
if (isnew)
380371
{
372+
entry=tuplehash_insert(hashtable->hashtab,key,&found);
373+
381374
if (found)
382375
{
383376
/* found pre-existing entry */
384377
*isnew= false;
385378
}
386379
else
387380
{
388-
/*
389-
* created new entry
390-
*
391-
* Zero any caller-requested space in the entry. (This zaps the
392-
* "key data" dynahash.c copied into the new entry, but we don't
393-
* care since we're about to overwrite it anyway.)
394-
*/
395-
MemSet(entry,0,hashtable->entrysize);
396-
397-
/* Copy the first tuple into the table context */
381+
/* created new entry */
382+
*isnew= true;
383+
/* zero caller data */
384+
entry->additional=NULL;
398385
MemoryContextSwitchTo(hashtable->tablecxt);
386+
/* Copy the first tuple into the table context */
399387
entry->firstTuple=ExecCopySlotMinimalTuple(slot);
400-
401-
*isnew= true;
402388
}
403389
}
404-
405-
CurTupleHashTable=saveCurHT;
390+
else
391+
{
392+
entry=tuplehash_lookup(hashtable->hashtab,key);
393+
}
406394

407395
MemoryContextSwitchTo(oldContext);
408396

@@ -425,34 +413,19 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
425413
{
426414
TupleHashEntryentry;
427415
MemoryContextoldContext;
428-
TupleHashTablesaveCurHT;
429-
TupleHashEntryDatadummy;
416+
MinimalTuplekey;
430417

431418
/* Need to run the hash functions in short-lived context */
432419
oldContext=MemoryContextSwitchTo(hashtable->tempcxt);
433420

434-
/*
435-
* Set up data needed by hash and match functions
436-
*
437-
* We save and restore CurTupleHashTable just in case someone manages to
438-
* invoke this code re-entrantly.
439-
*/
421+
/* Set up data needed by hash and match functions */
440422
hashtable->inputslot=slot;
441423
hashtable->in_hash_funcs=hashfunctions;
442424
hashtable->cur_eq_funcs=eqfunctions;
443425

444-
saveCurHT=CurTupleHashTable;
445-
CurTupleHashTable=hashtable;
446-
447426
/* Search the hash table */
448-
dummy.firstTuple=NULL;/* flag to reference inputslot */
449-
entry= (TupleHashEntry)hash_search(hashtable->hashtab,
450-
&dummy,
451-
HASH_FIND,
452-
NULL);
453-
454-
CurTupleHashTable=saveCurHT;
455-
427+
key=NULL;/* flag to reference inputslot */
428+
entry=tuplehash_lookup(hashtable->hashtab,key);
456429
MemoryContextSwitchTo(oldContext);
457430

458431
returnentry;
@@ -468,22 +441,18 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
468441
* This convention avoids the need to materialize virtual input tuples unless
469442
* they actually need to get copied into the table.
470443
*
471-
* CurTupleHashTable must be set before calling this, since dynahash.c
472-
* doesn't provide any API that would let us get at the hashtable otherwise.
473-
*
474444
* Also, the caller must select an appropriate memory context for running
475445
* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
476446
*/
477447
staticuint32
478-
TupleHashTableHash(constvoid*key,Sizekeysize)
448+
TupleHashTableHash(structtuplehash_hash*tb,constMinimalTupletuple)
479449
{
480-
MinimalTupletuple= ((constTupleHashEntryData*)key)->firstTuple;
481-
TupleTableSlot*slot;
482-
TupleHashTablehashtable=CurTupleHashTable;
450+
TupleHashTablehashtable= (TupleHashTable)tb->private;
483451
intnumCols=hashtable->numCols;
484452
AttrNumber*keyColIdx=hashtable->keyColIdx;
485-
FmgrInfo*hashfunctions;
486453
uint32hashkey=0;
454+
TupleTableSlot*slot;
455+
FmgrInfo*hashfunctions;
487456
inti;
488457

489458
if (tuple==NULL)
@@ -494,8 +463,12 @@ TupleHashTableHash(const void *key, Size keysize)
494463
}
495464
else
496465
{
497-
/* Process a tuple already stored in the table */
498-
/* (this case never actually occurs in current dynahash.c code) */
466+
/*
467+
* Process a tuple already stored in the table.
468+
*
469+
* (this case never actually occurs due to the way simplehash.h is
470+
* used, as the hash-value is stored in the entries)
471+
*/
499472
slot=hashtable->tableslot;
500473
ExecStoreMinimalTuple(tuple,slot, false);
501474
hashfunctions=hashtable->tab_hash_funcs;
@@ -530,29 +503,21 @@ TupleHashTableHash(const void *key, Size keysize)
530503
*
531504
* As above, the passed pointers are pointers to TupleHashEntryData.
532505
*
533-
* CurTupleHashTable must be set before calling this, since dynahash.c
534-
* doesn't provide any API that would let us get at the hashtable otherwise.
535-
*
536506
* Also, the caller must select an appropriate memory context for running
537507
* the compare functions. (dynahash.c doesn't change CurrentMemoryContext.)
538508
*/
539509
staticint
540-
TupleHashTableMatch(constvoid*key1,constvoid*key2,Sizekeysize)
510+
TupleHashTableMatch(structtuplehash_hash*tb,constMinimalTupletuple1,constMinimalTupletuple2)
541511
{
542-
MinimalTupletuple1= ((constTupleHashEntryData*)key1)->firstTuple;
543-
544-
#ifdefUSE_ASSERT_CHECKING
545-
MinimalTupletuple2= ((constTupleHashEntryData*)key2)->firstTuple;
546-
#endif
547512
TupleTableSlot*slot1;
548513
TupleTableSlot*slot2;
549-
TupleHashTablehashtable=CurTupleHashTable;
514+
TupleHashTablehashtable=(TupleHashTable)tb->private;
550515

551516
/*
552-
* We assume thatdynahash.c will only ever call us with the first
517+
* We assume thatsimplehash.h will only ever call us with the first
553518
* argument being an actual table entry, and the second argument being
554519
* LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
555-
* could be supported too, but is not currentlyused by dynahash.c.
520+
* could be supported too, but is not currentlyrequired.
556521
*/
557522
Assert(tuple1!=NULL);
558523
slot1=hashtable->tableslot;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp