NotificationsYou must be signed in to change notification settings
Fork4.9k
Star17.7k

Commit45f6240

committed

Pack tuples in a hash join batch densely, to save memory.

Instead of palloc'ing each HashJoinTuple individually, allocate 32kB chunksand pack the tuples densely in the chunks. This avoids the AllocChunkheader overhead, and the space wasted by standard allocator's habit ofrounding sizes up to the nearest power of two.This doesn't contain any planner changes, because the planner's estimate ofmemory usage ignores the palloc overhead. Now that the overhead is smaller,the planner's estimates are in fact more accurate.Tomas Vondra, reviewed by Robert Haas.

1 parent311da16 commit45f6240Copy full SHA for 45f6240

File tree

2 files changed

+145

-28

lines changed

src
- backend/executor
  - nodeHash.c
- include/executor
  - hashjoin.h

2 files changed

+145

-28

lines changed

`‎src/backend/executor/nodeHash.c`

Lines changed: 123 additions & 28 deletions

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ static void ExecHashSkewTableInsert(HashJoinTable hashtable,`
`47`	`47`	`intbucketNumber);`
`48`	`48`	`staticvoidExecHashRemoveNextSkewBucket(HashJoinTablehashtable);`
`49`	`49`
	`50`	`+staticvoid*dense_alloc(HashJoinTablehashtable,Sizesize);`
`50`	`51`
`51`	`52`	`/* ----------------------------------------------------------------`
`52`	`53`	`*ExecHash`
`@@ -293,6 +294,7 @@ ExecHashTableCreate(Hash node, List hashOperators, bool keepNulls)`
`293`	`294`	`hashtable->spaceUsedSkew=0;`
`294`	`295`	`hashtable->spaceAllowedSkew=`
`295`	`296`	`hashtable->spaceAllowed*SKEW_WORK_MEM_PERCENT /100;`
	`297`	`+hashtable->chunks=NULL;`
`296`	`298`
`297`	`299`	`/*`
`298`	`300`	`* Get info about the hash functions to be used for each hash key. Also`
`@@ -556,10 +558,10 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)`
`556`	`558`	`intoldnbatch=hashtable->nbatch;`
`557`	`559`	`intcurbatch=hashtable->curbatch;`
`558`	`560`	`intnbatch;`
`559`		`-inti;`
`560`	`561`	`MemoryContextoldcxt;`
`561`	`562`	`longninmemory;`
`562`	`563`	`longnfreed;`
	`564`	`+HashMemoryChunkoldchunks;`
`563`	`565`
`564`	`566`	`/* do nothing if we've decided to shut off growth */`
`565`	`567`	`if (!hashtable->growEnabled)`
`@@ -612,51 +614,65 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)`
`612`	`614`	`*/`
`613`	`615`	`ninmemory=nfreed=0;`
`614`	`616`
`615`		`-for (i=0;i<hashtable->nbuckets;i++)`
`616`		`-{`
`617`		`-HashJoinTupleprevtuple;`
`618`		`-HashJoinTupletuple;`
	`617`	`+/*`
	`618`	`+ * We will scan through the chunks directly, so that we can reset the`
	`619`	`+ * buckets now and not have to keep track which tuples in the buckets have`
	`620`	`+ * already been processed. We will free the old chunks as we go.`
	`621`	`+ */`
	`622`	`+memset(hashtable->buckets,0,sizeof(HashJoinTuple)hashtable->nbuckets);`
	`623`	`+oldchunks=hashtable->chunks;`
	`624`	`+hashtable->chunks=NULL;`
`619`	`625`
`620`		`-prevtuple=NULL;`
`621`		`-tuple=hashtable->buckets[i];`
	`626`	`+/* so, let's scan through the old chunks, and all tuples in each chunk */`
	`627`	`+while (oldchunks!=NULL)`
	`628`	`+{`
	`629`	`+HashMemoryChunknextchunk=oldchunks->next;`
	`630`	`+/* position within the buffer (up to oldchunks->used) */`
	`631`	`+size_tidx=0;`
`622`	`632`
`623`		`-while (tuple!=NULL)`
	`633`	`+/* process all tuples stored in this chunk (and then free it) */`
	`634`	`+while (idx<oldchunks->used)`
`624`	`635`	`{`
`625`		`-/* save link in case we delete */`
`626`		`-HashJoinTuplenexttuple=tuple->next;`
	`636`	`+HashJoinTuplehashTuple= (HashJoinTuple) (oldchunks->data+idx);`
	`637`	`+MinimalTupletuple=HJTUPLE_MINTUPLE(hashTuple);`
	`638`	`+inthashTupleSize= (HJTUPLE_OVERHEAD+tuple->t_len);`
`627`	`639`	`intbucketno;`
`628`	`640`	`intbatchno;`
`629`	`641`
`630`	`642`	`ninmemory++;`
`631`		`-ExecHashGetBucketAndBatch(hashtable,tuple->hashvalue,`
	`643`	`+ExecHashGetBucketAndBatch(hashtable,hashTuple->hashvalue,`
`632`	`644`	`&bucketno,&batchno);`
`633`		`-Assert(bucketno==i);`
	`645`	`+`
`634`	`646`	`if (batchno==curbatch)`
`635`	`647`	`{`
`636`		`-/* keep tuple */`
`637`		`-prevtuple=tuple;`
	`648`	`+/* keep tuple in memory - copy it into the new chunk */`
	`649`	`+HashJoinTuplecopyTuple=`
	`650`	`+(HashJoinTuple)dense_alloc(hashtable,hashTupleSize);`
	`651`	`+memcpy(copyTuple,hashTuple,hashTupleSize);`
	`652`	`+`
	`653`	`+/* and add it back to the appropriate bucket */`
	`654`	`+copyTuple->next=hashtable->buckets[bucketno];`
	`655`	`+hashtable->buckets[bucketno]=copyTuple;`
`638`	`656`	`}`
`639`	`657`	`else`
`640`	`658`	`{`
`641`	`659`	`/* dump it out */`
`642`	`660`	`Assert(batchno>curbatch);`
`643`		`-ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),`
`644`		`-tuple->hashvalue,`
	`661`	`+ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(hashTuple),`
	`662`	`+hashTuple->hashvalue,`
`645`	`663`	`&hashtable->innerBatchFile[batchno]);`
`646`		`-/* and remove from hash table */`
`647`		`-if (prevtuple)`
`648`		`-prevtuple->next=nexttuple;`
`649`		`-else`
`650`		`-hashtable->buckets[i]=nexttuple;`
`651`		`-/* prevtuple doesn't change */`
`652`		`-hashtable->spaceUsed-=`
`653`		`-HJTUPLE_OVERHEAD+HJTUPLE_MINTUPLE(tuple)->t_len;`
`654`		`-pfree(tuple);`
	`664`	`+`
	`665`	`+hashtable->spaceUsed-=hashTupleSize;`
`655`	`666`	`nfreed++;`
`656`	`667`	`}`
`657`	`668`
`658`		`-tuple=nexttuple;`
	`669`	`+/* next tuple in this chunk */`
	`670`	`+idx+=MAXALIGN(hashTupleSize);`
`659`	`671`	`}`
	`672`	`+`
	`673`	`+/* we're done with this chunk - free it and proceed to the next one */`
	`674`	`+pfree(oldchunks);`
	`675`	`+oldchunks=nextchunk;`
`660`	`676`	`}`
`661`	`677`
`662`	`678`	`#ifdefHJDEBUG`
`@@ -717,8 +733,8 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`717`	`733`
`718`	`734`	`/* Create the HashJoinTuple */`
`719`	`735`	`hashTupleSize=HJTUPLE_OVERHEAD+tuple->t_len;`
`720`		`-hashTuple= (HashJoinTuple)MemoryContextAlloc(hashtable->batchCxt,`
`721`		`-hashTupleSize);`
	`736`	`+hashTuple= (HashJoinTuple)dense_alloc(hashtable,hashTupleSize);`
	`737`	`+`
`722`	`738`	`hashTuple->hashvalue=hashvalue;`
`723`	`739`	`memcpy(HJTUPLE_MINTUPLE(hashTuple),tuple,tuple->t_len);`
`724`	`740`
`@@ -1068,6 +1084,9 @@ ExecHashTableReset(HashJoinTable hashtable)`
`1068`	`1084`	`hashtable->spaceUsed=0;`
`1069`	`1085`
`1070`	`1086`	`MemoryContextSwitchTo(oldcxt);`
	`1087`	`+`
	`1088`	`+/* Forget the chunks (the memory was freed by the context reset above). */`
	`1089`	`+hashtable->chunks=NULL;`
`1071`	`1090`	`}`
`1072`	`1091`
`1073`	`1092`	`/*`
`@@ -1462,3 +1481,79 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)`
`1462`	`1481`	`hashtable->spaceUsedSkew=0;`
`1463`	`1482`	`}`
`1464`	`1483`	`}`
	`1484`	`+`
	`1485`	`+/*`
	`1486`	`+ * Allocate 'size' bytes from the currently active HashMemoryChunk`
	`1487`	`+ */`
	`1488`	`+staticvoid*`
	`1489`	`+dense_alloc(HashJoinTablehashtable,Sizesize)`
	`1490`	`+{`
	`1491`	`+HashMemoryChunknewChunk;`
	`1492`	`+char*ptr;`
	`1493`	`+`
	`1494`	`+/* just in case the size is not already aligned properly */`
	`1495`	`+size=MAXALIGN(size);`
	`1496`	`+`
	`1497`	`+/*`
	`1498`	`+ * If tuple size is larger than of 1/4 of chunk size, allocate a separate`
	`1499`	`+ * chunk.`
	`1500`	`+ */`
	`1501`	`+if (size>HASH_CHUNK_THRESHOLD)`
	`1502`	`+{`
	`1503`	`+/* allocate new chunk and put it at the beginning of the list */`
	`1504`	`+newChunk= (HashMemoryChunk)MemoryContextAlloc(hashtable->batchCxt,`
	`1505`	`+ offsetof(HashMemoryChunkData,data)+size);`
	`1506`	`+newChunk->maxlen=size;`
	`1507`	`+newChunk->used=0;`
	`1508`	`+newChunk->ntuples=0;`
	`1509`	`+`
	`1510`	`+/*`
	`1511`	`+ * Add this chunk to the list after the first existing chunk, so that`
	`1512`	`+ * we don't lose the remaining space in the "current" chunk.`
	`1513`	`+ */`
	`1514`	`+if (hashtable->chunks!=NULL)`
	`1515`	`+{`
	`1516`	`+newChunk->next=hashtable->chunks->next;`
	`1517`	`+hashtable->chunks->next=newChunk;`
	`1518`	`+}`
	`1519`	`+else`
	`1520`	`+{`
	`1521`	`+newChunk->next=hashtable->chunks;`
	`1522`	`+hashtable->chunks=newChunk;`
	`1523`	`+}`
	`1524`	`+`
	`1525`	`+newChunk->used+=size;`
	`1526`	`+newChunk->ntuples+=1;`
	`1527`	`+`
	`1528`	`+returnnewChunk->data;`
	`1529`	`+}`
	`1530`	`+`
	`1531`	`+/*`
	`1532`	`+ * See if we have enough space for it in the current chunk (if any).`
	`1533`	`+ * If not, allocate a fresh chunk.`
	`1534`	`+ */`
	`1535`	`+if ((hashtable->chunks==NULL)\|\|`
	`1536`	`+(hashtable->chunks->maxlen-hashtable->chunks->used)<size)`
	`1537`	`+{`
	`1538`	`+/* allocate new chunk and put it at the beginning of the list */`
	`1539`	`+newChunk= (HashMemoryChunk)MemoryContextAlloc(hashtable->batchCxt,`
	`1540`	`+ offsetof(HashMemoryChunkData,data)+HASH_CHUNK_SIZE);`
	`1541`	`+`
	`1542`	`+newChunk->maxlen=HASH_CHUNK_SIZE;`
	`1543`	`+newChunk->used=size;`
	`1544`	`+newChunk->ntuples=1;`
	`1545`	`+`
	`1546`	`+newChunk->next=hashtable->chunks;`
	`1547`	`+hashtable->chunks=newChunk;`
	`1548`	`+`
	`1549`	`+returnnewChunk->data;`
	`1550`	`+}`
	`1551`	`+`
	`1552`	`+/* There is enough space in the current chunk, let's add the tuple */`
	`1553`	`+ptr=hashtable->chunks->data+hashtable->chunks->used;`
	`1554`	`+hashtable->chunks->used+=size;`
	`1555`	`+hashtable->chunks->ntuples+=1;`
	`1556`	`+`
	`1557`	`+/* return pointer to the start of the tuple memory */`
	`1558`	`+returnptr;`
	`1559`	`+}`

`‎src/include/executor/hashjoin.h`

Lines changed: 22 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -102,6 +102,25 @@ typedef struct HashSkewBucket`
`102`	`102`	`#defineSKEW_WORK_MEM_PERCENT 2`
`103`	`103`	`#defineSKEW_MIN_OUTER_FRACTION 0.01`
`104`	`104`
	`105`	`+/*`
	`106`	`+ * To reduce palloc overhead, the HashJoinTuples for the current batch are`
	`107`	`+ * packed in 32kB buffers instead of pallocing each tuple individually.`
	`108`	`+ */`
	`109`	`+typedefstructHashMemoryChunkData`
	`110`	`+{`
	`111`	`+intntuples;/* number of tuples stored in this chunk */`
	`112`	`+size_tmaxlen;/* size of the buffer holding the tuples */`
	`113`	`+size_tused;/* number of buffer bytes already used */`
	`114`	`+`
	`115`	`+structHashMemoryChunkDatanext;/ pointer to the next chunk (linked list) */`
	`116`	`+`
	`117`	`+chardata[1];/* buffer allocated at the end */`
	`118`	`+}HashMemoryChunkData;`
	`119`	`+`
	`120`	`+typedefstructHashMemoryChunkData*HashMemoryChunk;`
	`121`	`+`
	`122`	`+#defineHASH_CHUNK_SIZE(32 * 1024L)`
	`123`	`+#defineHASH_CHUNK_THRESHOLD(HASH_CHUNK_SIZE / 4)`
`105`	`124`
`106`	`125`	`typedefstructHashJoinTableData`
`107`	`126`	`{`
`@@ -157,6 +176,9 @@ typedef struct HashJoinTableData`
`157`	`176`
`158`	`177`	`MemoryContexthashCxt;/* context for whole-hash-join storage */`
`159`	`178`	`MemoryContextbatchCxt;/* context for this-batch-only storage */`
	`179`	`+`
	`180`	`+/* used for dense allocation of tuples (into linked chunks) */`
	`181`	`+HashMemoryChunkchunks;/* one list for the whole batch */`
`160`	`182`	`}HashJoinTableData;`
`161`	`183`
`162`	`184`	`#endif/* HASHJOIN_H */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit45f6240

File tree

2 files changed

2 files changed

`‎src/backend/executor/nodeHash.c`

`‎src/include/executor/hashjoin.h`

0 commit comments