Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit45f6240

Browse files
committed
Pack tuples in a hash join batch densely, to save memory.
Instead of palloc'ing each HashJoinTuple individually, allocate 32kB chunksand pack the tuples densely in the chunks. This avoids the AllocChunkheader overhead, and the space wasted by standard allocator's habit ofrounding sizes up to the nearest power of two.This doesn't contain any planner changes, because the planner's estimate ofmemory usage ignores the palloc overhead. Now that the overhead is smaller,the planner's estimates are in fact more accurate.Tomas Vondra, reviewed by Robert Haas.
1 parent311da16 commit45f6240

File tree

2 files changed

+145
-28
lines changed

2 files changed

+145
-28
lines changed

‎src/backend/executor/nodeHash.c

Lines changed: 123 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static void ExecHashSkewTableInsert(HashJoinTable hashtable,
4747
intbucketNumber);
4848
staticvoidExecHashRemoveNextSkewBucket(HashJoinTablehashtable);
4949

50+
staticvoid*dense_alloc(HashJoinTablehashtable,Sizesize);
5051

5152
/* ----------------------------------------------------------------
5253
*ExecHash
@@ -293,6 +294,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
293294
hashtable->spaceUsedSkew=0;
294295
hashtable->spaceAllowedSkew=
295296
hashtable->spaceAllowed*SKEW_WORK_MEM_PERCENT /100;
297+
hashtable->chunks=NULL;
296298

297299
/*
298300
* Get info about the hash functions to be used for each hash key. Also
@@ -556,10 +558,10 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
556558
intoldnbatch=hashtable->nbatch;
557559
intcurbatch=hashtable->curbatch;
558560
intnbatch;
559-
inti;
560561
MemoryContextoldcxt;
561562
longninmemory;
562563
longnfreed;
564+
HashMemoryChunkoldchunks;
563565

564566
/* do nothing if we've decided to shut off growth */
565567
if (!hashtable->growEnabled)
@@ -612,51 +614,65 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
612614
*/
613615
ninmemory=nfreed=0;
614616

615-
for (i=0;i<hashtable->nbuckets;i++)
616-
{
617-
HashJoinTupleprevtuple;
618-
HashJoinTupletuple;
617+
/*
618+
* We will scan through the chunks directly, so that we can reset the
619+
* buckets now and not have to keep track which tuples in the buckets have
620+
* already been processed. We will free the old chunks as we go.
621+
*/
622+
memset(hashtable->buckets,0,sizeof(HashJoinTuple*)*hashtable->nbuckets);
623+
oldchunks=hashtable->chunks;
624+
hashtable->chunks=NULL;
619625

620-
prevtuple=NULL;
621-
tuple=hashtable->buckets[i];
626+
/* so, let's scan through the old chunks, and all tuples in each chunk */
627+
while (oldchunks!=NULL)
628+
{
629+
HashMemoryChunknextchunk=oldchunks->next;
630+
/* position within the buffer (up to oldchunks->used) */
631+
size_tidx=0;
622632

623-
while (tuple!=NULL)
633+
/* process all tuples stored in this chunk (and then free it) */
634+
while (idx<oldchunks->used)
624635
{
625-
/* save link in case we delete */
626-
HashJoinTuplenexttuple=tuple->next;
636+
HashJoinTuplehashTuple= (HashJoinTuple) (oldchunks->data+idx);
637+
MinimalTupletuple=HJTUPLE_MINTUPLE(hashTuple);
638+
inthashTupleSize= (HJTUPLE_OVERHEAD+tuple->t_len);
627639
intbucketno;
628640
intbatchno;
629641

630642
ninmemory++;
631-
ExecHashGetBucketAndBatch(hashtable,tuple->hashvalue,
643+
ExecHashGetBucketAndBatch(hashtable,hashTuple->hashvalue,
632644
&bucketno,&batchno);
633-
Assert(bucketno==i);
645+
634646
if (batchno==curbatch)
635647
{
636-
/* keep tuple */
637-
prevtuple=tuple;
648+
/* keep tuple in memory - copy it into the new chunk */
649+
HashJoinTuplecopyTuple=
650+
(HashJoinTuple)dense_alloc(hashtable,hashTupleSize);
651+
memcpy(copyTuple,hashTuple,hashTupleSize);
652+
653+
/* and add it back to the appropriate bucket */
654+
copyTuple->next=hashtable->buckets[bucketno];
655+
hashtable->buckets[bucketno]=copyTuple;
638656
}
639657
else
640658
{
641659
/* dump it out */
642660
Assert(batchno>curbatch);
643-
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
644-
tuple->hashvalue,
661+
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(hashTuple),
662+
hashTuple->hashvalue,
645663
&hashtable->innerBatchFile[batchno]);
646-
/* and remove from hash table */
647-
if (prevtuple)
648-
prevtuple->next=nexttuple;
649-
else
650-
hashtable->buckets[i]=nexttuple;
651-
/* prevtuple doesn't change */
652-
hashtable->spaceUsed-=
653-
HJTUPLE_OVERHEAD+HJTUPLE_MINTUPLE(tuple)->t_len;
654-
pfree(tuple);
664+
665+
hashtable->spaceUsed-=hashTupleSize;
655666
nfreed++;
656667
}
657668

658-
tuple=nexttuple;
669+
/* next tuple in this chunk */
670+
idx+=MAXALIGN(hashTupleSize);
659671
}
672+
673+
/* we're done with this chunk - free it and proceed to the next one */
674+
pfree(oldchunks);
675+
oldchunks=nextchunk;
660676
}
661677

662678
#ifdefHJDEBUG
@@ -717,8 +733,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
717733

718734
/* Create the HashJoinTuple */
719735
hashTupleSize=HJTUPLE_OVERHEAD+tuple->t_len;
720-
hashTuple= (HashJoinTuple)MemoryContextAlloc(hashtable->batchCxt,
721-
hashTupleSize);
736+
hashTuple= (HashJoinTuple)dense_alloc(hashtable,hashTupleSize);
737+
722738
hashTuple->hashvalue=hashvalue;
723739
memcpy(HJTUPLE_MINTUPLE(hashTuple),tuple,tuple->t_len);
724740

@@ -1068,6 +1084,9 @@ ExecHashTableReset(HashJoinTable hashtable)
10681084
hashtable->spaceUsed=0;
10691085

10701086
MemoryContextSwitchTo(oldcxt);
1087+
1088+
/* Forget the chunks (the memory was freed by the context reset above). */
1089+
hashtable->chunks=NULL;
10711090
}
10721091

10731092
/*
@@ -1462,3 +1481,79 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
14621481
hashtable->spaceUsedSkew=0;
14631482
}
14641483
}
1484+
1485+
/*
1486+
* Allocate 'size' bytes from the currently active HashMemoryChunk
1487+
*/
1488+
staticvoid*
1489+
dense_alloc(HashJoinTablehashtable,Sizesize)
1490+
{
1491+
HashMemoryChunknewChunk;
1492+
char*ptr;
1493+
1494+
/* just in case the size is not already aligned properly */
1495+
size=MAXALIGN(size);
1496+
1497+
/*
1498+
* If tuple size is larger than of 1/4 of chunk size, allocate a separate
1499+
* chunk.
1500+
*/
1501+
if (size>HASH_CHUNK_THRESHOLD)
1502+
{
1503+
/* allocate new chunk and put it at the beginning of the list */
1504+
newChunk= (HashMemoryChunk)MemoryContextAlloc(hashtable->batchCxt,
1505+
offsetof(HashMemoryChunkData,data)+size);
1506+
newChunk->maxlen=size;
1507+
newChunk->used=0;
1508+
newChunk->ntuples=0;
1509+
1510+
/*
1511+
* Add this chunk to the list after the first existing chunk, so that
1512+
* we don't lose the remaining space in the "current" chunk.
1513+
*/
1514+
if (hashtable->chunks!=NULL)
1515+
{
1516+
newChunk->next=hashtable->chunks->next;
1517+
hashtable->chunks->next=newChunk;
1518+
}
1519+
else
1520+
{
1521+
newChunk->next=hashtable->chunks;
1522+
hashtable->chunks=newChunk;
1523+
}
1524+
1525+
newChunk->used+=size;
1526+
newChunk->ntuples+=1;
1527+
1528+
returnnewChunk->data;
1529+
}
1530+
1531+
/*
1532+
* See if we have enough space for it in the current chunk (if any).
1533+
* If not, allocate a fresh chunk.
1534+
*/
1535+
if ((hashtable->chunks==NULL)||
1536+
(hashtable->chunks->maxlen-hashtable->chunks->used)<size)
1537+
{
1538+
/* allocate new chunk and put it at the beginning of the list */
1539+
newChunk= (HashMemoryChunk)MemoryContextAlloc(hashtable->batchCxt,
1540+
offsetof(HashMemoryChunkData,data)+HASH_CHUNK_SIZE);
1541+
1542+
newChunk->maxlen=HASH_CHUNK_SIZE;
1543+
newChunk->used=size;
1544+
newChunk->ntuples=1;
1545+
1546+
newChunk->next=hashtable->chunks;
1547+
hashtable->chunks=newChunk;
1548+
1549+
returnnewChunk->data;
1550+
}
1551+
1552+
/* There is enough space in the current chunk, let's add the tuple */
1553+
ptr=hashtable->chunks->data+hashtable->chunks->used;
1554+
hashtable->chunks->used+=size;
1555+
hashtable->chunks->ntuples+=1;
1556+
1557+
/* return pointer to the start of the tuple memory */
1558+
returnptr;
1559+
}

‎src/include/executor/hashjoin.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,25 @@ typedef struct HashSkewBucket
102102
#defineSKEW_WORK_MEM_PERCENT 2
103103
#defineSKEW_MIN_OUTER_FRACTION 0.01
104104

105+
/*
106+
* To reduce palloc overhead, the HashJoinTuples for the current batch are
107+
* packed in 32kB buffers instead of pallocing each tuple individually.
108+
*/
109+
typedefstructHashMemoryChunkData
110+
{
111+
intntuples;/* number of tuples stored in this chunk */
112+
size_tmaxlen;/* size of the buffer holding the tuples */
113+
size_tused;/* number of buffer bytes already used */
114+
115+
structHashMemoryChunkData*next;/* pointer to the next chunk (linked list) */
116+
117+
chardata[1];/* buffer allocated at the end */
118+
}HashMemoryChunkData;
119+
120+
typedefstructHashMemoryChunkData*HashMemoryChunk;
121+
122+
#defineHASH_CHUNK_SIZE(32 * 1024L)
123+
#defineHASH_CHUNK_THRESHOLD(HASH_CHUNK_SIZE / 4)
105124

106125
typedefstructHashJoinTableData
107126
{
@@ -157,6 +176,9 @@ typedef struct HashJoinTableData
157176

158177
MemoryContexthashCxt;/* context for whole-hash-join storage */
159178
MemoryContextbatchCxt;/* context for this-batch-only storage */
179+
180+
/* used for dense allocation of tuples (into linked chunks) */
181+
HashMemoryChunkchunks;/* one list for the whole batch */
160182
}HashJoinTableData;
161183

162184
#endif/* HASHJOIN_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp