Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcc721c4

Browse files
committed
HashAgg: use Bump allocator for hash TupleHashTable entries.
The entries aren't freed until the entire hash table is destroyed, souse the Bump allocator to improve allocation speed, avoid wastingspace on the chunk header, and avoid wasting space due to thepower-of-two allocations.Discussion:https://postgr.es/m/CAApHDvqv1aNB4cM36FzRwivXrEvBO_LsG_eQ3nqDXTjECaatOQ@mail.gmail.comReviewed-by: David Rowley
1 parentcc43316 commitcc721c4

File tree

3 files changed

+104
-29
lines changed

3 files changed

+104
-29
lines changed

‎src/backend/executor/execUtils.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,18 @@ CreateExprContext(EState *estate)
322322
ExprContext*
323323
CreateWorkExprContext(EState*estate)
324324
{
325-
SizeminContextSize=ALLOCSET_DEFAULT_MINSIZE;
326-
SizeinitBlockSize=ALLOCSET_DEFAULT_INITSIZE;
327325
SizemaxBlockSize=ALLOCSET_DEFAULT_MAXSIZE;
328326

329-
/* choose the maxBlockSize to be no larger than 1/16 of work_mem */
330-
while (maxBlockSize>work_mem* (Size)1024 /16)
331-
maxBlockSize >>=1;
327+
maxBlockSize=pg_prevpower2_size_t(work_mem* (Size)1024 /16);
332328

333-
if (maxBlockSize<ALLOCSET_DEFAULT_INITSIZE)
334-
maxBlockSize=ALLOCSET_DEFAULT_INITSIZE;
329+
/* But no bigger than ALLOCSET_DEFAULT_MAXSIZE */
330+
maxBlockSize=Min(maxBlockSize,ALLOCSET_DEFAULT_MAXSIZE);
335331

336-
returnCreateExprContextInternal(estate,minContextSize,
337-
initBlockSize,maxBlockSize);
332+
/* and no smaller than ALLOCSET_DEFAULT_INITSIZE */
333+
maxBlockSize=Max(maxBlockSize,ALLOCSET_DEFAULT_INITSIZE);
334+
335+
returnCreateExprContextInternal(estate,ALLOCSET_DEFAULT_MINSIZE,
336+
ALLOCSET_DEFAULT_INITSIZE,maxBlockSize);
338337
}
339338

340339
/* ----------------

‎src/backend/executor/nodeAgg.c

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ static void build_hash_tables(AggState *aggstate);
406406
staticvoidbuild_hash_table(AggState*aggstate,intsetno,longnbuckets);
407407
staticvoidhashagg_recompile_expressions(AggState*aggstate,boolminslot,
408408
boolnullcheck);
409+
staticvoidhash_create_memory(AggState*aggstate);
409410
staticlonghash_choose_num_buckets(doublehashentrysize,
410411
longngroups,Sizememory);
411412
staticinthash_choose_num_partitions(doubleinput_groups,
@@ -1509,7 +1510,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
15091510
{
15101511
AggStatePerHashperhash=&aggstate->perhash[setno];
15111512
MemoryContextmetacxt=aggstate->hash_metacxt;
1512-
MemoryContexthashcxt=aggstate->hashcontext->ecxt_per_tuple_memory;
1513+
MemoryContexttablecxt=aggstate->hash_tablecxt;
15131514
MemoryContexttmpcxt=aggstate->tmpcontext->ecxt_per_tuple_memory;
15141515
Sizeadditionalsize;
15151516

@@ -1535,7 +1536,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
15351536
nbuckets,
15361537
additionalsize,
15371538
metacxt,
1538-
hashcxt,
1539+
tablecxt,
15391540
tmpcxt,
15401541
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
15411542
}
@@ -1706,15 +1707,19 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
17061707
tupleWidth);
17071708
SizepergroupSize=numTrans*sizeof(AggStatePerGroupData);
17081709

1709-
tupleChunkSize=CHUNKHDRSZ+tupleSize;
1710-
1711-
if (pergroupSize>0)
1712-
pergroupChunkSize=CHUNKHDRSZ+pergroupSize;
1713-
else
1714-
pergroupChunkSize=0;
1710+
/*
1711+
* Entries use the Bump allocator, so the chunk sizes are the same as the
1712+
* requested sizes.
1713+
*/
1714+
tupleChunkSize=MAXALIGN(tupleSize);
1715+
pergroupChunkSize=pergroupSize;
17151716

1717+
/*
1718+
* Transition values use AllocSet, which has a chunk header and also uses
1719+
* power-of-two allocations.
1720+
*/
17161721
if (transitionSpace>0)
1717-
transitionChunkSize=CHUNKHDRSZ+transitionSpace;
1722+
transitionChunkSize=CHUNKHDRSZ+pg_nextpower2_size_t(transitionSpace);
17181723
else
17191724
transitionChunkSize=0;
17201725

@@ -1864,8 +1869,11 @@ hash_agg_check_limits(AggState *aggstate)
18641869
uint64ngroups=aggstate->hash_ngroups_current;
18651870
Sizemeta_mem=MemoryContextMemAllocated(aggstate->hash_metacxt,
18661871
true);
1867-
Sizehashkey_mem=MemoryContextMemAllocated(aggstate->hashcontext->ecxt_per_tuple_memory,
1868-
true);
1872+
Sizeentry_mem=MemoryContextMemAllocated(aggstate->hash_tablecxt,
1873+
true);
1874+
Sizetval_mem=MemoryContextMemAllocated(aggstate->hashcontext->ecxt_per_tuple_memory,
1875+
true);
1876+
Sizetotal_mem=meta_mem+entry_mem+tval_mem;
18691877
booldo_spill= false;
18701878

18711879
#ifdefUSE_INJECTION_POINTS
@@ -1884,7 +1892,7 @@ hash_agg_check_limits(AggState *aggstate)
18841892
* can be sure to make progress even in edge cases.
18851893
*/
18861894
if (aggstate->hash_ngroups_current>0&&
1887-
(meta_mem+hashkey_mem>aggstate->hash_mem_limit||
1895+
(total_mem>aggstate->hash_mem_limit||
18881896
ngroups>aggstate->hash_ngroups_limit))
18891897
{
18901898
do_spill= true;
@@ -1939,6 +1947,7 @@ static void
19391947
hash_agg_update_metrics(AggState*aggstate,boolfrom_tape,intnpartitions)
19401948
{
19411949
Sizemeta_mem;
1950+
Sizeentry_mem;
19421951
Sizehashkey_mem;
19431952
Sizebuffer_mem;
19441953
Sizetotal_mem;
@@ -1950,7 +1959,10 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19501959
/* memory for the hash table itself */
19511960
meta_mem=MemoryContextMemAllocated(aggstate->hash_metacxt, true);
19521961

1953-
/* memory for the group keys and transition states */
1962+
/* memory for hash entries */
1963+
entry_mem=MemoryContextMemAllocated(aggstate->hash_tablecxt, true);
1964+
1965+
/* memory for byref transition states */
19541966
hashkey_mem=MemoryContextMemAllocated(aggstate->hashcontext->ecxt_per_tuple_memory, true);
19551967

19561968
/* memory for read/write tape buffers, if spilled */
@@ -1959,7 +1971,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19591971
buffer_mem+=HASHAGG_READ_BUFFER_SIZE;
19601972

19611973
/* update peak mem */
1962-
total_mem=meta_mem+hashkey_mem+buffer_mem;
1974+
total_mem=meta_mem+entry_mem+hashkey_mem+buffer_mem;
19631975
if (total_mem>aggstate->hash_mem_peak)
19641976
aggstate->hash_mem_peak=total_mem;
19651977

@@ -1981,6 +1993,64 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19811993
}
19821994
}
19831995

1996+
/*
1997+
* Create memory contexts used for hash aggregation.
1998+
*/
1999+
staticvoid
2000+
hash_create_memory(AggState*aggstate)
2001+
{
2002+
SizemaxBlockSize=ALLOCSET_DEFAULT_MAXSIZE;
2003+
2004+
/*
2005+
* The hashcontext's per-tuple memory will be used for byref transition
2006+
* values and returned by AggCheckCallContext().
2007+
*/
2008+
aggstate->hashcontext=CreateWorkExprContext(aggstate->ss.ps.state);
2009+
2010+
/*
2011+
* The meta context will be used for the bucket array of
2012+
* TupleHashEntryData (or arrays, in the case of grouping sets). As the
2013+
* hash table grows, the bucket array will double in size and the old one
2014+
* will be freed, so an AllocSet is appropriate. For large bucket arrays,
2015+
* the large allocation path will be used, so it's not worth worrying
2016+
* about wasting space due to power-of-two allocations.
2017+
*/
2018+
aggstate->hash_metacxt=AllocSetContextCreate(aggstate->ss.ps.state->es_query_cxt,
2019+
"HashAgg meta context",
2020+
ALLOCSET_DEFAULT_SIZES);
2021+
2022+
/*
2023+
* The hash entries themselves, which include the grouping key
2024+
* (firstTuple) and pergroup data, are stored in the table context. The
2025+
* bump allocator can be used because the entries are not freed until the
2026+
* entire hash table is reset. The bump allocator is faster for
2027+
* allocations and avoids wasting space on the chunk header or
2028+
* power-of-two allocations.
2029+
*
2030+
* Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2031+
* to avoid large jumps in memory usage.
2032+
*/
2033+
2034+
/*
2035+
* Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2036+
* to avoid large jumps in memory usage.
2037+
*/
2038+
maxBlockSize=pg_prevpower2_size_t(work_mem* (Size)1024 /16);
2039+
2040+
/* But no bigger than ALLOCSET_DEFAULT_MAXSIZE */
2041+
maxBlockSize=Min(maxBlockSize,ALLOCSET_DEFAULT_MAXSIZE);
2042+
2043+
/* and no smaller than ALLOCSET_DEFAULT_INITSIZE */
2044+
maxBlockSize=Max(maxBlockSize,ALLOCSET_DEFAULT_INITSIZE);
2045+
2046+
aggstate->hash_tablecxt=BumpContextCreate(aggstate->ss.ps.state->es_query_cxt,
2047+
"HashAgg table context",
2048+
ALLOCSET_DEFAULT_MINSIZE,
2049+
ALLOCSET_DEFAULT_INITSIZE,
2050+
maxBlockSize);
2051+
2052+
}
2053+
19842054
/*
19852055
* Choose a reasonable number of buckets for the initial hash table size.
19862056
*/
@@ -2642,6 +2712,7 @@ agg_refill_hash_table(AggState *aggstate)
26422712

26432713
/* free memory and reset hash tables */
26442714
ReScanExprContext(aggstate->hashcontext);
2715+
MemoryContextReset(aggstate->hash_tablecxt);
26452716
for (intsetno=0;setno<aggstate->num_hashes;setno++)
26462717
ResetTupleHashTable(aggstate->perhash[setno].hashtable);
26472718

@@ -3326,7 +3397,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
33263397
}
33273398

33283399
if (use_hashing)
3329-
aggstate->hashcontext=CreateWorkExprContext(estate);
3400+
hash_create_memory(aggstate);
33303401

33313402
ExecAssignExprContext(estate,&aggstate->ss.ps);
33323403

@@ -3621,9 +3692,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
36213692
Plan*outerplan=outerPlan(node);
36223693
uint64totalGroups=0;
36233694

3624-
aggstate->hash_metacxt=AllocSetContextCreate(aggstate->ss.ps.state->es_query_cxt,
3625-
"HashAgg meta context",
3626-
ALLOCSET_DEFAULT_SIZES);
36273695
aggstate->hash_spill_rslot=ExecInitExtraTupleSlot(estate,scanDesc,
36283696
&TTSOpsMinimalTuple);
36293697
aggstate->hash_spill_wslot=ExecInitExtraTupleSlot(estate,scanDesc,
@@ -4368,6 +4436,12 @@ ExecEndAgg(AggState *node)
43684436
MemoryContextDelete(node->hash_metacxt);
43694437
node->hash_metacxt=NULL;
43704438
}
4439+
if (node->hash_tablecxt!=NULL)
4440+
{
4441+
MemoryContextDelete(node->hash_tablecxt);
4442+
node->hash_tablecxt=NULL;
4443+
}
4444+
43714445

43724446
for (transno=0;transno<node->numtrans;transno++)
43734447
{
@@ -4484,6 +4558,7 @@ ExecReScanAgg(AggState *node)
44844558
node->hash_ngroups_current=0;
44854559

44864560
ReScanExprContext(node->hashcontext);
4561+
MemoryContextReset(node->hash_tablecxt);
44874562
/* Rebuild an empty hash table */
44884563
build_hash_tables(node);
44894564
node->table_filled= false;

‎src/include/nodes/execnodes.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2560,7 +2560,8 @@ typedef struct AggState
25602560
/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
25612561
booltable_filled;/* hash table filled yet? */
25622562
intnum_hashes;
2563-
MemoryContexthash_metacxt;/* memory for hash table itself */
2563+
MemoryContexthash_metacxt;/* memory for hash table bucket array */
2564+
MemoryContexthash_tablecxt;/* memory for hash table entries */
25642565
structLogicalTapeSet*hash_tapeset;/* tape set for hash spill tapes */
25652566
structHashAggSpill*hash_spills;/* HashAggSpill for each grouping set,
25662567
* exists only during first pass */
@@ -2586,7 +2587,7 @@ typedef struct AggState
25862587
* per-group pointers */
25872588

25882589
/* support for evaluation of agg input expressions: */
2589-
#defineFIELDNO_AGGSTATE_ALL_PERGROUPS53
2590+
#defineFIELDNO_AGGSTATE_ALL_PERGROUPS54
25902591
AggStatePerGroup*all_pergroups;/* array of first ->pergroups, than
25912592
* ->hash_pergroup */
25922593
SharedAggInfo*shared_info;/* one entry per worker */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp