NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commita4ccc1c

committed

Generational memory allocator

Add new style of memory allocator, known as Generationalappropriate for use in cases where memory is allocatedand then freed in roughly oldest first order (FIFO).Use new allocator for logical decoding’s reorderbufferto significantly reduce memory usage and improve performance.Author: Tomas VondraReviewed-by: Simon Riggs

1 parent3bae43c commita4ccc1cCopy full SHA for a4ccc1c

File tree

8 files changed

+819

-79

lines changed

src
- backend
  - replication/logical
    - reorderbuffer.c
  - utils/mmgr
- include
  - nodes
    - memnodes.h
    - nodes.h
  - replication
    - reorderbuffer.h
  - utils
    - memutils.h

8 files changed

+819

-79

lines changed

`‎src/backend/replication/logical/reorderbuffer.c‎`

Lines changed: 17 additions & 63 deletions

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,12 @@`
`43`	`43`	`* transaction there will be no other data carrying records between a row's`
`44`	`44`	`* toast chunks and the row data itself. See ReorderBufferToast* for`
`45`	`45`	`* details.`
	`46`	`+ *`
	`47`	`+ * ReorderBuffer uses two special memory context types - SlabContext for`
	`48`	`+ * allocations of fixed-length structures (changes and transactions), and`
	`49`	`+ * GenerationContext for the variable-length transaction data (allocated`
	`50`	`+ * and freed in groups with similar lifespan).`
	`51`	`+ *`
`46`	`52`	`* -------------------------------------------------------------------------`
`47`	`53`	`*/`
`48`	`54`	`#include"postgres.h"`
`@@ -150,15 +156,6 @@ typedef struct ReorderBufferDiskChange`
`150`	`156`	`*/`
`151`	`157`	`staticconstSizemax_changes_in_memory=4096;`
`152`	`158`
`153`		`-/*`
`154`		`- * We use a very simple form of a slab allocator for frequently allocated`
`155`		`- * objects, simply keeping a fixed number in a linked list when unused,`
`156`		`- * instead pfree()ing them. Without that in many workloads aset.c becomes a`
`157`		`- * major bottleneck, especially when spilling to disk while decoding batch`
`158`		`- * workloads.`
`159`		`- */`
`160`		`-staticconstSizemax_cached_tuplebufs=40962;/ ~8MB */`
`161`		`-`
`162`	`159`	`/* ---------------------------------------`
`163`	`160`	`* primary reorderbuffer support routines`
`164`	`161`	`* ---------------------------------------`
`@@ -248,6 +245,10 @@ ReorderBufferAllocate(void)`
`248`	`245`	`SLAB_DEFAULT_BLOCK_SIZE,`
`249`	`246`	`sizeof(ReorderBufferTXN));`
`250`	`247`
	`248`	`+buffer->tup_context=GenerationContextCreate(new_ctx,`
	`249`	`+"Tuples",`
	`250`	`+SLAB_LARGE_BLOCK_SIZE);`
	`251`	`+`
`251`	`252`	`hash_ctl.keysize=sizeof(TransactionId);`
`252`	`253`	`hash_ctl.entrysize=sizeof(ReorderBufferTXNByIdEnt);`
`253`	`254`	`hash_ctl.hcxt=buffer->context;`
`@@ -258,15 +259,12 @@ ReorderBufferAllocate(void)`
`258`	`259`	`buffer->by_txn_last_xid=InvalidTransactionId;`
`259`	`260`	`buffer->by_txn_last_txn=NULL;`
`260`	`261`
`261`		`-buffer->nr_cached_tuplebufs=0;`
`262`		`-`
`263`	`262`	`buffer->outbuf=NULL;`
`264`	`263`	`buffer->outbufsize=0;`
`265`	`264`
`266`	`265`	`buffer->current_restart_decoding_lsn=InvalidXLogRecPtr;`
`267`	`266`
`268`	`267`	`dlist_init(&buffer->toplevel_by_lsn);`
`269`		`-slist_init(&buffer->cached_tuplebufs);`
`270`	`268`
`271`	`269`	`returnbuffer;`
`272`	`270`	`}`
`@@ -419,42 +417,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)`
`419`	`417`
`420`	`418`	`alloc_len=tuple_len+SizeofHeapTupleHeader;`
`421`	`419`
`422`		`-/*`
`423`		`- * Most tuples are below MaxHeapTupleSize, so we use a slab allocator for`
`424`		`- * those. Thus always allocate at least MaxHeapTupleSize. Note that tuples`
`425`		`- * generated for oldtuples can be bigger, as they don't have out-of-line`
`426`		`- * toast columns.`
`427`		`- */`
`428`		`-if (alloc_len<MaxHeapTupleSize)`
`429`		`-alloc_len=MaxHeapTupleSize;`
`430`		`-`
`431`		`-`
`432`		`-/* if small enough, check the slab cache */`
`433`		`-if (alloc_len <=MaxHeapTupleSize&&rb->nr_cached_tuplebufs)`
`434`		`-{`
`435`		`-rb->nr_cached_tuplebufs--;`
`436`		`-tuple=slist_container(ReorderBufferTupleBuf,node,`
`437`		`-slist_pop_head_node(&rb->cached_tuplebufs));`
`438`		`-Assert(tuple->alloc_tuple_size==MaxHeapTupleSize);`
`439`		`-#ifdefUSE_ASSERT_CHECKING`
`440`		`-memset(&tuple->tuple,0xa9,sizeof(HeapTupleData));`
`441`		`-VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple,sizeof(HeapTupleData));`
`442`		`-#endif`
`443`		`-tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);`
`444`		`-#ifdefUSE_ASSERT_CHECKING`
`445`		`-memset(tuple->tuple.t_data,0xa8,tuple->alloc_tuple_size);`
`446`		`-VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data,tuple->alloc_tuple_size);`
`447`		`-#endif`
`448`		`-}`
`449`		`-else`
`450`		`-{`
`451`		`-tuple= (ReorderBufferTupleBuf*)`
`452`		`-MemoryContextAlloc(rb->context,`
`453`		`-sizeof(ReorderBufferTupleBuf)+`
`454`		`-MAXIMUM_ALIGNOF+alloc_len);`
`455`		`-tuple->alloc_tuple_size=alloc_len;`
`456`		`-tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);`
`457`		`-}`
	`420`	`+tuple= (ReorderBufferTupleBuf*)`
	`421`	`+MemoryContextAlloc(rb->tup_context,`
	`422`	`+sizeof(ReorderBufferTupleBuf)+`
	`423`	`+MAXIMUM_ALIGNOF+alloc_len);`
	`424`	`+tuple->alloc_tuple_size=alloc_len;`
	`425`	`+tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);`
`458`	`426`
`459`	`427`	`returntuple;`
`460`	`428`	`}`
`@@ -468,21 +436,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)`
`468`	`436`	`void`
`469`	`437`	`ReorderBufferReturnTupleBuf(ReorderBufferrb,ReorderBufferTupleBuftuple)`
`470`	`438`	`{`
`471`		`-/* check whether to put into the slab cache, oversized tuples never are */`
`472`		`-if (tuple->alloc_tuple_size==MaxHeapTupleSize&&`
`473`		`-rb->nr_cached_tuplebufs<max_cached_tuplebufs)`
`474`		`-{`
`475`		`-rb->nr_cached_tuplebufs++;`
`476`		`-slist_push_head(&rb->cached_tuplebufs,&tuple->node);`
`477`		`-VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data,tuple->alloc_tuple_size);`
`478`		`-VALGRIND_MAKE_MEM_UNDEFINED(tuple,sizeof(ReorderBufferTupleBuf));`
`479`		`-VALGRIND_MAKE_MEM_DEFINED(&tuple->node,sizeof(tuple->node));`
`480`		`-VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size,sizeof(tuple->alloc_tuple_size));`
`481`		`-}`
`482`		`-else`
`483`		`-{`
`484`		`-pfree(tuple);`
`485`		`-}`
	`439`	`+pfree(tuple);`
`486`	`440`	`}`
`487`	`441`
`488`	`442`	`/*`

`‎src/backend/utils/mmgr/Makefile‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr`
`12`	`12`	`top_builddir = ../../../..`
`13`	`13`	`include$(top_builddir)/src/Makefile.global`
`14`	`14`
`15`		`-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o`
	`15`	`+OBJS = aset.o dsa.o freepage.ogeneration.omcxt.o memdebug.o portalmem.o slab.o`
`16`	`16`
`17`	`17`	`include$(top_srcdir)/src/backend/common.mk`

`‎src/backend/utils/mmgr/README‎`

Lines changed: 23 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -431,3 +431,26 @@ will not allocate very much space per tuple cycle. To make this usage`
`431`	`431`	`pattern cheap, the first block allocated in a context is not given`
`432`	`432`	`back to malloc() during reset, but just cleared. This avoids malloc`
`433`	`433`	`thrashing.`
	`434`	`+`
	`435`	`+`
	`436`	`+Alternative Memory Context Implementations`
	`437`	`+------------------------------------------`
	`438`	`+`
	`439`	`+aset.c is our default general-purpose implementation, working fine`
	`440`	`+in most situations. We also have two implementations optimized for`
	`441`	`+special use cases, providing either better performance or lower memory`
	`442`	`+usage compared to aset.c (or both).`
	`443`	`+`
	`444`	`+* slab.c (SlabContext) is designed for allocations of fixed-length`
	`445`	`+ chunks, and does not allow allocations of chunks with different size.`
	`446`	`+`
	`447`	`+* generation.c (GenerationContext) is designed for cases when chunks`
	`448`	`+ are allocated in groups with similar lifespan (generations), or`
	`449`	`+ roughly in FIFO order.`
	`450`	`+`
	`451`	`+Both memory contexts aim to free memory back to the operating system`
	`452`	`+(unlike aset.c, which keeps the freed chunks in a freelist, and only`
	`453`	`+returns the memory when reset/deleted).`
	`454`	`+`
	`455`	`+These memory contexts were initially developed for ReorderBuffer, but`
	`456`	`+may be useful elsewhere as long as the allocation patterns match.`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commita4ccc1c

File tree

8 files changed

8 files changed

`‎src/backend/replication/logical/reorderbuffer.c‎`

`‎src/backend/utils/mmgr/Makefile‎`

`‎src/backend/utils/mmgr/README‎`

0 commit comments