Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita4ccc1c

Browse files
Generational memory allocator
Add new style of memory allocator, known as Generationalappropriate for use in cases where memory is allocatedand then freed in roughly oldest first order (FIFO).Use new allocator for logical decoding’s reorderbufferto significantly reduce memory usage and improve performance.Author: Tomas VondraReviewed-by: Simon Riggs
1 parent3bae43c commita4ccc1c

File tree

8 files changed

+819
-79
lines changed

8 files changed

+819
-79
lines changed

‎src/backend/replication/logical/reorderbuffer.c

Lines changed: 17 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
* transaction there will be no other data carrying records between a row's
4444
* toast chunks and the row data itself. See ReorderBufferToast* for
4545
* details.
46+
*
47+
* ReorderBuffer uses two special memory context types - SlabContext for
48+
* allocations of fixed-length structures (changes and transactions), and
49+
* GenerationContext for the variable-length transaction data (allocated
50+
* and freed in groups with similar lifespan).
51+
*
4652
* -------------------------------------------------------------------------
4753
*/
4854
#include"postgres.h"
@@ -150,15 +156,6 @@ typedef struct ReorderBufferDiskChange
150156
*/
151157
staticconstSizemax_changes_in_memory=4096;
152158

153-
/*
154-
* We use a very simple form of a slab allocator for frequently allocated
155-
* objects, simply keeping a fixed number in a linked list when unused,
156-
* instead pfree()ing them. Without that in many workloads aset.c becomes a
157-
* major bottleneck, especially when spilling to disk while decoding batch
158-
* workloads.
159-
*/
160-
staticconstSizemax_cached_tuplebufs=4096*2;/* ~8MB */
161-
162159
/* ---------------------------------------
163160
* primary reorderbuffer support routines
164161
* ---------------------------------------
@@ -248,6 +245,10 @@ ReorderBufferAllocate(void)
248245
SLAB_DEFAULT_BLOCK_SIZE,
249246
sizeof(ReorderBufferTXN));
250247

248+
buffer->tup_context=GenerationContextCreate(new_ctx,
249+
"Tuples",
250+
SLAB_LARGE_BLOCK_SIZE);
251+
251252
hash_ctl.keysize=sizeof(TransactionId);
252253
hash_ctl.entrysize=sizeof(ReorderBufferTXNByIdEnt);
253254
hash_ctl.hcxt=buffer->context;
@@ -258,15 +259,12 @@ ReorderBufferAllocate(void)
258259
buffer->by_txn_last_xid=InvalidTransactionId;
259260
buffer->by_txn_last_txn=NULL;
260261

261-
buffer->nr_cached_tuplebufs=0;
262-
263262
buffer->outbuf=NULL;
264263
buffer->outbufsize=0;
265264

266265
buffer->current_restart_decoding_lsn=InvalidXLogRecPtr;
267266

268267
dlist_init(&buffer->toplevel_by_lsn);
269-
slist_init(&buffer->cached_tuplebufs);
270268

271269
returnbuffer;
272270
}
@@ -419,42 +417,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
419417

420418
alloc_len=tuple_len+SizeofHeapTupleHeader;
421419

422-
/*
423-
* Most tuples are below MaxHeapTupleSize, so we use a slab allocator for
424-
* those. Thus always allocate at least MaxHeapTupleSize. Note that tuples
425-
* generated for oldtuples can be bigger, as they don't have out-of-line
426-
* toast columns.
427-
*/
428-
if (alloc_len<MaxHeapTupleSize)
429-
alloc_len=MaxHeapTupleSize;
430-
431-
432-
/* if small enough, check the slab cache */
433-
if (alloc_len <=MaxHeapTupleSize&&rb->nr_cached_tuplebufs)
434-
{
435-
rb->nr_cached_tuplebufs--;
436-
tuple=slist_container(ReorderBufferTupleBuf,node,
437-
slist_pop_head_node(&rb->cached_tuplebufs));
438-
Assert(tuple->alloc_tuple_size==MaxHeapTupleSize);
439-
#ifdefUSE_ASSERT_CHECKING
440-
memset(&tuple->tuple,0xa9,sizeof(HeapTupleData));
441-
VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple,sizeof(HeapTupleData));
442-
#endif
443-
tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);
444-
#ifdefUSE_ASSERT_CHECKING
445-
memset(tuple->tuple.t_data,0xa8,tuple->alloc_tuple_size);
446-
VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data,tuple->alloc_tuple_size);
447-
#endif
448-
}
449-
else
450-
{
451-
tuple= (ReorderBufferTupleBuf*)
452-
MemoryContextAlloc(rb->context,
453-
sizeof(ReorderBufferTupleBuf)+
454-
MAXIMUM_ALIGNOF+alloc_len);
455-
tuple->alloc_tuple_size=alloc_len;
456-
tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);
457-
}
420+
tuple= (ReorderBufferTupleBuf*)
421+
MemoryContextAlloc(rb->tup_context,
422+
sizeof(ReorderBufferTupleBuf)+
423+
MAXIMUM_ALIGNOF+alloc_len);
424+
tuple->alloc_tuple_size=alloc_len;
425+
tuple->tuple.t_data=ReorderBufferTupleBufData(tuple);
458426

459427
returntuple;
460428
}
@@ -468,21 +436,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
468436
void
469437
ReorderBufferReturnTupleBuf(ReorderBuffer*rb,ReorderBufferTupleBuf*tuple)
470438
{
471-
/* check whether to put into the slab cache, oversized tuples never are */
472-
if (tuple->alloc_tuple_size==MaxHeapTupleSize&&
473-
rb->nr_cached_tuplebufs<max_cached_tuplebufs)
474-
{
475-
rb->nr_cached_tuplebufs++;
476-
slist_push_head(&rb->cached_tuplebufs,&tuple->node);
477-
VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data,tuple->alloc_tuple_size);
478-
VALGRIND_MAKE_MEM_UNDEFINED(tuple,sizeof(ReorderBufferTupleBuf));
479-
VALGRIND_MAKE_MEM_DEFINED(&tuple->node,sizeof(tuple->node));
480-
VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size,sizeof(tuple->alloc_tuple_size));
481-
}
482-
else
483-
{
484-
pfree(tuple);
485-
}
439+
pfree(tuple);
486440
}
487441

488442
/*

‎src/backend/utils/mmgr/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
1212
top_builddir = ../../../..
1313
include$(top_builddir)/src/Makefile.global
1414

15-
OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
15+
OBJS = aset.o dsa.o freepage.ogeneration.omcxt.o memdebug.o portalmem.o slab.o
1616

1717
include$(top_srcdir)/src/backend/common.mk

‎src/backend/utils/mmgr/README

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,26 @@ will not allocate very much space per tuple cycle. To make this usage
431431
pattern cheap, the first block allocated in a context is not given
432432
back to malloc() during reset, but just cleared. This avoids malloc
433433
thrashing.
434+
435+
436+
Alternative Memory Context Implementations
437+
------------------------------------------
438+
439+
aset.c is our default general-purpose implementation, working fine
440+
in most situations. We also have two implementations optimized for
441+
special use cases, providing either better performance or lower memory
442+
usage compared to aset.c (or both).
443+
444+
* slab.c (SlabContext) is designed for allocations of fixed-length
445+
chunks, and does not allow allocations of chunks with different size.
446+
447+
* generation.c (GenerationContext) is designed for cases when chunks
448+
are allocated in groups with similar lifespan (generations), or
449+
roughly in FIFO order.
450+
451+
Both memory contexts aim to free memory back to the operating system
452+
(unlike aset.c, which keeps the freed chunks in a freelist, and only
453+
returns the memory when reset/deleted).
454+
455+
These memory contexts were initially developed for ReorderBuffer, but
456+
may be useful elsewhere as long as the allocation patterns match.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp