Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0fe5f64

Browse files
committed
Teach radix tree to embed values at runtime
Previously, the decision to store values in leaves or within the childpointer was made at compile time, with variable length values usingleaves by necessity. This commit allows introspecting the length ofvariable length values at runtime for that decision. This requiresthe ability to tell whether the last-level child pointer is actuallya value, so we use a pointer tag in the lowest level bit.Use this in TID store. This entails adding a byte to the header toreserve space for the tag. Commitf35bd9b stores up to three offsetswithin the header with no bitmap, and now the header can be embeddedas above. This reduces worst-case memory usage when TIDs are sparse.Reviewed (in an earlier version) by Masahiko SawadaDiscussion:https://postgr.es/m/CANWCAZYw+_KAaUNruhJfE=h6WgtBKeDG32St8vBJBEY82bGVRQ@mail.gmail.comDiscussion:https://postgr.es/m/CAD21AoBci3Hujzijubomo1tdwH3XtQ9F89cTNQ4bsQijOmqnEw@mail.gmail.com
1 parentf35bd9b commit0fe5f64

File tree

2 files changed

+89
-24
lines changed

2 files changed

+89
-24
lines changed

‎src/backend/access/common/tidstore.c

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,58 @@
3535
#defineWORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
3636

3737
/* number of offsets we can store in the header of a BlocktableEntry */
38-
#defineNUM_FULL_OFFSETS ((sizeof(bitmapword) - sizeof(uint16)) / sizeof(OffsetNumber))
38+
#defineNUM_FULL_OFFSETS ((sizeof(uintptr_t) - sizeof(uint8) - sizeof(int8)) / sizeof(OffsetNumber))
3939

4040
/*
4141
* This is named similarly to PagetableEntry in tidbitmap.c
4242
* because the two have a similar function.
4343
*/
4444
typedefstructBlocktableEntry
4545
{
46-
uint16nwords;
47-
48-
/*
49-
* We can store a small number of offsets here to avoid wasting space with
50-
* a sparse bitmap.
51-
*/
52-
OffsetNumberfull_offsets[NUM_FULL_OFFSETS];
46+
union
47+
{
48+
struct
49+
{
50+
#ifndefWORDS_BIGENDIAN
51+
/*
52+
* We need to position this member so that the backing radix tree
53+
* can use the lowest bit for a pointer tag. In particular, it
54+
* must be placed within 'header' so that it corresponds to the
55+
* lowest byte in 'ptr'. We position 'nwords' along with it to
56+
* avoid struct padding.
57+
*/
58+
uint8flags;
59+
60+
int8nwords;
61+
#endif
62+
63+
/*
64+
* We can store a small number of offsets here to avoid wasting
65+
* space with a sparse bitmap.
66+
*/
67+
OffsetNumberfull_offsets[NUM_FULL_OFFSETS];
68+
69+
#ifdefWORDS_BIGENDIAN
70+
int8nwords;
71+
uint8flags;
72+
#endif
73+
};
74+
uintptr_tptr;
75+
}header;
5376

5477
bitmapwordwords[FLEXIBLE_ARRAY_MEMBER];
5578
}BlocktableEntry;
79+
80+
/*
81+
* The type of 'nwords' limits the max number of words in the 'words' array.
82+
* This computes the max offset we can actually store in the bitmap. In
83+
* practice, it's almost always the same as MaxOffsetNumber.
84+
*/
85+
#defineMAX_OFFSET_IN_BITMAP Min(BITS_PER_BITMAPWORD * PG_INT8_MAX - 1, MaxOffsetNumber)
86+
5687
#defineMaxBlocktableEntrySize \
5788
offsetof(BlocktableEntry, words) + \
58-
(sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber))
89+
(sizeof(bitmapword) * WORDS_PER_PAGE(MAX_OFFSET_IN_BITMAP))
5990

6091
#defineRT_PREFIX local_ts
6192
#defineRT_SCOPE static
@@ -64,7 +95,8 @@ typedef struct BlocktableEntry
6495
#defineRT_VALUE_TYPE BlocktableEntry
6596
#defineRT_VARLEN_VALUE_SIZE(page) \
6697
(offsetof(BlocktableEntry, words) + \
67-
sizeof(bitmapword) * (page)->nwords)
98+
sizeof(bitmapword) * (page)->header.nwords)
99+
#defineRT_RUNTIME_EMBEDDABLE_VALUE
68100
#include"lib/radixtree.h"
69101

70102
#defineRT_PREFIX shared_ts
@@ -75,7 +107,8 @@ typedef struct BlocktableEntry
75107
#defineRT_VALUE_TYPE BlocktableEntry
76108
#defineRT_VARLEN_VALUE_SIZE(page) \
77109
(offsetof(BlocktableEntry, words) + \
78-
sizeof(bitmapword) * (page)->nwords)
110+
sizeof(bitmapword) * (page)->header.nwords)
111+
#defineRT_RUNTIME_EMBEDDABLE_VALUE
79112
#include"lib/radixtree.h"
80113

81114
/* Per-backend state for a TidStore */
@@ -350,13 +383,13 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
350383
OffsetNumberoff=offsets[i];
351384

352385
/* safety check to ensure we don't overrun bit array bounds */
353-
if (!OffsetNumberIsValid(off))
386+
if (off==InvalidOffsetNumber||off>MAX_OFFSET_IN_BITMAP)
354387
elog(ERROR,"tuple offset out of range: %u",off);
355388

356-
page->full_offsets[i]=off;
389+
page->header.full_offsets[i]=off;
357390
}
358391

359-
page->nwords=0;
392+
page->header.nwords=0;
360393
}
361394
else
362395
{
@@ -371,7 +404,7 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
371404
OffsetNumberoff=offsets[idx];
372405

373406
/* safety check to ensure we don't overrun bit array bounds */
374-
if (!OffsetNumberIsValid(off))
407+
if (off==InvalidOffsetNumber||off>MAX_OFFSET_IN_BITMAP)
375408
elog(ERROR,"tuple offset out of range: %u",off);
376409

377410
if (off >=next_word_threshold)
@@ -385,8 +418,8 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
385418
page->words[wordnum]=word;
386419
}
387420

388-
page->nwords=wordnum;
389-
Assert(page->nwords==WORDS_PER_PAGE(offsets[num_offsets-1]));
421+
page->header.nwords=wordnum;
422+
Assert(page->header.nwords==WORDS_PER_PAGE(offsets[num_offsets-1]));
390423
}
391424

392425
if (TidStoreIsShared(ts))
@@ -414,12 +447,12 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
414447
if (page==NULL)
415448
return false;
416449

417-
if (page->nwords==0)
450+
if (page->header.nwords==0)
418451
{
419452
/* we have offsets in the header */
420453
for (inti=0;i<NUM_FULL_OFFSETS;i++)
421454
{
422-
if (page->full_offsets[i]==off)
455+
if (page->header.full_offsets[i]==off)
423456
return true;
424457
}
425458
return false;
@@ -430,7 +463,7 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
430463
bitnum=BITNUM(off);
431464

432465
/* no bitmap for the off */
433-
if (wordnum >=page->nwords)
466+
if (wordnum >=page->header.nwords)
434467
return false;
435468

436469
return (page->words[wordnum]& ((bitmapword)1 <<bitnum))!=0;
@@ -554,18 +587,18 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
554587
result->num_offsets=0;
555588
result->blkno=blkno;
556589

557-
if (page->nwords==0)
590+
if (page->header.nwords==0)
558591
{
559592
/* we have offsets in the header */
560593
for (inti=0;i<NUM_FULL_OFFSETS;i++)
561594
{
562-
if (page->full_offsets[i]!=InvalidOffsetNumber)
563-
result->offsets[result->num_offsets++]=page->full_offsets[i];
595+
if (page->header.full_offsets[i]!=InvalidOffsetNumber)
596+
result->offsets[result->num_offsets++]=page->header.full_offsets[i];
564597
}
565598
}
566599
else
567600
{
568-
for (wordnum=0;wordnum<page->nwords;wordnum++)
601+
for (wordnum=0;wordnum<page->header.nwords;wordnum++)
569602
{
570603
bitmapwordw=page->words[wordnum];
571604
intoff=wordnum*BITS_PER_BITMAPWORD;

‎src/include/lib/radixtree.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@
105105
* involving a pointer to the value type, to calculate size.
106106
* NOTE: implies that the value is in fact variable-length,
107107
* so do not set for fixed-length values.
108+
* - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
109+
* storing the value in a child pointer slot, rather than as a single-
110+
* value leaf, if small enough. This requires that the value, when
111+
* read as a child pointer, can be tagged in the lowest bit.
108112
*
109113
* Optional parameters:
110114
* - RT_SHMEM - if defined, the radix tree is created in the DSA area
@@ -437,7 +441,13 @@ static inline bool
437441
RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE*value_p)
438442
{
439443
#ifdefRT_VARLEN_VALUE_SIZE
444+
445+
#ifdefRT_RUNTIME_EMBEDDABLE_VALUE
446+
returnRT_GET_VALUE_SIZE(value_p) <=sizeof(RT_PTR_ALLOC);
447+
#else
440448
return false;
449+
#endif
450+
441451
#else
442452
returnRT_GET_VALUE_SIZE(value_p) <=sizeof(RT_PTR_ALLOC);
443453
#endif
@@ -451,7 +461,19 @@ static inline bool
451461
RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOCchild)
452462
{
453463
#ifdefRT_VARLEN_VALUE_SIZE
464+
465+
#ifdefRT_RUNTIME_EMBEDDABLE_VALUE
466+
/* check for pointer tag */
467+
#ifdefRT_SHMEM
468+
returnchild&1;
469+
#else
470+
return ((uintptr_t)child)&1;
471+
#endif
472+
473+
#else
454474
return false;
475+
#endif
476+
455477
#else
456478
returnsizeof(RT_VALUE_TYPE) <=sizeof(RT_PTR_ALLOC);
457479
#endif
@@ -1729,6 +1751,15 @@ RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p)
17291751
{
17301752
/* store value directly in child pointer slot */
17311753
memcpy(slot,value_p,value_sz);
1754+
1755+
#ifdefRT_RUNTIME_EMBEDDABLE_VALUE
1756+
/* tag child pointer */
1757+
#ifdefRT_SHMEM
1758+
*slot |=1;
1759+
#else
1760+
*((uintptr_t*)slot) |=1;
1761+
#endif
1762+
#endif
17321763
}
17331764
else
17341765
{
@@ -2888,6 +2919,7 @@ RT_DUMP_NODE(RT_NODE * node)
28882919
#undef RT_DEFINE
28892920
#undef RT_VALUE_TYPE
28902921
#undef RT_VARLEN_VALUE_SIZE
2922+
#undef RT_RUNTIME_EMBEDDABLE_VALUE
28912923
#undef RT_SHMEM
28922924
#undef RT_USE_DELETE
28932925
#undef RT_DEBUG

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp