Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb1328d7

Browse files
committed
Invent PageIndexTupleOverwrite, and teach BRIN and GiST to use it.
PageIndexTupleOverwrite performs approximately the same function asPageIndexTupleDelete (or PageIndexDeleteNoCompact) followed by PageAddItemtargeting the same item pointer offset. But in the case where the newtuple is the same size as the old, it avoids shuffling other data around onthe page, because the new tuple is placed where the old one was rather thanbeing appended to the end of the page. This has been shown to provide asubstantial speedup for some GiST use-cases.Also, this change allows some API simplifications: we can get rid ofthe rather klugy and error-prone PAI_ALLOW_FAR_OFFSET flag forPageAddItemExtended, since that was used only to cover a corner casefor BRIN that's better expressed by using PageIndexTupleOverwrite.Note that this patch causes a rather subtle WAL incompatibility: thephysical page content change represented by certain WAL records is nowdifferent than it was before, because while the tuples have the sameitempointer line numbers, the tuples themselves are in different places.I have not bumped the WAL version number because I think it doesn't matterunless you are trying to do bitwise comparisons of original and replayedpages, and in any case we're early in a devel cycle and there will probablybe more WAL changes before v10 gets out the door.There is probably room to make use of PageIndexTupleOverwrite in SP-GiSTand GIN too, but that is left for a future patch.Andrey Borodin, reviewed by Anastasia Lubennikova, whacked around a bitby meDiscussion: <CAJEAwVGQjGGOj6mMSgMwGvtFd5Kwe6VFAxY=uEPZWMDjzbn4VQ@mail.gmail.com>
1 parent5c609a7 commitb1328d7

File tree

6 files changed

+179
-40
lines changed

6 files changed

+179
-40
lines changed

‎src/backend/access/brin/brin_pageops.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,8 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
178178
}
179179

180180
START_CRIT_SECTION();
181-
PageIndexDeleteNoCompact(oldpage,&oldoff,1);
182-
if (PageAddItemExtended(oldpage, (Item)newtup,newsz,oldoff,
183-
PAI_OVERWRITE |PAI_ALLOW_FAR_OFFSET)==InvalidOffsetNumber)
184-
elog(ERROR,"failed to add BRIN tuple");
181+
if (!PageIndexTupleOverwrite(oldpage,oldoff, (Item)newtup,newsz))
182+
elog(ERROR,"failed to replace BRIN tuple");
185183
MarkBufferDirty(oldbuf);
186184

187185
/* XLOG stuff */

‎src/backend/access/brin/brin_xlog.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,9 @@ brin_xlog_samepage_update(XLogReaderState *record)
189189
page= (Page)BufferGetPage(buffer);
190190

191191
offnum=xlrec->offnum;
192-
if (PageGetMaxOffsetNumber(page)+1<offnum)
193-
elog(PANIC,"brin_xlog_samepage_update: invalid max offset number");
194192

195-
PageIndexDeleteNoCompact(page,&offnum,1);
196-
offnum=PageAddItemExtended(page, (Item)brintuple,tuplen,offnum,
197-
PAI_OVERWRITE |PAI_ALLOW_FAR_OFFSET);
198-
if (offnum==InvalidOffsetNumber)
199-
elog(PANIC,"brin_xlog_samepage_update: failed to add tuple");
193+
if (!PageIndexTupleOverwrite(page,offnum, (Item)brintuple,tuplen))
194+
elog(PANIC,"brin_xlog_samepage_update: failed to replace tuple");
200195

201196
PageSetLSN(page,lsn);
202197
MarkBufferDirty(buffer);

‎src/backend/access/gist/gist.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -493,18 +493,36 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
493493
else
494494
{
495495
/*
496-
* Enough space. Wealso get here ifntuples==0.
496+
* Enough space.Wealways get here ifntup==0.
497497
*/
498498
START_CRIT_SECTION();
499499

500500
/*
501-
* While we delete only one tuple at once we could mix calls
502-
* PageIndexTupleDelete() here and PageIndexMultiDelete() in
503-
* gistRedoPageUpdateRecord()
501+
* Delete old tuple if any, then insert new tuple(s) if any. If
502+
* possible, use the fast path of PageIndexTupleOverwrite.
504503
*/
505504
if (OffsetNumberIsValid(oldoffnum))
506-
PageIndexTupleDelete(page,oldoffnum);
507-
gistfillbuffer(page,itup,ntup,InvalidOffsetNumber);
505+
{
506+
if (ntup==1)
507+
{
508+
/* One-for-one replacement, so use PageIndexTupleOverwrite */
509+
if (!PageIndexTupleOverwrite(page,oldoffnum, (Item)*itup,
510+
IndexTupleSize(*itup)))
511+
elog(ERROR,"failed to add item to index page in \"%s\"",
512+
RelationGetRelationName(rel));
513+
}
514+
else
515+
{
516+
/* Delete old, then append new tuple(s) to page */
517+
PageIndexTupleDelete(page,oldoffnum);
518+
gistfillbuffer(page,itup,ntup,InvalidOffsetNumber);
519+
}
520+
}
521+
else
522+
{
523+
/* Just append new tuples at the end of the page */
524+
gistfillbuffer(page,itup,ntup,InvalidOffsetNumber);
525+
}
508526

509527
MarkBufferDirty(buffer);
510528

‎src/backend/access/gist/gistxlog.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,31 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
8080

8181
page= (Page)BufferGetPage(buffer);
8282

83-
/* Delete old tuples */
84-
if (xldata->ntodelete>0)
83+
if (xldata->ntodelete==1&&xldata->ntoinsert==1)
8584
{
85+
/*
86+
* When replacing one tuple with one other tuple, we must use
87+
* PageIndexTupleOverwrite for consistency with gistplacetopage.
88+
*/
89+
OffsetNumberoffnum=*((OffsetNumber*)data);
90+
IndexTupleitup;
91+
Sizeitupsize;
92+
93+
data+=sizeof(OffsetNumber);
94+
itup= (IndexTuple)data;
95+
itupsize=IndexTupleSize(itup);
96+
if (!PageIndexTupleOverwrite(page,offnum, (Item)itup,itupsize))
97+
elog(ERROR,"failed to add item to GiST index page, size %d bytes",
98+
(int)itupsize);
99+
data+=itupsize;
100+
/* should be nothing left after consuming 1 tuple */
101+
Assert(data-begin==datalen);
102+
/* update insertion count for assert check below */
103+
ninserted++;
104+
}
105+
elseif (xldata->ntodelete>0)
106+
{
107+
/* Otherwise, delete old tuples if any */
86108
OffsetNumber*todelete= (OffsetNumber*)data;
87109

88110
data+=sizeof(OffsetNumber)*xldata->ntodelete;
@@ -92,7 +114,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
92114
GistMarkTuplesDeleted(page);
93115
}
94116

95-
/*addtuples */
117+
/*Add newtuples if any */
96118
if (data-begin<datalen)
97119
{
98120
OffsetNumberoff= (PageIsEmpty(page)) ?FirstOffsetNumber :
@@ -115,6 +137,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
115137
}
116138
}
117139

140+
/* Check that XLOG record contained expected number of tuples */
118141
Assert(ninserted==xldata->ntoinsert);
119142

120143
PageSetLSN(page,lsn);

‎src/backend/storage/page/bufpage.c

Lines changed: 123 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -166,21 +166,24 @@ PageIsVerified(Page page, BlockNumber blkno)
166166
*inserted, or InvalidOffsetNumber if the item is not inserted for any
167167
*reason. A WARNING is issued indicating the reason for the refusal.
168168
*
169-
*If flag PAI_OVERWRITE is set, we just store the item at the specified
170-
*offsetNumber (which must be either a currently-unused item pointer,
171-
*or one past the last existing item). Otherwise,
172-
*if offsetNumber is valid and <= current max offset in the page,
173-
*insert item into the array at that position by shuffling ItemId's
174-
*down to make room.
175-
*If offsetNumber is not valid, then assign one by finding the first
169+
*offsetNumber must be either InvalidOffsetNumber to specify finding a
170+
*free item pointer, or a value between FirstOffsetNumber and one past
171+
*the last existing item, to specify using that particular item pointer.
172+
*
173+
*If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
174+
*the item at the specified offsetNumber, which must be either a
175+
*currently-unused item pointer, or one past the last existing item.
176+
*
177+
*If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
178+
*the item at the specified offsetNumber, moving existing items later
179+
*in the array to make room.
180+
*
181+
*If offsetNumber is not valid, then assign a slot by finding the first
176182
*one that is both unused and deallocated.
177183
*
178184
*If flag PAI_IS_HEAP is set, we enforce that there can't be more than
179185
*MaxHeapTuplesPerPage line pointers on the page.
180186
*
181-
*If flag PAI_ALLOW_FAR_OFFSET is not set, we disallow placing items
182-
*beyond one past the last existing item.
183-
*
184187
*!!! EREPORT(ERROR) IS DISALLOWED HERE !!!
185188
*/
186189
OffsetNumber
@@ -267,11 +270,8 @@ PageAddItemExtended(Page page,
267270
}
268271
}
269272

270-
/*
271-
* Reject placing items beyond the first unused line pointer, unless
272-
* caller asked for that behavior specifically.
273-
*/
274-
if ((flags&PAI_ALLOW_FAR_OFFSET)==0&&offsetNumber>limit)
273+
/* Reject placing items beyond the first unused line pointer */
274+
if (offsetNumber>limit)
275275
{
276276
elog(WARNING,"specified item offset is too large");
277277
returnInvalidOffsetNumber;
@@ -290,10 +290,7 @@ PageAddItemExtended(Page page,
290290
* Note: do arithmetic as signed ints, to avoid mistakes if, say,
291291
* alignedSize > pd_upper.
292292
*/
293-
if ((flags&PAI_ALLOW_FAR_OFFSET)!=0)
294-
lower=Max(phdr->pd_lower,
295-
SizeOfPageHeaderData+sizeof(ItemIdData)*offsetNumber);
296-
elseif (offsetNumber==limit||needshuffle)
293+
if (offsetNumber==limit||needshuffle)
297294
lower=phdr->pd_lower+sizeof(ItemIdData);
298295
else
299296
lower=phdr->pd_lower;
@@ -1093,6 +1090,113 @@ PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos, int nitems)
10931090
}
10941091
}
10951092

1093+
1094+
/*
1095+
* PageIndexTupleOverwrite
1096+
*
1097+
* Replace a specified tuple on an index page.
1098+
*
1099+
* The new tuple is placed exactly where the old one had been, shifting
1100+
* other tuples' data up or down as needed to keep the page compacted.
1101+
* This is better than deleting and reinserting the tuple, because it
1102+
* avoids any data shifting when the tuple size doesn't change; and
1103+
* even when it does, we avoid moving the item pointers around.
1104+
* Conceivably this could also be of use to an index AM that cares about
1105+
* the physical order of tuples as well as their ItemId order.
1106+
*
1107+
* If there's insufficient space for the new tuple, return false. Other
1108+
* errors represent data-corruption problems, so we just elog.
1109+
*/
1110+
bool
1111+
PageIndexTupleOverwrite(Pagepage,OffsetNumberoffnum,
1112+
Itemnewtup,Sizenewsize)
1113+
{
1114+
PageHeaderphdr= (PageHeader)page;
1115+
ItemIdtupid;
1116+
intoldsize;
1117+
unsignedoffset;
1118+
Sizealignednewsize;
1119+
intsize_diff;
1120+
intitemcount;
1121+
1122+
/*
1123+
* As with PageRepairFragmentation, paranoia seems justified.
1124+
*/
1125+
if (phdr->pd_lower<SizeOfPageHeaderData||
1126+
phdr->pd_lower>phdr->pd_upper||
1127+
phdr->pd_upper>phdr->pd_special||
1128+
phdr->pd_special>BLCKSZ||
1129+
phdr->pd_special!=MAXALIGN(phdr->pd_special))
1130+
ereport(ERROR,
1131+
(errcode(ERRCODE_DATA_CORRUPTED),
1132+
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1133+
phdr->pd_lower,phdr->pd_upper,phdr->pd_special)));
1134+
1135+
itemcount=PageGetMaxOffsetNumber(page);
1136+
if ((int)offnum <=0|| (int)offnum>itemcount)
1137+
elog(ERROR,"invalid index offnum: %u",offnum);
1138+
1139+
tupid=PageGetItemId(page,offnum);
1140+
Assert(ItemIdHasStorage(tupid));
1141+
oldsize=ItemIdGetLength(tupid);
1142+
offset=ItemIdGetOffset(tupid);
1143+
1144+
if (offset<phdr->pd_upper|| (offset+oldsize)>phdr->pd_special||
1145+
offset!=MAXALIGN(offset))
1146+
ereport(ERROR,
1147+
(errcode(ERRCODE_DATA_CORRUPTED),
1148+
errmsg("corrupted item pointer: offset = %u, size = %u",
1149+
offset, (unsignedint)oldsize)));
1150+
1151+
/*
1152+
* Determine actual change in space requirement, check for page overflow.
1153+
*/
1154+
oldsize=MAXALIGN(oldsize);
1155+
alignednewsize=MAXALIGN(newsize);
1156+
if (alignednewsize>oldsize+ (phdr->pd_upper-phdr->pd_lower))
1157+
return false;
1158+
1159+
/*
1160+
* Relocate existing data and update line pointers, unless the new tuple
1161+
* is the same size as the old (after alignment), in which case there's
1162+
* nothing to do. Notice that what we have to relocate is data before the
1163+
* target tuple, not data after, so it's convenient to express size_diff
1164+
* as the amount by which the tuple's size is decreasing, making it the
1165+
* delta to add to pd_upper and affected line pointers.
1166+
*/
1167+
size_diff=oldsize- (int)alignednewsize;
1168+
if (size_diff!=0)
1169+
{
1170+
char*addr= (char*)page+phdr->pd_upper;
1171+
inti;
1172+
1173+
/* relocate all tuple data before the target tuple */
1174+
memmove(addr+size_diff,addr,offset-phdr->pd_upper);
1175+
1176+
/* adjust free space boundary pointer */
1177+
phdr->pd_upper+=size_diff;
1178+
1179+
/* adjust affected line pointers too */
1180+
for (i=FirstOffsetNumber;i <=itemcount;i++)
1181+
{
1182+
ItemIdii=PageGetItemId(phdr,i);
1183+
1184+
/* Allow items without storage; currently only BRIN needs that */
1185+
if (ItemIdHasStorage(ii)&&ItemIdGetOffset(ii) <=offset)
1186+
ii->lp_off+=size_diff;
1187+
}
1188+
}
1189+
1190+
/* Update the item's tuple length (other fields shouldn't change) */
1191+
ItemIdSetNormal(tupid,offset+size_diff,newsize);
1192+
1193+
/* Copy new tuple data onto page */
1194+
memcpy(PageGetItem(page,tupid),newtup,newsize);
1195+
1196+
return true;
1197+
}
1198+
1199+
10961200
/*
10971201
* Set checksum for a page in shared buffers.
10981202
*

‎src/include/storage/bufpage.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,6 @@ do { \
409409
*/
410410
#definePAI_OVERWRITE(1 << 0)
411411
#definePAI_IS_HEAP(1 << 1)
412-
#definePAI_ALLOW_FAR_OFFSET(1 << 2)
413412

414413
externvoidPageInit(Pagepage,SizepageSize,SizespecialSize);
415414
externboolPageIsVerified(Pagepage,BlockNumberblkno);
@@ -429,6 +428,8 @@ extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
429428
externvoidPageIndexMultiDelete(Pagepage,OffsetNumber*itemnos,intnitems);
430429
externvoidPageIndexDeleteNoCompact(Pagepage,OffsetNumber*itemnos,
431430
intnitems);
431+
externboolPageIndexTupleOverwrite(Pagepage,OffsetNumberoffnum,
432+
Itemnewtup,Sizenewsize);
432433
externchar*PageSetChecksumCopy(Pagepage,BlockNumberblkno);
433434
externvoidPageSetChecksumInplace(Pagepage,BlockNumberblkno);
434435

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp