Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf16241b

Browse files
committed
Raise error when affecting tuple moved into different partition.
When an update moves a row between partitions (supported since2f17844), our normal logic for following update chains in READCOMMITTED mode doesn't work anymore. Cross partition updates aremodeled as an delete from the old and insert into the newpartition. No ctid chain exists across partitions, and there's noconvenient space to introduce that link.Not throwing an error in a partitioned context when one would havebeen thrown without partitioning is obviously problematic. This commitintroduces infrastructure to detect when a tuple has been moved, notjust plainly deleted. That allows to throw an error when encounteringa deletion that's actually a move, while attempting to following actid chain.The row deleted as part of a cross partition update is marked bypointing it's t_ctid to an invalid block, instead of self as a normalupdate would. That was deemed to be the least invasive and mostfuture proof way to represent the knowledge, given how few infomaskbits are there to be recycled (there's also some locking issues withusing infomask bits).External code following ctid chains should be updated to check formoved tuples. The most likely consequence of not doing so is a missederror.Author: Amul Sul, editorialized by meReviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert HaasDiscussion:http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
1 parent8224de4 commitf16241b

23 files changed

+624
-22
lines changed

‎src/backend/access/heap/heapam.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,6 +2308,7 @@ heap_get_latest_tid(Relation relation,
23082308
*/
23092309
if ((tp.t_data->t_infomask&HEAP_XMAX_INVALID)||
23102310
HeapTupleHeaderIsOnlyLocked(tp.t_data)||
2311+
HeapTupleHeaderIndicatesMovedPartitions(tp.t_data)||
23112312
ItemPointerEquals(&tp.t_self,&tp.t_data->t_ctid))
23122313
{
23132314
UnlockReleaseBuffer(buffer);
@@ -3041,6 +3042,8 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
30413042
*crosscheck - if not InvalidSnapshot, also check tuple against this
30423043
*wait - true if should wait for any conflicting update to commit/abort
30433044
*hufd - output parameter, filled in failure cases (see below)
3045+
*changingPart - true iff the tuple is being moved to another partition
3046+
*table due to an update of the partition key. Otherwise, false.
30443047
*
30453048
* Normal, successful return value is HeapTupleMayBeUpdated, which
30463049
* actually means we did delete it. Failure return codes are
@@ -3056,7 +3059,7 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
30563059
HTSU_Result
30573060
heap_delete(Relationrelation,ItemPointertid,
30583061
CommandIdcid,Snapshotcrosscheck,boolwait,
3059-
HeapUpdateFailureData*hufd)
3062+
HeapUpdateFailureData*hufd,boolchangingPart)
30603063
{
30613064
HTSU_Resultresult;
30623065
TransactionIdxid=GetCurrentTransactionId();
@@ -3325,6 +3328,10 @@ heap_delete(Relation relation, ItemPointer tid,
33253328
/* Make sure there is no forward chain link in t_ctid */
33263329
tp.t_data->t_ctid=tp.t_self;
33273330

3331+
/* Signal that this is actually a move into another partition */
3332+
if (changingPart)
3333+
HeapTupleHeaderSetMovedPartitions(tp.t_data);
3334+
33283335
MarkBufferDirty(buffer);
33293336

33303337
/*
@@ -3342,7 +3349,11 @@ heap_delete(Relation relation, ItemPointer tid,
33423349
if (RelationIsAccessibleInLogicalDecoding(relation))
33433350
log_heap_new_cid(relation,&tp);
33443351

3345-
xlrec.flags=all_visible_cleared ?XLH_DELETE_ALL_VISIBLE_CLEARED :0;
3352+
xlrec.flags=0;
3353+
if (all_visible_cleared)
3354+
xlrec.flags |=XLH_DELETE_ALL_VISIBLE_CLEARED;
3355+
if (changingPart)
3356+
xlrec.flags |=XLH_DELETE_IS_PARTITION_MOVE;
33463357
xlrec.infobits_set=compute_infobits(tp.t_data->t_infomask,
33473358
tp.t_data->t_infomask2);
33483359
xlrec.offnum=ItemPointerGetOffsetNumber(&tp.t_self);
@@ -3450,7 +3461,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
34503461
result=heap_delete(relation,tid,
34513462
GetCurrentCommandId(true),InvalidSnapshot,
34523463
true/* wait for commit */ ,
3453-
&hufd);
3464+
&hufd, false/* changingPart */);
34543465
switch (result)
34553466
{
34563467
caseHeapTupleSelfUpdated:
@@ -6051,6 +6062,7 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
60516062
next:
60526063
/* if we find the end of update chain, we're done. */
60536064
if (mytup.t_data->t_infomask&HEAP_XMAX_INVALID||
6065+
HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data)||
60546066
ItemPointerEquals(&mytup.t_self,&mytup.t_data->t_ctid)||
60556067
HeapTupleHeaderIsOnlyLocked(mytup.t_data))
60566068
{
@@ -6102,7 +6114,12 @@ static HTSU_Result
61026114
heap_lock_updated_tuple(Relationrel,HeapTupletuple,ItemPointerctid,
61036115
TransactionIdxid,LockTupleModemode)
61046116
{
6105-
if (!ItemPointerEquals(&tuple->t_self,ctid))
6117+
/*
6118+
* If the tuple has not been updated, or has moved into another partition
6119+
* (effectively a delete) stop here.
6120+
*/
6121+
if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data)&&
6122+
!ItemPointerEquals(&tuple->t_self,ctid))
61066123
{
61076124
/*
61086125
* If this is the first possibly-multixact-able operation in the
@@ -8493,8 +8510,11 @@ heap_xlog_delete(XLogReaderState *record)
84938510
if (xlrec->flags&XLH_DELETE_ALL_VISIBLE_CLEARED)
84948511
PageClearAllVisible(page);
84958512

8496-
/* Make sure there is no forward chain link in t_ctid */
8497-
htup->t_ctid=target_tid;
8513+
/* Make sure t_ctid is set correctly */
8514+
if (xlrec->flags&XLH_DELETE_IS_PARTITION_MOVE)
8515+
HeapTupleHeaderSetMovedPartitions(htup);
8516+
else
8517+
htup->t_ctid=target_tid;
84988518
PageSetLSN(page,lsn);
84998519
MarkBufferDirty(buffer);
85008520
}
@@ -9422,6 +9442,13 @@ heap_mask(char *pagedata, BlockNumber blkno)
94229442
*/
94239443
if (HeapTupleHeaderIsSpeculative(page_htup))
94249444
ItemPointerSet(&page_htup->t_ctid,blkno,off);
9445+
9446+
/*
9447+
* NB: Not ignoring ctid changes due to the tuple having moved
9448+
* (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
9449+
* important information that needs to be in-sync between primary
9450+
* and standby, and thus is WAL logged.
9451+
*/
94259452
}
94269453

94279454
/*

‎src/backend/access/heap/pruneheap.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
552552
if (!HeapTupleHeaderIsHotUpdated(htup))
553553
break;
554554

555+
/* HOT implies it can't have moved to different partition */
556+
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
557+
555558
/*
556559
* Advance to next chain member.
557560
*/
@@ -823,6 +826,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
823826
if (!HeapTupleHeaderIsHotUpdated(htup))
824827
break;
825828

829+
/* HOT implies it can't have moved to different partition */
830+
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
831+
826832
nextoffnum=ItemPointerGetOffsetNumber(&htup->t_ctid);
827833
priorXmax=HeapTupleHeaderGetUpdateXid(htup);
828834
}

‎src/backend/access/heap/rewriteheap.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ rewrite_heap_tuple(RewriteState state,
424424
*/
425425
if (!((old_tuple->t_data->t_infomask&HEAP_XMAX_INVALID)||
426426
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data))&&
427+
!HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data)&&
427428
!(ItemPointerEquals(&(old_tuple->t_self),
428429
&(old_tuple->t_data->t_ctid))))
429430
{

‎src/backend/commands/trigger.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,6 +3315,11 @@ ltrmark:;
33153315
ereport(ERROR,
33163316
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
33173317
errmsg("could not serialize access due to concurrent update")));
3318+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
3319+
ereport(ERROR,
3320+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
3321+
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
3322+
33183323
if (!ItemPointerEquals(&hufd.ctid,&tuple.t_self))
33193324
{
33203325
/* it was updated, so look at the updated version */

‎src/backend/executor/execMain.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2733,6 +2733,10 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
27332733
ereport(ERROR,
27342734
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
27352735
errmsg("could not serialize access due to concurrent update")));
2736+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
2737+
ereport(ERROR,
2738+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2739+
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
27362740

27372741
/* Should not encounter speculative tuple on recheck */
27382742
Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
@@ -2801,6 +2805,14 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
28012805
* As above, it should be safe to examine xmax and t_ctid without the
28022806
* buffer content lock, because they can't be changing.
28032807
*/
2808+
2809+
/* check whether next version would be in a different partition */
2810+
if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
2811+
ereport(ERROR,
2812+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2813+
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
2814+
2815+
/* check whether tuple has been deleted */
28042816
if (ItemPointerEquals(&tuple.t_self,&tuple.t_data->t_ctid))
28052817
{
28062818
/* deleted, so forget about it */

‎src/backend/executor/execMerge.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,8 @@ lmerge_matched:;
324324
slot=ExecDelete(mtstate,tupleid,NULL,
325325
slot,epqstate,estate,
326326
&tuple_deleted, false,&hufd,action,
327-
mtstate->canSetTag);
327+
mtstate->canSetTag,
328+
false/* changingPart */);
328329

329330
break;
330331

‎src/backend/executor/execReplication.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,14 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
191191
break;
192192
caseHeapTupleUpdated:
193193
/* XXX: Improve handling here */
194-
ereport(LOG,
195-
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
196-
errmsg("concurrent update, retrying")));
194+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
195+
ereport(LOG,
196+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
197+
errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
198+
else
199+
ereport(LOG,
200+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
201+
errmsg("concurrent update, retrying")));
197202
gotoretry;
198203
caseHeapTupleInvisible:
199204
elog(ERROR,"attempted to lock invisible tuple");
@@ -349,9 +354,14 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
349354
break;
350355
caseHeapTupleUpdated:
351356
/* XXX: Improve handling here */
352-
ereport(LOG,
353-
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
354-
errmsg("concurrent update, retrying")));
357+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
358+
ereport(LOG,
359+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
360+
errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
361+
else
362+
ereport(LOG,
363+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
364+
errmsg("concurrent update, retrying")));
355365
gotoretry;
356366
caseHeapTupleInvisible:
357367
elog(ERROR,"attempted to lock invisible tuple");

‎src/backend/executor/nodeLockRows.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,11 @@ ExecLockRows(PlanState *pstate)
218218
ereport(ERROR,
219219
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
220220
errmsg("could not serialize access due to concurrent update")));
221+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
222+
ereport(ERROR,
223+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
224+
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
225+
221226
if (ItemPointerEquals(&hufd.ctid,&tuple.t_self))
222227
{
223228
/* Tuple was deleted, so don't return it */

‎src/backend/executor/nodeModifyTable.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,8 @@ ExecDelete(ModifyTableState *mtstate,
645645
boolprocessReturning,
646646
HeapUpdateFailureData*hufdp,
647647
MergeActionState*actionState,
648-
boolcanSetTag)
648+
boolcanSetTag,
649+
boolchangingPart)
649650
{
650651
ResultRelInfo*resultRelInfo;
651652
RelationresultRelationDesc;
@@ -744,7 +745,8 @@ ldelete:;
744745
estate->es_output_cid,
745746
estate->es_crosscheck_snapshot,
746747
true/* wait for commit */ ,
747-
&hufd);
748+
&hufd,
749+
changingPart);
748750

749751
/*
750752
* Copy the necessary information, if the caller has asked for it. We
@@ -803,6 +805,10 @@ ldelete:;
803805
ereport(ERROR,
804806
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
805807
errmsg("could not serialize access due to concurrent update")));
808+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
809+
ereport(ERROR,
810+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
811+
errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
806812

807813
if (!ItemPointerEquals(tupleid,&hufd.ctid))
808814
{
@@ -1157,7 +1163,7 @@ lreplace:;
11571163
*/
11581164
ExecDelete(mtstate,tupleid,oldtuple,planSlot,epqstate,
11591165
estate,&tuple_deleted, false,hufdp,NULL,
1160-
false);
1166+
false/* canSetTag */, true/* changingPart */);
11611167

11621168
/*
11631169
* For some reason if DELETE didn't happen (e.g. trigger prevented
@@ -1333,6 +1339,10 @@ lreplace:;
13331339
ereport(ERROR,
13341340
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
13351341
errmsg("could not serialize access due to concurrent update")));
1342+
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
1343+
ereport(ERROR,
1344+
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1345+
errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
13361346

13371347
if (!ItemPointerEquals(tupleid,&hufd.ctid))
13381348
{
@@ -1522,6 +1532,14 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
15221532
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
15231533
errmsg("could not serialize access due to concurrent update")));
15241534

1535+
/*
1536+
* As long as we don't support an UPDATE of INSERT ON CONFLICT for
1537+
* a partitioned table we shouldn't reach to a case where tuple to
1538+
* be lock is moved to another partition due to concurrent update
1539+
* of the partition key.
1540+
*/
1541+
Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
1542+
15251543
/*
15261544
* Tell caller to try again from the very start.
15271545
*
@@ -2274,7 +2292,8 @@ ExecModifyTable(PlanState *pstate)
22742292
caseCMD_DELETE:
22752293
slot=ExecDelete(node,tupleid,oldtuple,planSlot,
22762294
&node->mt_epqstate,estate,
2277-
NULL, true,NULL,NULL,node->canSetTag);
2295+
NULL, true,NULL,NULL,node->canSetTag,
2296+
false/* changingPart */);
22782297
break;
22792298
default:
22802299
elog(ERROR,"unknown operation");

‎src/include/access/heapam.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
167167
CommandIdcid,intoptions,BulkInsertStatebistate);
168168
externHTSU_Resultheap_delete(Relationrelation,ItemPointertid,
169169
CommandIdcid,Snapshotcrosscheck,boolwait,
170-
HeapUpdateFailureData*hufd);
170+
HeapUpdateFailureData*hufd,boolchangingPart);
171171
externvoidheap_finish_speculative(Relationrelation,HeapTupletuple);
172172
externvoidheap_abort_speculative(Relationrelation,HeapTupletuple);
173173
externHTSU_Resultheap_update(Relationrelation,ItemPointerotid,

‎src/include/access/heapam_xlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#defineXLH_DELETE_CONTAINS_OLD_TUPLE(1<<1)
9494
#defineXLH_DELETE_CONTAINS_OLD_KEY(1<<2)
9595
#defineXLH_DELETE_IS_SUPER(1<<3)
96+
#defineXLH_DELETE_IS_PARTITION_MOVE(1<<4)
9697

9798
/* convenience macro for checking whether any form of old tuple was logged */
9899
#defineXLH_DELETE_CONTAINS_OLD\

‎src/include/access/htup_details.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,10 @@
8383
*
8484
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
8585
* is initialized with its own TID (location). If the tuple is ever updated,
86-
* its t_ctid is changed to point to the replacement version of the tuple.
87-
* Thus, a tuple is the latest version of its row iff XMAX is invalid or
86+
* its t_ctid is changed to point to the replacement version of the tuple or
87+
* the block number (ip_blkid) is invalidated if the tuple is moved from one
88+
* partition to another partition relation due to an update of the partition
89+
* key. Thus, a tuple is the latest version of its row iff XMAX is invalid or
8890
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
8991
* either locked or deleted). One can follow the chain of t_ctid links
9092
* to find the newest version of the row. Beware however that VACUUM might
@@ -445,6 +447,12 @@ do { \
445447
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
446448
)
447449

450+
#defineHeapTupleHeaderSetMovedPartitions(tup) \
451+
ItemPointerSetMovedPartitions(&(tup)->t_ctid)
452+
453+
#defineHeapTupleHeaderIndicatesMovedPartitions(tup) \
454+
ItemPointerIndicatesMovedPartitions(&tup->t_ctid)
455+
448456
#defineHeapTupleHeaderGetDatumLength(tup) \
449457
VARSIZE(tup)
450458

‎src/include/executor/nodeModifyTable.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ extern TupleTableSlot *ExecDelete(ModifyTableState *mtstate,
2727
ItemPointertupleid,HeapTupleoldtuple,TupleTableSlot*planSlot,
2828
EPQState*epqstate,EState*estate,bool*tupleDeleted,
2929
boolprocessReturning,HeapUpdateFailureData*hufdp,
30-
MergeActionState*actionState,boolcanSetTag);
30+
MergeActionState*actionState,boolcanSetTag,
31+
boolchangingPart);
3132
externTupleTableSlot*ExecUpdate(ModifyTableState*mtstate,
3233
ItemPointertupleid,HeapTupleoldtuple,TupleTableSlot*slot,
3334
TupleTableSlot*planSlot,EPQState*epqstate,EState*estate,

‎src/include/storage/itemptr.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,22 @@ typedef ItemPointerData *ItemPointer;
154154
(pointer)->ip_posid = InvalidOffsetNumber \
155155
)
156156

157+
/*
158+
* ItemPointerIndicatesMovedPartitions
159+
*True iff the block number indicates the tuple has moved to another
160+
*partition.
161+
*/
162+
#defineItemPointerIndicatesMovedPartitions(pointer) \
163+
!BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(pointer))
164+
165+
/*
166+
* ItemPointerSetMovedPartitions
167+
*Indicate that the item referenced by the itempointer has moved into a
168+
*different partition.
169+
*/
170+
#defineItemPointerSetMovedPartitions(pointer) \
171+
ItemPointerSetBlockNumber((pointer), InvalidBlockNumber)
172+
157173
/* ----------------
158174
*externs
159175
* ----------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp