Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit40ca70e

Browse files
author
Amit Kapila
committed
Allow using the updated tuple while moving it to a different partition.
An update that causes the tuple to be moved to a different partition wasmissing out on re-constructing the to-be-updated tuple, based on the latesttuple in the update chain. Instead, it's simply deleting the latest tupleand inserting a new tuple in the new partition based on the old tuple.Commit2f17844 didn't consider this case, so some of the updates weregetting lost.In passing, change the argument order for output parameter in ExecDeleteand add some commentary about it.Reported-by: Pavan DeolaseeAuthor: Amit Khandekar, with minor changes by meReviewed-by: Dilip Kumar, Amit Kapila and Alvaro HerreraBackpatch-through: 11Discussion:https://postgr.es/m/CAJ3gD9fRbEzDqdeDq1jxqZUb47kJn+tQ7=Bcgjc8quqKsDViKQ@mail.gmail.com
1 parentedc6b41 commit40ca70e

File tree

7 files changed

+214
-23
lines changed

7 files changed

+214
-23
lines changed

‎src/backend/commands/trigger.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2726,11 +2726,19 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
27262726
false,NULL,NULL,NIL,NULL,transition_capture);
27272727
}
27282728

2729+
/*
2730+
* Execute BEFORE ROW DELETE triggers.
2731+
*
2732+
* True indicates caller can proceed with the delete. False indicates caller
2733+
* need to suppress the delete and additionally if requested, we need to pass
2734+
* back the concurrently updated tuple if any.
2735+
*/
27292736
bool
27302737
ExecBRDeleteTriggers(EState*estate,EPQState*epqstate,
27312738
ResultRelInfo*relinfo,
27322739
ItemPointertupleid,
2733-
HeapTuplefdw_trigtuple)
2740+
HeapTuplefdw_trigtuple,
2741+
TupleTableSlot**epqslot)
27342742
{
27352743
TriggerDesc*trigdesc=relinfo->ri_TrigDesc;
27362744
boolresult= true;
@@ -2747,6 +2755,18 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
27472755
LockTupleExclusive,&newSlot);
27482756
if (trigtuple==NULL)
27492757
return false;
2758+
2759+
/*
2760+
* If the tuple was concurrently updated and the caller of this
2761+
* function requested for the updated tuple, skip the trigger
2762+
* execution.
2763+
*/
2764+
if (newSlot!=NULL&&epqslot!=NULL)
2765+
{
2766+
*epqslot=newSlot;
2767+
heap_freetuple(trigtuple);
2768+
return false;
2769+
}
27502770
}
27512771
else
27522772
trigtuple=fdw_trigtuple;

‎src/backend/executor/execReplication.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
531531
{
532532
skip_tuple= !ExecBRDeleteTriggers(estate,epqstate,resultRelInfo,
533533
&searchslot->tts_tuple->t_self,
534-
NULL);
534+
NULL,NULL);
535535
}
536536

537537
if (!skip_tuple)

‎src/backend/executor/nodeModifyTable.c

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,11 @@ ExecInsert(ModifyTableState *mtstate,
609609
*foreign table, tupleid is invalid; the FDW has to figure out
610610
*which row to delete using data from the planSlot. oldtuple is
611611
*passed to foreign table triggers; it is NULL when the foreign
612-
*table has no relevant triggers.
612+
*table has no relevant triggers. We use tupleDeleted to indicate
613+
*whether the tuple is actually deleted, callers can use it to
614+
*decide whether to continue the operation. When this DELETE is a
615+
*part of an UPDATE of partition-key, then the slot returned by
616+
*EvalPlanQual() is passed back using output parameter epqslot.
613617
*
614618
*Returns RETURNING result if any, otherwise NULL.
615619
* ----------------------------------------------------------------
@@ -621,10 +625,11 @@ ExecDelete(ModifyTableState *mtstate,
621625
TupleTableSlot*planSlot,
622626
EPQState*epqstate,
623627
EState*estate,
624-
bool*tupleDeleted,
625628
boolprocessReturning,
626629
boolcanSetTag,
627-
boolchangingPart)
630+
boolchangingPart,
631+
bool*tupleDeleted,
632+
TupleTableSlot**epqslot)
628633
{
629634
ResultRelInfo*resultRelInfo;
630635
RelationresultRelationDesc;
@@ -649,7 +654,7 @@ ExecDelete(ModifyTableState *mtstate,
649654
booldodelete;
650655

651656
dodelete=ExecBRDeleteTriggers(estate,epqstate,resultRelInfo,
652-
tupleid,oldtuple);
657+
tupleid,oldtuple,epqslot);
653658

654659
if (!dodelete)/* "do nothing" */
655660
returnNULL;
@@ -769,19 +774,30 @@ ldelete:;
769774

770775
if (!ItemPointerEquals(tupleid,&hufd.ctid))
771776
{
772-
TupleTableSlot*epqslot;
773-
774-
epqslot=EvalPlanQual(estate,
775-
epqstate,
776-
resultRelationDesc,
777-
resultRelInfo->ri_RangeTableIndex,
778-
LockTupleExclusive,
779-
&hufd.ctid,
780-
hufd.xmax);
781-
if (!TupIsNull(epqslot))
777+
TupleTableSlot*my_epqslot;
778+
779+
my_epqslot=EvalPlanQual(estate,
780+
epqstate,
781+
resultRelationDesc,
782+
resultRelInfo->ri_RangeTableIndex,
783+
LockTupleExclusive,
784+
&hufd.ctid,
785+
hufd.xmax);
786+
if (!TupIsNull(my_epqslot))
782787
{
783788
*tupleid=hufd.ctid;
784-
gotoldelete;
789+
790+
/*
791+
* If requested, skip delete and pass back the updated
792+
* row.
793+
*/
794+
if (epqslot)
795+
{
796+
*epqslot=my_epqslot;
797+
returnNULL;
798+
}
799+
else
800+
gotoldelete;
785801
}
786802
}
787803
/* tuple already deleted; nothing to do */
@@ -1052,6 +1068,7 @@ lreplace:;
10521068
{
10531069
booltuple_deleted;
10541070
TupleTableSlot*ret_slot;
1071+
TupleTableSlot*epqslot=NULL;
10551072
PartitionTupleRouting*proute=mtstate->mt_partition_tuple_routing;
10561073
intmap_index;
10571074
TupleConversionMap*tupconv_map;
@@ -1081,8 +1098,8 @@ lreplace:;
10811098
* processing. We want to return rows from INSERT.
10821099
*/
10831100
ExecDelete(mtstate,tupleid,oldtuple,planSlot,epqstate,
1084-
estate,&tuple_deleted, false,
1085-
false/*canSetTag */ ,true/* changingPart */);
1101+
estate,false, false/* canSetTag */,
1102+
true/*changingPart */ ,&tuple_deleted,&epqslot);
10861103

10871104
/*
10881105
* For some reason if DELETE didn't happen (e.g. trigger prevented
@@ -1105,7 +1122,23 @@ lreplace:;
11051122
* resurrect it.
11061123
*/
11071124
if (!tuple_deleted)
1108-
returnNULL;
1125+
{
1126+
/*
1127+
* epqslot will be typically NULL. But when ExecDelete()
1128+
* finds that another transaction has concurrently updated the
1129+
* same row, it re-fetches the row, skips the delete, and
1130+
* epqslot is set to the re-fetched tuple slot. In that case,
1131+
* we need to do all the checks again.
1132+
*/
1133+
if (TupIsNull(epqslot))
1134+
returnNULL;
1135+
else
1136+
{
1137+
slot=ExecFilterJunk(resultRelInfo->ri_junkFilter,epqslot);
1138+
tuple=ExecMaterializeSlot(slot);
1139+
gotolreplace;
1140+
}
1141+
}
11091142

11101143
/*
11111144
* Updates set the transition capture map only when a new subplan
@@ -2136,8 +2169,8 @@ ExecModifyTable(PlanState *pstate)
21362169
caseCMD_DELETE:
21372170
slot=ExecDelete(node,tupleid,oldtuple,planSlot,
21382171
&node->mt_epqstate,estate,
2139-
NULL,true,node->canSetTag,
2140-
false/* changingPart */ );
2172+
true,node->canSetTag,
2173+
false/* changingPart */,NULL,NULL);
21412174
break;
21422175
default:
21432176
elog(ERROR,"unknown operation");

‎src/include/commands/trigger.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ extern bool ExecBRDeleteTriggers(EState *estate,
206206
EPQState*epqstate,
207207
ResultRelInfo*relinfo,
208208
ItemPointertupleid,
209-
HeapTuplefdw_trigtuple);
209+
HeapTuplefdw_trigtuple,
210+
TupleTableSlot**epqslot);
210211
externvoidExecARDeleteTriggers(EState*estate,
211212
ResultRelInfo*relinfo,
212213
ItemPointertupleid,
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
Parsed test spec with 2 sessions
2+
3+
starting permutation: s1b s2b s2u1 s1u s2c s1c s1s
4+
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
5+
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
6+
step s2u1: UPDATE foo SET b = b || ' update2' WHERE a = 1;
7+
step s1u: UPDATE foo SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
8+
step s2c: COMMIT;
9+
step s1u: <... completed>
10+
step s1c: COMMIT;
11+
step s1s: SELECT tableoid::regclass, * FROM foo ORDER BY a;
12+
tableoid a b
13+
14+
foo2 2 ABC update2 update1
15+
16+
starting permutation: s1b s2b s2ut1 s1ut s2c s1c s1st s1stl
17+
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
18+
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
19+
step s2ut1: UPDATE footrg SET b = b || ' update2' WHERE a = 1;
20+
step s1ut: UPDATE footrg SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
21+
step s2c: COMMIT;
22+
step s1ut: <... completed>
23+
step s1c: COMMIT;
24+
step s1st: SELECT tableoid::regclass, * FROM footrg ORDER BY a;
25+
tableoid a b
26+
27+
footrg2 2 ABC update2 update1
28+
step s1stl: SELECT * FROM triglog ORDER BY a;
29+
a b
30+
31+
1 ABC update2 trigger
32+
33+
starting permutation: s1b s2b s2u2 s1u s2c s1c s1s
34+
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
35+
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
36+
step s2u2: UPDATE foo SET b = 'EFG' WHERE a = 1;
37+
step s1u: UPDATE foo SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
38+
step s2c: COMMIT;
39+
step s1u: <... completed>
40+
step s1c: COMMIT;
41+
step s1s: SELECT tableoid::regclass, * FROM foo ORDER BY a;
42+
tableoid a b
43+
44+
foo1 1 EFG
45+
46+
starting permutation: s1b s2b s2ut2 s1ut s2c s1c s1st s1stl
47+
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
48+
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
49+
step s2ut2: UPDATE footrg SET b = 'EFG' WHERE a = 1;
50+
step s1ut: UPDATE footrg SET a = a + 1, b = b || ' update1' WHERE b like '%ABC%'; <waiting ...>
51+
step s2c: COMMIT;
52+
step s1ut: <... completed>
53+
step s1c: COMMIT;
54+
step s1st: SELECT tableoid::regclass, * FROM footrg ORDER BY a;
55+
tableoid a b
56+
57+
footrg1 1 EFG
58+
step s1stl: SELECT * FROM triglog ORDER BY a;
59+
a b
60+

‎src/test/isolation/isolation_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,5 @@ test: predicate-gin-nomatch
7474
test: partition-key-update-1
7575
test: partition-key-update-2
7676
test: partition-key-update-3
77+
test: partition-key-update-4
7778
test: plpgsql-toast
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Test that a row that ends up in a new partition contains changes made by
2+
# a concurrent transaction.
3+
4+
setup
5+
{
6+
--
7+
--SetuptotestconcurrenthandlingofExecDelete().
8+
--
9+
CREATETABLEfoo (aint,btext)PARTITIONBYLIST(a);
10+
CREATETABLEfoo1PARTITIONOFfooFORVALUESIN (1);
11+
CREATETABLEfoo2PARTITIONOFfooFORVALUESIN (2);
12+
INSERTINTOfooVALUES (1,'ABC');
13+
14+
--
15+
--SetuptotestconcurrenthandlingofGetTupleForTrigger().
16+
--
17+
CREATETABLEfootrg (aint,btext)PARTITIONBYLIST(a);
18+
CREATETABLEtriglogasselect*fromfootrg;
19+
CREATETABLEfootrg1PARTITIONOFfootrgFORVALUESIN (1);
20+
CREATETABLEfootrg2PARTITIONOFfootrgFORVALUESIN (2);
21+
INSERTINTOfootrgVALUES (1,'ABC');
22+
CREATEFUNCTIONfunc_footrg()RETURNSTRIGGERAS $$
23+
BEGIN
24+
OLD.b=OLD.b||' trigger';
25+
26+
--Thiswillverifythatthetriggerisnotrun*before*therowis
27+
--refetchedbyEvalPlanQual.TheOLDrowshouldcontainthechangesmade
28+
--bytheconcurrentsession.
29+
INSERTINTOtriglogselectOLD.*;
30+
31+
RETURNOLD;
32+
END $$LANGUAGEPLPGSQL;
33+
CREATETRIGGERfootrg_ondelBEFOREDELETEONfootrg1
34+
FOREACHROWEXECUTEPROCEDUREfunc_footrg();
35+
36+
}
37+
38+
teardown
39+
{
40+
DROPTABLEfoo;
41+
DROPTRIGGERfootrg_ondelONfootrg1;
42+
DROPFUNCTIONfunc_footrg();
43+
DROPTABLEfootrg;
44+
DROPTABLEtriglog;
45+
}
46+
47+
session"s1"
48+
step"s1b" {BEGINISOLATIONLEVELREADCOMMITTED; }
49+
step"s1u" {UPDATEfooSETa=a+1,b=b||' update1'WHEREblike'%ABC%'; }
50+
step"s1ut" {UPDATEfootrgSETa=a+1,b=b||' update1'WHEREblike'%ABC%'; }
51+
step"s1s" {SELECTtableoid::regclass,*FROMfooORDERBYa; }
52+
step"s1st" {SELECTtableoid::regclass,*FROMfootrgORDERBYa; }
53+
step"s1stl" {SELECT*FROMtriglogORDERBYa; }
54+
step"s1c" {COMMIT; }
55+
56+
session"s2"
57+
step"s2b" {BEGINISOLATIONLEVELREADCOMMITTED; }
58+
step"s2u1" {UPDATEfooSETb=b||' update2'WHEREa=1; }
59+
step"s2u2" {UPDATEfooSETb='EFG'WHEREa=1; }
60+
step"s2ut1" {UPDATEfootrgSETb=b||' update2'WHEREa=1; }
61+
step"s2ut2" {UPDATEfootrgSETb='EFG'WHEREa=1; }
62+
step"s2c" {COMMIT; }
63+
64+
65+
# Session s1 is moving a row into another partition, but is waiting for
66+
# another session s2 that is updating the original row. The row that ends up
67+
# in the new partition should contain the changes made by session s2.
68+
permutation"s1b""s2b""s2u1""s1u""s2c""s1c""s1s"
69+
70+
# Same as above, except, session s1 is waiting in GetTupleTrigger().
71+
permutation"s1b""s2b""s2ut1""s1ut""s2c""s1c""s1st""s1stl"
72+
73+
# Below two cases are similar to the above two; except that the session s1
74+
# fails EvalPlanQual() test, so partition key update does not happen.
75+
permutation"s1b""s2b""s2u2""s1u""s2c""s1c""s1s"
76+
permutation"s1b""s2b""s2ut2""s1ut""s2c""s1c""s1st""s1stl"

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp