Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9f2ee8f

Browse files
committed
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the"current" row for every joined relation in UPDATE, DELETE, and SELECT FORUPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam theappropriate row into each scan node in the rechecking plan, forcing it to emitonly that one row. The former behavior could rescan the whole of each joinedrelation for each recheck, which was terrible for performance, and what's muchworse could result in duplicated output tuples.Also, the original implementation of EvalPlanQual could not re-use the recheckexecution tree --- it had to go through a full executor init and shutdown forevery row to be tested. To avoid this overhead, I've associated a specialruntime Param with each LockRows or ModifyTable plan node, and arranged tomake every scan node below such a node depend on that Param. Thus, bysignaling a change in that Param, the EPQ machinery can just rescan thealready-built test plan.This patch also adds a prohibition on set-returning functions in thetargetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid theduplicate-output-tuple problem. It seems fairly reasonable since theother restrictions on SELECT FOR UPDATE are meant to ensure that thereis a unique correspondence between source tuples and result tuples,which an output SRF destroys as much as anything else does.
1 parent76d8883 commit9f2ee8f

File tree

50 files changed

+1550
-1021
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1550
-1021
lines changed

‎src/backend/commands/trigger.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.254 2009/10/14 22:14:21 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.255 2009/10/26 02:26:28 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -61,7 +61,7 @@ intSessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
6161
staticvoidConvertTriggerToFK(CreateTrigStmt*stmt,Oidfuncoid);
6262
staticvoidInsertTrigger(TriggerDesc*trigdesc,Trigger*trigger,intindx);
6363
staticHeapTupleGetTupleForTrigger(EState*estate,
64-
PlanState*subplanstate,
64+
EPQState*epqstate,
6565
ResultRelInfo*relinfo,
6666
ItemPointertid,
6767
TupleTableSlot**newSlot);
@@ -1828,7 +1828,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
18281828
}
18291829

18301830
bool
1831-
ExecBRDeleteTriggers(EState*estate,PlanState*subplanstate,
1831+
ExecBRDeleteTriggers(EState*estate,EPQState*epqstate,
18321832
ResultRelInfo*relinfo,
18331833
ItemPointertupleid)
18341834
{
@@ -1842,7 +1842,7 @@ ExecBRDeleteTriggers(EState *estate, PlanState *subplanstate,
18421842
TupleTableSlot*newSlot;
18431843
inti;
18441844

1845-
trigtuple=GetTupleForTrigger(estate,subplanstate,relinfo,tupleid,
1845+
trigtuple=GetTupleForTrigger(estate,epqstate,relinfo,tupleid,
18461846
&newSlot);
18471847
if (trigtuple==NULL)
18481848
return false;
@@ -1964,7 +1964,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
19641964
}
19651965

19661966
HeapTuple
1967-
ExecBRUpdateTriggers(EState*estate,PlanState*subplanstate,
1967+
ExecBRUpdateTriggers(EState*estate,EPQState*epqstate,
19681968
ResultRelInfo*relinfo,
19691969
ItemPointertupleid,HeapTuplenewtuple)
19701970
{
@@ -1979,7 +1979,7 @@ ExecBRUpdateTriggers(EState *estate, PlanState *subplanstate,
19791979
inti;
19801980
Bitmapset*modifiedCols;
19811981

1982-
trigtuple=GetTupleForTrigger(estate,subplanstate,relinfo,tupleid,
1982+
trigtuple=GetTupleForTrigger(estate,epqstate,relinfo,tupleid,
19831983
&newSlot);
19841984
if (trigtuple==NULL)
19851985
returnNULL;
@@ -2107,7 +2107,7 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
21072107

21082108
staticHeapTuple
21092109
GetTupleForTrigger(EState*estate,
2110-
PlanState*subplanstate,
2110+
EPQState*epqstate,
21112111
ResultRelInfo*relinfo,
21122112
ItemPointertid,
21132113
TupleTableSlot**newSlot)
@@ -2125,8 +2125,8 @@ GetTupleForTrigger(EState *estate,
21252125

21262126
*newSlot=NULL;
21272127

2128-
/* caller must passa subplanstate if EvalPlanQual is possible */
2129-
Assert(subplanstate!=NULL);
2128+
/* caller must passan epqstate if EvalPlanQual is possible */
2129+
Assert(epqstate!=NULL);
21302130

21312131
/*
21322132
* lock tuple for update
@@ -2153,27 +2153,35 @@ ltrmark:;
21532153
ereport(ERROR,
21542154
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
21552155
errmsg("could not serialize access due to concurrent update")));
2156-
elseif (!ItemPointerEquals(&update_ctid,&tuple.t_self))
2156+
if (!ItemPointerEquals(&update_ctid,&tuple.t_self))
21572157
{
21582158
/* it was updated, so look at the updated version */
21592159
TupleTableSlot*epqslot;
21602160

21612161
epqslot=EvalPlanQual(estate,
2162+
epqstate,
2163+
relation,
21622164
relinfo->ri_RangeTableIndex,
2163-
subplanstate,
21642165
&update_ctid,
21652166
update_xmax);
21662167
if (!TupIsNull(epqslot))
21672168
{
21682169
*tid=update_ctid;
21692170
*newSlot=epqslot;
2171+
2172+
/*
2173+
* EvalPlanQual already locked the tuple, but we
2174+
* re-call heap_lock_tuple anyway as an easy way
2175+
* of re-fetching the correct tuple. Speed is
2176+
* hardly a criterion in this path anyhow.
2177+
*/
21702178
gotoltrmark;
21712179
}
21722180
}
21732181

21742182
/*
21752183
* if tuple was deleted or PlanQual failed for updated tuple -
2176-
* wehave not process this tuple!
2184+
* wemust not process this tuple!
21772185
*/
21782186
returnNULL;
21792187

‎src/backend/commands/vacuum.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.393 2009/09/01 04:46:49 tgl Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.394 2009/10/26 02:26:29 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -102,7 +102,7 @@ typedef VacPageListData *VacPageList;
102102
* Note: because t_ctid links can be stale (this would only occur if a prior
103103
* VACUUM crashed partway through), it is possible that new_tid points to an
104104
* empty slot or unrelated tuple. We have to check the linkage as we follow
105-
* it, just as is done inEvalPlanQual.
105+
* it, just as is done inEvalPlanQualFetch.
106106
*/
107107
typedefstructVTupleLinkData
108108
{

‎src/backend/executor/README

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
$PostgreSQL: pgsql/src/backend/executor/README,v 1.10 2009/10/12 18:10:41 tgl Exp $
1+
$PostgreSQL: pgsql/src/backend/executor/README,v 1.11 2009/10/26 02:26:29 tgl Exp $
22

33
The Postgres Executor
44
=====================
@@ -160,41 +160,38 @@ modified tuple. SELECT FOR UPDATE/SHARE behaves similarly, except that its
160160
action is just to lock the modified tuple and return results based on that
161161
version of the tuple.
162162

163-
To implement this checking, we actually re-run the entire query from scratch
164-
for each modified tuple, but with the scan node that sourced the original
165-
tuple set to return only the modified tuple, not the original tuple or any
166-
of the rest of the relation. If this query returns a tuple, then the
167-
modified tuple passes the quals (and the query output is the suitably
168-
modified update tuple, if we're doing UPDATE). If no tuple is returned,
169-
then the modified tuple fails the quals, so we ignore it and continue the
170-
original query. (This is reasonably efficient for simple queries, but may
171-
be horribly slow for joins. A better design would be nice; one thought for
172-
future investigation is to treat the tuple substitution like a parameter,
173-
so that we can avoid rescanning unrelated nodes.)
174-
175-
Note a fundamental bogosity of this approach: if the relation containing
176-
the original tuple is being used in a self-join, the other instance(s) of
177-
the relation will be treated as still containing the original tuple, whereas
178-
logical consistency would demand that the modified tuple appear in them too.
179-
But we'd have to actually substitute the modified tuple for the original,
180-
while still returning all the rest of the relation, to ensure consistent
181-
answers. Implementing this correctly is a task for future work.
182-
183-
In UPDATE/DELETE, only the target relation needs to be handled this way,
184-
so only one special recheck query needs to execute at a time. In SELECT FOR
185-
UPDATE, there may be multiple relations flagged FOR UPDATE, so it's possible
186-
that while we are executing a recheck query for one modified tuple, we will
187-
hit another modified tuple in another relation. In this case we "stack up"
188-
recheck queries: a sub-recheck query is spawned in which both the first and
189-
second modified tuples will be returned as the only components of their
190-
relations. (In event of success, all these modified tuples will be locked.)
191-
Again, this isn't necessarily quite the right thing ... but in simple cases
192-
it works. Potentially, recheck queries could get nested to the depth of the
193-
number of FOR UPDATE/SHARE relations in the query.
194-
195-
It should be noted also that UPDATE/DELETE expect at most one tuple to
196-
result from the modified query, whereas in the FOR UPDATE case it's possible
197-
for multiple tuples to result (since we could be dealing with a join in
198-
which multiple tuples join to the modified tuple). We want FOR UPDATE to
199-
lock all relevant tuples, so we process all tuples output by all the stacked
200-
recheck queries.
163+
To implement this checking, we actually re-run the query from scratch for
164+
each modified tuple (or set of tuples, for SELECT FOR UPDATE), with the
165+
relation scan nodes tweaked to return only the current tuples --- either
166+
the original ones, or the updated (and now locked) versions of the modified
167+
tuple(s). If this query returns a tuple, then the modified tuple(s) pass
168+
the quals (and the query output is the suitably modified update tuple, if
169+
we're doing UPDATE). If no tuple is returned, then the modified tuple(s)
170+
fail the quals, so we ignore the current result tuple and continue the
171+
original query.
172+
173+
In UPDATE/DELETE, only the target relation needs to be handled this way.
174+
In SELECT FOR UPDATE, there may be multiple relations flagged FOR UPDATE,
175+
so we obtain lock on the current tuple version in each such relation before
176+
executing the recheck.
177+
178+
It is also possible that there are relations in the query that are not
179+
to be locked (they are neither the UPDATE/DELETE target nor specified to
180+
be locked in SELECT FOR UPDATE/SHARE). When re-running the test query
181+
we want to use the same rows from these relations that were joined to
182+
the locked rows. For ordinary relations this can be implemented relatively
183+
cheaply by including the row TID in the join outputs and re-fetching that
184+
TID. (The re-fetch is expensive, but we're trying to optimize the normal
185+
case where no re-test is needed.) We have also to consider non-table
186+
relations, such as a ValuesScan or FunctionScan. For these, since there
187+
is no equivalent of TID, the only practical solution seems to be to include
188+
the entire row value in the join output row.
189+
190+
We disallow set-returning functions in the targetlist of SELECT FOR UPDATE,
191+
so as to ensure that at most one tuple can be returned for any particular
192+
set of scan tuples. Otherwise we'd get duplicates due to the original
193+
query returning the same set of scan tuples multiple times. (Note: there
194+
is no explicit prohibition on SRFs in UPDATE, but the net effect will be
195+
that only the first result row of an SRF counts, because all subsequent
196+
rows will result in attempts to re-update an already updated target row.
197+
This is historical behavior and seems not worth changing.)

‎src/backend/executor/execCurrent.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
*$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.11 2009/10/12 18:10:41 tgl Exp $
9+
*$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.12 2009/10/26 02:26:29 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -102,6 +102,9 @@ execCurrentOf(CurrentOfExpr *cexpr,
102102
{
103103
ExecRowMark*thiserm= (ExecRowMark*)lfirst(lc);
104104

105+
if (!RowMarkRequiresRowShareLock(thiserm->markType))
106+
continue;/* ignore non-FOR UPDATE/SHARE items */
107+
105108
if (RelationGetRelid(thiserm->relation)==table_oid)
106109
{
107110
if (erm)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp