NotificationsYou must be signed in to change notification settings
Fork28
Star151

Commit9f2ee8f

committed

Re-implement EvalPlanQual processing to improve its performance and eliminate

a lot of strange behaviors that occurred in join cases. We now identify the"current" row for every joined relation in UPDATE, DELETE, and SELECT FORUPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam theappropriate row into each scan node in the rechecking plan, forcing it to emitonly that one row. The former behavior could rescan the whole of each joinedrelation for each recheck, which was terrible for performance, and what's muchworse could result in duplicated output tuples.Also, the original implementation of EvalPlanQual could not re-use the recheckexecution tree --- it had to go through a full executor init and shutdown forevery row to be tested. To avoid this overhead, I've associated a specialruntime Param with each LockRows or ModifyTable plan node, and arranged tomake every scan node below such a node depend on that Param. Thus, bysignaling a change in that Param, the EPQ machinery can just rescan thealready-built test plan.This patch also adds a prohibition on set-returning functions in thetargetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid theduplicate-output-tuple problem. It seems fairly reasonable since theother restrictions on SELECT FOR UPDATE are meant to ensure that thereis a unique correspondence between source tuples and result tuples,which an output SRF destroys as much as anything else does.

1 parent76d8883 commit9f2ee8fCopy full SHA for 9f2ee8f

File tree

50 files changed

+1550

-1021

lines changed

src
- backend
  - commands
    - trigger.c
    - vacuum.c
  - executor
  - nodes
  - optimizer
    - path
      - allpaths.c
    - plan
    - prep
  - parser
    - analyze.c
    - parse_relation.c
  - rewrite
    - rewriteHandler.c
    - rewriteManip.c
  - tcop
    - utility.c
  - utils/cache
    - plancache.c
- include
  - catalog
    - catversion.h
  - commands
    - trigger.h
  - executor
    - executor.h
  - nodes
  - optimizer
  - parser
    - parsetree.h

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1550

-1021

lines changed

`‎src/backend/commands/trigger.c`

Lines changed: 20 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1994, Regents of the University of California`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.254 2009/10/14 22:14:21 tgl Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.255 2009/10/26 02:26:28 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -61,7 +61,7 @@ intSessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;`
`61`	`61`	`staticvoidConvertTriggerToFK(CreateTrigStmt*stmt,Oidfuncoid);`
`62`	`62`	`staticvoidInsertTrigger(TriggerDesctrigdesc,Triggertrigger,intindx);`
`63`	`63`	`staticHeapTupleGetTupleForTrigger(EState*estate,`
`64`		`-PlanState*subplanstate,`
	`64`	`+EPQState*epqstate,`
`65`	`65`	`ResultRelInfo*relinfo,`
`66`	`66`	`ItemPointertid,`
`67`	`67`	`TupleTableSlot**newSlot);`
`@@ -1828,7 +1828,7 @@ ExecASDeleteTriggers(EState estate, ResultRelInfo relinfo)`
`1828`	`1828`	`}`
`1829`	`1829`
`1830`	`1830`	`bool`
`1831`		`-ExecBRDeleteTriggers(EStateestate,PlanStatesubplanstate,`
	`1831`	`+ExecBRDeleteTriggers(EStateestate,EPQStateepqstate,`
`1832`	`1832`	`ResultRelInfo*relinfo,`
`1833`	`1833`	`ItemPointertupleid)`
`1834`	`1834`	`{`
`@@ -1842,7 +1842,7 @@ ExecBRDeleteTriggers(EState estate, PlanState subplanstate,`
`1842`	`1842`	`TupleTableSlot*newSlot;`
`1843`	`1843`	`inti;`
`1844`	`1844`
`1845`		`-trigtuple=GetTupleForTrigger(estate,subplanstate,relinfo,tupleid,`
	`1845`	`+trigtuple=GetTupleForTrigger(estate,epqstate,relinfo,tupleid,`
`1846`	`1846`	`&newSlot);`
`1847`	`1847`	`if (trigtuple==NULL)`
`1848`	`1848`	`return false;`
`@@ -1964,7 +1964,7 @@ ExecASUpdateTriggers(EState estate, ResultRelInfo relinfo)`
`1964`	`1964`	`}`
`1965`	`1965`
`1966`	`1966`	`HeapTuple`
`1967`		`-ExecBRUpdateTriggers(EStateestate,PlanStatesubplanstate,`
	`1967`	`+ExecBRUpdateTriggers(EStateestate,EPQStateepqstate,`
`1968`	`1968`	`ResultRelInfo*relinfo,`
`1969`	`1969`	`ItemPointertupleid,HeapTuplenewtuple)`
`1970`	`1970`	`{`
`@@ -1979,7 +1979,7 @@ ExecBRUpdateTriggers(EState estate, PlanState subplanstate,`
`1979`	`1979`	`inti;`
`1980`	`1980`	`Bitmapset*modifiedCols;`
`1981`	`1981`
`1982`		`-trigtuple=GetTupleForTrigger(estate,subplanstate,relinfo,tupleid,`
	`1982`	`+trigtuple=GetTupleForTrigger(estate,epqstate,relinfo,tupleid,`
`1983`	`1983`	`&newSlot);`
`1984`	`1984`	`if (trigtuple==NULL)`
`1985`	`1985`	`returnNULL;`
`@@ -2107,7 +2107,7 @@ ExecASTruncateTriggers(EState estate, ResultRelInfo relinfo)`
`2107`	`2107`
`2108`	`2108`	`staticHeapTuple`
`2109`	`2109`	`GetTupleForTrigger(EState*estate,`
`2110`		`-PlanState*subplanstate,`
	`2110`	`+EPQState*epqstate,`
`2111`	`2111`	`ResultRelInfo*relinfo,`
`2112`	`2112`	`ItemPointertid,`
`2113`	`2113`	`TupleTableSlot**newSlot)`
`@@ -2125,8 +2125,8 @@ GetTupleForTrigger(EState *estate,`
`2125`	`2125`
`2126`	`2126`	`*newSlot=NULL;`
`2127`	`2127`
`2128`		`-/* caller must passa subplanstate if EvalPlanQual is possible */`
`2129`		`-Assert(subplanstate!=NULL);`
	`2128`	`+/* caller must passan epqstate if EvalPlanQual is possible */`
	`2129`	`+Assert(epqstate!=NULL);`
`2130`	`2130`
`2131`	`2131`	`/*`
`2132`	`2132`	`* lock tuple for update`
`@@ -2153,27 +2153,35 @@ ltrmark:;`
`2153`	`2153`	`ereport(ERROR,`
`2154`	`2154`	`(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),`
`2155`	`2155`	`errmsg("could not serialize access due to concurrent update")));`
`2156`		`-elseif (!ItemPointerEquals(&update_ctid,&tuple.t_self))`
	`2156`	`+if (!ItemPointerEquals(&update_ctid,&tuple.t_self))`
`2157`	`2157`	`{`
`2158`	`2158`	`/* it was updated, so look at the updated version */`
`2159`	`2159`	`TupleTableSlot*epqslot;`
`2160`	`2160`
`2161`	`2161`	`epqslot=EvalPlanQual(estate,`
	`2162`	`+epqstate,`
	`2163`	`+relation,`
`2162`	`2164`	`relinfo->ri_RangeTableIndex,`
`2163`		`-subplanstate,`
`2164`	`2165`	`&update_ctid,`
`2165`	`2166`	`update_xmax);`
`2166`	`2167`	`if (!TupIsNull(epqslot))`
`2167`	`2168`	`{`
`2168`	`2169`	`*tid=update_ctid;`
`2169`	`2170`	`*newSlot=epqslot;`
	`2171`	`+`
	`2172`	`+/*`
	`2173`	`+ * EvalPlanQual already locked the tuple, but we`
	`2174`	`+ * re-call heap_lock_tuple anyway as an easy way`
	`2175`	`+ * of re-fetching the correct tuple. Speed is`
	`2176`	`+ * hardly a criterion in this path anyhow.`
	`2177`	`+ */`
`2170`	`2178`	`gotoltrmark;`
`2171`	`2179`	`}`
`2172`	`2180`	`}`
`2173`	`2181`
`2174`	`2182`	`/*`
`2175`	`2183`	`* if tuple was deleted or PlanQual failed for updated tuple -`
`2176`		`- * wehave not process this tuple!`
	`2184`	`+ * wemust not process this tuple!`
`2177`	`2185`	`*/`
`2178`	`2186`	`returnNULL;`
`2179`	`2187`

`‎src/backend/commands/vacuum.c`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@`
`13`	`13`	`*`
`14`	`14`	`*`
`15`	`15`	`* IDENTIFICATION`
`16`		`- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.393 2009/09/01 04:46:49 tgl Exp $`
	`16`	`+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.394 2009/10/26 02:26:29 tgl Exp $`
`17`	`17`	`*`
`18`	`18`	`*-------------------------------------------------------------------------`
`19`	`19`	`*/`
`@@ -102,7 +102,7 @@ typedef VacPageListData *VacPageList;`
`102`	`102`	`* Note: because t_ctid links can be stale (this would only occur if a prior`
`103`	`103`	`* VACUUM crashed partway through), it is possible that new_tid points to an`
`104`	`104`	`* empty slot or unrelated tuple. We have to check the linkage as we follow`
`105`		`- * it, just as is done inEvalPlanQual.`
	`105`	`+ * it, just as is done inEvalPlanQualFetch.`
`106`	`106`	`*/`
`107`	`107`	`typedefstructVTupleLinkData`
`108`	`108`	`{`

`‎src/backend/executor/README`

Lines changed: 36 additions & 39 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-$PostgreSQL: pgsql/src/backend/executor/README,v 1.10 2009/10/12 18:10:41 tgl Exp $`
	`1`	`+$PostgreSQL: pgsql/src/backend/executor/README,v 1.11 2009/10/26 02:26:29 tgl Exp $`
`2`	`2`
`3`	`3`	`The Postgres Executor`
`4`	`4`	`=====================`
`@@ -160,41 +160,38 @@ modified tuple. SELECT FOR UPDATE/SHARE behaves similarly, except that its`
`160`	`160`	`action is just to lock the modified tuple and return results based on that`
`161`	`161`	`version of the tuple.`
`162`	`162`
`163`		`-To implement this checking, we actually re-run the entire query from scratch`
`164`		`-for each modified tuple, but with the scan node that sourced the original`
`165`		`-tuple set to return only the modified tuple, not the original tuple or any`
`166`		`-of the rest of the relation. If this query returns a tuple, then the`
`167`		`-modified tuple passes the quals (and the query output is the suitably`
`168`		`-modified update tuple, if we're doing UPDATE). If no tuple is returned,`
`169`		`-then the modified tuple fails the quals, so we ignore it and continue the`
`170`		`-original query. (This is reasonably efficient for simple queries, but may`
`171`		`-be horribly slow for joins. A better design would be nice; one thought for`
`172`		`-future investigation is to treat the tuple substitution like a parameter,`
`173`		`-so that we can avoid rescanning unrelated nodes.)`
`174`		`-`
`175`		`-Note a fundamental bogosity of this approach: if the relation containing`
`176`		`-the original tuple is being used in a self-join, the other instance(s) of`
`177`		`-the relation will be treated as still containing the original tuple, whereas`
`178`		`-logical consistency would demand that the modified tuple appear in them too.`
`179`		`-But we'd have to actually substitute the modified tuple for the original,`
`180`		`-while still returning all the rest of the relation, to ensure consistent`
`181`		`-answers. Implementing this correctly is a task for future work.`
`182`		`-`
`183`		`-In UPDATE/DELETE, only the target relation needs to be handled this way,`
`184`		`-so only one special recheck query needs to execute at a time. In SELECT FOR`
`185`		`-UPDATE, there may be multiple relations flagged FOR UPDATE, so it's possible`
`186`		`-that while we are executing a recheck query for one modified tuple, we will`
`187`		`-hit another modified tuple in another relation. In this case we "stack up"`
`188`		`-recheck queries: a sub-recheck query is spawned in which both the first and`
`189`		`-second modified tuples will be returned as the only components of their`
`190`		`-relations. (In event of success, all these modified tuples will be locked.)`
`191`		`-Again, this isn't necessarily quite the right thing ... but in simple cases`
`192`		`-it works. Potentially, recheck queries could get nested to the depth of the`
`193`		`-number of FOR UPDATE/SHARE relations in the query.`
`194`		`-`
`195`		`-It should be noted also that UPDATE/DELETE expect at most one tuple to`
`196`		`-result from the modified query, whereas in the FOR UPDATE case it's possible`
`197`		`-for multiple tuples to result (since we could be dealing with a join in`
`198`		`-which multiple tuples join to the modified tuple). We want FOR UPDATE to`
`199`		`-lock all relevant tuples, so we process all tuples output by all the stacked`
`200`		`-recheck queries.`
	`163`	`+To implement this checking, we actually re-run the query from scratch for`
	`164`	`+each modified tuple (or set of tuples, for SELECT FOR UPDATE), with the`
	`165`	`+relation scan nodes tweaked to return only the current tuples --- either`
	`166`	`+the original ones, or the updated (and now locked) versions of the modified`
	`167`	`+tuple(s). If this query returns a tuple, then the modified tuple(s) pass`
	`168`	`+the quals (and the query output is the suitably modified update tuple, if`
	`169`	`+we're doing UPDATE). If no tuple is returned, then the modified tuple(s)`
	`170`	`+fail the quals, so we ignore the current result tuple and continue the`
	`171`	`+original query.`
	`172`	`+`
	`173`	`+In UPDATE/DELETE, only the target relation needs to be handled this way.`
	`174`	`+In SELECT FOR UPDATE, there may be multiple relations flagged FOR UPDATE,`
	`175`	`+so we obtain lock on the current tuple version in each such relation before`
	`176`	`+executing the recheck.`
	`177`	`+`
	`178`	`+It is also possible that there are relations in the query that are not`
	`179`	`+to be locked (they are neither the UPDATE/DELETE target nor specified to`
	`180`	`+be locked in SELECT FOR UPDATE/SHARE). When re-running the test query`
	`181`	`+we want to use the same rows from these relations that were joined to`
	`182`	`+the locked rows. For ordinary relations this can be implemented relatively`
	`183`	`+cheaply by including the row TID in the join outputs and re-fetching that`
	`184`	`+TID. (The re-fetch is expensive, but we're trying to optimize the normal`
	`185`	`+case where no re-test is needed.) We have also to consider non-table`
	`186`	`+relations, such as a ValuesScan or FunctionScan. For these, since there`
	`187`	`+is no equivalent of TID, the only practical solution seems to be to include`
	`188`	`+the entire row value in the join output row.`
	`189`	`+`
	`190`	`+We disallow set-returning functions in the targetlist of SELECT FOR UPDATE,`
	`191`	`+so as to ensure that at most one tuple can be returned for any particular`
	`192`	`+set of scan tuples. Otherwise we'd get duplicates due to the original`
	`193`	`+query returning the same set of scan tuples multiple times. (Note: there`
	`194`	`+is no explicit prohibition on SRFs in UPDATE, but the net effect will be`
	`195`	`+that only the first result row of an SRF counts, because all subsequent`
	`196`	`+rows will result in attempts to re-update an already updated target row.`
	`197`	`+This is historical behavior and seems not worth changing.)`

`‎src/backend/executor/execCurrent.c`

Lines changed: 4 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@`
`6`	`6`	`* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group`
`7`	`7`	`* Portions Copyright (c) 1994, Regents of the University of California`
`8`	`8`	`*`
`9`		`- *$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.11 2009/10/12 18:10:41 tgl Exp $`
	`9`	`+ *$PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.12 2009/10/26 02:26:29 tgl Exp $`
`10`	`10`	`*`
`11`	`11`	`*-------------------------------------------------------------------------`
`12`	`12`	`*/`
`@@ -102,6 +102,9 @@ execCurrentOf(CurrentOfExpr *cexpr,`
`102`	`102`	`{`
`103`	`103`	`ExecRowMarkthiserm= (ExecRowMark)lfirst(lc);`
`104`	`104`
	`105`	`+if (!RowMarkRequiresRowShareLock(thiserm->markType))`
	`106`	`+continue;/* ignore non-FOR UPDATE/SHARE items */`
	`107`	`+`
`105`	`108`	`if (RelationGetRelid(thiserm->relation)==table_oid)`
`106`	`109`	`{`
`107`	`110`	`if (erm)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9f2ee8f

File tree

50 files changed

Some content is hidden

50 files changed

`‎src/backend/commands/trigger.c`

`‎src/backend/commands/vacuum.c`

`‎src/backend/executor/README`

`‎src/backend/executor/execCurrent.c`

0 commit comments