NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit31c7c64

committed

Account for SRFs in targetlists in planner rowcount estimates.

We made use of the ROWS estimate for set-returning functions used in FROM,but not for those used in SELECT targetlists; which is a bit of anoversight considering there are common usages that require the latterapproach. Improve that. (I had initially thought it might be worthfolding this into cost_qual_eval, but after investigation concluded thatthat wouldn't be very helpful, so just do it separately.) Per complaintfrom David Johnston.Back-patch to 9.2, but not further, for fear of destabilizing plan choicesin existing releases.

1 parented0af33 commit31c7c64Copy full SHA for 31c7c64

File tree

7 files changed

+120

-48

lines changed

src
- backend/optimizer
  - path
    - costsize.c
  - plan
  - util
    - clauses.c
- include/optimizer
  - clauses.h
  - planner.h

7 files changed

+120

-48

lines changed

`‎src/backend/optimizer/path/costsize.c`

Lines changed: 8 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -2958,6 +2958,13 @@ cost_qual_eval_walker(Node node, cost_qual_eval_context context)`
`2958`	`2958`	`* to expect that the current ordering of the clauses is the one that's`
`2959`	`2959`	`* going to end up being used.The above per-RestrictInfo caching would`
`2960`	`2960`	`* not mix well with trying to re-order clauses anyway.`
	`2961`	`+ *`
	`2962`	`+ * Another issue that is entirely ignored here is that if a set-returning`
	`2963`	`+ * function is below top level in the tree, the functions/operators above`
	`2964`	`+ * it will need to be evaluated multiple times. In practical use, such`
	`2965`	`+ * cases arise so seldom as to not be worth the added complexity needed;`
	`2966`	`+ * moreover, since our rowcount estimates for functions tend to be pretty`
	`2967`	`+ * phony, the results would also be pretty phony.`
`2961`	`2968`	`*/`
`2962`	`2969`	`if (IsA(node,FuncExpr))`
`2963`	`2970`	`{`
`@@ -3742,7 +3749,7 @@ set_function_size_estimates(PlannerInfo root, RelOptInfo rel)`
`3742`	`3749`	`Assert(rte->rtekind==RTE_FUNCTION);`
`3743`	`3750`
`3744`	`3751`	`/* Estimate number of rows the function itself will return */`
`3745`		`-rel->tuples=clamp_row_est(expression_returns_set_rows(rte->funcexpr));`
	`3752`	`+rel->tuples=expression_returns_set_rows(rte->funcexpr);`
`3746`	`3753`
`3747`	`3754`	`/* Now estimate number of output rows, etc */`
`3748`	`3755`	`set_baserel_size_estimates(root,rel);`

`‎src/backend/optimizer/plan/createplan.c`

Lines changed: 10 additions & 19 deletions

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@`
`30`	`30`	`#include"optimizer/placeholder.h"`
`31`	`31`	`#include"optimizer/plancat.h"`
`32`	`32`	`#include"optimizer/planmain.h"`
	`33`	`+#include"optimizer/planner.h"`
`33`	`34`	`#include"optimizer/predtest.h"`
`34`	`35`	`#include"optimizer/restrictinfo.h"`
`35`	`36`	`#include"optimizer/subselect.h"`
`@@ -4126,8 +4127,8 @@ make_agg(PlannerInfo root, List tlist, List *qual,`
`4126`	`4127`	`* anything for Aggref nodes; this is okay since they are really`
`4127`	`4128`	`* comparable to Vars.`
`4128`	`4129`	`*`
`4129`		`- * See notes ingrouping_planner about why only make_agg, make_windowagg`
`4130`		`- * and make_group worry about tlist eval cost.`
	`4130`	`+ * See notes inadd_tlist_costs_to_plan about why only make_agg,`
	`4131`	`+ *make_windowaggand make_group worry about tlist eval cost.`
`4131`	`4132`	`*/`
`4132`	`4133`	`if (qual)`
`4133`	`4134`	`{`
`@@ -4136,10 +4137,7 @@ make_agg(PlannerInfo root, List tlist, List *qual,`
`4136`	`4137`	`plan->total_cost+=qual_cost.startup;`
`4137`	`4138`	`plan->total_cost+=qual_cost.per_tuple*plan->plan_rows;`
`4138`	`4139`	`}`
`4139`		`-cost_qual_eval(&qual_cost,tlist,root);`
`4140`		`-plan->startup_cost+=qual_cost.startup;`
`4141`		`-plan->total_cost+=qual_cost.startup;`
`4142`		`-plan->total_cost+=qual_cost.per_tuple*plan->plan_rows;`
	`4140`	`+add_tlist_costs_to_plan(root,plan,tlist);`
`4143`	`4141`
`4144`	`4142`	`plan->qual=qual;`
`4145`	`4143`	`plan->targetlist=tlist;`
`@@ -4160,7 +4158,6 @@ make_windowagg(PlannerInfo root, List tlist,`
`4160`	`4158`	`WindowAgg*node=makeNode(WindowAgg);`
`4161`	`4159`	`Plan*plan=&node->plan;`
`4162`	`4160`	`Pathwindowagg_path;/* dummy for result of cost_windowagg */`
`4163`		`-QualCostqual_cost;`
`4164`	`4161`
`4165`	`4162`	`node->winref=winref;`
`4166`	`4163`	`node->partNumCols=partNumCols;`
`@@ -4185,13 +4182,10 @@ make_windowagg(PlannerInfo root, List tlist,`
`4185`	`4182`	`/*`
`4186`	`4183`	`* We also need to account for the cost of evaluation of the tlist.`
`4187`	`4184`	`*`
`4188`		`- * See notes ingrouping_planner about why only make_agg, make_windowagg`
`4189`		`- * and make_group worry about tlist eval cost.`
	`4185`	`+ * See notes inadd_tlist_costs_to_plan about why only make_agg,`
	`4186`	`+ *make_windowaggand make_group worry about tlist eval cost.`
`4190`	`4187`	`*/`
`4191`		`-cost_qual_eval(&qual_cost,tlist,root);`
`4192`		`-plan->startup_cost+=qual_cost.startup;`
`4193`		`-plan->total_cost+=qual_cost.startup;`
`4194`		`-plan->total_cost+=qual_cost.per_tuple*plan->plan_rows;`
	`4188`	`+add_tlist_costs_to_plan(root,plan,tlist);`
`4195`	`4189`
`4196`	`4190`	`plan->targetlist=tlist;`
`4197`	`4191`	`plan->lefttree=lefttree;`
`@@ -4242,8 +4236,8 @@ make_group(PlannerInfo *root,`
`4242`	`4236`	`* lower plan level and will only be copied by the Group node. Worth`
`4243`	`4237`	`* fixing?`
`4244`	`4238`	`*`
`4245`		`- * See notes ingrouping_planner about why only make_agg, make_windowagg`
`4246`		`- * and make_group worry about tlist eval cost.`
	`4239`	`+ * See notes inadd_tlist_costs_to_plan about why only make_agg,`
	`4240`	`+ *make_windowaggand make_group worry about tlist eval cost.`
`4247`	`4241`	`*/`
`4248`	`4242`	`if (qual)`
`4249`	`4243`	`{`
`@@ -4252,10 +4246,7 @@ make_group(PlannerInfo *root,`
`4252`	`4246`	`plan->total_cost+=qual_cost.startup;`
`4253`	`4247`	`plan->total_cost+=qual_cost.per_tuple*plan->plan_rows;`
`4254`	`4248`	`}`
`4255`		`-cost_qual_eval(&qual_cost,tlist,root);`
`4256`		`-plan->startup_cost+=qual_cost.startup;`
`4257`		`-plan->total_cost+=qual_cost.startup;`
`4258`		`-plan->total_cost+=qual_cost.per_tuple*plan->plan_rows;`
	`4249`	`+add_tlist_costs_to_plan(root,plan,tlist);`
`4259`	`4250`
`4260`	`4251`	`plan->qual=qual;`
`4261`	`4252`	`plan->targetlist=tlist;`

`‎src/backend/optimizer/plan/planagg.c`

Lines changed: 2 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@`
`36`	`36`	`#include"optimizer/cost.h"`
`37`	`37`	`#include"optimizer/paths.h"`
`38`	`38`	`#include"optimizer/planmain.h"`
	`39`	`+#include"optimizer/planner.h"`
`39`	`40`	`#include"optimizer/subselect.h"`
`40`	`41`	`#include"parser/parsetree.h"`
`41`	`42`	`#include"parser/parse_clause.h"`
`@@ -205,7 +206,6 @@ optimize_minmax_aggregates(PlannerInfo root, List tlist,`
`205`	`206`	`Pathagg_p;`
`206`	`207`	`Plan*plan;`
`207`	`208`	`Node*hqual;`
`208`		`-QualCosttlist_cost;`
`209`	`209`	`ListCell*lc;`
`210`	`210`
`211`	`211`	`/* Nothing to do if preprocess_minmax_aggs rejected the query */`
`@@ -272,9 +272,7 @@ optimize_minmax_aggregates(PlannerInfo root, List tlist,`
`272`	`272`	`plan= (Plan*)make_result(root,tlist,hqual,NULL);`
`273`	`273`
`274`	`274`	`/* Account for evaluation cost of the tlist (make_result did the rest) */`
`275`		`-cost_qual_eval(&tlist_cost,tlist,root);`
`276`		`-plan->startup_cost+=tlist_cost.startup;`
`277`		`-plan->total_cost+=tlist_cost.startup+tlist_cost.per_tuple;`
	`275`	`+add_tlist_costs_to_plan(root,plan,tlist);`
`278`	`276`
`279`	`277`	`returnplan;`
`280`	`278`	`}`

`‎src/backend/optimizer/plan/planner.c`

Lines changed: 57 additions & 21 deletions

Original file line number	Diff line number	Diff line change
`@@ -1045,7 +1045,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)`
`1045`	`1045`	`doublesub_limit_tuples;`
`1046`	`1046`	`AttrNumber*groupColIdx=NULL;`
`1047`	`1047`	`boolneed_tlist_eval= true;`
`1048`		`-QualCosttlist_cost;`
`1049`	`1048`	`Path*cheapest_path;`
`1050`	`1049`	`Path*sorted_path;`
`1051`	`1050`	`Path*best_path;`
`@@ -1355,27 +1354,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)`
`1355`	`1354`
`1356`	`1355`	`/*`
`1357`	`1356`	`* Also, account for the cost of evaluation of the sub_tlist.`
`1358`		`- *`
`1359`		`- * Up to now, we have only been dealing with "flat" tlists,`
`1360`		`- * containing just Vars. So their evaluation cost is zero`
`1361`		`- * according to the model used by cost_qual_eval() (or if you`
`1362`		`- * prefer, the cost is factored into cpu_tuple_cost). Thus we`
`1363`		`- * can avoid accounting for tlist cost throughout`
`1364`		`- * query_planner() and subroutines. But now we've inserted a`
`1365`		`- * tlist that might contain actual operators, sub-selects, etc`
`1366`		`- * --- so we'd better account for its cost.`
`1367`		`- *`
`1368`		`- * Below this point, any tlist eval cost for added-on nodes`
`1369`		`- * should be accounted for as we create those nodes.`
`1370`		`- * Presently, of the node types we can add on, only Agg,`
`1371`		`- * WindowAgg, and Group project new tlists (the rest just copy`
`1372`		`- * their input tuples) --- so make_agg(), make_windowagg() and`
`1373`		`- * make_group() are responsible for computing the added cost.`
	`1357`	`+ * See comments for add_tlist_costs_to_plan() for more info.`
`1374`	`1358`	`*/`
`1375`		`-cost_qual_eval(&tlist_cost,sub_tlist,root);`
`1376`		`-result_plan->startup_cost+=tlist_cost.startup;`
`1377`		`-result_plan->total_cost+=tlist_cost.startup+`
`1378`		`-tlist_cost.per_tuple*result_plan->plan_rows;`
	`1359`	`+add_tlist_costs_to_plan(root,result_plan,sub_tlist);`
`1379`	`1360`	`}`
`1380`	`1361`	`else`
`1381`	`1362`	`{`
`@@ -1815,6 +1796,61 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)`
`1815`	`1796`	`returnresult_plan;`
`1816`	`1797`	`}`
`1817`	`1798`
	`1799`	`+/*`
	`1800`	`+ * add_tlist_costs_to_plan`
	`1801`	`+ *`
	`1802`	`+ * Estimate the execution costs associated with evaluating the targetlist`
	`1803`	`+ * expressions, and add them to the cost estimates for the Plan node.`
	`1804`	`+ *`
	`1805`	`+ * If the tlist contains set-returning functions, also inflate the Plan's cost`
	`1806`	`+ * and plan_rows estimates accordingly. (Hence, this must be called after`
	`1807`	`+ * any logic that uses plan_rows to, eg, estimate qual evaluation costs.)`
	`1808`	`+ *`
	`1809`	`+ * Note: during initial stages of planning, we mostly consider plan nodes with`
	`1810`	`+ * "flat" tlists, containing just Vars. So their evaluation cost is zero`
	`1811`	`+ * according to the model used by cost_qual_eval() (or if you prefer, the cost`
	`1812`	`+ * is factored into cpu_tuple_cost). Thus we can avoid accounting for tlist`
	`1813`	`+ * cost throughout query_planner() and subroutines. But once we apply a`
	`1814`	`+ * tlist that might contain actual operators, sub-selects, etc, we'd better`
	`1815`	`+ * account for its cost. Any set-returning functions in the tlist must also`
	`1816`	`+ * affect the estimated rowcount.`
	`1817`	`+ *`
	`1818`	`+ * Once grouping_planner() has applied a general tlist to the topmost`
	`1819`	`+ * scan/join plan node, any tlist eval cost for added-on nodes should be`
	`1820`	`+ * accounted for as we create those nodes. Presently, of the node types we`
	`1821`	`+ * can add on later, only Agg, WindowAgg, and Group project new tlists (the`
	`1822`	`+ * rest just copy their input tuples) --- so make_agg(), make_windowagg() and`
	`1823`	`+ * make_group() are responsible for calling this function to account for their`
	`1824`	`+ * tlist costs.`
	`1825`	`+ */`
	`1826`	`+void`
	`1827`	`+add_tlist_costs_to_plan(PlannerInforoot,Planplan,List*tlist)`
	`1828`	`+{`
	`1829`	`+QualCosttlist_cost;`
	`1830`	`+doubletlist_rows;`
	`1831`	`+`
	`1832`	`+cost_qual_eval(&tlist_cost,tlist,root);`
	`1833`	`+plan->startup_cost+=tlist_cost.startup;`
	`1834`	`+plan->total_cost+=tlist_cost.startup+`
	`1835`	`+tlist_cost.per_tuple*plan->plan_rows;`
	`1836`	`+`
	`1837`	`+tlist_rows=tlist_returns_set_rows(tlist);`
	`1838`	`+if (tlist_rows>1)`
	`1839`	`+{`
	`1840`	`+/*`
	`1841`	`+ * We assume that execution costs of the tlist proper were all`
	`1842`	`+ * accounted for by cost_qual_eval. However, it still seems`
	`1843`	`+ * appropriate to charge something more for the executor's general`
	`1844`	`+ * costs of processing the added tuples. The cost is probably less`
	`1845`	`+ * than cpu_tuple_cost, though, so we arbitrarily use half of that.`
	`1846`	`+ */`
	`1847`	`+plan->total_cost+=plan->plan_rows* (tlist_rows-1)*`
	`1848`	`+cpu_tuple_cost /2;`
	`1849`	`+`
	`1850`	`+plan->plan_rows *=tlist_rows;`
	`1851`	`+}`
	`1852`	`+}`
	`1853`	`+`
`1818`	`1854`	`/*`
`1819`	`1855`	`* Detect whether a plan node is a "dummy" plan created when a relation`
`1820`	`1856`	`* is deemed not to need scanning due to constraint exclusion.`

`‎src/backend/optimizer/util/clauses.c`

Lines changed: 39 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -661,10 +661,12 @@ find_window_functions_walker(Node node, WindowFuncLists lists)`
`661`	`661`
`662`	`662`	`/*`
`663`	`663`	`* expression_returns_set_rows`
`664`		`- * Estimate the number of rows in a set result.`
	`664`	`+ * Estimate the number of rows returned by a set-returning expression.`
	`665`	`+ * The result is 1 if there are no set-returning functions.`
`665`	`666`	`*`
`666`	`667`	`* We use the product of the rowcount estimates of all the functions in`
`667`		`- * the given tree.The result is 1 if there are no set-returning functions.`
	`668`	`+ * the given tree (this corresponds to the behavior of ExecMakeFunctionResult`
	`669`	`+ * for nested set-returning functions).`
`668`	`670`	`*`
`669`	`671`	`* Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.`
`670`	`672`	`*/`
`@@ -674,7 +676,7 @@ expression_returns_set_rows(Node *clause)`
`674`	`676`	`doubleresult=1;`
`675`	`677`
`676`	`678`	`(void)expression_returns_set_rows_walker(clause,&result);`
`677`		`-returnresult;`
	`679`	`+returnclamp_row_est(result);`
`678`	`680`	`}`
`679`	`681`
`680`	`682`	`staticbool`
`@@ -736,6 +738,40 @@ expression_returns_set_rows_walker(Node node, double count)`
`736`	`738`	`(void*)count);`
`737`	`739`	`}`
`738`	`740`
	`741`	`+/*`
	`742`	`+ * tlist_returns_set_rows`
	`743`	`+ * Estimate the number of rows returned by a set-returning targetlist.`
	`744`	`+ * The result is 1 if there are no set-returning functions.`
	`745`	`+ *`
	`746`	`+ * Here, the result is the largest rowcount estimate of any of the tlist's`
	`747`	`+ * expressions, not the product as you would get from naively applying`
	`748`	`+ * expression_returns_set_rows() to the whole tlist. The behavior actually`
	`749`	`+ * implemented by ExecTargetList produces a number of rows equal to the least`
	`750`	`+ * common multiple of the expression rowcounts, so that the product would be`
	`751`	`+ * a worst-case estimate that is typically not realistic. Taking the max as`
	`752`	`+ * we do here is a best-case estimate that might not be realistic either,`
	`753`	`+ * but it's probably closer for typical usages. We don't try to compute the`
	`754`	`+ * actual LCM because we're working with very approximate estimates, so their`
	`755`	`+ * LCM would be unduly noisy.`
	`756`	`+ */`
	`757`	`+double`
	`758`	`+tlist_returns_set_rows(List*tlist)`
	`759`	`+{`
	`760`	`+doubleresult=1;`
	`761`	`+ListCell*lc;`
	`762`	`+`
	`763`	`+foreach(lc,tlist)`
	`764`	`+{`
	`765`	`+TargetEntrytle= (TargetEntry)lfirst(lc);`
	`766`	`+doublecolresult;`
	`767`	`+`
	`768`	`+colresult=expression_returns_set_rows((Node*)tle->expr);`
	`769`	`+if (result<colresult)`
	`770`	`+result=colresult;`
	`771`	`+}`
	`772`	`+returnresult;`
	`773`	`+}`
	`774`	`+`
`739`	`775`
`740`	`776`	`/*****************************************************************************`
`741`	`777`	`*Subplan clause manipulation`

`‎src/include/optimizer/clauses.h`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ extern bool contain_window_function(Node *clause);`
`55`	`55`	`externWindowFuncListsfind_window_functions(Nodeclause,IndexmaxWinRef);`
`56`	`56`
`57`	`57`	`externdoubleexpression_returns_set_rows(Node*clause);`
	`58`	`+externdoubletlist_returns_set_rows(List*tlist);`
`58`	`59`
`59`	`60`	`externboolcontain_subplans(Node*clause);`
`60`	`61`

`‎src/include/optimizer/planner.h`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,9 @@ extern Plan subquery_planner(PlannerGlobal glob, Query *parse,`
`35`	`35`	`boolhasRecursion,doubletuple_fraction,`
`36`	`36`	`PlannerInfo**subroot);`
`37`	`37`
	`38`	`+externvoidadd_tlist_costs_to_plan(PlannerInforoot,Planplan,`
	`39`	`+List*tlist);`
	`40`	`+`
`38`	`41`	`externboolis_dummy_plan(Plan*plan);`
`39`	`42`
`40`	`43`	`externExprexpression_planner(Exprexpr);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit31c7c64

File tree

7 files changed

7 files changed

`‎src/backend/optimizer/path/costsize.c`

`‎src/backend/optimizer/plan/createplan.c`

`‎src/backend/optimizer/plan/planagg.c`

`‎src/backend/optimizer/plan/planner.c`

`‎src/backend/optimizer/util/clauses.c`

`‎src/include/optimizer/clauses.h`

`‎src/include/optimizer/planner.h`

0 commit comments