5454 * Portions Copyright (c) 1994, Regents of the University of California
5555 *
5656 * IDENTIFICATION
57- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.210 2009/07/11 04:09:33 tgl Exp $
57+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
5858 *
5959 *-------------------------------------------------------------------------
6060 */
6363
6464#include <math.h>
6565
66+ #include "executor/executor.h"
6667#include "executor/nodeHash.h"
6768#include "miscadmin.h"
6869#include "nodes/nodeFuncs.h"
@@ -119,6 +120,8 @@ typedef struct
119120static MergeScanSelCache * cached_scansel (PlannerInfo * root ,
120121RestrictInfo * rinfo ,
121122PathKey * pathkey );
123+ static void cost_rescan (PlannerInfo * root ,Path * path ,
124+ Cost * rescan_startup_cost ,Cost * rescan_total_cost );
122125static bool cost_qual_eval_walker (Node * node ,cost_qual_eval_context * context );
123126static bool adjust_semi_join (PlannerInfo * root ,JoinPath * path ,
124127SpecialJoinInfo * sjinfo ,
@@ -895,15 +898,26 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
895898rte = planner_rt_fetch (baserel -> relid ,root );
896899Assert (rte -> rtekind == RTE_FUNCTION );
897900
898- /* Estimate costs of executing the function expression */
901+ /*
902+ * Estimate costs of executing the function expression.
903+ *
904+ * Currently, nodeFunctionscan.c always executes the function to
905+ * completion before returning any rows, and caches the results in a
906+ * tuplestore. So the function eval cost is all startup cost, and
907+ * per-row costs are minimal.
908+ *
909+ * XXX in principle we ought to charge tuplestore spill costs if the
910+ * number of rows is large. However, given how phony our rowcount
911+ * estimates for functions tend to be, there's not a lot of point
912+ * in that refinement right now.
913+ */
899914cost_qual_eval_node (& exprcost ,rte -> funcexpr ,root );
900915
901- startup_cost += exprcost .startup ;
902- cpu_per_tuple = exprcost .per_tuple ;
916+ startup_cost += exprcost .startup + exprcost .per_tuple ;
903917
904918/* Add scanning CPU costs */
905919startup_cost += baserel -> baserestrictcost .startup ;
906- cpu_per_tuple + =cpu_tuple_cost + baserel -> baserestrictcost .per_tuple ;
920+ cpu_per_tuple = cpu_tuple_cost + baserel -> baserestrictcost .per_tuple ;
907921run_cost += cpu_per_tuple * baserel -> tuples ;
908922
909923path -> startup_cost = startup_cost ;
@@ -1176,41 +1190,44 @@ sort_exceeds_work_mem(Sort *sort)
11761190 *
11771191 * If the total volume of data to materialize exceeds work_mem, we will need
11781192 * to write it to disk, so the cost is much higher in that case.
1193+ *
1194+ * Note that here we are estimating the costs for the first scan of the
1195+ * relation, so the materialization is all overhead --- any savings will
1196+ * occur only on rescan, which is estimated in cost_rescan.
11791197 */
11801198void
11811199cost_material (Path * path ,
1182- Cost input_cost ,double tuples ,int width )
1200+ Cost input_startup_cost ,Cost input_total_cost ,
1201+ double tuples ,int width )
11831202{
1184- Cost startup_cost = input_cost ;
1185- Cost run_cost = 0 ;
1203+ Cost startup_cost = input_startup_cost ;
1204+ Cost run_cost = input_total_cost - input_startup_cost ;
11861205double nbytes = relation_byte_size (tuples ,width );
11871206long work_mem_bytes = work_mem * 1024L ;
11881207
1189- /* disk costs */
1208+ /*
1209+ * Whether spilling or not, charge 2x cpu_tuple_cost per tuple to reflect
1210+ * bookkeeping overhead. (This rate must be more than cpu_tuple_cost;
1211+ * if it is exactly the same then there will be a cost tie between
1212+ * nestloop with A outer, materialized B inner and nestloop with B outer,
1213+ * materialized A inner. The extra cost ensures we'll prefer
1214+ * materializing the smaller rel.)
1215+ */
1216+ run_cost += 2 * cpu_tuple_cost * tuples ;
1217+
1218+ /*
1219+ * If we will spill to disk, charge at the rate of seq_page_cost per page.
1220+ * This cost is assumed to be evenly spread through the plan run phase,
1221+ * which isn't exactly accurate but our cost model doesn't allow for
1222+ * nonuniform costs within the run phase.
1223+ */
11901224if (nbytes > work_mem_bytes )
11911225{
11921226double npages = ceil (nbytes /BLCKSZ );
11931227
1194- /* We'll write during startup and read during retrieval */
1195- startup_cost += seq_page_cost * npages ;
11961228run_cost += seq_page_cost * npages ;
11971229}
11981230
1199- /*
1200- * Charge a very small amount per inserted tuple, to reflect bookkeeping
1201- * costs. We use cpu_tuple_cost/10 for this. This is needed to break the
1202- * tie that would otherwise exist between nestloop with A outer,
1203- * materialized B inner and nestloop with B outer, materialized A inner.
1204- * The extra cost ensures we'll prefer materializing the smaller rel.
1205- */
1206- startup_cost += cpu_tuple_cost * 0.1 * tuples ;
1207-
1208- /*
1209- * Also charge a small amount per extracted tuple.We use cpu_tuple_cost
1210- * so that it doesn't appear worthwhile to materialize a bare seqscan.
1211- */
1212- run_cost += cpu_tuple_cost * tuples ;
1213-
12141231path -> startup_cost = startup_cost ;
12151232path -> total_cost = startup_cost + run_cost ;
12161233}
@@ -1400,7 +1417,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
14001417Path * inner_path = path -> innerjoinpath ;
14011418Cost startup_cost = 0 ;
14021419Cost run_cost = 0 ;
1420+ Cost inner_rescan_start_cost ;
1421+ Cost inner_rescan_total_cost ;
14031422Cost inner_run_cost ;
1423+ Cost inner_rescan_run_cost ;
14041424Cost cpu_per_tuple ;
14051425QualCost restrict_qual_cost ;
14061426double outer_path_rows = PATH_ROWS (outer_path );
@@ -1413,32 +1433,26 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
14131433if (!enable_nestloop )
14141434startup_cost += disable_cost ;
14151435
1436+ /* estimate costs to rescan the inner relation */
1437+ cost_rescan (root ,inner_path ,
1438+ & inner_rescan_start_cost ,
1439+ & inner_rescan_total_cost );
1440+
14161441/* cost of source data */
14171442
14181443/*
14191444 * NOTE: clearly, we must pay both outer and inner paths' startup_cost
14201445 * before we can start returning tuples, so the join's startup cost is
1421- * their sum. What's not so clear is whether the inner path's
1422- * startup_cost must be paid again on each rescan of the inner path. This
1423- * is not true if the inner path is materialized or is a hashjoin, but
1424- * probably is true otherwise.
1446+ * their sum. We'll also pay the inner path's rescan startup cost
1447+ * multiple times.
14251448 */
14261449startup_cost += outer_path -> startup_cost + inner_path -> startup_cost ;
14271450run_cost += outer_path -> total_cost - outer_path -> startup_cost ;
1428- if (IsA (inner_path ,MaterialPath )||
1429- IsA (inner_path ,HashPath ))
1430- {
1431- /* charge only run cost for each iteration of inner path */
1432- }
1433- else
1434- {
1435- /*
1436- * charge startup cost for each iteration of inner path, except we
1437- * already charged the first startup_cost in our own startup
1438- */
1439- run_cost += (outer_path_rows - 1 )* inner_path -> startup_cost ;
1440- }
1451+ if (outer_path_rows > 1 )
1452+ run_cost += (outer_path_rows - 1 )* inner_rescan_start_cost ;
1453+
14411454inner_run_cost = inner_path -> total_cost - inner_path -> startup_cost ;
1455+ inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost ;
14421456
14431457if (adjust_semi_join (root ,path ,sjinfo ,
14441458& outer_match_frac ,
@@ -1458,12 +1472,22 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
14581472 * that fraction. (If we used a larger fuzz factor, we'd have to
14591473 * clamp inner_scan_frac to at most 1.0; but since match_count is at
14601474 * least 1, no such clamp is needed now.)
1475+ *
1476+ * A complicating factor is that rescans may be cheaper than first
1477+ * scans. If we never scan all the way to the end of the inner rel,
1478+ * it might be (depending on the plan type) that we'd never pay the
1479+ * whole inner first-scan run cost. However it is difficult to
1480+ * estimate whether that will happen, so be conservative and always
1481+ * charge the whole first-scan cost once.
14611482 */
1483+ run_cost += inner_run_cost ;
1484+
14621485outer_matched_rows = rint (outer_path_rows * outer_match_frac );
14631486inner_scan_frac = 2.0 / (match_count + 1.0 );
14641487
1465- /* Add inner run cost for outer tuples having matches */
1466- run_cost += outer_matched_rows * inner_run_cost * inner_scan_frac ;
1488+ /* Add inner run cost for additional outer tuples having matches */
1489+ if (outer_matched_rows > 1 )
1490+ run_cost += (outer_matched_rows - 1 )* inner_rescan_run_cost * inner_scan_frac ;
14671491
14681492/* Compute number of tuples processed (not number emitted!) */
14691493ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac ;
@@ -1479,21 +1503,26 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
14791503if (indexed_join_quals )
14801504{
14811505run_cost += (outer_path_rows - outer_matched_rows )*
1482- inner_run_cost /inner_path_rows ;
1483- /* We won't be evaluating any quals at all for these rows */
1506+ inner_rescan_run_cost /inner_path_rows ;
1507+ /*
1508+ * We won't be evaluating any quals at all for these rows,
1509+ * so don't add them to ntuples.
1510+ */
14841511}
14851512else
14861513{
14871514run_cost += (outer_path_rows - outer_matched_rows )*
1488- inner_run_cost ;
1515+ inner_rescan_run_cost ;
14891516ntuples += (outer_path_rows - outer_matched_rows )*
14901517inner_path_rows ;
14911518}
14921519}
14931520else
14941521{
14951522/* Normal case; we'll scan whole input rel for each outer row */
1496- run_cost += outer_path_rows * inner_run_cost ;
1523+ run_cost += inner_run_cost ;
1524+ if (outer_path_rows > 1 )
1525+ run_cost += (outer_path_rows - 1 )* inner_rescan_run_cost ;
14971526
14981527/* Compute number of tuples processed (not number emitted!) */
14991528ntuples = outer_path_rows * inner_path_rows ;
@@ -2190,13 +2219,13 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
21902219
21912220/*
21922221 * Also account for subplan's startup cost. If the subplan is
2193- * uncorrelated or undirect correlated, AND its topmost node is a Sort
2194- * or Material node, assume that we'll only need to pay its startup
2195- * cost once; otherwise assume we pay the startup cost every time.
2222+ * uncorrelated or undirect correlated, AND its topmost node is one
2223+ * that materializes its output, assume that we'll only need to pay
2224+ * its startup cost once; otherwise assume we pay the startup cost
2225+ * every time.
21962226 */
21972227if (subplan -> parParam == NIL &&
2198- (IsA (plan ,Sort )||
2199- IsA (plan ,Material )))
2228+ ExecMaterializesOutput (nodeTag (plan )))
22002229sp_cost .startup += plan -> startup_cost ;
22012230else
22022231sp_cost .per_tuple += plan -> startup_cost ;
@@ -2207,6 +2236,81 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
22072236}
22082237
22092238
2239+ /*
2240+ * cost_rescan
2241+ *Given a finished Path, estimate the costs of rescanning it after
2242+ *having done so the first time. For some Path types a rescan is
2243+ *cheaper than an original scan (if no parameters change), and this
2244+ *function embodies knowledge about that. The default is to return
2245+ *the same costs stored in the Path. (Note that the cost estimates
2246+ *actually stored in Paths are always for first scans.)
2247+ *
2248+ * This function is not currently intended to model effects such as rescans
2249+ * being cheaper due to disk block caching; what we are concerned with is
2250+ * plan types wherein the executor caches results explicitly, or doesn't
2251+ * redo startup calculations, etc.
2252+ */
2253+ static void
2254+ cost_rescan (PlannerInfo * root ,Path * path ,
2255+ Cost * rescan_startup_cost ,/* output parameters */
2256+ Cost * rescan_total_cost )
2257+ {
2258+ switch (path -> pathtype )
2259+ {
2260+ case T_FunctionScan :
2261+ /*
2262+ * Currently, nodeFunctionscan.c always executes the function
2263+ * to completion before returning any rows, and caches the
2264+ * results in a tuplestore. So the function eval cost is
2265+ * all startup cost and isn't paid over again on rescans.
2266+ * However, all run costs will be paid over again.
2267+ */
2268+ * rescan_startup_cost = 0 ;
2269+ * rescan_total_cost = path -> total_cost - path -> startup_cost ;
2270+ break ;
2271+ case T_HashJoin :
2272+ /*
2273+ * Assume that all of the startup cost represents hash table
2274+ * building, which we won't have to do over.
2275+ */
2276+ * rescan_startup_cost = 0 ;
2277+ * rescan_total_cost = path -> total_cost - path -> startup_cost ;
2278+ break ;
2279+ case T_Material :
2280+ case T_CteScan :
2281+ case T_WorkTableScan :
2282+ case T_Sort :
2283+ {
2284+ /*
2285+ * These plan types materialize their final result in a
2286+ * tuplestore or tuplesort object. So the rescan cost is only
2287+ * cpu_tuple_cost per tuple, unless the result is large enough
2288+ * to spill to disk.
2289+ */
2290+ Cost run_cost = cpu_tuple_cost * path -> parent -> rows ;
2291+ double nbytes = relation_byte_size (path -> parent -> rows ,
2292+ path -> parent -> width );
2293+ long work_mem_bytes = work_mem * 1024L ;
2294+
2295+ if (nbytes > work_mem_bytes )
2296+ {
2297+ /* It will spill, so account for re-read cost */
2298+ double npages = ceil (nbytes /BLCKSZ );
2299+
2300+ run_cost += seq_page_cost * npages ;
2301+ }
2302+ * rescan_startup_cost = 0 ;
2303+ * rescan_total_cost = run_cost ;
2304+ }
2305+ break ;
2306+ default :
2307+ * rescan_startup_cost = path -> startup_cost ;
2308+ * rescan_total_cost = path -> total_cost ;
2309+ break ;
2310+ }
2311+ }
2312+
2313+
22102314/*
22112315 * cost_qual_eval
22122316 *Estimate the CPU costs of evaluating a WHERE clause.