Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2415ad9

Browse files
committed
Teach tuplestore.c to throw away data before the "mark" point when the caller
is using mark/restore but not rewind or backward-scan capability. Insert amaterialize plan node between a mergejoin and its inner child if the innerchild is a sort that is expected to spill to disk. The materialize shieldsthe sort from the need to do mark/restore and thereby allows it to performits final merge pass on-the-fly; while the materialize itself is normallycheap since it won't spill to disk unless the number of tuples with equalkey values exceeds work_mem.Greg Stark, with some kibitzing from Tom Lane.
1 parent3963574 commit2415ad9

File tree

8 files changed

+236
-40
lines changed

8 files changed

+236
-40
lines changed

‎src/backend/executor/nodeMaterial.c

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.58 2007/01/05 22:19:28 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeMaterial.c,v 1.59 2007/05/21 17:57:33 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -56,10 +56,10 @@ ExecMaterial(MaterialState *node)
5656
/*
5757
* If first time through, and we need a tuplestore, initialize it.
5858
*/
59-
if (tuplestorestate==NULL&&node->randomAccess)
59+
if (tuplestorestate==NULL&&node->eflags!=0)
6060
{
6161
tuplestorestate=tuplestore_begin_heap(true, false,work_mem);
62-
62+
tuplestore_set_eflags(tuplestorestate,node->eflags);
6363
node->tuplestorestate= (void*)tuplestorestate;
6464
}
6565

@@ -162,14 +162,14 @@ ExecInitMaterial(Material *node, EState *estate, int eflags)
162162
matstate->ss.ps.state=estate;
163163

164164
/*
165-
* We must haverandom access to the subplan output to do backward scan or
166-
* mark/restore. We also prefer to materialize the subplan output if we
167-
* might be called on to rewind and replay it many times. However, if none
168-
* of these cases apply, we can skip storing the data.
165+
* We must havea tuplestore buffering the subplan output to do backward
166+
*scan ormark/restore. We also prefer to materialize the subplan output
167+
*if wemight be called on to rewind and replay it many times. However,
168+
*if noneof these cases apply, we can skip storing the data.
169169
*/
170-
matstate->randomAccess= (eflags& (EXEC_FLAG_REWIND |
171-
EXEC_FLAG_BACKWARD |
172-
EXEC_FLAG_MARK))!=0;
170+
matstate->eflags= (eflags& (EXEC_FLAG_REWIND |
171+
EXEC_FLAG_BACKWARD |
172+
EXEC_FLAG_MARK));
173173

174174
matstate->eof_underlying= false;
175175
matstate->tuplestorestate=NULL;
@@ -255,7 +255,7 @@ ExecEndMaterial(MaterialState *node)
255255
void
256256
ExecMaterialMarkPos(MaterialState*node)
257257
{
258-
Assert(node->randomAccess);
258+
Assert(node->eflags&EXEC_FLAG_MARK);
259259

260260
/*
261261
* if we haven't materialized yet, just return.
@@ -275,7 +275,7 @@ ExecMaterialMarkPos(MaterialState *node)
275275
void
276276
ExecMaterialRestrPos(MaterialState*node)
277277
{
278-
Assert(node->randomAccess);
278+
Assert(node->eflags&EXEC_FLAG_MARK);
279279

280280
/*
281281
* if we haven't materialized yet, just return.
@@ -300,7 +300,7 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
300300
{
301301
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
302302

303-
if (node->randomAccess)
303+
if (node->eflags!=0)
304304
{
305305
/*
306306
* If we haven't materialized yet, just return. If outerplan' chgParam
@@ -312,15 +312,21 @@ ExecMaterialReScan(MaterialState *node, ExprContext *exprCtxt)
312312

313313
/*
314314
* If subnode is to be rescanned then we forget previous stored
315-
* results; we have to re-read the subplan and re-store.
315+
* results; we have to re-read the subplan and re-store. Also,
316+
* if we told tuplestore it needn't support rescan, we lose and
317+
* must re-read. (This last should not happen in common cases;
318+
* else our caller lied by not passing EXEC_FLAG_REWIND to us.)
316319
*
317320
* Otherwise we can just rewind and rescan the stored output. The
318321
* state of the subnode does not change.
319322
*/
320-
if (((PlanState*)node)->lefttree->chgParam!=NULL)
323+
if (((PlanState*)node)->lefttree->chgParam!=NULL||
324+
(node->eflags&EXEC_FLAG_REWIND)==0)
321325
{
322326
tuplestore_end((Tuplestorestate*)node->tuplestorestate);
323327
node->tuplestorestate=NULL;
328+
if (((PlanState*)node)->lefttree->chgParam==NULL)
329+
ExecReScan(((PlanState*)node)->lefttree,exprCtxt);
324330
node->eof_underlying= false;
325331
}
326332
else

‎src/backend/executor/nodeMergejoin.c

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.87 2007/02/02 00:07:03 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.88 2007/05/21 17:57:33 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -706,6 +706,9 @@ ExecMergeJoin(MergeJoinState *node)
706706
}
707707
else
708708
{
709+
/* Mark before advancing, if wanted */
710+
if (node->mj_ExtraMarks)
711+
ExecMarkPos(innerPlan);
709712
/* Stay in same state to fetch next inner tuple */
710713
if (doFillInner)
711714
{
@@ -830,6 +833,9 @@ ExecMergeJoin(MergeJoinState *node)
830833
* now we get the next inner tuple, if any. If there's none,
831834
* advance to next outer tuple (which may be able to join to
832835
* previously marked tuples).
836+
*
837+
* NB: must NOT do "extraMarks" here, since we may need to
838+
* return to previously marked tuples.
833839
*/
834840
innerTupleSlot=ExecProcNode(innerPlan);
835841
node->mj_InnerTupleSlot=innerTupleSlot;
@@ -1140,6 +1146,9 @@ ExecMergeJoin(MergeJoinState *node)
11401146
break;
11411147

11421148
/*
1149+
* SKIPOUTER_ADVANCE: advance over an outer tuple that is
1150+
* known not to join to any inner tuple.
1151+
*
11431152
* Before advancing, we check to see if we must emit an
11441153
* outer-join fill tuple for this outer tuple.
11451154
*/
@@ -1204,6 +1213,9 @@ ExecMergeJoin(MergeJoinState *node)
12041213
break;
12051214

12061215
/*
1216+
* SKIPINNER_ADVANCE: advance over an inner tuple that is
1217+
* known not to join to any outer tuple.
1218+
*
12071219
* Before advancing, we check to see if we must emit an
12081220
* outer-join fill tuple for this inner tuple.
12091221
*/
@@ -1225,6 +1237,10 @@ ExecMergeJoin(MergeJoinState *node)
12251237
returnresult;
12261238
}
12271239

1240+
/* Mark before advancing, if wanted */
1241+
if (node->mj_ExtraMarks)
1242+
ExecMarkPos(innerPlan);
1243+
12281244
/*
12291245
* now we get the next inner tuple, if any
12301246
*/
@@ -1295,6 +1311,10 @@ ExecMergeJoin(MergeJoinState *node)
12951311
returnresult;
12961312
}
12971313

1314+
/* Mark before advancing, if wanted */
1315+
if (node->mj_ExtraMarks)
1316+
ExecMarkPos(innerPlan);
1317+
12981318
/*
12991319
* now we get the next inner tuple, if any
13001320
*/
@@ -1425,6 +1445,22 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
14251445
innerPlanState(mergestate)=ExecInitNode(innerPlan(node),estate,
14261446
eflags |EXEC_FLAG_MARK);
14271447

1448+
/*
1449+
* For certain types of inner child nodes, it is advantageous to issue
1450+
* MARK every time we advance past an inner tuple we will never return
1451+
* to. For other types, MARK on a tuple we cannot return to is a waste
1452+
* of cycles. Detect which case applies and set mj_ExtraMarks if we
1453+
* want to issue "unnecessary" MARK calls.
1454+
*
1455+
* Currently, only Material wants the extra MARKs, and it will be helpful
1456+
* only if eflags doesn't specify REWIND.
1457+
*/
1458+
if (IsA(innerPlan(node),Material)&&
1459+
(eflags&EXEC_FLAG_REWIND)==0)
1460+
mergestate->mj_ExtraMarks= true;
1461+
else
1462+
mergestate->mj_ExtraMarks= false;
1463+
14281464
#defineMERGEJOIN_NSLOTS 4
14291465

14301466
/*

‎src/backend/optimizer/path/costsize.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
* Portions Copyright (c) 1994, Regents of the University of California
5555
*
5656
* IDENTIFICATION
57-
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13:44 tgl Exp $
57+
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.183 2007/05/21 17:57:33 tgl Exp $
5858
*
5959
*-------------------------------------------------------------------------
6060
*/
@@ -1038,6 +1038,23 @@ cost_sort(Path *path, PlannerInfo *root,
10381038
path->total_cost=startup_cost+run_cost;
10391039
}
10401040

1041+
/*
1042+
* sort_exceeds_work_mem
1043+
* Given a finished Sort plan node, detect whether it is expected to
1044+
* spill to disk (ie, will need more than work_mem workspace)
1045+
*
1046+
* This assumes there will be no available LIMIT.
1047+
*/
1048+
bool
1049+
sort_exceeds_work_mem(Sort*sort)
1050+
{
1051+
doubleinput_bytes=relation_byte_size(sort->plan.plan_rows,
1052+
sort->plan.plan_width);
1053+
longwork_mem_bytes=work_mem*1024L;
1054+
1055+
return (input_bytes>work_mem_bytes);
1056+
}
1057+
10411058
/*
10421059
* cost_material
10431060
* Determines and returns the cost of materializing a relation, including

‎src/backend/optimizer/plan/createplan.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.230 2007/05/04 01:13:44 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.231 2007/05/21 17:57:34 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1600,6 +1600,30 @@ create_mergejoin_plan(PlannerInfo *root,
16001600
else
16011601
innerpathkeys=best_path->jpath.innerjoinpath->pathkeys;
16021602

1603+
/*
1604+
* If inner plan is a sort that is expected to spill to disk, add a
1605+
* materialize node to shield it from the need to handle mark/restore.
1606+
* This will allow it to perform the last merge pass on-the-fly, while
1607+
* in most cases not requiring the materialize to spill to disk.
1608+
*
1609+
* XXX really, Sort oughta do this for itself, probably, to avoid the
1610+
* overhead of a separate plan node.
1611+
*/
1612+
if (IsA(inner_plan,Sort)&&
1613+
sort_exceeds_work_mem((Sort*)inner_plan))
1614+
{
1615+
Plan*matplan= (Plan*)make_material(inner_plan);
1616+
1617+
/*
1618+
* We assume the materialize will not spill to disk, and therefore
1619+
* charge just cpu_tuple_cost per tuple.
1620+
*/
1621+
copy_plan_costsize(matplan,inner_plan);
1622+
matplan->total_cost+=cpu_tuple_cost*matplan->plan_rows;
1623+
1624+
inner_plan=matplan;
1625+
}
1626+
16031627
/*
16041628
* Compute the opfamily/strategy/nullsfirst arrays needed by the executor.
16051629
* The information is in the pathkeys for the two inputs, but we need to

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp