Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitaf95d7a

Browse files
committed
Improve INTERSECT/EXCEPT hashing by realizing that we don't need to make any
hashtable entries for tuples that are found only in the second input: theycan never contribute to the output. Furthermore, this implies that theplanner should endeavor to put first the smaller (in number of groups) inputrelation for an INTERSECT. Implement that, and upgrade prepunion's estimationof the number of rows returned by setops so that there's some amount of sanityin the estimate of which one is smaller.
1 parent368df30 commitaf95d7a

File tree

9 files changed

+278
-159
lines changed

9 files changed

+278
-159
lines changed

‎src/backend/executor/nodeSetOp.c

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@
1212
* relation. Then it is a simple matter to emit the output demanded by the
1313
* SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
1414
*
15-
* In SETOP_HASHED mode, the input is delivered in no particular order.
16-
* We build a hash table in memory with one entry for each group of
17-
* identical tuples, and count the number of tuples in the group from
18-
* each relation. After seeing all the input, we scan the hashtable and
19-
* generate the correct output using those counts.
15+
* In SETOP_HASHED mode, the input is delivered in no particular order,
16+
* except that we know all the tuples from one input relation will come before
17+
* all the tuples of the other. The planner guarantees that the first input
18+
* relation is the left-hand one for EXCEPT, and tries to make the smaller
19+
* input relation come first for INTERSECT. We build a hash table in memory
20+
* with one entry for each group of identical tuples, and count the number of
21+
* tuples in the group from each relation. After seeing all the input, we
22+
* scan the hashtable and generate the correct output using those counts.
23+
* We can avoid making hashtable entries for any tuples appearing only in the
24+
* second input relation, since they cannot result in any output.
2025
*
2126
* This node type is not used for UNION or UNION ALL, since those can be
2227
* implemented more cheaply (there's no need for the junk attribute to
@@ -32,7 +37,7 @@
3237
*
3338
*
3439
* IDENTIFICATION
35-
* $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.26 2008/08/0703:04:03 tgl Exp $
40+
* $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.27 2008/08/0719:35:02 tgl Exp $
3641
*
3742
*-------------------------------------------------------------------------
3843
*/
@@ -82,18 +87,30 @@ static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
8287
/*
8388
* Initialize state for a new group of input values.
8489
*/
85-
staticvoid
86-
initialize_counts(SetOpState*setopstate,SetOpStatePerGrouppergroup)
90+
staticinlinevoid
91+
initialize_counts(SetOpStatePerGrouppergroup)
8792
{
8893
pergroup->numLeft=pergroup->numRight=0;
8994
}
9095

9196
/*
9297
* Advance the appropriate counter for one input tuple.
9398
*/
94-
staticvoid
95-
advance_counts(SetOpState*setopstate,SetOpStatePerGrouppergroup,
96-
TupleTableSlot*inputslot)
99+
staticinlinevoid
100+
advance_counts(SetOpStatePerGrouppergroup,intflag)
101+
{
102+
if (flag)
103+
pergroup->numRight++;
104+
else
105+
pergroup->numLeft++;
106+
}
107+
108+
/*
109+
* Fetch the "flag" column from an input tuple.
110+
* This is an integer column with value 0 for left side, 1 for right side.
111+
*/
112+
staticint
113+
fetch_tuple_flag(SetOpState*setopstate,TupleTableSlot*inputslot)
97114
{
98115
SetOp*node= (SetOp*)setopstate->ps.plan;
99116
intflag;
@@ -103,10 +120,8 @@ advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup,
103120
node->flagColIdx,
104121
&isNull));
105122
Assert(!isNull);
106-
if (flag)
107-
pergroup->numRight++;
108-
else
109-
pergroup->numLeft++;
123+
Assert(flag==0||flag==1);
124+
returnflag;
110125
}
111126

112127
/*
@@ -130,33 +145,6 @@ build_hash_table(SetOpState *setopstate)
130145
setopstate->tempContext);
131146
}
132147

133-
/*
134-
* Find or create a hashtable entry for the tuple group containing the
135-
* given tuple.
136-
*/
137-
staticSetOpHashEntry
138-
lookup_hash_entry(SetOpState*setopstate,TupleTableSlot*inputslot)
139-
{
140-
SetOpHashEntryentry;
141-
boolisnew;
142-
143-
/* find or create the hashtable entry */
144-
entry= (SetOpHashEntry)LookupTupleHashEntry(setopstate->hashtable,
145-
inputslot,
146-
&isnew);
147-
148-
if (isnew)
149-
{
150-
/* initialize counts for new tuple group */
151-
initialize_counts(setopstate,&entry->pergroup);
152-
}
153-
154-
/* Must reset temp context after each hashtable lookup */
155-
MemoryContextReset(setopstate->tempContext);
156-
157-
returnentry;
158-
}
159-
160148
/*
161149
* We've completed processing a tuple group. Decide how many copies (if any)
162150
* of its representative row to emit, and store the count into numOutput.
@@ -289,10 +277,11 @@ setop_retrieve_direct(SetOpState *setopstate)
289277
setopstate->grp_firstTuple=NULL;/* don't keep two pointers */
290278

291279
/* Initialize working state for a new input tuple group */
292-
initialize_counts(setopstate,pergroup);
280+
initialize_counts(pergroup);
293281

294282
/* Count the first input tuple */
295-
advance_counts(setopstate,pergroup,resultTupleSlot);
283+
advance_counts(pergroup,
284+
fetch_tuple_flag(setopstate,resultTupleSlot));
296285

297286
/*
298287
* Scan the outer plan until we exhaust it or cross a group boundary.
@@ -324,7 +313,8 @@ setop_retrieve_direct(SetOpState *setopstate)
324313
}
325314

326315
/* Still in same group, so count this tuple */
327-
advance_counts(setopstate,pergroup,outerslot);
316+
advance_counts(pergroup,
317+
fetch_tuple_flag(setopstate,outerslot));
328318
}
329319

330320
/*
@@ -351,30 +341,73 @@ setop_retrieve_direct(SetOpState *setopstate)
351341
staticvoid
352342
setop_fill_hash_table(SetOpState*setopstate)
353343
{
344+
SetOp*node= (SetOp*)setopstate->ps.plan;
354345
PlanState*outerPlan;
355-
SetOpHashEntryentry;
356-
TupleTableSlot*outerslot;
346+
intfirstFlag;
347+
boolin_first_rel;
357348

358349
/*
359350
* get state info from node
360351
*/
361352
outerPlan=outerPlanState(setopstate);
353+
firstFlag=node->firstFlag;
354+
/* verify planner didn't mess up */
355+
Assert(firstFlag==0||
356+
(firstFlag==1&&
357+
(node->cmd==SETOPCMD_INTERSECT||
358+
node->cmd==SETOPCMD_INTERSECT_ALL)));
362359

363360
/*
364361
* Process each outer-plan tuple, and then fetch the next one, until we
365362
* exhaust the outer plan.
366363
*/
364+
in_first_rel= true;
367365
for (;;)
368366
{
367+
TupleTableSlot*outerslot;
368+
intflag;
369+
SetOpHashEntryentry;
370+
boolisnew;
371+
369372
outerslot=ExecProcNode(outerPlan);
370373
if (TupIsNull(outerslot))
371374
break;
372375

373-
/* Find or build hashtable entry for this tuple's group */
374-
entry=lookup_hash_entry(setopstate,outerslot);
376+
/* Identify whether it's left or right input */
377+
flag=fetch_tuple_flag(setopstate,outerslot);
378+
379+
if (flag==firstFlag)
380+
{
381+
/* (still) in first input relation */
382+
Assert(in_first_rel);
383+
384+
/* Find or build hashtable entry for this tuple's group */
385+
entry= (SetOpHashEntry)
386+
LookupTupleHashEntry(setopstate->hashtable,outerslot,&isnew);
387+
388+
/* If new tuple group, initialize counts */
389+
if (isnew)
390+
initialize_counts(&entry->pergroup);
391+
392+
/* Advance the counts */
393+
advance_counts(&entry->pergroup,flag);
394+
}
395+
else
396+
{
397+
/* reached second relation */
398+
in_first_rel= false;
399+
400+
/* For tuples not seen previously, do not make hashtable entry */
401+
entry= (SetOpHashEntry)
402+
LookupTupleHashEntry(setopstate->hashtable,outerslot,NULL);
403+
404+
/* Advance the counts if entry is already present */
405+
if (entry)
406+
advance_counts(&entry->pergroup,flag);
407+
}
375408

376-
/*Advance the counts */
377-
advance_counts(setopstate,&entry->pergroup,outerslot);
409+
/*Must reset temp context after each hashtable lookup */
410+
MemoryContextReset(setopstate->tempContext);
378411
}
379412

380413
setopstate->table_filled= true;

‎src/backend/nodes/copyfuncs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Portions Copyright (c) 1994, Regents of the University of California
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.398 2008/08/0703:04:03 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.399 2008/08/0719:35:02 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -657,6 +657,7 @@ _copySetOp(SetOp *from)
657657
COPY_POINTER_FIELD(dupColIdx,from->numCols*sizeof(AttrNumber));
658658
COPY_POINTER_FIELD(dupOperators,from->numCols*sizeof(Oid));
659659
COPY_SCALAR_FIELD(flagColIdx);
660+
COPY_SCALAR_FIELD(firstFlag);
660661
COPY_SCALAR_FIELD(numGroups);
661662

662663
returnnewnode;

‎src/backend/nodes/outfuncs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.332 2008/08/0703:04:03 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.333 2008/08/0719:35:02 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -611,6 +611,7 @@ _outSetOp(StringInfo str, SetOp *node)
611611
appendStringInfo(str," %u",node->dupOperators[i]);
612612

613613
WRITE_INT_FIELD(flagColIdx);
614+
WRITE_INT_FIELD(firstFlag);
614615
WRITE_LONG_FIELD(numGroups);
615616
}
616617

‎src/backend/optimizer/plan/createplan.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.243 2008/08/0703:04:03 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.244 2008/08/0719:35:02 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -3109,8 +3109,8 @@ make_unique(Plan *lefttree, List *distinctList)
31093109
*/
31103110
SetOp*
31113111
make_setop(SetOpCmdcmd,SetOpStrategystrategy,Plan*lefttree,
3112-
List*distinctList,AttrNumberflagColIdx,longnumGroups,
3113-
doubleoutputRows)
3112+
List*distinctList,AttrNumberflagColIdx,intfirstFlag,
3113+
longnumGroups,doubleoutputRows)
31143114
{
31153115
SetOp*node=makeNode(SetOp);
31163116
Plan*plan=&node->plan;
@@ -3159,6 +3159,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
31593159
node->dupColIdx=dupColIdx;
31603160
node->dupOperators=dupOperators;
31613161
node->flagColIdx=flagColIdx;
3162+
node->firstFlag=firstFlag;
31623163
node->numGroups=numGroups;
31633164

31643165
returnnode;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp