Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit92ee252

Browse files
committed
Tweak processing of multiple-index-scan plans to reduce overhead when
handling many-way scans: instead of re-evaluating all prior indexscanquals to see if a tuple has been fetched more than once, use a hash tableindexed by tuple CTID. But fall back to the old way if the hash tablegrows to exceed SortMem.
1 parent38e2bf6 commit92ee252

File tree

2 files changed

+153
-31
lines changed

2 files changed

+153
-31
lines changed

‎src/backend/executor/nodeIndexscan.c

Lines changed: 148 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.82 2003/08/04 02:39:59 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -28,19 +28,51 @@
2828
#include"access/heapam.h"
2929
#include"executor/execdebug.h"
3030
#include"executor/nodeIndexscan.h"
31+
#include"miscadmin.h"
3132
#include"nodes/nodeFuncs.h"
3233
#include"optimizer/clauses.h"
3334
#include"parser/parsetree.h"
3435

35-
/* ----------------
36-
*Misc stuff to move to executor.h soon -cim 6/5/90
37-
* ----------------
38-
*/
36+
3937
#defineNO_OP0
4038
#defineLEFT_OP1
4139
#defineRIGHT_OP2
4240

41+
/*
42+
* In a multiple-index plan, we must take care to return any given tuple
43+
* only once, even if it matches conditions of several index scans. Our
44+
* preferred way to do this is to record already-returned tuples in a hash
45+
* table (using the TID as unique identifier). However, in a very large
46+
* scan this could conceivably run out of memory. We limit the hash table
47+
* to no more than SortMem KB; if it grows past that, we fall back to the
48+
* pre-7.4 technique: evaluate the prior-scan index quals again for each
49+
* tuple (which is space-efficient, but slow).
50+
*
51+
* When scanning backwards, we use scannum to determine when to emit the
52+
* tuple --- we have to re-emit a tuple in the same scan as it was first
53+
* encountered.
54+
*
55+
* Note: this code would break if the planner were ever to create a multiple
56+
* index plan with overall backwards direction, because the hashtable code
57+
* will emit a tuple the first time it is encountered (which would be the
58+
* highest scan in which it matches the index), but the evaluate-the-quals
59+
* code will emit a tuple in the lowest-numbered scan in which it's valid.
60+
* This could be fixed at need by making the evaluate-the-quals case more
61+
* complex. Currently the planner will never create such a plan (since it
62+
* considers multi-index plans unordered anyway), so there's no need for
63+
* more complexity.
64+
*/
65+
typedefstruct
66+
{
67+
/* tid is the hash key and so must be first! */
68+
ItemPointerDatatid;/* TID of a tuple we've returned */
69+
intscannum;/* number of scan we returned it in */
70+
}DupHashTabEntry;
71+
72+
4373
staticTupleTableSlot*IndexNext(IndexScanState*node);
74+
staticvoidcreate_duphash(IndexScanState*node);
75+
4476

4577
/* ----------------------------------------------------------------
4678
*IndexNext
@@ -163,7 +195,7 @@ IndexNext(IndexScanState *node)
163195
while ((tuple=index_getnext(scandesc,direction))!=NULL)
164196
{
165197
/*
166-
*store the scanned tuple in the scan tuple slot of the scan
198+
*Store the scanned tuple in the scan tuple slot of the scan
167199
* state. Note: we pass 'false' because tuples returned by
168200
* amgetnext are pointers onto disk pages and must not be
169201
* pfree()'d.
@@ -174,36 +206,80 @@ IndexNext(IndexScanState *node)
174206
false);/* don't pfree */
175207

176208
/*
177-
* We must check to see if the current tuple was already
178-
* matched by an earlier index, so we don't double-report it.
179-
* We do this by passing the tuple through ExecQual and
180-
* checking for failure with all previous qualifications.
209+
* If it's a multiple-index scan, make sure not to double-report
210+
* a tuple matched by more than one index. (See notes above.)
181211
*/
182-
if (node->iss_IndexPtr>0)
212+
if (numIndices>1)
183213
{
184-
boolprev_matches= false;
185-
intprev_index;
186-
List*qual;
187-
188-
econtext->ecxt_scantuple=slot;
189-
ResetExprContext(econtext);
190-
qual=node->indxqualorig;
191-
for (prev_index=0;
192-
prev_index<node->iss_IndexPtr;
193-
prev_index++)
214+
/* First try the hash table */
215+
if (node->iss_DupHash)
194216
{
195-
if (ExecQual((List*)lfirst(qual),econtext, false))
217+
DupHashTabEntry*entry;
218+
boolfound;
219+
220+
entry= (DupHashTabEntry*)
221+
hash_search(node->iss_DupHash,
222+
&tuple->t_data->t_ctid,
223+
HASH_ENTER,
224+
&found);
225+
if (entry==NULL||
226+
node->iss_DupHash->hctl->nentries>node->iss_MaxHash)
227+
{
228+
/* out of memory (either hard or soft limit) */
229+
/* release hash table and fall thru to old code */
230+
hash_destroy(node->iss_DupHash);
231+
node->iss_DupHash=NULL;
232+
}
233+
elseif (found)
196234
{
197-
prev_matches= true;
198-
break;
235+
/* pre-existing entry */
236+
237+
/*
238+
* It's duplicate if first emitted in a different
239+
* scan. If same scan, we must be backing up, so
240+
* okay to emit again.
241+
*/
242+
if (entry->scannum!=node->iss_IndexPtr)
243+
{
244+
/* Dup, so drop it and loop back for another */
245+
ExecClearTuple(slot);
246+
continue;
247+
}
248+
}
249+
else
250+
{
251+
/* new entry, finish filling it in */
252+
entry->scannum=node->iss_IndexPtr;
199253
}
200-
qual=lnext(qual);
201254
}
202-
if (prev_matches)
255+
/* If hash table has overflowed, do it the hard way */
256+
if (node->iss_DupHash==NULL&&
257+
node->iss_IndexPtr>0)
203258
{
204-
/* Duplicate, so drop it and loop back for another */
205-
ExecClearTuple(slot);
206-
continue;
259+
boolprev_matches= false;
260+
intprev_index;
261+
List*qual;
262+
263+
econtext->ecxt_scantuple=slot;
264+
ResetExprContext(econtext);
265+
qual=node->indxqualorig;
266+
for (prev_index=0;
267+
prev_index<node->iss_IndexPtr;
268+
prev_index++)
269+
{
270+
if (ExecQual((List*)lfirst(qual),econtext, false))
271+
{
272+
prev_matches= true;
273+
break;
274+
}
275+
qual=lnext(qual);
276+
}
277+
if (prev_matches)
278+
{
279+
/* Dup, so drop it and loop back for another */
280+
ExecClearTuple(slot);
281+
continue;
282+
}
207283
}
208284
}
209285

@@ -383,6 +459,14 @@ ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
383459
return;
384460
}
385461

462+
/* reset hash table */
463+
if (numIndices>1)
464+
{
465+
if (node->iss_DupHash)
466+
hash_destroy(node->iss_DupHash);
467+
create_duphash(node);
468+
}
469+
386470
/* reset index scans */
387471
if (ScanDirectionIsBackward(((IndexScan*)node->ss.ps.plan)->indxorderdir))
388472
node->iss_IndexPtr=numIndices;
@@ -432,6 +516,10 @@ ExecEndIndexScan(IndexScanState *node)
432516
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
433517
ExecClearTuple(node->ss.ss_ScanTupleSlot);
434518

519+
/* drop hash table */
520+
if (node->iss_DupHash)
521+
hash_destroy(node->iss_DupHash);
522+
435523
/*
436524
* close the index relations
437525
*/
@@ -507,7 +595,7 @@ ExecIndexRestrPos(IndexScanState *node)
507595

508596
/* ----------------------------------------------------------------
509597
*ExecInitIndexScan
510-
*
598+
*
511599
*Initializes the index scan's state information, creates
512600
*scan keys, and opens the base and index relations.
513601
*
@@ -919,12 +1007,42 @@ ExecInitIndexScan(IndexScan *node, EState *estate)
9191007
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
9201008
ExecAssignScanProjectionInfo(&indexstate->ss);
9211009

1010+
/*
1011+
* Initialize hash table if needed.
1012+
*/
1013+
if (numIndices>1)
1014+
create_duphash(indexstate);
1015+
else
1016+
indexstate->iss_DupHash=NULL;
1017+
9221018
/*
9231019
* all done.
9241020
*/
9251021
returnindexstate;
9261022
}
9271023

1024+
staticvoid
1025+
create_duphash(IndexScanState*node)
1026+
{
1027+
HASHCTLhash_ctl;
1028+
1029+
MemSet(&hash_ctl,0,sizeof(hash_ctl));
1030+
hash_ctl.keysize=SizeOfIptrData;
1031+
hash_ctl.entrysize=sizeof(DupHashTabEntry);
1032+
hash_ctl.hash=tag_hash;
1033+
hash_ctl.hcxt=CurrentMemoryContext;
1034+
node->iss_DupHash=hash_create("DupHashTable",
1035+
(long)ceil(node->ss.ps.plan->plan_rows),
1036+
&hash_ctl,
1037+
HASH_ELEM |HASH_FUNCTION |HASH_CONTEXT);
1038+
if (node->iss_DupHash==NULL)
1039+
ereport(ERROR,
1040+
(errcode(ERRCODE_OUT_OF_MEMORY),
1041+
errmsg("out of memory")));
1042+
node->iss_MaxHash= (SortMem*1024L) /
1043+
(MAXALIGN(sizeof(HASHELEMENT))+MAXALIGN(sizeof(DupHashTabEntry)));
1044+
}
1045+
9281046
int
9291047
ExecCountSlotsIndexScan(IndexScan*node)
9301048
{

‎src/include/nodes/execnodes.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
10+
* $Id: execnodes.h,v 1.105 2003/08/22 20:26:43 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -768,6 +768,8 @@ typedef ScanState SeqScanState;
768768
*RuntimeKeysReady true if runtime Skeys have been computed
769769
*RelationDescs ptr to array of relation descriptors
770770
*ScanDescs ptr to array of scan descriptors
771+
*DupHash hashtable for recognizing dups in multiple scan
772+
*MaxHash max # entries we will allow in hashtable
771773
* ----------------
772774
*/
773775
typedefstructIndexScanState
@@ -785,6 +787,8 @@ typedef struct IndexScanState
785787
booliss_RuntimeKeysReady;
786788
RelationPtriss_RelationDescs;
787789
IndexScanDescPtriss_ScanDescs;
790+
HTAB*iss_DupHash;
791+
longiss_MaxHash;
788792
}IndexScanState;
789793

790794
/* ----------------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp