NotificationsYou must be signed in to change notification settings
Fork28
Star153

Commit92ee252

committed

Tweak processing of multiple-index-scan plans to reduce overhead when

handling many-way scans: instead of re-evaluating all prior indexscanquals to see if a tuple has been fetched more than once, use a hash tableindexed by tuple CTID. But fall back to the old way if the hash tablegrows to exceed SortMem.

1 parent38e2bf6 commit92ee252Copy full SHA for 92ee252

File tree

2 files changed

+153

-31

lines changed

src
- backend/executor
  - nodeIndexscan.c
- include/nodes
  - execnodes.h

2 files changed

+153

-31

lines changed

`‎src/backend/executor/nodeIndexscan.c‎`

Lines changed: 148 additions & 30 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.82 2003/08/04 02:39:59 momjian Exp $`
	`11`	`+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -28,19 +28,51 @@`
`28`	`28`	`#include"access/heapam.h"`
`29`	`29`	`#include"executor/execdebug.h"`
`30`	`30`	`#include"executor/nodeIndexscan.h"`
	`31`	`+#include"miscadmin.h"`
`31`	`32`	`#include"nodes/nodeFuncs.h"`
`32`	`33`	`#include"optimizer/clauses.h"`
`33`	`34`	`#include"parser/parsetree.h"`
`34`	`35`
`35`		`-/* ----------------`
`36`		`- *Misc stuff to move to executor.h soon -cim 6/5/90`
`37`		`- * ----------------`
`38`		`- */`
	`36`	`+`
`39`	`37`	`#defineNO_OP0`
`40`	`38`	`#defineLEFT_OP1`
`41`	`39`	`#defineRIGHT_OP2`
`42`	`40`
	`41`	`+/*`
	`42`	`+ * In a multiple-index plan, we must take care to return any given tuple`
	`43`	`+ * only once, even if it matches conditions of several index scans. Our`
	`44`	`+ * preferred way to do this is to record already-returned tuples in a hash`
	`45`	`+ * table (using the TID as unique identifier). However, in a very large`
	`46`	`+ * scan this could conceivably run out of memory. We limit the hash table`
	`47`	`+ * to no more than SortMem KB; if it grows past that, we fall back to the`
	`48`	`+ * pre-7.4 technique: evaluate the prior-scan index quals again for each`
	`49`	`+ * tuple (which is space-efficient, but slow).`
	`50`	`+ *`
	`51`	`+ * When scanning backwards, we use scannum to determine when to emit the`
	`52`	`+ * tuple --- we have to re-emit a tuple in the same scan as it was first`
	`53`	`+ * encountered.`
	`54`	`+ *`
	`55`	`+ * Note: this code would break if the planner were ever to create a multiple`
	`56`	`+ * index plan with overall backwards direction, because the hashtable code`
	`57`	`+ * will emit a tuple the first time it is encountered (which would be the`
	`58`	`+ * highest scan in which it matches the index), but the evaluate-the-quals`
	`59`	`+ * code will emit a tuple in the lowest-numbered scan in which it's valid.`
	`60`	`+ * This could be fixed at need by making the evaluate-the-quals case more`
	`61`	`+ * complex. Currently the planner will never create such a plan (since it`
	`62`	`+ * considers multi-index plans unordered anyway), so there's no need for`
	`63`	`+ * more complexity.`
	`64`	`+ */`
	`65`	`+typedefstruct`
	`66`	`+{`
	`67`	`+/* tid is the hash key and so must be first! */`
	`68`	`+ItemPointerDatatid;/* TID of a tuple we've returned */`
	`69`	`+intscannum;/* number of scan we returned it in */`
	`70`	`+}DupHashTabEntry;`
	`71`	`+`
	`72`	`+`
`43`	`73`	`staticTupleTableSlotIndexNext(IndexScanStatenode);`
	`74`	`+staticvoidcreate_duphash(IndexScanState*node);`
	`75`	`+`
`44`	`76`
`45`	`77`	`/* ----------------------------------------------------------------`
`46`	`78`	`*IndexNext`
`@@ -163,7 +195,7 @@ IndexNext(IndexScanState *node)`
`163`	`195`	`while ((tuple=index_getnext(scandesc,direction))!=NULL)`
`164`	`196`	`{`
`165`	`197`	`/*`
`166`		`- *store the scanned tuple in the scan tuple slot of the scan`
	`198`	`+ *Store the scanned tuple in the scan tuple slot of the scan`
`167`	`199`	`* state. Note: we pass 'false' because tuples returned by`
`168`	`200`	`* amgetnext are pointers onto disk pages and must not be`
`169`	`201`	`* pfree()'d.`
`@@ -174,36 +206,80 @@ IndexNext(IndexScanState *node)`
`174`	`206`	`false);/* don't pfree */`
`175`	`207`
`176`	`208`	`/*`
`177`		`- * We must check to see if the current tuple was already`
`178`		`- * matched by an earlier index, so we don't double-report it.`
`179`		`- * We do this by passing the tuple through ExecQual and`
`180`		`- * checking for failure with all previous qualifications.`
	`209`	`+ * If it's a multiple-index scan, make sure not to double-report`
	`210`	`+ * a tuple matched by more than one index. (See notes above.)`
`181`	`211`	`*/`
`182`		`-if (node->iss_IndexPtr>0)`
	`212`	`+if (numIndices>1)`
`183`	`213`	`{`
`184`		`-boolprev_matches= false;`
`185`		`-intprev_index;`
`186`		`-List*qual;`
`187`		`-`
`188`		`-econtext->ecxt_scantuple=slot;`
`189`		`-ResetExprContext(econtext);`
`190`		`-qual=node->indxqualorig;`
`191`		`-for (prev_index=0;`
`192`		`-prev_index<node->iss_IndexPtr;`
`193`		`-prev_index++)`
	`214`	`+/* First try the hash table */`
	`215`	`+if (node->iss_DupHash)`
`194`	`216`	`{`
`195`		`-if (ExecQual((List*)lfirst(qual),econtext, false))`
	`217`	`+DupHashTabEntry*entry;`
	`218`	`+boolfound;`
	`219`	`+`
	`220`	`+entry= (DupHashTabEntry*)`
	`221`	`+hash_search(node->iss_DupHash,`
	`222`	`+&tuple->t_data->t_ctid,`
	`223`	`+HASH_ENTER,`
	`224`	`+&found);`
	`225`	`+if (entry==NULL\|\|`
	`226`	`+node->iss_DupHash->hctl->nentries>node->iss_MaxHash)`
	`227`	`+{`
	`228`	`+/* out of memory (either hard or soft limit) */`
	`229`	`+/* release hash table and fall thru to old code */`
	`230`	`+hash_destroy(node->iss_DupHash);`
	`231`	`+node->iss_DupHash=NULL;`
	`232`	`+}`
	`233`	`+elseif (found)`
`196`	`234`	`{`
`197`		`-prev_matches= true;`
`198`		`-break;`
	`235`	`+/* pre-existing entry */`
	`236`	`+`
	`237`	`+/*`
	`238`	`+ * It's duplicate if first emitted in a different`
	`239`	`+ * scan. If same scan, we must be backing up, so`
	`240`	`+ * okay to emit again.`
	`241`	`+ */`
	`242`	`+if (entry->scannum!=node->iss_IndexPtr)`
	`243`	`+{`
	`244`	`+/* Dup, so drop it and loop back for another */`
	`245`	`+ExecClearTuple(slot);`
	`246`	`+continue;`
	`247`	`+}`
	`248`	`+}`
	`249`	`+else`
	`250`	`+{`
	`251`	`+/* new entry, finish filling it in */`
	`252`	`+entry->scannum=node->iss_IndexPtr;`
`199`	`253`	`}`
`200`		`-qual=lnext(qual);`
`201`	`254`	`}`
`202`		`-if (prev_matches)`
	`255`	`+/* If hash table has overflowed, do it the hard way */`
	`256`	`+if (node->iss_DupHash==NULL&&`
	`257`	`+node->iss_IndexPtr>0)`
`203`	`258`	`{`
`204`		`-/* Duplicate, so drop it and loop back for another */`
`205`		`-ExecClearTuple(slot);`
`206`		`-continue;`
	`259`	`+boolprev_matches= false;`
	`260`	`+intprev_index;`
	`261`	`+List*qual;`
	`262`	`+`
	`263`	`+econtext->ecxt_scantuple=slot;`
	`264`	`+ResetExprContext(econtext);`
	`265`	`+qual=node->indxqualorig;`
	`266`	`+for (prev_index=0;`
	`267`	`+prev_index<node->iss_IndexPtr;`
	`268`	`+prev_index++)`
	`269`	`+{`
	`270`	`+if (ExecQual((List*)lfirst(qual),econtext, false))`
	`271`	`+{`
	`272`	`+prev_matches= true;`
	`273`	`+break;`
	`274`	`+}`
	`275`	`+qual=lnext(qual);`
	`276`	`+}`
	`277`	`+if (prev_matches)`
	`278`	`+{`
	`279`	`+/* Dup, so drop it and loop back for another */`
	`280`	`+ExecClearTuple(slot);`
	`281`	`+continue;`
	`282`	`+}`
`207`	`283`	`}`
`208`	`284`	`}`
`209`	`285`
`@@ -383,6 +459,14 @@ ExecIndexReScan(IndexScanState node, ExprContext exprCtxt)`
`383`	`459`	`return;`
`384`	`460`	`}`
`385`	`461`
	`462`	`+/* reset hash table */`
	`463`	`+if (numIndices>1)`
	`464`	`+{`
	`465`	`+if (node->iss_DupHash)`
	`466`	`+hash_destroy(node->iss_DupHash);`
	`467`	`+create_duphash(node);`
	`468`	`+}`
	`469`	`+`
`386`	`470`	`/* reset index scans */`
`387`	`471`	`if (ScanDirectionIsBackward(((IndexScan*)node->ss.ps.plan)->indxorderdir))`
`388`	`472`	`node->iss_IndexPtr=numIndices;`
`@@ -432,6 +516,10 @@ ExecEndIndexScan(IndexScanState *node)`
`432`	`516`	`ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);`
`433`	`517`	`ExecClearTuple(node->ss.ss_ScanTupleSlot);`
`434`	`518`
	`519`	`+/* drop hash table */`
	`520`	`+if (node->iss_DupHash)`
	`521`	`+hash_destroy(node->iss_DupHash);`
	`522`	`+`
`435`	`523`	`/*`
`436`	`524`	`* close the index relations`
`437`	`525`	`*/`
`@@ -507,7 +595,7 @@ ExecIndexRestrPos(IndexScanState *node)`
`507`	`595`
`508`	`596`	`/* ----------------------------------------------------------------`
`509`	`597`	`*ExecInitIndexScan`
`510`		`-*`
	`598`	`+ *`
`511`	`599`	`*Initializes the index scan's state information, creates`
`512`	`600`	`*scan keys, and opens the base and index relations.`
`513`	`601`	`*`
`@@ -919,12 +1007,42 @@ ExecInitIndexScan(IndexScan node, EState estate)`
`919`	`1007`	`ExecAssignResultTypeFromTL(&indexstate->ss.ps);`
`920`	`1008`	`ExecAssignScanProjectionInfo(&indexstate->ss);`
`921`	`1009`
	`1010`	`+/*`
	`1011`	`+ * Initialize hash table if needed.`
	`1012`	`+ */`
	`1013`	`+if (numIndices>1)`
	`1014`	`+create_duphash(indexstate);`
	`1015`	`+else`
	`1016`	`+indexstate->iss_DupHash=NULL;`
	`1017`	`+`
`922`	`1018`	`/*`
`923`	`1019`	`* all done.`
`924`	`1020`	`*/`
`925`	`1021`	`returnindexstate;`
`926`	`1022`	`}`
`927`	`1023`
	`1024`	`+staticvoid`
	`1025`	`+create_duphash(IndexScanState*node)`
	`1026`	`+{`
	`1027`	`+HASHCTLhash_ctl;`
	`1028`	`+`
	`1029`	`+MemSet(&hash_ctl,0,sizeof(hash_ctl));`
	`1030`	`+hash_ctl.keysize=SizeOfIptrData;`
	`1031`	`+hash_ctl.entrysize=sizeof(DupHashTabEntry);`
	`1032`	`+hash_ctl.hash=tag_hash;`
	`1033`	`+hash_ctl.hcxt=CurrentMemoryContext;`
	`1034`	`+node->iss_DupHash=hash_create("DupHashTable",`
	`1035`	`+(long)ceil(node->ss.ps.plan->plan_rows),`
	`1036`	`+&hash_ctl,`
	`1037`	`+HASH_ELEM \|HASH_FUNCTION \|HASH_CONTEXT);`
	`1038`	`+if (node->iss_DupHash==NULL)`
	`1039`	`+ereport(ERROR,`
	`1040`	`+(errcode(ERRCODE_OUT_OF_MEMORY),`
	`1041`	`+errmsg("out of memory")));`
	`1042`	`+node->iss_MaxHash= (SortMem*1024L) /`
	`1043`	`+(MAXALIGN(sizeof(HASHELEMENT))+MAXALIGN(sizeof(DupHashTabEntry)));`
	`1044`	`+}`
	`1045`	`+`
`928`	`1046`	`int`
`929`	`1047`	`ExecCountSlotsIndexScan(IndexScan*node)`
`930`	`1048`	`{`

`‎src/include/nodes/execnodes.h‎`

Lines changed: 5 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group`
`8`	`8`	`* Portions Copyright (c) 1994, Regents of the University of California`
`9`	`9`	`*`
`10`		`- * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $`
	`10`	`+ * $Id: execnodes.h,v 1.105 2003/08/22 20:26:43 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -768,6 +768,8 @@ typedef ScanState SeqScanState;`
`768`	`768`	`*RuntimeKeysReady true if runtime Skeys have been computed`
`769`	`769`	`*RelationDescs ptr to array of relation descriptors`
`770`	`770`	`*ScanDescs ptr to array of scan descriptors`
	`771`	`+ *DupHash hashtable for recognizing dups in multiple scan`
	`772`	`+ *MaxHash max # entries we will allow in hashtable`
`771`	`773`	`* ----------------`
`772`	`774`	`*/`
`773`	`775`	`typedefstructIndexScanState`
`@@ -785,6 +787,8 @@ typedef struct IndexScanState`
`785`	`787`	`booliss_RuntimeKeysReady;`
`786`	`788`	`RelationPtriss_RelationDescs;`
`787`	`789`	`IndexScanDescPtriss_ScanDescs;`
	`790`	`+HTAB*iss_DupHash;`
	`791`	`+longiss_MaxHash;`
`788`	`792`	`}IndexScanState;`
`789`	`793`
`790`	`794`	`/* ----------------`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit92ee252

File tree

2 files changed

2 files changed

`‎src/backend/executor/nodeIndexscan.c‎`

`‎src/include/nodes/execnodes.h‎`

0 commit comments