NotificationsYou must be signed in to change notification settings
Fork5
Star26

Commita0fa011

committed

Better solution to integer overflow problem in hash batch-number

computation: reduce the bucket number mod nbatch. This changes theassociation between original bucket numbers and batches, but thatdoesn't matter. Minor other cleanups in hashjoin code to helpcentralize decisions.

1 parente533e7d commita0fa011Copy full SHA for a0fa011

File tree

4 files changed

+60

-91

lines changed

src
- backend
  - executor
    - nodeHash.c
    - nodeHashjoin.c
  - optimizer/path
    - costsize.c
- include/executor
  - nodeHash.h

4 files changed

+60

-91

lines changed

`‎src/backend/executor/nodeHash.c`

Lines changed: 31 additions & 41 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $`
	`11`	`+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -59,9 +59,6 @@ ExecHash(HashState *node)`
`59`	`59`	`outerNode=outerPlanState(node);`
`60`	`60`
`61`	`61`	`hashtable=node->hashtable;`
`62`		`-if (hashtable==NULL)`
`63`		`-elog(ERROR,"ExecHash: hash table is NULL.");`
`64`		`-`
`65`	`62`	`nbatch=hashtable->nbatch;`
`66`	`63`
`67`	`64`	`if (nbatch>0)`
`@@ -284,20 +281,13 @@ ExecHashTableCreate(Hash *node)`
`284`	`281`	`* allocate and initialize the file arrays in hashCxt`
`285`	`282`	`*/`
`286`	`283`	`hashtable->innerBatchFile= (BufFile**)`
`287`		`-palloc(nbatchsizeof(BufFile));`
	`284`	`+palloc0(nbatchsizeof(BufFile));`
`288`	`285`	`hashtable->outerBatchFile= (BufFile**)`
`289`		`-palloc(nbatchsizeof(BufFile));`
	`286`	`+palloc0(nbatchsizeof(BufFile));`
`290`	`287`	`hashtable->innerBatchSize= (long*)`
`291`		`-palloc(nbatch*sizeof(long));`
	`288`	`+palloc0(nbatch*sizeof(long));`
`292`	`289`	`hashtable->outerBatchSize= (long*)`
`293`		`-palloc(nbatch*sizeof(long));`
`294`		`-for (i=0;i<nbatch;i++)`
`295`		`-{`
`296`		`-hashtable->innerBatchFile[i]=NULL;`
`297`		`-hashtable->outerBatchFile[i]=NULL;`
`298`		`-hashtable->innerBatchSize[i]=0;`
`299`		`-hashtable->outerBatchSize[i]=0;`
`300`		`-}`
	`290`	`+palloc0(nbatch*sizeof(long));`
`301`	`291`	`/* The files will not be opened until later... */`
`302`	`292`	`}`
`303`	`293`
`@@ -308,13 +298,7 @@ ExecHashTableCreate(Hash *node)`
`308`	`298`	`MemoryContextSwitchTo(hashtable->batchCxt);`
`309`	`299`
`310`	`300`	`hashtable->buckets= (HashJoinTuple*)`
`311`		`-palloc(nbuckets*sizeof(HashJoinTuple));`
`312`		`-`
`313`		`-if (hashtable->buckets==NULL)`
`314`		`-elog(ERROR,"Insufficient memory for hash table.");`
`315`		`-`
`316`		`-for (i=0;i<nbuckets;i++)`
`317`		`-hashtable->buckets[i]=NULL;`
	`301`	`+palloc0(nbuckets*sizeof(HashJoinTuple));`
`318`	`302`
`319`	`303`	`MemoryContextSwitchTo(oldcxt);`
`320`	`304`
`@@ -414,15 +398,14 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,`
`414`	`398`	`* totalbuckets/nbuckets; in fact, it is the number of groups we`
`415`	`399`	`* will use for the part of the data that doesn't fall into the`
`416`	`400`	`* first nbuckets hash buckets. We try to set it to make all the`
`417`		`- * batches the same size. But we have to keep nbatch small`
`418`		`- * enough to avoid integer overflow in ExecHashJoinGetBatch().`
	`401`	`+ * batches the same size.`
`419`	`402`	`*/`
`420`	`403`	`dtmp=ceil((inner_rel_bytes-hash_table_bytes) /`
`421`	`404`	`hash_table_bytes);`
`422`		`-if (dtmp<INT_MAX /totalbuckets)`
	`405`	`+if (dtmp<INT_MAX)`
`423`	`406`	`nbatch= (int)dtmp;`
`424`	`407`	`else`
`425`		`-nbatch=INT_MAX /totalbuckets;`
	`408`	`+nbatch=INT_MAX;`
`426`	`409`	`if (nbatch <=0)`
`427`	`410`	`nbatch=1;`
`428`	`411`	`}`
`@@ -481,13 +464,14 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`481`	`464`	`List*hashkeys)`
`482`	`465`	`{`
`483`	`466`	`intbucketno=ExecHashGetBucket(hashtable,econtext,hashkeys);`
	`467`	`+intbatchno=ExecHashGetBatch(bucketno,hashtable);`
`484`	`468`	`TupleTableSlot*slot=econtext->ecxt_innertuple;`
`485`	`469`	`HeapTupleheapTuple=slot->val;`
`486`	`470`
`487`	`471`	`/*`
`488`	`472`	`* decide whether to put the tuple in the hash table or a tmp file`
`489`	`473`	`*/`
`490`		`-if (bucketno<hashtable->nbuckets)`
	`474`	`+if (batchno<0)`
`491`	`475`	`{`
`492`	`476`	`/*`
`493`	`477`	`* put the tuple in hash table`
`@@ -498,8 +482,6 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`498`	`482`	`hashTupleSize=MAXALIGN(sizeof(*hashTuple))+heapTuple->t_len;`
`499`	`483`	`hashTuple= (HashJoinTuple)MemoryContextAlloc(hashtable->batchCxt,`
`500`	`484`	`hashTupleSize);`
`501`		`-if (hashTuple==NULL)`
`502`		`-elog(ERROR,"Insufficient memory for hash table.");`
`503`	`485`	`memcpy((char*)&hashTuple->htup,`
`504`	`486`	`(char*)heapTuple,`
`505`	`487`	`sizeof(hashTuple->htup));`
`@@ -515,11 +497,8 @@ ExecHashTableInsert(HashJoinTable hashtable,`
`515`	`497`	`else`
`516`	`498`	`{`
`517`	`499`	`/*`
`518`		`- * put the tuple into a tmp file forother batches`
	`500`	`+ * put the tuple into a tmp file forlater batches`
`519`	`501`	`*/`
`520`		`-intbatchno= (hashtable->nbatch* (bucketno-hashtable->nbuckets)) /`
`521`		`-(hashtable->totalbuckets-hashtable->nbuckets);`
`522`		`-`
`523`	`502`	`hashtable->innerBatchSize[batchno]++;`
`524`	`503`	`ExecHashJoinSaveTuple(heapTuple,`
`525`	`504`	`hashtable->innerBatchFile[batchno]);`
`@@ -592,6 +571,24 @@ ExecHashGetBucket(HashJoinTable hashtable,`
`592`	`571`	`returnbucketno;`
`593`	`572`	`}`
`594`	`573`
	`574`	`+/* ----------------------------------------------------------------`
	`575`	`+ *ExecHashGetBatch`
	`576`	`+ *`
	`577`	`+ *determine the batch number for a bucketno`
	`578`	`+ *`
	`579`	`+ * Returns -1 if bucket belongs to initial (or current) batch,`
	`580`	`+ * else 0..nbatch-1 corresponding to external batch file number for bucket.`
	`581`	`+ * ----------------------------------------------------------------`
	`582`	`+ */`
	`583`	`+int`
	`584`	`+ExecHashGetBatch(intbucketno,HashJoinTablehashtable)`
	`585`	`+{`
	`586`	`+if (bucketno<hashtable->nbuckets)`
	`587`	`+return-1;`
	`588`	`+`
	`589`	`+return (bucketno-hashtable->nbuckets) %hashtable->nbatch;`
	`590`	`+}`
	`591`	`+`
`595`	`592`	`/* ----------------------------------------------------------------`
`596`	`593`	`*ExecScanHashBucket`
`597`	`594`	`*`
`@@ -727,7 +724,6 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)`
`727`	`724`	`{`
`728`	`725`	`MemoryContextoldcxt;`
`729`	`726`	`intnbuckets=hashtable->nbuckets;`
`730`		`-inti;`
`731`	`727`
`732`	`728`	`/*`
`733`	`729`	`* Release all the hash buckets and tuples acquired in the prior pass,`
`@@ -750,13 +746,7 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)`
`750`	`746`
`751`	`747`	`/* Reallocate and reinitialize the hash bucket headers. */`
`752`	`748`	`hashtable->buckets= (HashJoinTuple*)`
`753`		`-palloc(nbuckets*sizeof(HashJoinTuple));`
`754`		`-`
`755`		`-if (hashtable->buckets==NULL)`
`756`		`-elog(ERROR,"Insufficient memory for hash table.");`
`757`		`-`
`758`		`-for (i=0;i<nbuckets;i++)`
`759`		`-hashtable->buckets[i]=NULL;`
	`749`	`+palloc0(nbuckets*sizeof(HashJoinTuple));`
`760`	`750`
`761`	`751`	`MemoryContextSwitchTo(oldcxt);`
`762`	`752`	`}`

`‎src/backend/executor/nodeHashjoin.c`

Lines changed: 4 additions & 29 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $`
	`11`	`+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -27,7 +27,6 @@ static TupleTableSlot ExecHashJoinOuterGetTuple(PlanState node,`
`27`	`27`	`staticTupleTableSlotExecHashJoinGetSavedTuple(HashJoinStatehjstate,`
`28`	`28`	`BufFile*file,`
`29`	`29`	`TupleTableSlot*tupleSlot);`
`30`		`-staticintExecHashJoinGetBatch(intbucketno,HashJoinTablehashtable);`
`31`	`30`	`staticintExecHashJoinNewBatch(HashJoinState*hjstate);`
`32`	`31`
`33`	`32`
`@@ -179,17 +178,15 @@ ExecHashJoin(HashJoinState *node)`
`179`	`178`	`*/`
`180`	`179`	`if (hashtable->curbatch==0)`
`181`	`180`	`{`
`182`		`-intbatch=ExecHashJoinGetBatch(node->hj_CurBucketNo,`
`183`		`-hashtable);`
	`181`	`+intbatchno=ExecHashGetBatch(node->hj_CurBucketNo,`
	`182`	`+hashtable);`
`184`	`183`
`185`		`-if (batch>0)`
	`184`	`+if (batchno >=0)`
`186`	`185`	`{`
`187`	`186`	`/*`
`188`	`187`	`* Need to postpone this outer tuple to a later batch.`
`189`	`188`	`* Save it in the corresponding outer-batch file.`
`190`	`189`	`*/`
`191`		`-intbatchno=batch-1;`
`192`		`-`
`193`	`190`	`hashtable->outerBatchSize[batchno]++;`
`194`	`191`	`ExecHashJoinSaveTuple(outerTupleSlot->val,`
`195`	`192`	`hashtable->outerBatchFile[batchno]);`
`@@ -640,28 +637,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)`
`640`	`637`	`returnnewbatch;`
`641`	`638`	`}`
`642`	`639`
`643`		`-/* ----------------------------------------------------------------`
`644`		`- *ExecHashJoinGetBatch`
`645`		`- *`
`646`		`- *determine the batch number for a bucketno`
`647`		`- *+----------------+-------+-------+ ... +-------+`
`648`		`- *0 nbuckets totalbuckets`
`649`		`- * batch 0 1 2 ...`
`650`		`- * ----------------------------------------------------------------`
`651`		`- */`
`652`		`-staticint`
`653`		`-ExecHashJoinGetBatch(intbucketno,HashJoinTablehashtable)`
`654`		`-{`
`655`		`-intb;`
`656`		`-`
`657`		`-if (bucketno<hashtable->nbuckets\|\|hashtable->nbatch==0)`
`658`		`-return0;`
`659`		`-`
`660`		`-b= (hashtable->nbatch* (bucketno-hashtable->nbuckets)) /`
`661`		`-(hashtable->totalbuckets-hashtable->nbuckets);`
`662`		`-returnb+1;`
`663`		`-}`
`664`		`-`
`665`	`640`	`/* ----------------------------------------------------------------`
`666`	`641`	`*ExecHashJoinSaveTuple`
`667`	`642`	`*`

`‎src/backend/optimizer/path/costsize.c`

Lines changed: 23 additions & 20 deletions

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@`
`42`	`42`	`* Portions Copyright (c) 1994, Regents of the University of California`
`43`	`43`	`*`
`44`	`44`	`* IDENTIFICATION`
`45`		`- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $`
	`45`	`+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $`
`46`	`46`	`*`
`47`	`47`	`*-------------------------------------------------------------------------`
`48`	`48`	`*/`
`@@ -85,7 +85,8 @@ boolenable_mergejoin = true;`
`85`	`85`	`boolenable_hashjoin= true;`
`86`	`86`
`87`	`87`
`88`		`-staticSelectivityestimate_hash_bucketsize(Queryroot,Varvar);`
	`88`	`+staticSelectivityestimate_hash_bucketsize(Queryroot,Varvar,`
	`89`	`+intnbuckets);`
`89`	`90`	`staticboolcost_qual_eval_walker(Nodenode,Costtotal);`
`90`	`91`	`staticSelectivityapprox_selectivity(Queryroot,Listquals);`
`91`	`92`	`staticvoidset_rel_width(Queryroot,RelOptInforel);`
`@@ -882,7 +883,9 @@ cost_hashjoin(Path path, Query root,`
`882`	`883`	`outer_path->parent->width);`
`883`	`884`	`doubleinnerbytes=relation_byte_size(inner_path->parent->rows,`
`884`	`885`	`inner_path->parent->width);`
`885`		`-longhashtablebytes=SortMem*1024L;`
	`886`	`+intvirtualbuckets;`
	`887`	`+intphysicalbuckets;`
	`888`	`+intnumbatches;`
`886`	`889`	`Selectivityinnerbucketsize;`
`887`	`890`	`List*hcl;`
`888`	`891`
`@@ -898,6 +901,13 @@ cost_hashjoin(Path path, Query root,`
`898`	`901`	`startup_cost+=cpu_operator_cost*inner_path->parent->rows;`
`899`	`902`	`run_cost+=cpu_operator_cost*outer_path->parent->rows;`
`900`	`903`
	`904`	`+/* Get hash table size that executor would use for inner relation */`
	`905`	`+ExecChooseHashTableSize(inner_path->parent->rows,`
	`906`	`+inner_path->parent->width,`
	`907`	`+&virtualbuckets,`
	`908`	`+&physicalbuckets,`
	`909`	`+&numbatches);`
	`910`	`+`
`901`	`911`	`/*`
`902`	`912`	`* Determine bucketsize fraction for inner relation. We use the`
`903`	`913`	`* smallest bucketsize estimated for any individual hashclause;`
`@@ -931,7 +941,8 @@ cost_hashjoin(Path path, Query root,`
`931`	`941`	`if (thisbucketsize<0)`
`932`	`942`	`{`
`933`	`943`	`/* not cached yet */`
`934`		`-thisbucketsize=estimate_hash_bucketsize(root,right);`
	`944`	`+thisbucketsize=estimate_hash_bucketsize(root,right,`
	`945`	`+virtualbuckets);`
`935`	`946`	`restrictinfo->right_bucketsize=thisbucketsize;`
`936`	`947`	`}`
`937`	`948`	`}`
`@@ -943,7 +954,8 @@ cost_hashjoin(Path path, Query root,`
`943`	`954`	`if (thisbucketsize<0)`
`944`	`955`	`{`
`945`	`956`	`/* not cached yet */`
`946`		`-thisbucketsize=estimate_hash_bucketsize(root,left);`
	`957`	`+thisbucketsize=estimate_hash_bucketsize(root,left,`
	`958`	`+virtualbuckets);`
`947`	`959`	`restrictinfo->left_bucketsize=thisbucketsize;`
`948`	`960`	`}`
`949`	`961`	`}`
`@@ -982,7 +994,7 @@ cost_hashjoin(Path path, Query root,`
`982`	`994`	`* should be nice and sequential...). Writing the inner rel counts as`
`983`	`995`	`* startup cost, all the rest as run cost.`
`984`	`996`	`*/`
`985`		`-if (innerbytes>hashtablebytes)`
	`997`	`+if (numbatches)`
`986`	`998`	`{`
`987`	`999`	`doubleouterpages=page_size(outer_path->parent->rows,`
`988`	`1000`	`outer_path->parent->width);`
`@@ -1019,7 +1031,7 @@ cost_hashjoin(Path path, Query root,`
`1019`	`1031`	`* smart enough to figure out how the restrict clauses might change the`
`1020`	`1032`	`* distribution, so this will have to do for now.`
`1021`	`1033`	`*`
`1022`		`- * Wecan get the number of buckets the executor will use for the given`
	`1034`	`+ * Weare passed the number of buckets the executor will use for the given`
`1023`	`1035`	`* input relation.If the data were perfectly distributed, with the same`
`1024`	`1036`	`* number of tuples going into each available bucket, then the bucketsize`
`1025`	`1037`	`* fraction would be 1/nbuckets. But this happy state of affairs will occur`
`@@ -1039,13 +1051,10 @@ cost_hashjoin(Path path, Query root,`
`1039`	`1051`	`* inner rel is well-dispersed (or the alternatives seem much worse).`
`1040`	`1052`	`*/`
`1041`	`1053`	`staticSelectivity`
`1042`		`-estimate_hash_bucketsize(Queryroot,Varvar)`
	`1054`	`+estimate_hash_bucketsize(Queryroot,Varvar,intnbuckets)`
`1043`	`1055`	`{`
`1044`	`1056`	`Oidrelid;`
`1045`	`1057`	`RelOptInfo*rel;`
`1046`		`-intvirtualbuckets;`
`1047`		`-intphysicalbuckets;`
`1048`		`-intnumbatches;`
`1049`	`1058`	`HeapTupletuple;`
`1050`	`1059`	`Form_pg_statisticstats;`
`1051`	`1060`	`doubleestfract,`
`@@ -1071,12 +1080,6 @@ estimate_hash_bucketsize(Query root, Var var)`
`1071`	`1080`	`if (rel->tuples <=0.0\|\|rel->rows <=0.0)`
`1072`	`1081`	`return0.1;/* ensure we can divide below */`
`1073`	`1082`
`1074`		`-/* Get hash table size that executor would use for this relation */`
`1075`		`-ExecChooseHashTableSize(rel->rows,rel->width,`
`1076`		`-&virtualbuckets,`
`1077`		`-&physicalbuckets,`
`1078`		`-&numbatches);`
`1079`		`-`
`1080`	`1083`	`tuple=SearchSysCache(STATRELATT,`
`1081`	`1084`	`ObjectIdGetDatum(relid),`
`1082`	`1085`	`Int16GetDatum(var->varattno),`
`@@ -1093,7 +1096,7 @@ estimate_hash_bucketsize(Query root, Var var)`
`1093`	`1096`	`caseObjectIdAttributeNumber:`
`1094`	`1097`	`caseSelfItemPointerAttributeNumber:`
`1095`	`1098`	`/* these are unique, so buckets should be well-distributed */`
`1096`		`-return1.0 / (double)virtualbuckets;`
	`1099`	`+return1.0 / (double)nbuckets;`
`1097`	`1100`	`caseTableOidAttributeNumber:`
`1098`	`1101`	`/* hashing this is a terrible idea... */`
`1099`	`1102`	`return1.0;`
`@@ -1134,8 +1137,8 @@ estimate_hash_bucketsize(Query root, Var var)`
`1134`	`1137`	`* the number of buckets is less than the expected number of distinct`
`1135`	`1138`	`* values; otherwise it is 1/ndistinct.`
`1136`	`1139`	`*/`
`1137`		`-if (ndistinct> (double)virtualbuckets)`
`1138`		`-estfract=1.0 / (double)virtualbuckets;`
	`1140`	`+if (ndistinct> (double)nbuckets)`
	`1141`	`+estfract=1.0 / (double)nbuckets;`
`1139`	`1142`	`else`
`1140`	`1143`	`estfract=1.0 /ndistinct;`
`1141`	`1144`

`‎src/include/executor/nodeHash.h`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group`
`8`	`8`	`* Portions Copyright (c) 1994, Regents of the University of California`
`9`	`9`	`*`
`10`		`- * $Id: nodeHash.h,v 1.27 2002/12/05 15:50:37 tgl Exp $`
	`10`	`+ * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -30,6 +30,7 @@ extern void ExecHashTableInsert(HashJoinTable hashtable,`
`30`	`30`	`externintExecHashGetBucket(HashJoinTablehashtable,`
`31`	`31`	`ExprContext*econtext,`
`32`	`32`	`List*hashkeys);`
	`33`	`+externintExecHashGetBatch(intbucketno,HashJoinTablehashtable);`
`33`	`34`	`externHeapTupleExecScanHashBucket(HashJoinStatehjstate,Listhjclauses,`
`34`	`35`	`ExprContext*econtext);`
`35`	`36`	`externvoidExecHashTableReset(HashJoinTablehashtable,longntuples);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commita0fa011

File tree

4 files changed

4 files changed

`‎src/backend/executor/nodeHash.c`

`‎src/backend/executor/nodeHashjoin.c`

`‎src/backend/optimizer/path/costsize.c`

`‎src/include/executor/nodeHash.h`

0 commit comments