Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit69d0a15

Browse files
committed
Convert hash join code to use MinimalTuple format in tuple hash table
and batch files. Should reduce memory and I/O demands for such joins.
1 parent665c5e8 commit69d0a15

File tree

7 files changed

+121
-72
lines changed

7 files changed

+121
-72
lines changed

‎src/backend/executor/execTuples.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/2702:51:39 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/2721:31:20 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -718,6 +718,55 @@ ExecFetchSlotTuple(TupleTableSlot *slot)
718718
returnExecMaterializeSlot(slot);
719719
}
720720

721+
/* --------------------------------
722+
*ExecFetchSlotMinimalTuple
723+
*Fetch the slot's minimal physical tuple.
724+
*
725+
*If the slot contains a virtual tuple, we convert it to minimal
726+
*physical form. The slot retains ownership of the physical tuple.
727+
*Likewise, if it contains a regular tuple we convert to minimal form.
728+
*
729+
* As above, the result must be treated as read-only.
730+
* --------------------------------
731+
*/
732+
MinimalTuple
733+
ExecFetchSlotMinimalTuple(TupleTableSlot*slot)
734+
{
735+
MinimalTuplenewTuple;
736+
MemoryContextoldContext;
737+
738+
/*
739+
* sanity checks
740+
*/
741+
Assert(slot!=NULL);
742+
Assert(!slot->tts_isempty);
743+
744+
/*
745+
* If we have a minimal physical tuple then just return it.
746+
*/
747+
if (slot->tts_mintuple)
748+
returnslot->tts_mintuple;
749+
750+
/*
751+
* Otherwise, build a minimal tuple, and then store it as the new slot
752+
* value. (Note: tts_nvalid will be reset to zero here. There are cases
753+
* in which this could be optimized but it's probably not worth worrying
754+
* about.)
755+
*
756+
* We may be called in a context that is shorter-lived than the tuple
757+
* slot, but we have to ensure that the materialized tuple will survive
758+
* anyway.
759+
*/
760+
oldContext=MemoryContextSwitchTo(slot->tts_mcxt);
761+
newTuple=ExecCopySlotMinimalTuple(slot);
762+
MemoryContextSwitchTo(oldContext);
763+
764+
ExecStoreMinimalTuple(newTuple,slot, true);
765+
766+
Assert(slot->tts_mintuple);
767+
returnslot->tts_mintuple;
768+
}
769+
721770
/* --------------------------------
722771
*ExecMaterializeSlot
723772
*Force a slot into the "materialized" state.

‎src/backend/executor/nodeHash.c

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -92,7 +92,7 @@ MultiExecHash(HashState *node)
9292
/* We have to compute the hash value */
9393
econtext->ecxt_innertuple=slot;
9494
hashvalue=ExecHashGetHashValue(hashtable,econtext,hashkeys);
95-
ExecHashTableInsert(hashtable,ExecFetchSlotTuple(slot),hashvalue);
95+
ExecHashTableInsert(hashtable,slot,hashvalue);
9696
}
9797

9898
/* must provide our own instrumentation support */
@@ -358,8 +358,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
358358
* does not allow for any palloc overhead.The manipulations of spaceUsed
359359
* don't count palloc overhead either.
360360
*/
361-
tupsize=MAXALIGN(sizeof(HashJoinTupleData))+
362-
MAXALIGN(sizeof(HeapTupleHeaderData))+
361+
tupsize=HJTUPLE_OVERHEAD+
362+
MAXALIGN(sizeof(MinimalTupleData))+
363363
MAXALIGN(tupwidth);
364364
inner_rel_bytes=ntuples*tupsize;
365365

@@ -548,7 +548,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
548548
{
549549
/* dump it out */
550550
Assert(batchno>curbatch);
551-
ExecHashJoinSaveTuple(&tuple->htup,tuple->hashvalue,
551+
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
552+
tuple->hashvalue,
552553
&hashtable->innerBatchFile[batchno]);
553554
/* and remove from hash table */
554555
if (prevtuple)
@@ -557,7 +558,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
557558
hashtable->buckets[i]=nexttuple;
558559
/* prevtuple doesn't change */
559560
hashtable->spaceUsed-=
560-
MAXALIGN(sizeof(HashJoinTupleData))+tuple->htup.t_len;
561+
HJTUPLE_OVERHEAD+HJTUPLE_MINTUPLE(tuple)->t_len;
561562
pfree(tuple);
562563
nfreed++;
563564
}
@@ -592,12 +593,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
592593
* ExecHashTableInsert
593594
*insert a tuple into the hash table depending on the hash value
594595
*it may just go to a temp file for later batches
596+
*
597+
* Note: the passed TupleTableSlot may contain a regular, minimal, or virtual
598+
* tuple; the minimal case in particular is certain to happen while reloading
599+
* tuples from batch files. We could save some cycles in the regular-tuple
600+
* case by not forcing the slot contents into minimal form; not clear if it's
601+
* worth the messiness required.
595602
*/
596603
void
597604
ExecHashTableInsert(HashJoinTablehashtable,
598-
HeapTupletuple,
605+
TupleTableSlot*slot,
599606
uint32hashvalue)
600607
{
608+
MinimalTupletuple=ExecFetchSlotMinimalTuple(slot);
601609
intbucketno;
602610
intbatchno;
603611

@@ -615,18 +623,11 @@ ExecHashTableInsert(HashJoinTable hashtable,
615623
HashJoinTuplehashTuple;
616624
inthashTupleSize;
617625

618-
hashTupleSize=MAXALIGN(sizeof(HashJoinTupleData))+tuple->t_len;
626+
hashTupleSize=HJTUPLE_OVERHEAD+tuple->t_len;
619627
hashTuple= (HashJoinTuple)MemoryContextAlloc(hashtable->batchCxt,
620628
hashTupleSize);
621629
hashTuple->hashvalue=hashvalue;
622-
memcpy((char*)&hashTuple->htup,
623-
(char*)tuple,
624-
sizeof(hashTuple->htup));
625-
hashTuple->htup.t_data= (HeapTupleHeader)
626-
(((char*)hashTuple)+MAXALIGN(sizeof(HashJoinTupleData)));
627-
memcpy((char*)hashTuple->htup.t_data,
628-
(char*)tuple->t_data,
629-
tuple->t_len);
630+
memcpy(HJTUPLE_MINTUPLE(hashTuple),tuple,tuple->t_len);
630631
hashTuple->next=hashtable->buckets[bucketno];
631632
hashtable->buckets[bucketno]=hashTuple;
632633
hashtable->spaceUsed+=hashTupleSize;
@@ -639,7 +640,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
639640
* put the tuple into a temp file for later batches
640641
*/
641642
Assert(batchno>hashtable->curbatch);
642-
ExecHashJoinSaveTuple(tuple,hashvalue,
643+
ExecHashJoinSaveTuple(tuple,
644+
hashvalue,
643645
&hashtable->innerBatchFile[batchno]);
644646
}
645647
}
@@ -749,7 +751,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
749751
*
750752
* The current outer tuple must be stored in econtext->ecxt_outertuple.
751753
*/
752-
HeapTuple
754+
HashJoinTuple
753755
ExecScanHashBucket(HashJoinState*hjstate,
754756
ExprContext*econtext)
755757
{
@@ -771,14 +773,12 @@ ExecScanHashBucket(HashJoinState *hjstate,
771773
{
772774
if (hashTuple->hashvalue==hashvalue)
773775
{
774-
HeapTupleheapTuple=&hashTuple->htup;
775776
TupleTableSlot*inntuple;
776777

777778
/* insert hashtable's tuple into exec slot so ExecQual sees it */
778-
inntuple=ExecStoreTuple(heapTuple,
779-
hjstate->hj_HashTupleSlot,
780-
InvalidBuffer,
781-
false);/* do not pfree */
779+
inntuple=ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
780+
hjstate->hj_HashTupleSlot,
781+
false);/* do not pfree */
782782
econtext->ecxt_innertuple=inntuple;
783783

784784
/* reset temp memory each time to avoid leaks from qual expr */
@@ -787,7 +787,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
787787
if (ExecQual(hjclauses,econtext, false))
788788
{
789789
hjstate->hj_CurTuple=hashTuple;
790-
returnheapTuple;
790+
returnhashTuple;
791791
}
792792
}
793793

‎src/backend/executor/nodeHashjoin.c

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -54,7 +54,7 @@ ExecHashJoin(HashJoinState *node)
5454
ExprContext*econtext;
5555
ExprDoneCondisDone;
5656
HashJoinTablehashtable;
57-
HeapTuplecurtuple;
57+
HashJoinTuplecurtuple;
5858
TupleTableSlot*outerTupleSlot;
5959
uint32hashvalue;
6060
intbatchno;
@@ -224,7 +224,7 @@ ExecHashJoin(HashJoinState *node)
224224
* in the corresponding outer-batch file.
225225
*/
226226
Assert(batchno>hashtable->curbatch);
227-
ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
227+
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
228228
hashvalue,
229229
&hashtable->outerBatchFile[batchno]);
230230
node->hj_NeedNewOuter= true;
@@ -244,10 +244,9 @@ ExecHashJoin(HashJoinState *node)
244244
/*
245245
* we've got a match, but still need to test non-hashed quals
246246
*/
247-
inntuple=ExecStoreTuple(curtuple,
248-
node->hj_HashTupleSlot,
249-
InvalidBuffer,
250-
false);/* don't pfree this tuple */
247+
inntuple=ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
248+
node->hj_HashTupleSlot,
249+
false);/* don't pfree */
251250
econtext->ecxt_innertuple=inntuple;
252251

253252
/* reset temp memory each time to avoid leaks from qual expr */
@@ -706,9 +705,7 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
706705
* NOTE: some tuples may be sent to future batches. Also, it is
707706
* possible for hashtable->nbatch to be increased here!
708707
*/
709-
ExecHashTableInsert(hashtable,
710-
ExecFetchSlotTuple(slot),
711-
hashvalue);
708+
ExecHashTableInsert(hashtable,slot,hashvalue);
712709
}
713710

714711
/*
@@ -741,15 +738,14 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
741738
*save a tuple to a batch file.
742739
*
743740
* The data recorded in the file for each tuple is its hash value,
744-
* then an image of its HeapTupleData (with meaningless t_data pointer)
745-
* followed by the HeapTupleHeader and tuple data.
741+
* then the tuple in MinimalTuple format.
746742
*
747743
* Note: it is important always to call this in the regular executor
748744
* context, not in a shorter-lived context; else the temp file buffers
749745
* will get messed up.
750746
*/
751747
void
752-
ExecHashJoinSaveTuple(HeapTupleheapTuple,uint32hashvalue,
748+
ExecHashJoinSaveTuple(MinimalTupletuple,uint32hashvalue,
753749
BufFile**fileptr)
754750
{
755751
BufFile*file=*fileptr;
@@ -768,14 +764,8 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
768764
(errcode_for_file_access(),
769765
errmsg("could not write to hash-join temporary file: %m")));
770766

771-
written=BufFileWrite(file, (void*)heapTuple,sizeof(HeapTupleData));
772-
if (written!=sizeof(HeapTupleData))
773-
ereport(ERROR,
774-
(errcode_for_file_access(),
775-
errmsg("could not write to hash-join temporary file: %m")));
776-
777-
written=BufFileWrite(file, (void*)heapTuple->t_data,heapTuple->t_len);
778-
if (written!= (size_t)heapTuple->t_len)
767+
written=BufFileWrite(file, (void*)tuple,tuple->t_len);
768+
if (written!=tuple->t_len)
779769
ereport(ERROR,
780770
(errcode_for_file_access(),
781771
errmsg("could not write to hash-join temporary file: %m")));
@@ -794,32 +784,36 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
794784
uint32*hashvalue,
795785
TupleTableSlot*tupleSlot)
796786
{
797-
HeapTupleDatahtup;
787+
uint32header[2];
798788
size_tnread;
799-
HeapTupleheapTuple;
789+
MinimalTupletuple;
800790

801-
nread=BufFileRead(file, (void*)hashvalue,sizeof(uint32));
802-
if (nread==0)
803-
returnNULL;/* end of file */
804-
if (nread!=sizeof(uint32))
805-
ereport(ERROR,
806-
(errcode_for_file_access(),
807-
errmsg("could not read from hash-join temporary file: %m")));
808-
nread=BufFileRead(file, (void*)&htup,sizeof(HeapTupleData));
809-
if (nread!=sizeof(HeapTupleData))
791+
/*
792+
* Since both the hash value and the MinimalTuple length word are
793+
* uint32, we can read them both in one BufFileRead() call without
794+
* any type cheating.
795+
*/
796+
nread=BufFileRead(file, (void*)header,sizeof(header));
797+
if (nread==0)/* end of file */
798+
{
799+
ExecClearTuple(tupleSlot);
800+
returnNULL;
801+
}
802+
if (nread!=sizeof(header))
810803
ereport(ERROR,
811804
(errcode_for_file_access(),
812805
errmsg("could not read from hash-join temporary file: %m")));
813-
heapTuple=palloc(HEAPTUPLESIZE+htup.t_len);
814-
memcpy((char*)heapTuple, (char*)&htup,sizeof(HeapTupleData));
815-
heapTuple->t_data= (HeapTupleHeader)
816-
((char*)heapTuple+HEAPTUPLESIZE);
817-
nread=BufFileRead(file, (void*)heapTuple->t_data,htup.t_len);
818-
if (nread!= (size_t)htup.t_len)
806+
*hashvalue=header[0];
807+
tuple= (MinimalTuple)palloc(header[1]);
808+
tuple->t_len=header[1];
809+
nread=BufFileRead(file,
810+
(void*) ((char*)tuple+sizeof(uint32)),
811+
header[1]-sizeof(uint32));
812+
if (nread!=header[1]-sizeof(uint32))
819813
ereport(ERROR,
820814
(errcode_for_file_access(),
821815
errmsg("could not read from hash-join temporary file: %m")));
822-
returnExecStoreTuple(heapTuple,tupleSlot,InvalidBuffer, true);
816+
returnExecStoreMinimalTuple(tuple,tupleSlot, true);
823817
}
824818

825819

‎src/include/executor/hashjoin.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -65,9 +65,14 @@ typedef struct HashJoinTupleData
6565
{
6666
structHashJoinTupleData*next;/* link to next tuple in same bucket */
6767
uint32hashvalue;/* tuple's hash code */
68-
HeapTupleDatahtup;/*tuple header */
68+
/*Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
6969
}HashJoinTupleData;
7070

71+
#defineHJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
72+
#defineHJTUPLE_MINTUPLE(hjtup) \
73+
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
74+
75+
7176
typedefstructHashJoinTableData
7277
{
7378
intnbuckets;/* # buckets in the in-memory hash table */

‎src/include/executor/nodeHash.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -26,7 +26,7 @@ extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt);
2626
externHashJoinTableExecHashTableCreate(Hash*node,List*hashOperators);
2727
externvoidExecHashTableDestroy(HashJoinTablehashtable);
2828
externvoidExecHashTableInsert(HashJoinTablehashtable,
29-
HeapTupletuple,
29+
TupleTableSlot*slot,
3030
uint32hashvalue);
3131
externuint32ExecHashGetHashValue(HashJoinTablehashtable,
3232
ExprContext*econtext,
@@ -35,7 +35,7 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
3535
uint32hashvalue,
3636
int*bucketno,
3737
int*batchno);
38-
externHeapTupleExecScanHashBucket(HashJoinState*hjstate,
38+
externHashJoinTupleExecScanHashBucket(HashJoinState*hjstate,
3939
ExprContext*econtext);
4040
externvoidExecHashTableReset(HashJoinTablehashtable);
4141
externvoidExecChooseHashTableSize(doublentuples,inttupwidth,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp