Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb0f18cb

Browse files
committed
hash: Refactor bucket squeeze code.
In preparation for adding write-ahead logging to hash indexes,refactor _hash_freeovflpage and _hash_squeezebucket so that allrelated page modifications happen in a single section of code. Theprevious coding assumed that it would be fine to move tuples one at atime, and also that the various operations involved in freeing anoverflow page didn't necessarily all need to be done together, allof which is true if you don't care about write-ahead logging.Amit Kapila, with slight changes by me.
1 parent817f2a5 commitb0f18cb

File tree

6 files changed

+196
-70
lines changed

6 files changed

+196
-70
lines changed

‎src/backend/access/hash/hashinsert.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,44 @@ _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
228228

229229
returnitup_off;
230230
}
231+
232+
/*
233+
*_hash_pgaddmultitup() -- add a tuple vector to a particular page in the
234+
* index.
235+
*
236+
* This routine has same requirements for locking and tuple ordering as
237+
* _hash_pgaddtup().
238+
*
239+
* Returns the offset number array at which the tuples were inserted.
240+
*/
241+
void
242+
_hash_pgaddmultitup(Relationrel,Bufferbuf,IndexTuple*itups,
243+
OffsetNumber*itup_offsets,uint16nitups)
244+
{
245+
OffsetNumberitup_off;
246+
Pagepage;
247+
uint32hashkey;
248+
inti;
249+
250+
_hash_checkpage(rel,buf,LH_BUCKET_PAGE |LH_OVERFLOW_PAGE);
251+
page=BufferGetPage(buf);
252+
253+
for (i=0;i<nitups;i++)
254+
{
255+
Sizeitemsize;
256+
257+
itemsize=IndexTupleDSize(*itups[i]);
258+
itemsize=MAXALIGN(itemsize);
259+
260+
/* Find where to insert the tuple (preserving page's hashkey ordering) */
261+
hashkey=_hash_get_indextuple_hashkey(itups[i]);
262+
itup_off=_hash_binsearch(page,hashkey);
263+
264+
itup_offsets[i]=itup_off;
265+
266+
if (PageAddItem(page, (Item)itups[i],itemsize,itup_off, false, false)
267+
==InvalidOffsetNumber)
268+
elog(ERROR,"failed to add index item to \"%s\"",
269+
RelationGetRelationName(rel));
270+
}
271+
}

‎src/backend/access/hash/hashovfl.c

Lines changed: 122 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,8 @@ _hash_firstfreebit(uint32 map)
391391
*Remove this overflow page from its bucket's chain, and mark the page as
392392
*free. On entry, ovflbuf is write-locked; it is released before exiting.
393393
*
394+
*Add the tuples (itups) to wbuf.
395+
*
394396
*Since this function is invoked in VACUUM, we provide an access strategy
395397
*parameter that controls fetches of the bucket pages.
396398
*
@@ -403,13 +405,16 @@ _hash_firstfreebit(uint32 map)
403405
*has a lock on same.
404406
*/
405407
BlockNumber
406-
_hash_freeovflpage(Relationrel,Bufferovflbuf,Bufferwbuf,
408+
_hash_freeovflpage(Relationrel,Bufferbucketbuf,Bufferovflbuf,
409+
Bufferwbuf,IndexTuple*itups,OffsetNumber*itup_offsets,
410+
Size*tups_size,uint16nitups,
407411
BufferAccessStrategybstrategy)
408412
{
409413
HashMetaPagemetap;
410414
Buffermetabuf;
411415
Buffermapbuf;
412416
Bufferprevbuf=InvalidBuffer;
417+
Buffernextbuf=InvalidBuffer;
413418
BlockNumberovflblkno;
414419
BlockNumberprevblkno;
415420
BlockNumberblkno;
@@ -434,15 +439,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
434439
writeblkno=BufferGetBlockNumber(wbuf);
435440
bucket=ovflopaque->hasho_bucket;
436441

437-
/*
438-
* Zero the page for debugging's sake; then write and release it. (Note:
439-
* if we failed to zero the page here, we'd have problems with the Assert
440-
* in _hash_pageinit() when the page is reused.)
441-
*/
442-
MemSet(ovflpage,0,BufferGetPageSize(ovflbuf));
443-
MarkBufferDirty(ovflbuf);
444-
_hash_relbuf(rel,ovflbuf);
445-
446442
/*
447443
* Fix up the bucket chain. this is a doubly-linked list, so we must fix
448444
* up the bucket chain members behind and ahead of the overflow page being
@@ -451,9 +447,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
451447
*/
452448
if (BlockNumberIsValid(prevblkno))
453449
{
454-
Pageprevpage;
455-
HashPageOpaqueprevopaque;
456-
457450
if (prevblkno==writeblkno)
458451
prevbuf=wbuf;
459452
else
@@ -462,32 +455,13 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
462455
HASH_WRITE,
463456
LH_BUCKET_PAGE |LH_OVERFLOW_PAGE,
464457
bstrategy);
465-
466-
prevpage=BufferGetPage(prevbuf);
467-
prevopaque= (HashPageOpaque)PageGetSpecialPointer(prevpage);
468-
469-
Assert(prevopaque->hasho_bucket==bucket);
470-
prevopaque->hasho_nextblkno=nextblkno;
471-
472-
MarkBufferDirty(prevbuf);
473-
if (prevblkno!=writeblkno)
474-
_hash_relbuf(rel,prevbuf);
475458
}
476459
if (BlockNumberIsValid(nextblkno))
477-
{
478-
Buffernextbuf=_hash_getbuf_with_strategy(rel,
479-
nextblkno,
480-
HASH_WRITE,
481-
LH_OVERFLOW_PAGE,
482-
bstrategy);
483-
Pagenextpage=BufferGetPage(nextbuf);
484-
HashPageOpaquenextopaque= (HashPageOpaque)PageGetSpecialPointer(nextpage);
485-
486-
Assert(nextopaque->hasho_bucket==bucket);
487-
nextopaque->hasho_prevblkno=prevblkno;
488-
MarkBufferDirty(nextbuf);
489-
_hash_relbuf(rel,nextbuf);
490-
}
460+
nextbuf=_hash_getbuf_with_strategy(rel,
461+
nextblkno,
462+
HASH_WRITE,
463+
LH_OVERFLOW_PAGE,
464+
bstrategy);
491465

492466
/* Note: bstrategy is intentionally not used for metapage and bitmap */
493467

@@ -508,24 +482,71 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
508482
/* Release metapage lock while we access the bitmap page */
509483
LockBuffer(metabuf,BUFFER_LOCK_UNLOCK);
510484

511-
/*Clear the bitmapbit toindicate that this overflow page is free */
485+
/*read the bitmappage toclear the bitmap bit */
512486
mapbuf=_hash_getbuf(rel,blkno,HASH_WRITE,LH_BITMAP_PAGE);
513487
mappage=BufferGetPage(mapbuf);
514488
freep=HashPageGetBitmap(mappage);
515489
Assert(ISSET(freep,bitmapbit));
516-
CLRBIT(freep,bitmapbit);
517-
MarkBufferDirty(mapbuf);
518-
_hash_relbuf(rel,mapbuf);
519490

520491
/* Get write-lock on metapage to update firstfree */
521492
LockBuffer(metabuf,BUFFER_LOCK_EXCLUSIVE);
522493

494+
/*
495+
* we have to insert tuples on the "write" page, being careful to preserve
496+
* hashkey ordering. (If we insert many tuples into the same "write" page
497+
* it would be worth qsort'ing them).
498+
*/
499+
if (nitups>0)
500+
{
501+
_hash_pgaddmultitup(rel,wbuf,itups,itup_offsets,nitups);
502+
MarkBufferDirty(wbuf);
503+
}
504+
505+
/* Initialize the freed overflow page. */
506+
_hash_pageinit(ovflpage,BufferGetPageSize(ovflbuf));
507+
MarkBufferDirty(ovflbuf);
508+
509+
if (BufferIsValid(prevbuf))
510+
{
511+
Pageprevpage=BufferGetPage(prevbuf);
512+
HashPageOpaqueprevopaque= (HashPageOpaque)PageGetSpecialPointer(prevpage);
513+
514+
Assert(prevopaque->hasho_bucket==bucket);
515+
prevopaque->hasho_nextblkno=nextblkno;
516+
MarkBufferDirty(prevbuf);
517+
}
518+
if (BufferIsValid(nextbuf))
519+
{
520+
Pagenextpage=BufferGetPage(nextbuf);
521+
HashPageOpaquenextopaque= (HashPageOpaque)PageGetSpecialPointer(nextpage);
522+
523+
Assert(nextopaque->hasho_bucket==bucket);
524+
nextopaque->hasho_prevblkno=prevblkno;
525+
MarkBufferDirty(nextbuf);
526+
}
527+
528+
/* Clear the bitmap bit to indicate that this overflow page is free */
529+
CLRBIT(freep,bitmapbit);
530+
MarkBufferDirty(mapbuf);
531+
523532
/* if this is now the first free page, update hashm_firstfree */
524533
if (ovflbitno<metap->hashm_firstfree)
525534
{
526535
metap->hashm_firstfree=ovflbitno;
527536
MarkBufferDirty(metabuf);
528537
}
538+
539+
/* release previous bucket if it is not same as write bucket */
540+
if (BufferIsValid(prevbuf)&&prevblkno!=writeblkno)
541+
_hash_relbuf(rel,prevbuf);
542+
543+
if (BufferIsValid(ovflbuf))
544+
_hash_relbuf(rel,ovflbuf);
545+
546+
if (BufferIsValid(nextbuf))
547+
_hash_relbuf(rel,nextbuf);
548+
549+
_hash_relbuf(rel,mapbuf);
529550
_hash_relbuf(rel,metabuf);
530551

531552
returnnextblkno;
@@ -640,7 +661,6 @@ _hash_squeezebucket(Relation rel,
640661
Pagerpage;
641662
HashPageOpaquewopaque;
642663
HashPageOpaqueropaque;
643-
boolwbuf_dirty;
644664

645665
/*
646666
* start squeezing into the primary bucket page.
@@ -686,15 +706,21 @@ _hash_squeezebucket(Relation rel,
686706
/*
687707
* squeeze the tuples.
688708
*/
689-
wbuf_dirty= false;
690709
for (;;)
691710
{
692711
OffsetNumberroffnum;
693712
OffsetNumbermaxroffnum;
694713
OffsetNumberdeletable[MaxOffsetNumber];
695-
intndeletable=0;
714+
IndexTupleitups[MaxIndexTuplesPerPage];
715+
Sizetups_size[MaxIndexTuplesPerPage];
716+
OffsetNumberitup_offsets[MaxIndexTuplesPerPage];
717+
uint16ndeletable=0;
718+
uint16nitups=0;
719+
Sizeall_tups_size=0;
720+
inti;
696721
boolretain_pin= false;
697722

723+
readpage:
698724
/* Scan each tuple in "read" page */
699725
maxroffnum=PageGetMaxOffsetNumber(rpage);
700726
for (roffnum=FirstOffsetNumber;
@@ -715,11 +741,13 @@ _hash_squeezebucket(Relation rel,
715741

716742
/*
717743
* Walk up the bucket chain, looking for a page big enough for
718-
* this item. Exit if we reach the read page.
744+
* this item and all other accumulated items. Exit if we reach
745+
* the read page.
719746
*/
720-
while (PageGetFreeSpace(wpage)<itemsz)
747+
while (PageGetFreeSpaceForMultipleTuples(wpage,nitups+1)<(all_tups_size+itemsz))
721748
{
722749
Buffernext_wbuf=InvalidBuffer;
750+
booltups_moved= false;
723751

724752
Assert(!PageIsEmpty(wpage));
725753

@@ -737,12 +765,30 @@ _hash_squeezebucket(Relation rel,
737765
LH_OVERFLOW_PAGE,
738766
bstrategy);
739767

768+
if (nitups>0)
769+
{
770+
Assert(nitups==ndeletable);
771+
772+
/*
773+
* we have to insert tuples on the "write" page, being
774+
* careful to preserve hashkey ordering. (If we insert
775+
* many tuples into the same "write" page it would be
776+
* worth qsort'ing them).
777+
*/
778+
_hash_pgaddmultitup(rel,wbuf,itups,itup_offsets,nitups);
779+
MarkBufferDirty(wbuf);
780+
781+
/* Delete tuples we already moved off read page */
782+
PageIndexMultiDelete(rpage,deletable,ndeletable);
783+
MarkBufferDirty(rbuf);
784+
785+
tups_moved= true;
786+
}
787+
740788
/*
741789
* release the lock on previous page after acquiring the lock
742790
* on next page
743791
*/
744-
if (wbuf_dirty)
745-
MarkBufferDirty(wbuf);
746792
if (retain_pin)
747793
LockBuffer(wbuf,BUFFER_LOCK_UNLOCK);
748794
else
@@ -751,12 +797,6 @@ _hash_squeezebucket(Relation rel,
751797
/* nothing more to do if we reached the read page */
752798
if (rblkno==wblkno)
753799
{
754-
if (ndeletable>0)
755-
{
756-
/* Delete tuples we already moved off read page */
757-
PageIndexMultiDelete(rpage,deletable,ndeletable);
758-
MarkBufferDirty(rbuf);
759-
}
760800
_hash_relbuf(rel,rbuf);
761801
return;
762802
}
@@ -765,21 +805,34 @@ _hash_squeezebucket(Relation rel,
765805
wpage=BufferGetPage(wbuf);
766806
wopaque= (HashPageOpaque)PageGetSpecialPointer(wpage);
767807
Assert(wopaque->hasho_bucket==bucket);
768-
wbuf_dirty= false;
769808
retain_pin= false;
770-
}
771809

772-
/*
773-
* we have found room so insert on the "write" page, being careful
774-
* to preserve hashkey ordering. (If we insert many tuples into
775-
* the same "write" page it would be worth qsort'ing instead of
776-
* doing repeated _hash_pgaddtup.)
777-
*/
778-
(void)_hash_pgaddtup(rel,wbuf,itemsz,itup);
779-
wbuf_dirty= true;
810+
/* be tidy */
811+
for (i=0;i<nitups;i++)
812+
pfree(itups[i]);
813+
nitups=0;
814+
all_tups_size=0;
815+
ndeletable=0;
816+
817+
/*
818+
* after moving the tuples, rpage would have been compacted,
819+
* so we need to rescan it.
820+
*/
821+
if (tups_moved)
822+
gotoreadpage;
823+
}
780824

781825
/* remember tuple for deletion from "read" page */
782826
deletable[ndeletable++]=roffnum;
827+
828+
/*
829+
* we need a copy of index tuples as they can be freed as part of
830+
* overflow page, however we need them to write a WAL record in
831+
* _hash_freeovflpage.
832+
*/
833+
itups[nitups]=CopyIndexTuple(itup);
834+
tups_size[nitups++]=itemsz;
835+
all_tups_size+=itemsz;
783836
}
784837

785838
/*
@@ -797,10 +850,12 @@ _hash_squeezebucket(Relation rel,
797850
Assert(BlockNumberIsValid(rblkno));
798851

799852
/* free this overflow page (releases rbuf) */
800-
_hash_freeovflpage(rel,rbuf,wbuf,bstrategy);
853+
_hash_freeovflpage(rel,bucket_buf,rbuf,wbuf,itups,itup_offsets,
854+
tups_size,nitups,bstrategy);
801855

802-
if (wbuf_dirty)
803-
MarkBufferDirty(wbuf);
856+
/* be tidy */
857+
for (i=0;i<nitups;i++)
858+
pfree(itups[i]);
804859

805860
/* are we freeing the page adjacent to wbuf? */
806861
if (rblkno==wblkno)

‎src/backend/access/hash/hashpage.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,6 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
470470
void
471471
_hash_pageinit(Pagepage,Sizesize)
472472
{
473-
Assert(PageIsNew(page));
474473
PageInit(page,size,sizeof(HashPageOpaqueData));
475474
}
476475

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp