NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commitb0f18cb

committed

hash: Refactor bucket squeeze code.

In preparation for adding write-ahead logging to hash indexes,refactor _hash_freeovflpage and _hash_squeezebucket so that allrelated page modifications happen in a single section of code. Theprevious coding assumed that it would be fine to move tuples one at atime, and also that the various operations involved in freeing anoverflow page didn't necessarily all need to be done together, allof which is true if you don't care about write-ahead logging.Amit Kapila, with slight changes by me.

1 parent817f2a5 commitb0f18cbCopy full SHA for b0f18cb

File tree

6 files changed

+196

-70

lines changed

src
- backend
  - access/hash
  - storage/page
    - bufpage.c
- include
  - access
    - hash.h
  - storage
    - bufpage.h

6 files changed

+196

-70

lines changed

`‎src/backend/access/hash/hashinsert.c‎`

Lines changed: 41 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -228,3 +228,44 @@ _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)`
`228`	`228`
`229`	`229`	`returnitup_off;`
`230`	`230`	`}`
	`231`	`+`
	`232`	`+/*`
	`233`	`+ *_hash_pgaddmultitup() -- add a tuple vector to a particular page in the`
	`234`	`+ * index.`
	`235`	`+ *`
	`236`	`+ * This routine has same requirements for locking and tuple ordering as`
	`237`	`+ * _hash_pgaddtup().`
	`238`	`+ *`
	`239`	`+ * Returns the offset number array at which the tuples were inserted.`
	`240`	`+ */`
	`241`	`+void`
	`242`	`+_hash_pgaddmultitup(Relationrel,Bufferbuf,IndexTuple*itups,`
	`243`	`+OffsetNumber*itup_offsets,uint16nitups)`
	`244`	`+{`
	`245`	`+OffsetNumberitup_off;`
	`246`	`+Pagepage;`
	`247`	`+uint32hashkey;`
	`248`	`+inti;`
	`249`	`+`
	`250`	`+_hash_checkpage(rel,buf,LH_BUCKET_PAGE \|LH_OVERFLOW_PAGE);`
	`251`	`+page=BufferGetPage(buf);`
	`252`	`+`
	`253`	`+for (i=0;i<nitups;i++)`
	`254`	`+{`
	`255`	`+Sizeitemsize;`
	`256`	`+`
	`257`	`+itemsize=IndexTupleDSize(*itups[i]);`
	`258`	`+itemsize=MAXALIGN(itemsize);`
	`259`	`+`
	`260`	`+/* Find where to insert the tuple (preserving page's hashkey ordering) */`
	`261`	`+hashkey=_hash_get_indextuple_hashkey(itups[i]);`
	`262`	`+itup_off=_hash_binsearch(page,hashkey);`
	`263`	`+`
	`264`	`+itup_offsets[i]=itup_off;`
	`265`	`+`
	`266`	`+if (PageAddItem(page, (Item)itups[i],itemsize,itup_off, false, false)`
	`267`	`+==InvalidOffsetNumber)`
	`268`	`+elog(ERROR,"failed to add index item to \"%s\"",`
	`269`	`+RelationGetRelationName(rel));`
	`270`	`+}`
	`271`	`+}`

`‎src/backend/access/hash/hashovfl.c‎`

Lines changed: 122 additions & 67 deletions

Original file line number	Diff line number	Diff line change
`@@ -391,6 +391,8 @@ _hash_firstfreebit(uint32 map)`
`391`	`391`	`*Remove this overflow page from its bucket's chain, and mark the page as`
`392`	`392`	`*free. On entry, ovflbuf is write-locked; it is released before exiting.`
`393`	`393`	`*`
	`394`	`+ *Add the tuples (itups) to wbuf.`
	`395`	`+ *`
`394`	`396`	`*Since this function is invoked in VACUUM, we provide an access strategy`
`395`	`397`	`*parameter that controls fetches of the bucket pages.`
`396`	`398`	`*`
`@@ -403,13 +405,16 @@ _hash_firstfreebit(uint32 map)`
`403`	`405`	`*has a lock on same.`
`404`	`406`	`*/`
`405`	`407`	`BlockNumber`
`406`		`-_hash_freeovflpage(Relationrel,Bufferovflbuf,Bufferwbuf,`
	`408`	`+_hash_freeovflpage(Relationrel,Bufferbucketbuf,Bufferovflbuf,`
	`409`	`+Bufferwbuf,IndexTupleitups,OffsetNumberitup_offsets,`
	`410`	`+Size*tups_size,uint16nitups,`
`407`	`411`	`BufferAccessStrategybstrategy)`
`408`	`412`	`{`
`409`	`413`	`HashMetaPagemetap;`
`410`	`414`	`Buffermetabuf;`
`411`	`415`	`Buffermapbuf;`
`412`	`416`	`Bufferprevbuf=InvalidBuffer;`
	`417`	`+Buffernextbuf=InvalidBuffer;`
`413`	`418`	`BlockNumberovflblkno;`
`414`	`419`	`BlockNumberprevblkno;`
`415`	`420`	`BlockNumberblkno;`
`@@ -434,15 +439,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,`
`434`	`439`	`writeblkno=BufferGetBlockNumber(wbuf);`
`435`	`440`	`bucket=ovflopaque->hasho_bucket;`
`436`	`441`
`437`		`-/*`
`438`		`- * Zero the page for debugging's sake; then write and release it. (Note:`
`439`		`- * if we failed to zero the page here, we'd have problems with the Assert`
`440`		`- * in _hash_pageinit() when the page is reused.)`
`441`		`- */`
`442`		`-MemSet(ovflpage,0,BufferGetPageSize(ovflbuf));`
`443`		`-MarkBufferDirty(ovflbuf);`
`444`		`-_hash_relbuf(rel,ovflbuf);`
`445`		`-`
`446`	`442`	`/*`
`447`	`443`	`* Fix up the bucket chain. this is a doubly-linked list, so we must fix`
`448`	`444`	`* up the bucket chain members behind and ahead of the overflow page being`
`@@ -451,9 +447,6 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,`
`451`	`447`	`*/`
`452`	`448`	`if (BlockNumberIsValid(prevblkno))`
`453`	`449`	`{`
`454`		`-Pageprevpage;`
`455`		`-HashPageOpaqueprevopaque;`
`456`		`-`
`457`	`450`	`if (prevblkno==writeblkno)`
`458`	`451`	`prevbuf=wbuf;`
`459`	`452`	`else`
`@@ -462,32 +455,13 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,`
`462`	`455`	`HASH_WRITE,`
`463`	`456`	`LH_BUCKET_PAGE \|LH_OVERFLOW_PAGE,`
`464`	`457`	`bstrategy);`
`465`		`-`
`466`		`-prevpage=BufferGetPage(prevbuf);`
`467`		`-prevopaque= (HashPageOpaque)PageGetSpecialPointer(prevpage);`
`468`		`-`
`469`		`-Assert(prevopaque->hasho_bucket==bucket);`
`470`		`-prevopaque->hasho_nextblkno=nextblkno;`
`471`		`-`
`472`		`-MarkBufferDirty(prevbuf);`
`473`		`-if (prevblkno!=writeblkno)`
`474`		`-_hash_relbuf(rel,prevbuf);`
`475`	`458`	`}`
`476`	`459`	`if (BlockNumberIsValid(nextblkno))`
`477`		`-{`
`478`		`-Buffernextbuf=_hash_getbuf_with_strategy(rel,`
`479`		`-nextblkno,`
`480`		`-HASH_WRITE,`
`481`		`-LH_OVERFLOW_PAGE,`
`482`		`-bstrategy);`
`483`		`-Pagenextpage=BufferGetPage(nextbuf);`
`484`		`-HashPageOpaquenextopaque= (HashPageOpaque)PageGetSpecialPointer(nextpage);`
`485`		`-`
`486`		`-Assert(nextopaque->hasho_bucket==bucket);`
`487`		`-nextopaque->hasho_prevblkno=prevblkno;`
`488`		`-MarkBufferDirty(nextbuf);`
`489`		`-_hash_relbuf(rel,nextbuf);`
`490`		`-}`
	`460`	`+nextbuf=_hash_getbuf_with_strategy(rel,`
	`461`	`+nextblkno,`
	`462`	`+HASH_WRITE,`
	`463`	`+LH_OVERFLOW_PAGE,`
	`464`	`+bstrategy);`
`491`	`465`
`492`	`466`	`/* Note: bstrategy is intentionally not used for metapage and bitmap */`
`493`	`467`
`@@ -508,24 +482,71 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,`
`508`	`482`	`/* Release metapage lock while we access the bitmap page */`
`509`	`483`	`LockBuffer(metabuf,BUFFER_LOCK_UNLOCK);`
`510`	`484`
`511`		`-/Clear the bitmapbit toindicate that this overflow page is free /`
	`485`	`+/read the bitmappage toclear the bitmap bit /`
`512`	`486`	`mapbuf=_hash_getbuf(rel,blkno,HASH_WRITE,LH_BITMAP_PAGE);`
`513`	`487`	`mappage=BufferGetPage(mapbuf);`
`514`	`488`	`freep=HashPageGetBitmap(mappage);`
`515`	`489`	`Assert(ISSET(freep,bitmapbit));`
`516`		`-CLRBIT(freep,bitmapbit);`
`517`		`-MarkBufferDirty(mapbuf);`
`518`		`-_hash_relbuf(rel,mapbuf);`
`519`	`490`
`520`	`491`	`/* Get write-lock on metapage to update firstfree */`
`521`	`492`	`LockBuffer(metabuf,BUFFER_LOCK_EXCLUSIVE);`
`522`	`493`
	`494`	`+/*`
	`495`	`+ * we have to insert tuples on the "write" page, being careful to preserve`
	`496`	`+ * hashkey ordering. (If we insert many tuples into the same "write" page`
	`497`	`+ * it would be worth qsort'ing them).`
	`498`	`+ */`
	`499`	`+if (nitups>0)`
	`500`	`+{`
	`501`	`+_hash_pgaddmultitup(rel,wbuf,itups,itup_offsets,nitups);`
	`502`	`+MarkBufferDirty(wbuf);`
	`503`	`+}`
	`504`	`+`
	`505`	`+/* Initialize the freed overflow page. */`
	`506`	`+_hash_pageinit(ovflpage,BufferGetPageSize(ovflbuf));`
	`507`	`+MarkBufferDirty(ovflbuf);`
	`508`	`+`
	`509`	`+if (BufferIsValid(prevbuf))`
	`510`	`+{`
	`511`	`+Pageprevpage=BufferGetPage(prevbuf);`
	`512`	`+HashPageOpaqueprevopaque= (HashPageOpaque)PageGetSpecialPointer(prevpage);`
	`513`	`+`
	`514`	`+Assert(prevopaque->hasho_bucket==bucket);`
	`515`	`+prevopaque->hasho_nextblkno=nextblkno;`
	`516`	`+MarkBufferDirty(prevbuf);`
	`517`	`+}`
	`518`	`+if (BufferIsValid(nextbuf))`
	`519`	`+{`
	`520`	`+Pagenextpage=BufferGetPage(nextbuf);`
	`521`	`+HashPageOpaquenextopaque= (HashPageOpaque)PageGetSpecialPointer(nextpage);`
	`522`	`+`
	`523`	`+Assert(nextopaque->hasho_bucket==bucket);`
	`524`	`+nextopaque->hasho_prevblkno=prevblkno;`
	`525`	`+MarkBufferDirty(nextbuf);`
	`526`	`+}`
	`527`	`+`
	`528`	`+/* Clear the bitmap bit to indicate that this overflow page is free */`
	`529`	`+CLRBIT(freep,bitmapbit);`
	`530`	`+MarkBufferDirty(mapbuf);`
	`531`	`+`
`523`	`532`	`/* if this is now the first free page, update hashm_firstfree */`
`524`	`533`	`if (ovflbitno<metap->hashm_firstfree)`
`525`	`534`	`{`
`526`	`535`	`metap->hashm_firstfree=ovflbitno;`
`527`	`536`	`MarkBufferDirty(metabuf);`
`528`	`537`	`}`
	`538`	`+`
	`539`	`+/* release previous bucket if it is not same as write bucket */`
	`540`	`+if (BufferIsValid(prevbuf)&&prevblkno!=writeblkno)`
	`541`	`+_hash_relbuf(rel,prevbuf);`
	`542`	`+`
	`543`	`+if (BufferIsValid(ovflbuf))`
	`544`	`+_hash_relbuf(rel,ovflbuf);`
	`545`	`+`
	`546`	`+if (BufferIsValid(nextbuf))`
	`547`	`+_hash_relbuf(rel,nextbuf);`
	`548`	`+`
	`549`	`+_hash_relbuf(rel,mapbuf);`
`529`	`550`	`_hash_relbuf(rel,metabuf);`
`530`	`551`
`531`	`552`	`returnnextblkno;`
`@@ -640,7 +661,6 @@ _hash_squeezebucket(Relation rel,`
`640`	`661`	`Pagerpage;`
`641`	`662`	`HashPageOpaquewopaque;`
`642`	`663`	`HashPageOpaqueropaque;`
`643`		`-boolwbuf_dirty;`
`644`	`664`
`645`	`665`	`/*`
`646`	`666`	`* start squeezing into the primary bucket page.`
`@@ -686,15 +706,21 @@ _hash_squeezebucket(Relation rel,`
`686`	`706`	`/*`
`687`	`707`	`* squeeze the tuples.`
`688`	`708`	`*/`
`689`		`-wbuf_dirty= false;`
`690`	`709`	`for (;;)`
`691`	`710`	`{`
`692`	`711`	`OffsetNumberroffnum;`
`693`	`712`	`OffsetNumbermaxroffnum;`
`694`	`713`	`OffsetNumberdeletable[MaxOffsetNumber];`
`695`		`-intndeletable=0;`
	`714`	`+IndexTupleitups[MaxIndexTuplesPerPage];`
	`715`	`+Sizetups_size[MaxIndexTuplesPerPage];`
	`716`	`+OffsetNumberitup_offsets[MaxIndexTuplesPerPage];`
	`717`	`+uint16ndeletable=0;`
	`718`	`+uint16nitups=0;`
	`719`	`+Sizeall_tups_size=0;`
	`720`	`+inti;`
`696`	`721`	`boolretain_pin= false;`
`697`	`722`
	`723`	`+readpage:`
`698`	`724`	`/* Scan each tuple in "read" page */`
`699`	`725`	`maxroffnum=PageGetMaxOffsetNumber(rpage);`
`700`	`726`	`for (roffnum=FirstOffsetNumber;`
`@@ -715,11 +741,13 @@ _hash_squeezebucket(Relation rel,`
`715`	`741`
`716`	`742`	`/*`
`717`	`743`	`* Walk up the bucket chain, looking for a page big enough for`
`718`		`- * this item. Exit if we reach the read page.`
	`744`	`+ * this item and all other accumulated items. Exit if we reach`
	`745`	`+ * the read page.`
`719`	`746`	`*/`
`720`		`-while (PageGetFreeSpace(wpage)<itemsz)`
	`747`	`+while (PageGetFreeSpaceForMultipleTuples(wpage,nitups+1)<(all_tups_size+itemsz))`
`721`	`748`	`{`
`722`	`749`	`Buffernext_wbuf=InvalidBuffer;`
	`750`	`+booltups_moved= false;`
`723`	`751`
`724`	`752`	`Assert(!PageIsEmpty(wpage));`
`725`	`753`
`@@ -737,12 +765,30 @@ _hash_squeezebucket(Relation rel,`
`737`	`765`	`LH_OVERFLOW_PAGE,`
`738`	`766`	`bstrategy);`
`739`	`767`
	`768`	`+if (nitups>0)`
	`769`	`+{`
	`770`	`+Assert(nitups==ndeletable);`
	`771`	`+`
	`772`	`+/*`
	`773`	`+ * we have to insert tuples on the "write" page, being`
	`774`	`+ * careful to preserve hashkey ordering. (If we insert`
	`775`	`+ * many tuples into the same "write" page it would be`
	`776`	`+ * worth qsort'ing them).`
	`777`	`+ */`
	`778`	`+_hash_pgaddmultitup(rel,wbuf,itups,itup_offsets,nitups);`
	`779`	`+MarkBufferDirty(wbuf);`
	`780`	`+`
	`781`	`+/* Delete tuples we already moved off read page */`
	`782`	`+PageIndexMultiDelete(rpage,deletable,ndeletable);`
	`783`	`+MarkBufferDirty(rbuf);`
	`784`	`+`
	`785`	`+tups_moved= true;`
	`786`	`+}`
	`787`	`+`
`740`	`788`	`/*`
`741`	`789`	`* release the lock on previous page after acquiring the lock`
`742`	`790`	`* on next page`
`743`	`791`	`*/`
`744`		`-if (wbuf_dirty)`
`745`		`-MarkBufferDirty(wbuf);`
`746`	`792`	`if (retain_pin)`
`747`	`793`	`LockBuffer(wbuf,BUFFER_LOCK_UNLOCK);`
`748`	`794`	`else`
`@@ -751,12 +797,6 @@ _hash_squeezebucket(Relation rel,`
`751`	`797`	`/* nothing more to do if we reached the read page */`
`752`	`798`	`if (rblkno==wblkno)`
`753`	`799`	`{`
`754`		`-if (ndeletable>0)`
`755`		`-{`
`756`		`-/* Delete tuples we already moved off read page */`
`757`		`-PageIndexMultiDelete(rpage,deletable,ndeletable);`
`758`		`-MarkBufferDirty(rbuf);`
`759`		`-}`
`760`	`800`	`_hash_relbuf(rel,rbuf);`
`761`	`801`	`return;`
`762`	`802`	`}`
`@@ -765,21 +805,34 @@ _hash_squeezebucket(Relation rel,`
`765`	`805`	`wpage=BufferGetPage(wbuf);`
`766`	`806`	`wopaque= (HashPageOpaque)PageGetSpecialPointer(wpage);`
`767`	`807`	`Assert(wopaque->hasho_bucket==bucket);`
`768`		`-wbuf_dirty= false;`
`769`	`808`	`retain_pin= false;`
`770`		`-}`
`771`	`809`
`772`		`-/*`
`773`		`- * we have found room so insert on the "write" page, being careful`
`774`		`- * to preserve hashkey ordering. (If we insert many tuples into`
`775`		`- * the same "write" page it would be worth qsort'ing instead of`
`776`		`- * doing repeated _hash_pgaddtup.)`
`777`		`- */`
`778`		`-(void)_hash_pgaddtup(rel,wbuf,itemsz,itup);`
`779`		`-wbuf_dirty= true;`
	`810`	`+/* be tidy */`
	`811`	`+for (i=0;i<nitups;i++)`
	`812`	`+pfree(itups[i]);`
	`813`	`+nitups=0;`
	`814`	`+all_tups_size=0;`
	`815`	`+ndeletable=0;`
	`816`	`+`
	`817`	`+/*`
	`818`	`+ * after moving the tuples, rpage would have been compacted,`
	`819`	`+ * so we need to rescan it.`
	`820`	`+ */`
	`821`	`+if (tups_moved)`
	`822`	`+gotoreadpage;`
	`823`	`+}`
`780`	`824`
`781`	`825`	`/* remember tuple for deletion from "read" page */`
`782`	`826`	`deletable[ndeletable++]=roffnum;`
	`827`	`+`
	`828`	`+/*`
	`829`	`+ * we need a copy of index tuples as they can be freed as part of`
	`830`	`+ * overflow page, however we need them to write a WAL record in`
	`831`	`+ * _hash_freeovflpage.`
	`832`	`+ */`
	`833`	`+itups[nitups]=CopyIndexTuple(itup);`
	`834`	`+tups_size[nitups++]=itemsz;`
	`835`	`+all_tups_size+=itemsz;`
`783`	`836`	`}`
`784`	`837`
`785`	`838`	`/*`
`@@ -797,10 +850,12 @@ _hash_squeezebucket(Relation rel,`
`797`	`850`	`Assert(BlockNumberIsValid(rblkno));`
`798`	`851`
`799`	`852`	`/* free this overflow page (releases rbuf) */`
`800`		`-_hash_freeovflpage(rel,rbuf,wbuf,bstrategy);`
	`853`	`+_hash_freeovflpage(rel,bucket_buf,rbuf,wbuf,itups,itup_offsets,`
	`854`	`+tups_size,nitups,bstrategy);`
`801`	`855`
`802`		`-if (wbuf_dirty)`
`803`		`-MarkBufferDirty(wbuf);`
	`856`	`+/* be tidy */`
	`857`	`+for (i=0;i<nitups;i++)`
	`858`	`+pfree(itups[i]);`
`804`	`859`
`805`	`860`	`/* are we freeing the page adjacent to wbuf? */`
`806`	`861`	`if (rblkno==wblkno)`

`‎src/backend/access/hash/hashpage.c‎`

Lines changed: 0 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -470,7 +470,6 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)`
`470`	`470`	`void`
`471`	`471`	`_hash_pageinit(Pagepage,Sizesize)`
`472`	`472`	`{`
`473`		`-Assert(PageIsNew(page));`
`474`	`473`	`PageInit(page,size,sizeof(HashPageOpaqueData));`
`475`	`474`	`}`
`476`	`475`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitb0f18cb

File tree

6 files changed

6 files changed

`‎src/backend/access/hash/hashinsert.c‎`

`‎src/backend/access/hash/hashovfl.c‎`

`‎src/backend/access/hash/hashpage.c‎`

0 commit comments