Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit39673ca

Browse files
committed
Rewrite hashbulkdelete() to make it amenable to new bucket locking
scheme. A pleasant side effect is that it is *much* faster when deletinga large fraction of the indexed tuples, because of elimination ofredundant hash_step activity induced by hash_adjscans. Various othercontinuing code cleanup.
1 parent5f65345 commit39673ca

File tree

6 files changed

+231
-76
lines changed

6 files changed

+231
-76
lines changed

‎src/backend/access/hash/hash.c

Lines changed: 158 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.65 2003/08/04 02:39:57 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.66 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -449,40 +449,178 @@ hashbulkdelete(PG_FUNCTION_ARGS)
449449
BlockNumbernum_pages;
450450
doubletuples_removed;
451451
doublenum_index_tuples;
452-
IndexScanDesciscan;
452+
uint32deleted_tuples;
453+
uint32tuples_remaining;
454+
uint32orig_ntuples;
455+
Bucketorig_maxbucket;
456+
Bucketcur_maxbucket;
457+
Bucketcur_bucket;
458+
Buffermetabuf;
459+
HashMetaPagemetap;
460+
HashMetaPageDatalocal_metapage;
453461

462+
/*
463+
* keep track of counts in both float form (to return) and integer form
464+
* (to update hashm_ntuples). It'd be better to make hashm_ntuples a
465+
* double, but that will have to wait for an initdb.
466+
*/
454467
tuples_removed=0;
455468
num_index_tuples=0;
469+
deleted_tuples=0;
470+
tuples_remaining=0;
456471

457472
/*
458-
* XXX generic implementation --- should be improved!
473+
* Read the metapage to fetch original bucket and tuple counts. Also,
474+
* we keep a copy of the last-seen metapage so that we can use its
475+
* hashm_spares[] values to compute bucket page addresses. This is a
476+
* bit hokey but perfectly safe, since the interesting entries in the
477+
* spares array cannot change under us; and it beats rereading the
478+
* metapage for each bucket.
459479
*/
480+
metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_READ);
481+
metap= (HashMetaPage)BufferGetPage(metabuf);
482+
_hash_checkpage((Page)metap,LH_META_PAGE);
483+
orig_maxbucket=metap->hashm_maxbucket;
484+
orig_ntuples=metap->hashm_ntuples;
485+
memcpy(&local_metapage,metap,sizeof(local_metapage));
486+
_hash_relbuf(rel,metabuf,HASH_READ);
487+
488+
/* Scan the buckets that we know exist */
489+
cur_bucket=0;
490+
cur_maxbucket=orig_maxbucket;
491+
492+
loop_top:
493+
while (cur_bucket <=cur_maxbucket)
494+
{
495+
BlockNumberbucket_blkno;
496+
BlockNumberblkno;
497+
boolbucket_dirty= false;
460498

461-
/* walk through the entire index */
462-
iscan=index_beginscan(NULL,rel,SnapshotAny,0, (ScanKey)NULL);
463-
/* including killed tuples */
464-
iscan->ignore_killed_tuples= false;
499+
/* Get address of bucket's start page */
500+
bucket_blkno=BUCKET_TO_BLKNO(&local_metapage,cur_bucket);
465501

466-
while (index_getnext_indexitem(iscan,ForwardScanDirection))
467-
{
468-
if (callback(&iscan->xs_ctup.t_self,callback_state))
502+
/* XXX lock bucket here */
503+
504+
/* Scan each page in bucket */
505+
blkno=bucket_blkno;
506+
while (BlockNumberIsValid(blkno))
469507
{
470-
ItemPointerDataindextup=iscan->currentItemData;
508+
Bufferbuf;
509+
Pagepage;
510+
HashPageOpaqueopaque;
511+
OffsetNumberoffno;
512+
OffsetNumbermaxoffno;
513+
boolpage_dirty= false;
514+
515+
buf=_hash_getbuf(rel,blkno,HASH_WRITE);
516+
page=BufferGetPage(buf);
517+
_hash_checkpage(page,LH_BUCKET_PAGE |LH_OVERFLOW_PAGE);
518+
opaque= (HashPageOpaque)PageGetSpecialPointer(page);
519+
Assert(opaque->hasho_bucket==cur_bucket);
520+
521+
/* Scan each tuple in page */
522+
offno=FirstOffsetNumber;
523+
maxoffno=PageGetMaxOffsetNumber(page);
524+
while (offno <=maxoffno)
525+
{
526+
HashItemhitem;
527+
ItemPointerhtup;
528+
529+
hitem= (HashItem)PageGetItem(page,
530+
PageGetItemId(page,offno));
531+
htup=&(hitem->hash_itup.t_tid);
532+
if (callback(htup,callback_state))
533+
{
534+
ItemPointerDataindextup;
535+
536+
/* adjust any active scans that will be affected */
537+
/* (this should be unnecessary) */
538+
ItemPointerSet(&indextup,blkno,offno);
539+
_hash_adjscans(rel,&indextup);
540+
541+
/* delete the item from the page */
542+
PageIndexTupleDelete(page,offno);
543+
bucket_dirty=page_dirty= true;
544+
545+
/* don't increment offno, instead decrement maxoffno */
546+
maxoffno=OffsetNumberPrev(maxoffno);
547+
548+
tuples_removed+=1;
549+
deleted_tuples+=1;
550+
}
551+
else
552+
{
553+
offno=OffsetNumberNext(offno);
554+
555+
num_index_tuples+=1;
556+
tuples_remaining+=1;
557+
}
558+
}
471559

472-
/* adjust any active scans that will be affected by deletion */
473-
/* (namely, my own scan) */
474-
_hash_adjscans(rel,&indextup);
560+
/*
561+
* Write or free page if needed, advance to next page. We want
562+
* to preserve the invariant that overflow pages are nonempty.
563+
*/
564+
blkno=opaque->hasho_nextblkno;
565+
566+
if (PageIsEmpty(page)&& (opaque->hasho_flag&LH_OVERFLOW_PAGE))
567+
_hash_freeovflpage(rel,buf);
568+
elseif (page_dirty)
569+
_hash_wrtbuf(rel,buf);
570+
else
571+
_hash_relbuf(rel,buf,HASH_WRITE);
572+
}
475573

476-
/* delete the data from the page */
477-
_hash_pagedel(rel,&indextup);
574+
/* If we deleted anything, try to compact free space */
575+
if (bucket_dirty)
576+
_hash_squeezebucket(rel,cur_bucket,bucket_blkno);
478577

479-
tuples_removed+=1;
480-
}
578+
/* XXX unlock bucket here */
579+
580+
/* Advance to next bucket */
581+
cur_bucket++;
582+
}
583+
584+
/* Write-lock metapage and check for split since we started */
585+
metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_WRITE);
586+
metap= (HashMetaPage)BufferGetPage(metabuf);
587+
_hash_checkpage((Page)metap,LH_META_PAGE);
588+
589+
if (cur_maxbucket!=metap->hashm_maxbucket)
590+
{
591+
/* There's been a split, so process the additional bucket(s) */
592+
cur_maxbucket=metap->hashm_maxbucket;
593+
memcpy(&local_metapage,metap,sizeof(local_metapage));
594+
_hash_relbuf(rel,metabuf,HASH_WRITE);
595+
gotoloop_top;
596+
}
597+
598+
/* Okay, we're really done. Update tuple count in metapage. */
599+
600+
if (orig_maxbucket==metap->hashm_maxbucket&&
601+
orig_ntuples==metap->hashm_ntuples)
602+
{
603+
/*
604+
* No one has split or inserted anything since start of scan,
605+
* so believe our count as gospel.
606+
*/
607+
metap->hashm_ntuples=tuples_remaining;
608+
}
609+
else
610+
{
611+
/*
612+
* Otherwise, our count is untrustworthy since we may have
613+
* double-scanned tuples in split buckets. Proceed by
614+
* dead-reckoning.
615+
*/
616+
if (metap->hashm_ntuples>deleted_tuples)
617+
metap->hashm_ntuples-=deleted_tuples;
481618
else
482-
num_index_tuples+=1;
619+
metap->hashm_ntuples=0;
620+
num_index_tuples=metap->hashm_ntuples;
483621
}
484622

485-
index_endscan(iscan);
623+
_hash_wrtbuf(rel,metabuf);
486624

487625
/* return statistics */
488626
num_pages=RelationGetNumberOfBlocks(rel);

‎src/backend/access/hash/hashovfl.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.38 2003/09/01 20:26:34 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.39 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* Overflow pages look like ordinary relation pages.
@@ -444,11 +444,13 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
444444
*first page in the bucket chain. The read page works backward and
445445
*the write page works forward; the procedure terminates when the
446446
*read page and write page are the same page.
447+
*
448+
*Caller must hold exclusive lock on the target bucket.
447449
*/
448450
void
449451
_hash_squeezebucket(Relationrel,
450-
HashMetaPagemetap,
451-
Bucketbucket)
452+
Bucketbucket,
453+
BlockNumberbucket_blkno)
452454
{
453455
Bufferwbuf;
454456
Bufferrbuf=0;
@@ -466,7 +468,7 @@ _hash_squeezebucket(Relation rel,
466468
/*
467469
* start squeezing into the base bucket page.
468470
*/
469-
wblkno=BUCKET_TO_BLKNO(bucket);
471+
wblkno=bucket_blkno;
470472
wbuf=_hash_getbuf(rel,wblkno,HASH_WRITE);
471473
wpage=BufferGetPage(wbuf);
472474
_hash_checkpage(wpage,LH_BUCKET_PAGE);
@@ -484,11 +486,6 @@ _hash_squeezebucket(Relation rel,
484486
/*
485487
* find the last page in the bucket chain by starting at the base
486488
* bucket page and working forward.
487-
*
488-
* XXX if chains tend to be long, we should probably move forward using
489-
* HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach
490-
* the end. if they are short we probably don't care very much. if
491-
* the hash function is working at all, they had better be short..
492489
*/
493490
ropaque=wopaque;
494491
do

‎src/backend/access/hash/hashpage.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.39 2003/09/01 20:26:34 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.40 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -143,7 +143,7 @@ _hash_metapinit(Relation rel)
143143
*/
144144
for (i=0;i <=1;i++)
145145
{
146-
buf=_hash_getbuf(rel,BUCKET_TO_BLKNO(i),HASH_WRITE);
146+
buf=_hash_getbuf(rel,BUCKET_TO_BLKNO(metap,i),HASH_WRITE);
147147
pg=BufferGetPage(buf);
148148
_hash_pageinit(pg,BufferGetPageSize(buf));
149149
pageopaque= (HashPageOpaque)PageGetSpecialPointer(pg);
@@ -456,6 +456,8 @@ _hash_splitbucket(Relation rel,
456456
Bufferovflbuf;
457457
BlockNumberoblkno;
458458
BlockNumbernblkno;
459+
BlockNumberstart_oblkno;
460+
BlockNumberstart_nblkno;
459461
boolnull;
460462
Datumdatum;
461463
HashItemhitem;
@@ -475,8 +477,10 @@ _hash_splitbucket(Relation rel,
475477
_hash_checkpage((Page)metap,LH_META_PAGE);
476478

477479
/* get the buffers & pages */
478-
oblkno=BUCKET_TO_BLKNO(obucket);
479-
nblkno=BUCKET_TO_BLKNO(nbucket);
480+
start_oblkno=BUCKET_TO_BLKNO(metap,obucket);
481+
start_nblkno=BUCKET_TO_BLKNO(metap,nbucket);
482+
oblkno=start_oblkno;
483+
nblkno=start_nblkno;
480484
obuf=_hash_getbuf(rel,oblkno,HASH_WRITE);
481485
nbuf=_hash_getbuf(rel,nblkno,HASH_WRITE);
482486
opage=BufferGetPage(obuf);
@@ -571,7 +575,7 @@ _hash_splitbucket(Relation rel,
571575
*/
572576
_hash_wrtbuf(rel,obuf);
573577
_hash_wrtbuf(rel,nbuf);
574-
_hash_squeezebucket(rel,metap,obucket);
578+
_hash_squeezebucket(rel,obucket,start_oblkno);
575579
return;
576580
}
577581
}
@@ -639,7 +643,7 @@ _hash_splitbucket(Relation rel,
639643
if (!BlockNumberIsValid(oblkno))
640644
{
641645
_hash_wrtbuf(rel,nbuf);
642-
_hash_squeezebucket(rel,metap,obucket);
646+
_hash_squeezebucket(rel,obucket,start_oblkno);
643647
return;
644648
}
645649

‎src/backend/access/hash/hashsearch.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.31 2003/08/04 02:39:57 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.32 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -19,8 +19,10 @@
1919

2020

2121
/*
22-
*_hash_search() -- Finds the page/bucket that the contains the
23-
*scankey and loads it into *bufP. the buffer has a read lock.
22+
*_hash_search() -- Find the bucket that contains the scankey
23+
*and fetch its primary bucket page into *bufP.
24+
*
25+
* the buffer has a read lock.
2426
*/
2527
void
2628
_hash_search(Relationrel,
@@ -30,22 +32,23 @@ _hash_search(Relation rel,
3032
HashMetaPagemetap)
3133
{
3234
BlockNumberblkno;
33-
DatumkeyDatum;
3435
Bucketbucket;
3536

36-
if (scankey== (ScanKey)NULL||
37-
(keyDatum=scankey[0].sk_argument)== (Datum)NULL)
37+
if (scankey==NULL)
3838
{
3939
/*
40-
* If the scankeyargumentisNULL, all tuples will satisfy the
40+
* If the scankey isempty, all tuples will satisfy the
4141
* scan so we start the scan at the first bucket (bucket 0).
4242
*/
4343
bucket=0;
4444
}
4545
else
46-
bucket=_hash_call(rel,metap,keyDatum);
46+
{
47+
Assert(!(scankey[0].sk_flags&SK_ISNULL));
48+
bucket=_hash_call(rel,metap,scankey[0].sk_argument);
49+
}
4750

48-
blkno=BUCKET_TO_BLKNO(bucket);
51+
blkno=BUCKET_TO_BLKNO(metap,bucket);
4952

5053
*bufP=_hash_getbuf(rel,blkno,HASH_READ);
5154
}
@@ -330,7 +333,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
330333
if (allbuckets&&bucket<metap->hashm_maxbucket)
331334
{
332335
++bucket;
333-
blkno=BUCKET_TO_BLKNO(bucket);
336+
blkno=BUCKET_TO_BLKNO(metap,bucket);
334337
buf=_hash_getbuf(rel,blkno,HASH_READ);
335338
page=BufferGetPage(buf);
336339
_hash_checkpage(page,LH_BUCKET_PAGE);
@@ -380,7 +383,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
380383
if (allbuckets&&bucket>0)
381384
{
382385
--bucket;
383-
blkno=BUCKET_TO_BLKNO(bucket);
386+
blkno=BUCKET_TO_BLKNO(metap,bucket);
384387
buf=_hash_getbuf(rel,blkno,HASH_READ);
385388
page=BufferGetPage(buf);
386389
_hash_checkpage(page,LH_BUCKET_PAGE);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp