NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit4adc2f7

committed

Change hash indexes to store only the hash code rather than the whole indexed

value. This means that hash index lookups are always lossy and have to berechecked when the heap is visited; however, the gain in index compactnessoutweighs this when the indexed values are wide. Also, we only need toperform datatype comparisons when the hash codes match exactly, rather thanfor every entry in the hash bucket; so it could also win for datatypes thathave expensive comparison functions. A small additional win is gained bykeeping hash index pages sorted by hash code and using binary search to reducethe number of index tuples we have to look at.Xiao MengThis commit also incorporates Zdenek Kotala's patch to isolate hash metapagesand hash bitmaps a bit better from the page header datastructures.

1 parent440b338 commit4adc2f7Copy full SHA for 4adc2f7

File tree

13 files changed

+313

-129

lines changed

doc/src/sgml
- catalogs.sgml
src
- backend
  - access/hash
  - catalog
    - index.c
  - utils/sort
    - tuplesort.c
- include
  - access
    - hash.h
  - catalog

13 files changed

+313

-129

lines changed

`‎doc/src/sgml/catalogs.sgml‎`

Lines changed: 9 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.173 2008/09/10 18:09:19 alvherre Exp $ -->`
	`1`	`+<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.174 2008/09/15 18:43:41 tgl Exp $ -->`
`2`	`2`	`<!--`
`3`	`3`	`Documentation of the system catalogs, directed toward PostgreSQL developers`
`4`	`4`	`-->`
`@@ -451,6 +451,13 @@`
`451`	`451`	`<entry>Can an index of this type be clustered on?</entry>`
`452`	`452`	`</row>`
`453`	`453`
	`454`	`+ <row>`
	`455`	`+ <entry><structfield>amkeytype</structfield></entry>`
	`456`	`+ <entry><type>oid</type></entry>`
	`457`	`+ <entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry>`
	`458`	`+ <entry>Type of data stored in index, or zero if not a fixed type</entry>`
	`459`	`+ </row>`
	`460`	`+`
`454`	`461`	`<row>`
`455`	`462`	`<entry><structfield>aminsert</structfield></entry>`
`456`	`463`	`<entry><type>regproc</type></entry>`
`@@ -6424,7 +6431,7 @@`
`6424`	`6431`	`<row>`
`6425`	`6432`	`<entry><structfield>sourceline</structfield></entry>`
`6426`	`6433`	`<entry><type>text</type></entry>`
`6427`		`- <entry>Line number within the sourcefile the current value was set`
	`6434`	`+ <entry>Line number within the sourcefile the current value was set`
`6428`	`6435`	`from (NULL for values set in sources other than configuration files)`
`6429`	`6436`	`</entry>`
`6430`	`6437`	`</row>`

`‎src/backend/access/hash/hash.c‎`

Lines changed: 13 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.104 2008/06/19 00:46:03 alvherre Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.105 2008/09/15 18:43:41 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* This file contains only the public interface routines.`
`@@ -79,12 +79,12 @@ hashbuild(PG_FUNCTION_ARGS)`
`79`	`79`	`* then we'll thrash horribly. To prevent that scenario, we can sort the`
`80`	`80`	`* tuples by (expected) bucket number. However, such a sort is useless`
`81`	`81`	`* overhead when the index does fit in RAM. We choose to sort if the`
`82`		`- * initial index size exceedseffective_cache_size.`
	`82`	`+ * initial index size exceedsNBuffers.`
`83`	`83`	`*`
`84`	`84`	`* NOTE: this test will need adjustment if a bucket is ever different`
`85`	`85`	`* from one page.`
`86`	`86`	`*/`
`87`		`-if (num_buckets >= (uint32)effective_cache_size)`
	`87`	`+if (num_buckets >= (uint32)NBuffers)`
`88`	`88`	`buildstate.spool=_h_spoolinit(index,num_buckets);`
`89`	`89`	`else`
`90`	`90`	`buildstate.spool=NULL;`
`@@ -129,7 +129,7 @@ hashbuildCallback(Relation index,`
`129`	`129`	`IndexTupleitup;`
`130`	`130`
`131`	`131`	`/* form an index tuple and point it at the heap tuple */`
`132`		`-itup=index_form_tuple(RelationGetDescr(index),values,isnull);`
	`132`	`+itup=_hash_form_tuple(index,values,isnull);`
`133`	`133`	`itup->t_tid=htup->t_self;`
`134`	`134`
`135`	`135`	`/* Hash indexes don't index nulls, see notes in hashinsert */`
`@@ -153,8 +153,8 @@ hashbuildCallback(Relation index,`
`153`	`153`	`/*`
`154`	`154`	`*hashinsert() -- insert an index tuple into a hash table.`
`155`	`155`	`*`
`156`		`- *Hash on theindex tuple's key,find the appropriate location`
`157`		`- *for the new tuple, and put it there.`
	`156`	`+ *Hash on theheap tuple's key,form an index tuple with hash code.`
	`157`	`+ *Find the appropriate locationfor the new tuple, and put it there.`
`158`	`158`	`*/`
`159`	`159`	`Datum`
`160`	`160`	`hashinsert(PG_FUNCTION_ARGS)`
`@@ -171,7 +171,7 @@ hashinsert(PG_FUNCTION_ARGS)`
`171`	`171`	`IndexTupleitup;`
`172`	`172`
`173`	`173`	`/* generate an index tuple */`
`174`		`-itup=index_form_tuple(RelationGetDescr(rel),values,isnull);`
	`174`	`+itup=_hash_form_tuple(rel,values,isnull);`
`175`	`175`	`itup->t_tid=*ht_ctid;`
`176`	`176`
`177`	`177`	`/*`
`@@ -211,8 +211,8 @@ hashgettuple(PG_FUNCTION_ARGS)`
`211`	`211`	`OffsetNumberoffnum;`
`212`	`212`	`boolres;`
`213`	`213`
`214`		`-/* Hash indexes arenever lossy(atthemoment anyway) */`
`215`		`-scan->xs_recheck=false;`
	`214`	`+/* Hash indexes arealways lossysince we store onlythehash code */`
	`215`	`+scan->xs_recheck=true;`
`216`	`216`
`217`	`217`	`/*`
`218`	`218`	`* We hold pin but not lock on current buffer while outside the hash AM.`
`@@ -317,7 +317,8 @@ hashgetbitmap(PG_FUNCTION_ARGS)`
`317`	`317`	`/* Save tuple ID, and continue scanning */`
`318`	`318`	`if (add_tuple)`
`319`	`319`	`{`
`320`		`-tbm_add_tuples(tbm,&scan->xs_ctup.t_self,1, false);`
	`320`	`+/* Note we mark the tuple ID as requiring recheck */`
	`321`	`+tbm_add_tuples(tbm,&scan->xs_ctup.t_self,1, true);`
`321`	`322`	`ntids++;`
`322`	`323`	`}`
`323`	`324`
`@@ -527,7 +528,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)`
`527`	`528`	`* each bucket.`
`528`	`529`	`*/`
`529`	`530`	`metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_READ,LH_META_PAGE);`
`530`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`531`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`531`	`532`	`orig_maxbucket=metap->hashm_maxbucket;`
`532`	`533`	`orig_ntuples=metap->hashm_ntuples;`
`533`	`534`	`memcpy(&local_metapage,metap,sizeof(local_metapage));`
`@@ -629,7 +630,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)`
`629`	`630`
`630`	`631`	`/* Write-lock metapage and check for split since we started */`
`631`	`632`	`metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_WRITE,LH_META_PAGE);`
`632`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`633`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`633`	`634`
`634`	`635`	`if (cur_maxbucket!=metap->hashm_maxbucket)`
`635`	`636`	`{`

`‎src/backend/access/hash/hashinsert.c‎`

Lines changed: 11 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.50 2008/06/19 00:46:03 alvherre Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.51 2008/09/15 18:43:41 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -43,18 +43,11 @@ _hash_doinsert(Relation rel, IndexTuple itup)`
`43`	`43`	`booldo_expand;`
`44`	`44`	`uint32hashkey;`
`45`	`45`	`Bucketbucket;`
`46`		`-Datumdatum;`
`47`		`-boolisnull;`
`48`	`46`
`49`	`47`	`/*`
`50`		`- * Compute the hash key for the item. We do this first so as not to need`
`51`		`- * to hold any locks while running the hash function.`
	`48`	`+ * Get the hash key for the item (it's stored in the index tuple itself).`
`52`	`49`	`*/`
`53`		`-if (rel->rd_rel->relnatts!=1)`
`54`		`-elog(ERROR,"hash indexes support only one index key");`
`55`		`-datum=index_getattr(itup,1,RelationGetDescr(rel),&isnull);`
`56`		`-Assert(!isnull);`
`57`		`-hashkey=_hash_datum2hashkey(rel,datum);`
	`50`	`+hashkey=_hash_get_indextuple_hashkey(itup);`
`58`	`51`
`59`	`52`	`/* compute item size too */`
`60`	`53`	`itemsz=IndexTupleDSize(*itup);`
`@@ -69,12 +62,14 @@ _hash_doinsert(Relation rel, IndexTuple itup)`
`69`	`62`
`70`	`63`	`/* Read the metapage */`
`71`	`64`	`metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_READ,LH_META_PAGE);`
`72`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`65`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`73`	`66`
`74`	`67`	`/*`
`75`	`68`	`* Check whether the item can fit on a hash page at all. (Eventually, we`
`76`	`69`	`* ought to try to apply TOAST methods if not.) Note that at this point,`
`77`	`70`	`* itemsz doesn't include the ItemId.`
	`71`	`+ *`
	`72`	`+ * XXX this is useless code if we are only storing hash keys.`
`78`	`73`	`*/`
`79`	`74`	`if (itemsz>HashMaxItemSize((Page)metap))`
`80`	`75`	`ereport(ERROR,`
`@@ -197,11 +192,15 @@ _hash_pgaddtup(Relation rel,`
`197`	`192`	`{`
`198`	`193`	`OffsetNumberitup_off;`
`199`	`194`	`Pagepage;`
	`195`	`+uint32hashkey;`
`200`	`196`
`201`	`197`	`_hash_checkpage(rel,buf,LH_BUCKET_PAGE \|LH_OVERFLOW_PAGE);`
`202`	`198`	`page=BufferGetPage(buf);`
`203`	`199`
`204`		`-itup_off=OffsetNumberNext(PageGetMaxOffsetNumber(page));`
	`200`	`+/* Find where to insert the tuple (preserving page's hashkey ordering) */`
	`201`	`+hashkey=_hash_get_indextuple_hashkey(itup);`
	`202`	`+itup_off=_hash_binsearch(page,hashkey);`
	`203`	`+`
`205`	`204`	`if (PageAddItem(page, (Item)itup,itemsize,itup_off, false, false)`
`206`	`205`	`==InvalidOffsetNumber)`
`207`	`206`	`elog(ERROR,"failed to add index item to \"%s\"",`

`‎src/backend/access/hash/hashovfl.c‎`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.64 2008/06/19 00:46:03 alvherre Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.65 2008/09/15 18:43:41 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* Overflow pages look like ordinary relation pages.`
`@@ -187,7 +187,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)`
`187`	`187`	`_hash_chgbufaccess(rel,metabuf,HASH_NOLOCK,HASH_WRITE);`
`188`	`188`
`189`	`189`	`_hash_checkpage(rel,metabuf,LH_META_PAGE);`
`190`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`190`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`191`	`191`
`192`	`192`	`/* start search at hashm_firstfree */`
`193`	`193`	`orig_firstfree=metap->hashm_firstfree;`
`@@ -450,7 +450,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,`
`450`	`450`
`451`	`451`	`/* Read the metapage so we can determine which bitmap page to use */`
`452`	`452`	`metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_READ,LH_META_PAGE);`
`453`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`453`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`454`	`454`
`455`	`455`	`/* Identify which bit to set */`
`456`	`456`	`ovflbitno=blkno_to_bitno(metap,ovflblkno);`

`‎src/backend/access/hash/hashpage.c‎`

Lines changed: 10 additions & 22 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.77 2008/09/15 18:43:41 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* Postgres hash pages look like ordinary relation pages. The opaque`
`@@ -348,11 +348,9 @@ _hash_metapinit(Relation rel, double num_tuples)`
`348`	`348`	`* Determine the target fill factor (in tuples per bucket) for this index.`
`349`	`349`	`* The idea is to make the fill factor correspond to pages about as full`
`350`	`350`	`* as the user-settable fillfactor parameter says.We can compute it`
`351`		`- * exactly if the index datatype is fixed-width, but for var-width there's`
`352`		`- * some guessing involved.`
	`351`	`+ * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.`
`353`	`352`	`*/`
`354`		`-data_width=get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,`
`355`		`-RelationGetDescr(rel)->attrs[0]->atttypmod);`
	`353`	`+data_width=sizeof(uint32);`
`356`	`354`	`item_width=MAXALIGN(sizeof(IndexTupleData))+MAXALIGN(data_width)+`
`357`	`355`	`sizeof(ItemIdData);/* include the line pointer */`
`358`	`356`	`ffactor=RelationGetTargetPageUsage(rel,HASH_DEFAULT_FILLFACTOR) /item_width;`
`@@ -395,20 +393,18 @@ _hash_metapinit(Relation rel, double num_tuples)`
`395`	`393`	`pageopaque->hasho_flag=LH_META_PAGE;`
`396`	`394`	`pageopaque->hasho_page_id=HASHO_PAGE_ID;`
`397`	`395`
`398`		`-metap=(HashMetaPage)pg;`
	`396`	`+metap=HashPageGetMeta(pg);`
`399`	`397`
`400`	`398`	`metap->hashm_magic=HASH_MAGIC;`
`401`	`399`	`metap->hashm_version=HASH_VERSION;`
`402`	`400`	`metap->hashm_ntuples=0;`
`403`	`401`	`metap->hashm_nmaps=0;`
`404`	`402`	`metap->hashm_ffactor=ffactor;`
`405`		`-metap->hashm_bsize=BufferGetPageSize(metabuf);`
	`403`	`+metap->hashm_bsize=HashGetMaxBitmapSize(pg);`
`406`	`404`	`/* find largest bitmap array size that will fit in page size */`
`407`	`405`	`for (i=_hash_log2(metap->hashm_bsize);i>0;--i)`
`408`	`406`	`{`
`409`		`-if ((1 <<i) <= (metap->hashm_bsize-`
`410`		`- (MAXALIGN(sizeof(PageHeaderData))+`
`411`		`-MAXALIGN(sizeof(HashPageOpaqueData)))))`
	`407`	`+if ((1 <<i) <=metap->hashm_bsize)`
`412`	`408`	`break;`
`413`	`409`	`}`
`414`	`410`	`Assert(i>0);`
`@@ -532,7 +528,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)`
`532`	`528`	`_hash_chgbufaccess(rel,metabuf,HASH_NOLOCK,HASH_WRITE);`
`533`	`529`
`534`	`530`	`_hash_checkpage(rel,metabuf,LH_META_PAGE);`
`535`		`-metap=(HashMetaPage)BufferGetPage(metabuf);`
	`531`	`+metap=HashPageGetMeta(BufferGetPage(metabuf));`
`536`	`532`
`537`	`533`	`/*`
`538`	`534`	`* Check to see if split is still needed; someone else might have already`
`@@ -774,8 +770,6 @@ _hash_splitbucket(Relation rel,`
`774`	`770`	`Buffernbuf;`
`775`	`771`	`BlockNumberoblkno;`
`776`	`772`	`BlockNumbernblkno;`
`777`		`-boolnull;`
`778`		`-Datumdatum;`
`779`	`773`	`HashPageOpaqueoopaque;`
`780`	`774`	`HashPageOpaquenopaque;`
`781`	`775`	`IndexTupleitup;`
`@@ -785,7 +779,6 @@ _hash_splitbucket(Relation rel,`
`785`	`779`	`OffsetNumberomaxoffnum;`
`786`	`780`	`Pageopage;`
`787`	`781`	`Pagenpage;`
`788`		`-TupleDescitupdesc=RelationGetDescr(rel);`
`789`	`782`
`790`	`783`	`/*`
`791`	`784`	`* It should be okay to simultaneously write-lock pages from each bucket,`
`@@ -846,16 +839,11 @@ _hash_splitbucket(Relation rel,`
`846`	`839`	`}`
`847`	`840`
`848`	`841`	`/*`
`849`		`- * Re-hash the tuple to determine which bucket it now belongs in.`
`850`		`- *`
`851`		`- * It is annoying to call the hash function while holding locks, but`
`852`		`- * releasing and relocking the page for each tuple is unappealing too.`
	`842`	`+ * Fetch the item's hash key (conveniently stored in the item)`
	`843`	`+ * and determine which bucket it now belongs in.`
`853`	`844`	`*/`
`854`	`845`	`itup= (IndexTuple)PageGetItem(opage,PageGetItemId(opage,ooffnum));`
`855`		`-datum=index_getattr(itup,1,itupdesc,&null);`
`856`		`-Assert(!null);`
`857`		`-`
`858`		`-bucket=_hash_hashkey2bucket(_hash_datum2hashkey(rel,datum),`
	`846`	`+bucket=_hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),`
`859`	`847`	`maxbucket,highmask,lowmask);`
`860`	`848`
`861`	`849`	`if (bucket==nbucket)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit4adc2f7

File tree

13 files changed

13 files changed

`‎doc/src/sgml/catalogs.sgml‎`

`‎src/backend/access/hash/hash.c‎`

`‎src/backend/access/hash/hashinsert.c‎`

`‎src/backend/access/hash/hashovfl.c‎`

`‎src/backend/access/hash/hashpage.c‎`

0 commit comments