Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitee4af34

Browse files
committed
Measure Bloom index signature-length reloption in bits, not words.
Per discussion, this is a more understandable and future-proof way ofexposing the setting to users. On-disk, we can still store it in words,so as to not break on-disk compatibility with beta1.Along the way, clean up the code associated with Bloom reloptions.Provide explicit macros for default and maximum lengths rather thanhaving magic numbers buried in multiple places in the code. Dropthe adjustBloomOptions() code altogether: it was useless in view ofthe fact that reloptions.c already performed default-substitution andrange checking for the options. Rename a couple of macros and typesfor more clarity.Discussion: <23767.1464926580@sss.pgh.pa.us>
1 parentfdfaccf commitee4af34

File tree

4 files changed

+86
-93
lines changed

4 files changed

+86
-93
lines changed

‎contrib/bloom/bloom.h

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque;
7979
#defineBLOOM_HEAD_BLKNO(1)/* first data page */
8080

8181
/*
82-
* Maximum of bloom signature length in uint16. Actual value
83-
* is 512 bytes
82+
* We store Bloom signatures as arrays of uint16 words.
8483
*/
85-
#defineMAX_BLOOM_LENGTH(256)
84+
typedefuint16BloomSignatureWord;
85+
86+
#defineSIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
87+
88+
/*
89+
* Default and maximum Bloom signature length in bits.
90+
*/
91+
#defineDEFAULT_BLOOM_LENGTH(5 * SIGNWORDBITS)
92+
#defineMAX_BLOOM_LENGTH(256 * SIGNWORDBITS)
93+
94+
/*
95+
* Default and maximum signature bits generated per index key.
96+
*/
97+
#defineDEFAULT_BLOOM_BITS2
98+
#defineMAX_BLOOM_BITS(MAX_BLOOM_LENGTH - 1)
8699

87100
/* Bloom index options */
88101
typedefstructBloomOptions
89102
{
90103
int32vl_len_;/* varlena header (do not touch directly!) */
91-
intbloomLength;/* length of signature inuint16 */
92-
intbitSize[INDEX_MAX_KEYS];/*signaturebitsper index
93-
* key */
104+
intbloomLength;/* length of signature inwords (not bits!) */
105+
intbitSize[INDEX_MAX_KEYS];/*# ofbitsgenerated for each
106+
* index key */
94107
}BloomOptions;
95108

96109
/*
@@ -143,20 +156,18 @@ typedef struct BloomState
143156
/*
144157
* Tuples are very different from all other relations
145158
*/
146-
typedefuint16SignType;
147-
148159
typedefstructBloomTuple
149160
{
150161
ItemPointerDataheapPtr;
151-
SignTypesign[FLEXIBLE_ARRAY_MEMBER];
162+
BloomSignatureWordsign[FLEXIBLE_ARRAY_MEMBER];
152163
}BloomTuple;
153164

154165
#defineBLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
155166

156167
/* Opaque data structure for bloom index scan */
157168
typedefstructBloomScanOpaqueData
158169
{
159-
SignType*sign;/* Scan signature */
170+
BloomSignatureWord*sign;/* Scan signature */
160171
BloomStatestate;
161172
}BloomScanOpaqueData;
162173

@@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage);
170181
externvoidBloomInitMetapage(Relationindex);
171182
externvoidBloomInitPage(Pagepage,uint16flags);
172183
externBufferBloomNewBuffer(Relationindex);
173-
externvoidsignValue(BloomState*state,SignType*sign,Datumvalue,intattno);
184+
externvoidsignValue(BloomState*state,BloomSignatureWord*sign,Datumvalue,intattno);
174185
externBloomTuple*BloomFormTuple(BloomState*state,ItemPointeriptr,Datum*values,bool*isnull);
175186
externboolBloomPageAddItem(BloomState*state,Pagepage,BloomTuple*tuple);
176187

‎contrib/bloom/blscan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
9393
/* New search: have to calculate search signature */
9494
ScanKeyskey=scan->keyData;
9595

96-
so->sign=palloc0(sizeof(SignType)*so->state.opts.bloomLength);
96+
so->sign=palloc0(sizeof(BloomSignatureWord)*so->state.opts.bloomLength);
9797

9898
for (i=0;i<scan->numberOfKeys;i++)
9999
{

‎contrib/bloom/blutils.c

Lines changed: 53 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,26 @@
2727

2828
#include"bloom.h"
2929

30-
/* Signature dealing macros */
31-
#defineBITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
32-
#defineGETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
33-
#defineCLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
34-
#defineSETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
35-
#defineGETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
30+
/* Signature dealing macros - note i is assumed to be of type int */
31+
#defineGETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
32+
#defineCLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
33+
#defineSETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
34+
#defineGETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
3635

3736
PG_FUNCTION_INFO_V1(blhandler);
3837

39-
/* Kind of relationoptioms for bloom index */
38+
/* Kind of relationoptions for bloom index */
4039
staticrelopt_kindbl_relopt_kind;
40+
/* parse table for fillRelOptions */
41+
staticrelopt_parse_eltbl_relopt_tab[INDEX_MAX_KEYS+1];
4142

4243
staticint32myRand(void);
4344
staticvoidmySrand(uint32seed);
4445

4546
/*
46-
* Module initialize function: initilized relation options.
47+
* Module initialize function: initialize info about Bloom relation options.
48+
*
49+
* Note: keep this in sync with makeDefaultBloomOptions().
4750
*/
4851
void
4952
_PG_init(void)
@@ -53,17 +56,46 @@ _PG_init(void)
5356

5457
bl_relopt_kind=add_reloption_kind();
5558

59+
/* Option for length of signature */
5660
add_int_reloption(bl_relopt_kind,"length",
57-
"Length of signature in uint16 type",5,1,256);
61+
"Length of signature in bits",
62+
DEFAULT_BLOOM_LENGTH,1,MAX_BLOOM_LENGTH);
63+
bl_relopt_tab[0].optname="length";
64+
bl_relopt_tab[0].opttype=RELOPT_TYPE_INT;
65+
bl_relopt_tab[0].offset= offsetof(BloomOptions,bloomLength);
5866

67+
/* Number of bits for each possible index column: col1, col2, ... */
5968
for (i=0;i<INDEX_MAX_KEYS;i++)
6069
{
61-
snprintf(buf,16,"col%d",i+1);
70+
snprintf(buf,sizeof(buf),"col%d",i+1);
6271
add_int_reloption(bl_relopt_kind,buf,
63-
"Number of bits for corresponding column",2,1,2048);
72+
"Number of bits generated for each index column",
73+
DEFAULT_BLOOM_BITS,1,MAX_BLOOM_BITS);
74+
bl_relopt_tab[i+1].optname=MemoryContextStrdup(TopMemoryContext,
75+
buf);
76+
bl_relopt_tab[i+1].opttype=RELOPT_TYPE_INT;
77+
bl_relopt_tab[i+1].offset= offsetof(BloomOptions,bitSize[i]);
6478
}
6579
}
6680

81+
/*
82+
* Construct a default set of Bloom options.
83+
*/
84+
staticBloomOptions*
85+
makeDefaultBloomOptions(void)
86+
{
87+
BloomOptions*opts;
88+
inti;
89+
90+
opts= (BloomOptions*)palloc0(sizeof(BloomOptions));
91+
/* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
92+
opts->bloomLength= (DEFAULT_BLOOM_LENGTH+SIGNWORDBITS-1) /SIGNWORDBITS;
93+
for (i=0;i<INDEX_MAX_KEYS;i++)
94+
opts->bitSize[i]=DEFAULT_BLOOM_BITS;
95+
SET_VARSIZE(opts,sizeof(BloomOptions));
96+
returnopts;
97+
}
98+
6799
/*
68100
* Bloom handler function: return IndexAmRoutine with access method parameters
69101
* and callbacks.
@@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index)
157189

158190
memcpy(&state->opts,index->rd_amcache,sizeof(state->opts));
159191
state->sizeOfBloomTuple=BLOOMTUPLEHDRSZ+
160-
sizeof(SignType)*state->opts.bloomLength;
192+
sizeof(BloomSignatureWord)*state->opts.bloomLength;
161193
}
162194

163195
/*
@@ -208,7 +240,7 @@ mySrand(uint32 seed)
208240
* Add bits of given value to the signature.
209241
*/
210242
void
211-
signValue(BloomState*state,SignType*sign,Datumvalue,intattno)
243+
signValue(BloomState*state,BloomSignatureWord*sign,Datumvalue,intattno)
212244
{
213245
uint32hashVal;
214246
intnBit,
@@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno)
231263

232264
for (j=0;j<state->opts.bitSize[attno];j++)
233265
{
234-
/* preventmutiple evaluation */
235-
nBit=myRand() % (state->opts.bloomLength*BITSIGNTYPE);
266+
/* preventmultiple evaluation in SETBIT macro */
267+
nBit=myRand() % (state->opts.bloomLength*SIGNWORDBITS);
236268
SETBIT(sign,nBit);
237269
}
238270
}
@@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags)
361393
opaque->bloom_page_id=BLOOM_PAGE_ID;
362394
}
363395

364-
/*
365-
* Adjust options of bloom index.
366-
*
367-
* This must produce default options when *opts is initially all-zero.
368-
*/
369-
staticvoid
370-
adjustBloomOptions(BloomOptions*opts)
371-
{
372-
inti;
373-
374-
/* Default length of bloom filter is 5 of 16-bit integers */
375-
if (opts->bloomLength <=0)
376-
opts->bloomLength=5;
377-
elseif (opts->bloomLength>MAX_BLOOM_LENGTH)
378-
ereport(ERROR,
379-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
380-
errmsg("length of bloom signature (%d) is greater than maximum %d",
381-
opts->bloomLength,MAX_BLOOM_LENGTH)));
382-
383-
/* Check signature length */
384-
for (i=0;i<INDEX_MAX_KEYS;i++)
385-
{
386-
/*
387-
* Zero and negative number of bits is meaningless. Also setting
388-
* more bits than signature have seems useless. Replace both cases
389-
* with 2 bits default.
390-
*/
391-
if (opts->bitSize[i] <=0
392-
||opts->bitSize[i] >=opts->bloomLength*sizeof(SignType)*BITS_PER_BYTE)
393-
opts->bitSize[i]=2;
394-
}
395-
}
396-
397396
/*
398397
* Fill in metapage for bloom index.
399398
*/
@@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage)
405404

406405
/*
407406
* Choose the index's options. If reloptions have been assigned, use
408-
* those, otherwise create default options by applying adjustBloomOptions
409-
* to a zeroed chunk of memory. We apply adjustBloomOptions to existing
410-
* reloptions too, just out of paranoia; they should be valid already.
407+
* those, otherwise create default options.
411408
*/
412409
opts= (BloomOptions*)index->rd_options;
413410
if (!opts)
414-
opts= (BloomOptions*)palloc0(sizeof(BloomOptions));
415-
adjustBloomOptions(opts);
411+
opts=makeDefaultBloomOptions();
416412

417413
/*
418414
* Initialize contents of meta page, including a copy of the options,
@@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate)
462458
relopt_value*options;
463459
intnumoptions;
464460
BloomOptions*rdopts;
465-
relopt_parse_elttab[INDEX_MAX_KEYS+1];
466-
inti;
467-
charbuf[16];
468-
469-
/* Option for length of signature */
470-
tab[0].optname="length";
471-
tab[0].opttype=RELOPT_TYPE_INT;
472-
tab[0].offset= offsetof(BloomOptions,bloomLength);
473-
474-
/* Number of bits for each of possible columns: col1, col2, ... */
475-
for (i=0;i<INDEX_MAX_KEYS;i++)
476-
{
477-
snprintf(buf,sizeof(buf),"col%d",i+1);
478-
tab[i+1].optname=pstrdup(buf);
479-
tab[i+1].opttype=RELOPT_TYPE_INT;
480-
tab[i+1].offset= offsetof(BloomOptions,bitSize[i]);
481-
}
482461

462+
/* Parse the user-given reloptions */
483463
options=parseRelOptions(reloptions,validate,bl_relopt_kind,&numoptions);
484464
rdopts=allocateReloptStruct(sizeof(BloomOptions),options,numoptions);
485465
fillRelOptions((void*)rdopts,sizeof(BloomOptions),options,numoptions,
486-
validate,tab,INDEX_MAX_KEYS+1);
466+
validate,bl_relopt_tab,lengthof(bl_relopt_tab));
487467

488-
adjustBloomOptions(rdopts);
468+
/* Convert signature length from # of bits to # to words, rounding up */
469+
rdopts->bloomLength= (rdopts->bloomLength+SIGNWORDBITS-1) /SIGNWORDBITS;
489470

490471
return (bytea*)rdopts;
491472
}

‎doc/src/sgml/bloom.sgml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
</indexterm>
99

1010
<para>
11-
<literal>bloom</> is a modulewhich implements an index access method. It comes
12-
as an example of custom access methods and generic WALrecords usage. But it
11+
<literal>bloom</> is a modulethat implements an index access method. It comes
12+
as an example of custom access methods and generic WALrecord usage. But it
1313
is also useful in itself.
1414
</para>
1515

@@ -22,8 +22,9 @@
2222
allows fast exclusion of non-candidate tuples via signatures.
2323
Since a signature is a lossy representation of all indexed attributes,
2424
search results must be rechecked using heap information.
25-
The user can specify signature length (in uint16, default is 5) and the
26-
number of bits, which can be set per attribute (1 < colN < 2048).
25+
The user can specify signature length in bits (default 80, maximum 4096)
26+
and the number of bits generated for each index column (default 2,
27+
maximum 4095).
2728
</para>
2829

2930
<para>
@@ -51,17 +52,17 @@
5152
<term><literal>length</></term>
5253
<listitem>
5354
<para>
54-
Length of signature inuint16 type values
55+
Length of signature inbits
5556
</para>
5657
</listitem>
5758
</varlistentry>
5859
</variablelist>
5960
<variablelist>
6061
<varlistentry>
61-
<term><literal>col1 &mdash;col16</></term>
62+
<term><literal>col1 &mdash;col32</></term>
6263
<listitem>
6364
<para>
64-
Number of bits forcorresponding column
65+
Number of bitsgeneratedforeach index column
6566
</para>
6667
</listitem>
6768
</varlistentry>
@@ -77,12 +78,12 @@
7778

7879
<programlisting>
7980
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
80-
WITH (length=5, col1=2, col2=2, col3=4);
81+
WITH (length=80, col1=2, col2=2, col3=4);
8182
</programlisting>
8283

8384
<para>
8485
Here, we created a bloom index with a signature length of 80 bits,
85-
and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
86+
and attributes i1 and i2 mapped to 2 bits, and attribute i3mappedto 4 bits.
8687
</para>
8788

8889
<para>

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp