@@ -259,6 +259,48 @@ typedef struct BloomFilter
259
259
char data [FLEXIBLE_ARRAY_MEMBER ];
260
260
}BloomFilter ;
261
261
262
+ /*
263
+ * bloom_filter_size
264
+ *Calculate Bloom filter parameters (nbits, nbytes, nhashes).
265
+ *
266
+ * Given expected number of distinct values and desired false positive rate,
267
+ * calculates the optimal parameters of the Bloom filter.
268
+ *
269
+ * The resulting parameters are returned through nbytesp (number of bytes),
270
+ * nbitsp (number of bits) and nhashesp (number of hash functions). If a
271
+ * pointer is NULL, the parameter is not returned.
272
+ */
273
+ static void
274
+ bloom_filter_size (int ndistinct ,double false_positive_rate ,
275
+ int * nbytesp ,int * nbitsp ,int * nhashesp )
276
+ {
277
+ double k ;
278
+ int nbits ,
279
+ nbytes ;
280
+
281
+ /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
282
+ nbits = ceil (- (ndistinct * log (false_positive_rate )) /pow (log (2.0 ),2 ));
283
+
284
+ /* round m to whole bytes */
285
+ nbytes = ((nbits + 7 ) /8 );
286
+ nbits = nbytes * 8 ;
287
+
288
+ /*
289
+ * round(log(2.0) * m / ndistinct), but assume round() may not be
290
+ * available on Windows
291
+ */
292
+ k = log (2.0 )* nbits /ndistinct ;
293
+ k = (k - floor (k ) >=0.5 ) ?ceil (k ) :floor (k );
294
+
295
+ if (nbytesp )
296
+ * nbytesp = nbytes ;
297
+
298
+ if (nbitsp )
299
+ * nbitsp = nbits ;
300
+
301
+ if (nhashesp )
302
+ * nhashesp = (int )k ;
303
+ }
262
304
263
305
/*
264
306
* bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
275
317
276
318
int nbits ;/* size of filter / number of bits */
277
319
int nbytes ;/* size of filter / number of bytes */
278
-
279
- double k ;/* number of hash functions */
320
+ int nhashes ;/* number of hash functions */
280
321
281
322
Assert (ndistinct > 0 );
282
323
Assert ((false_positive_rate >=BLOOM_MIN_FALSE_POSITIVE_RATE )&&
283
324
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE ));
284
325
285
- /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
286
- nbits = ceil (- (ndistinct * log (false_positive_rate )) /pow (log (2.0 ),2 ));
287
-
288
- /* round m to whole bytes */
289
- nbytes = ((nbits + 7 ) /8 );
290
- nbits = nbytes * 8 ;
326
+ /* calculate bloom filter size / parameters */
327
+ bloom_filter_size (ndistinct ,false_positive_rate ,
328
+ & nbytes ,& nbits ,& nhashes );
291
329
292
330
/*
293
331
* Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
310
348
elog (ERROR ,"the bloom filter is too large (%d > %zu)" ,nbytes ,
311
349
BloomMaxFilterSize );
312
350
313
- /*
314
- * round(log(2.0) * m / ndistinct), but assume round() may not be
315
- * available on Windows
316
- */
317
- k = log (2.0 )* nbits /ndistinct ;
318
- k = (k - floor (k ) >=0.5 ) ?ceil (k ) :floor (k );
319
-
320
351
/*
321
352
* We allocate the whole filter. Most of it is going to be 0 bits, so the
322
353
* varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
326
357
filter = (BloomFilter * )palloc0 (len );
327
358
328
359
filter -> flags = 0 ;
329
- filter -> nhashes = ( int ) k ;
360
+ filter -> nhashes = nhashes ;
330
361
filter -> nbits = nbits ;
331
362
332
363
SET_VARSIZE (filter ,len );