Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7842c01

Browse files
committed
add BloomFilter
1 parentc2e2402 commit7842c01

File tree

2 files changed

+260
-0
lines changed

2 files changed

+260
-0
lines changed
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
packagesrc.main.java.com.search;
2+
3+
importjava.io.Serializable;
4+
importjava.util.ArrayList;
5+
importjava.util.Collections;
6+
importjava.util.List;
7+
importjava.util.function.Function;
8+
9+
/**
10+
* A simple implementation of Bloom filter.
11+
* <p>
12+
* Bloom filter have a chance of being wrong.
13+
* <p>
14+
* The Bloom filter assert that elements that do not exist must not exist,
15+
* if assert an element exists, but not necessarily.
16+
* <p>
17+
* The accuracy rate depends on capacity and hash functions.
18+
*
19+
* @author yangxf
20+
*/
21+
publicclassBloomFilterimplementsSerializable {
22+
privatestaticfinallongserialVersionUID = -4466610350741278658L;
23+
24+
privatestaticfinalintLONG_SHIFT =6;
25+
privatestaticfinalintLONG_MASK =63;
26+
27+
/**
28+
* hash functions
29+
*/
30+
privatefinalList<Function<String,Integer>>hashFunctions;
31+
32+
privatefinallong[]table;
33+
privatefinalinttableMask;
34+
privateintsize;
35+
36+
/**
37+
* @param capacity the filter capacity
38+
* @param hashFunctions hash functions
39+
* @see Builder must be build by {@link Builder}.
40+
*/
41+
privateBloomFilter(intcapacity,List<Function<String,Integer>>hashFunctions) {
42+
this.hashFunctions =hashFunctions;
43+
intcap =nextPowerOf2(capacity);
44+
tableMask = (cap <<LONG_SHIFT) -1;
45+
table =newlong[cap];
46+
}
47+
48+
publicstaticBuilderbuilder(intcapacity) {
49+
if (capacity <1) {
50+
thrownewIllegalStateException("capacity must be > 0");
51+
}
52+
53+
returnnewBuilder(capacity);
54+
}
55+
56+
/**
57+
* Add an element to the Bloom filter.
58+
*/
59+
publicvoidadd(Stringelement) {
60+
checkNotNull(element,"element");
61+
62+
for (Function<String,Integer>hashFunction :hashFunctions) {
63+
intkey =hashFunction.apply(element) &tableMask;
64+
table[key >>>LONG_SHIFT] |= (1 << (key &LONG_MASK));
65+
}
66+
size++;
67+
}
68+
69+
/**
70+
* @return true if the element exists, otherwise false.
71+
*/
72+
publicbooleancontains(Stringelement) {
73+
if (element ==null) {
74+
returnfalse;
75+
}
76+
77+
for (Function<String,Integer>hashFunction :hashFunctions) {
78+
intkey =hashFunction.apply(element) &tableMask;
79+
if ((table[key >>>LONG_SHIFT] & (1 << (key &LONG_MASK))) ==0) {
80+
returnfalse;
81+
}
82+
}
83+
returntrue;
84+
}
85+
86+
publicList<Function<String,Integer>>getHashFunctions() {
87+
returnhashFunctions;
88+
}
89+
90+
publicintsize() {
91+
returnsize;
92+
}
93+
94+
privatestaticvoidcheckNotNull(Stringelement,Stringmsg) {
95+
if (element ==null) {
96+
thrownewNullPointerException(msg +" must be not null");
97+
}
98+
}
99+
100+
privatestaticintnextPowerOf2(inti) {
101+
intn =i -1;
102+
n |=n >>>1;
103+
n |=n >>>2;
104+
n |=n >>>4;
105+
n |=n >>>8;
106+
n |=n >>>16;
107+
return (n <0) ?1 : (n >=0x40000000) ?0x40000000 :n +1;
108+
}
109+
110+
/**
111+
* We need a list of unmodifiable hash functions.
112+
*/
113+
publicstaticclassBuilder {
114+
privateintcapacity;
115+
privateList<Function<String,Integer>>hashFunctions =newArrayList<>();
116+
117+
privateBuilder(intcapacity) {
118+
this.capacity =capacity;
119+
}
120+
121+
publicBuilderaddHashFunction(Function<String,Integer>function) {
122+
hashFunctions.add(function);
123+
returnthis;
124+
}
125+
126+
publicBloomFilterbuild() {
127+
if (hashFunctions.isEmpty()) {
128+
addDefaultHashFunction();
129+
}
130+
returnnewBloomFilter(capacity,Collections.unmodifiableList(hashFunctions));
131+
}
132+
133+
/**
134+
* I provides several default hash functions
135+
*/
136+
privatevoidaddDefaultHashFunction() {
137+
// Java String Hash Function
138+
hashFunctions.add(String::hashCode);
139+
140+
// SDBM Hash Function
141+
hashFunctions.add(key -> {
142+
if (key ==null ||key.isEmpty()) {
143+
return0;
144+
}
145+
146+
inthash =0;
147+
for (inti =0;i <key.length();i++) {
148+
hash =key.charAt(i) + (hash <<6) + (hash <<16) -hash;
149+
}
150+
hash &=0x7ffffff;
151+
returnhash;
152+
});
153+
154+
// Robert Sedgwicks Hash Function
155+
hashFunctions.add(key -> {
156+
if (key ==null ||key.isEmpty()) {
157+
return0;
158+
}
159+
160+
inthash =0;
161+
intmagic =63689;
162+
for (inti =0;i <key.length();i++) {
163+
hash =hash *magic +key.charAt(i);
164+
magic *=378551;
165+
}
166+
returnhash;
167+
});
168+
169+
// Arash Partow Hash Function
170+
hashFunctions.add(key -> {
171+
if (key ==null ||key.isEmpty()) {
172+
return0;
173+
}
174+
175+
inthash =0;
176+
for (inti =0;i <key.length();i++) {
177+
charch =key.charAt(i);
178+
if ((i &1) ==0) {
179+
hash ^= ((hash <<7) ^ch ^ (hash >>3));
180+
}else {
181+
hash ^= (~((hash <<11) ^ch ^ (hash >>5)));
182+
}
183+
}
184+
returnhash;
185+
});
186+
}
187+
188+
}
189+
190+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
packagesrc.test.java.com.search;
2+
3+
importorg.junit.Test;
4+
importsrc.main.java.com.search.BloomFilter;
5+
6+
importjava.util.HashSet;
7+
importjava.util.Set;
8+
importjava.util.concurrent.ThreadLocalRandom;
9+
10+
importstaticorg.junit.Assert.*;
11+
12+
publicclassBloomFilterTest {
13+
14+
@Test
15+
publicvoidtest() {
16+
intcount =100000;
17+
intlow =50,up =100;
18+
BloomFilterfilter =BloomFilter.builder(10000).build();
19+
String[]data =newString[count];
20+
Set<String>dataSet =newHashSet<>();
21+
for (inti =0;i <count;i++) {
22+
Stringstr =randomString(low,up);
23+
data[i] =str;
24+
if (i %2 ==0) {
25+
dataSet.add(str);
26+
filter.add(str);
27+
}
28+
}
29+
30+
interror =0,total =0;
31+
for (inti =0;i <count;i++) {
32+
Stringstr =data[i];
33+
if (filter.contains(str)) {
34+
total++;
35+
if (!dataSet.contains(str)) {
36+
error++;
37+
}
38+
}else {
39+
assertFalse(dataSet.contains(str));
40+
}
41+
}
42+
System.out.println("error: " +error);
43+
System.out.println("total: " +total);
44+
System.out.println("error rate : " + (double)error /total);
45+
}
46+
47+
publicstaticStringrandomString(intminLength,intmaxLength) {
48+
ThreadLocalRandomr =ThreadLocalRandom.current();
49+
intchLen =r.nextInt(minLength,maxLength),
50+
poolSize =CHAR_POOL.length;
51+
char[]chars =newchar[chLen];
52+
for (inti =0;i <chLen;i++) {
53+
chars[i] =CHAR_POOL[r.nextInt(poolSize)];
54+
}
55+
56+
returnnewString(chars);
57+
}
58+
59+
privatestaticfinalchar[]CHAR_POOL;
60+
61+
static {
62+
CHAR_POOL =newchar[52];
63+
inti =0;
64+
for (charc ='a';c <='z';c++) {
65+
CHAR_POOL[i++] =c;
66+
CHAR_POOL[i++] = (char) (c -32);
67+
}
68+
}
69+
70+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp