tbloom.c - plan9port - [fork] Plan 9 from user space | |
git clone git://src.adamsgaard.dk/plan9port | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
tbloom.c (4669B) | |
--- | |
1 /* | |
2 * Bloom filter tracking which scores are present in our arenas | |
3 * and (more importantly) which are not. | |
4 */ | |
5 | |
6 #include "stdinc.h" | |
7 #include "dat.h" | |
8 #include "fns.h" | |
9 | |
10 int ignorebloom; | |
11 | |
12 int | |
13 bloominit(Bloom *b, vlong vsize, u8int *data) | |
14 { | |
15 ulong size; | |
16 | |
17 size = vsize; | |
18 if(size != vsize){ /* truncation */ | |
19 werrstr("bloom data too big"); | |
20 return -1; | |
21 } | |
22 | |
23 b->size = size; | |
24 b->nhash = 32; /* will be fixed by caller on initializati… | |
25 if(data != nil) | |
26 if(unpackbloomhead(b, data) < 0) | |
27 return -1; | |
28 | |
29 b->bitmask = (b->size<<3) - 1; | |
30 b->data = data; | |
31 return 0; | |
32 } | |
33 | |
34 void | |
35 wbbloomhead(Bloom *b) | |
36 { | |
37 packbloomhead(b, b->data); | |
38 } | |
39 | |
40 Bloom* | |
41 readbloom(Part *p) | |
42 { | |
43 uchar buf[512]; | |
44 Bloom *b; | |
45 | |
46 b = vtmallocz(sizeof *b); | |
47 if(readpart(p, 0, buf, sizeof buf) < 0) | |
48 return nil; | |
49 /* | |
50 * pass buf as b->data so that bloominit | |
51 * can parse header. won't be used for | |
52 * accessing bits (cleared below). | |
53 */ | |
54 if(bloominit(b, 0, buf) < 0){ | |
55 vtfree(b); | |
56 return nil; | |
57 }else{ | |
58 /* | |
59 * default block size is system page size. | |
60 * the bloom filter is usually very big. | |
61 * bump the block size up to speed i/o. | |
62 */ | |
63 if(p->blocksize < (1<<20)){ | |
64 p->blocksize = 1<<20; | |
65 if(p->blocksize > p->size) | |
66 p->blocksize = p->size; | |
67 } | |
68 } | |
69 b->part = p; | |
70 b->data = nil; | |
71 return b; | |
72 } | |
73 | |
74 int | |
75 resetbloom(Bloom *b) | |
76 { | |
77 uchar *data; | |
78 | |
79 data = vtmallocz(b->size); | |
80 b->data = data; | |
81 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ | |
82 addstat(StatBloomBits, b->size*8-1); | |
83 else | |
84 addstat(StatBloomBits, b->size*8); | |
85 return 0; | |
86 } | |
87 | |
88 int | |
89 loadbloom(Bloom *b) | |
90 { | |
91 int i, n; | |
92 uint ones; | |
93 uchar *data; | |
94 u32int *a; | |
95 | |
96 data = vtmallocz(b->size); | |
97 if(readpart(b->part, 0, data, b->size) < 0){ | |
98 vtfree(b); | |
99 vtfree(data); | |
100 return -1; | |
101 } | |
102 b->data = data; | |
103 | |
104 a = (u32int*)b->data; | |
105 n = b->size/4; | |
106 ones = 0; | |
107 for(i=0; i<n; i++) | |
108 ones += countbits(a[i]); | |
109 addstat(StatBloomOnes, ones); | |
110 | |
111 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ | |
112 addstat(StatBloomBits, b->size*8-1); | |
113 else | |
114 addstat(StatBloomBits, b->size*8); | |
115 | |
116 return 0; | |
117 } | |
118 | |
119 int | |
120 writebloom(Bloom *b) | |
121 { | |
122 wbbloomhead(b); | |
123 if(writepart(b->part, 0, b->data, b->size) < 0) | |
124 return -1; | |
125 if(flushpart(b->part) < 0) | |
126 return -1; | |
127 return 0; | |
128 } | |
129 | |
130 /* | |
131 * Derive two random 32-bit quantities a, b from the score | |
132 * and then use a+b*i as a sequence of bloom filter indices. | |
133 * Michael Mitzenmacher has a recent (2005) paper saying this is okay. | |
134 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header. | |
135 */ | |
136 static void | |
137 gethashes(u8int *score, ulong *h) | |
138 { | |
139 int i; | |
140 u32int a, b; | |
141 | |
142 a = 0; | |
143 b = 0; | |
144 for(i=4; i+8<=VtScoreSize; i+=8){ | |
145 a ^= *(u32int*)(score+i); | |
146 b ^= *(u32int*)(score+i+4); | |
147 } | |
148 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */ | |
149 a ^= *(u32int*)(score+i); | |
150 for(i=0; i<BloomMaxHash; i++, a+=b) | |
151 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a; | |
152 } | |
153 | |
154 static void | |
155 _markbloomfilter(Bloom *b, u8int *score) | |
156 { | |
157 int i, nnew; | |
158 ulong h[BloomMaxHash]; | |
159 u32int x, *y, z, *tab; | |
160 | |
161 trace("markbloomfilter", "markbloomfilter %V", score); | |
162 gethashes(score, h); | |
163 nnew = 0; | |
164 tab = (u32int*)b->data; | |
165 for(i=0; i<b->nhash; i++){ | |
166 x = h[i]; | |
167 y = &tab[(x&b->bitmask)>>5]; | |
168 z = 1<<(x&31); | |
169 if(!(*y&z)){ | |
170 nnew++; | |
171 *y |= z; | |
172 } | |
173 } | |
174 if(nnew) | |
175 addstat(StatBloomOnes, nnew); | |
176 | |
177 trace("markbloomfilter", "markbloomfilter exit"); | |
178 } | |
179 | |
180 static int | |
181 _inbloomfilter(Bloom *b, u8int *score) | |
182 { | |
183 int i; | |
184 ulong h[BloomMaxHash], x; | |
185 u32int *tab; | |
186 | |
187 gethashes(score, h); | |
188 tab = (u32int*)b->data; | |
189 for(i=0; i<b->nhash; i++){ | |
190 x = h[i]; | |
191 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31)))) | |
192 return 0; | |
193 } | |
194 return 1; | |
195 } | |
196 | |
197 int | |
198 inbloomfilter(Bloom *b, u8int *score) | |
199 { | |
200 int r; | |
201 | |
202 if(b == nil || b->data == nil) | |
203 return 1; | |
204 | |
205 if(ignorebloom) | |
206 return 1; | |
207 | |
208 rlock(&b->lk); | |
209 r = _inbloomfilter(b, score); | |
210 runlock(&b->lk); | |
211 addstat(StatBloomLookup, 1); | |
212 if(r) | |
213 addstat(StatBloomMiss, 1); | |
214 else | |
215 addstat(StatBloomHit, 1); | |
216 return r; | |
217 } | |
218 | |
219 void | |
220 markbloomfilter(Bloom *b, u8int *score) | |
221 { | |
222 if(b == nil || b->data == nil) | |
223 return; | |
224 | |
225 rlock(&b->lk); | |
226 qlock(&b->mod); | |
227 _markbloomfilter(b, score); | |
228 qunlock(&b->mod); | |
229 runlock(&b->lk); | |
230 } | |
231 | |
232 void | |
233 markbloomfiltern(Bloom *b, u8int score[][20], int n) | |
234 { | |
235 int i; | |
236 | |
237 if(b == nil || b->data == nil) | |
238 return; | |
239 | |
240 rlock(&b->lk); | |
241 qlock(&b->mod); | |
242 for(i=0; i<n; i++) | |
243 _markbloomfilter(b, score[i]); | |
244 qunlock(&b->mod); | |
245 runlock(&b->lk); | |
246 } | |
247 | |
248 static void | |
249 bloomwriteproc(void *v) | |
250 { | |
251 int ret; | |
252 Bloom *b; | |
253 | |
254 threadsetname("bloomwriteproc"); | |
255 b = v; | |
256 for(;;){ | |
257 recv(b->writechan, 0); | |
258 if((ret=writebloom(b)) < 0) | |
259 fprint(2, "oops! writing bloom: %r\n"); | |
260 else | |
261 ret = 0; | |
262 sendul(b->writedonechan, ret); | |
263 } | |
264 } | |
265 | |
266 void | |
267 startbloomproc(Bloom *b) | |
268 { | |
269 b->writechan = chancreate(sizeof(void*), 0); | |
270 b->writedonechan = chancreate(sizeof(ulong), 0); | |
271 vtproc(bloomwriteproc, b); | |
272 } |