|
7 | 7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | 8 | * |
9 | 9 | * |
10 | | - *$Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $ |
| 10 | + *$Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $ |
11 | 11 | * |
12 | 12 | *------------------------------------------------------------------------- |
13 | 13 | */ |
|
16 | 16 | *ExecHash- generate an in-memory hash table of the relation |
17 | 17 | *ExecInitHash- initialize node and subnodes |
18 | 18 | *ExecEndHash- shutdown node and subnodes |
19 | | - * |
20 | 19 | */ |
| 20 | +#include"postgres.h" |
21 | 21 |
|
22 | 22 | #include<sys/types.h> |
23 | 23 | #include<math.h> |
24 | 24 |
|
25 | | -#include"postgres.h" |
26 | | - |
27 | 25 | #include"executor/execdebug.h" |
28 | 26 | #include"executor/nodeHash.h" |
29 | 27 | #include"executor/nodeHashjoin.h" |
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node) |
209 | 207 | *create a hashtable in shared memory for hashjoin. |
210 | 208 | * ---------------------------------------------------------------- |
211 | 209 | */ |
212 | | -#defineFUDGE_FAC2.0 |
213 | | - |
214 | 210 | HashJoinTable |
215 | 211 | ExecHashTableCreate(Hash*node) |
216 | 212 | { |
217 | | -Plan*outerNode; |
218 | | -doublentuples; |
219 | | -inttupsize; |
220 | | -doubleinner_rel_bytes; |
221 | | -doublehash_table_bytes; |
222 | | -intnbatch; |
223 | 213 | HashJoinTablehashtable; |
224 | | -intnbuckets; |
| 214 | +Plan*outerNode; |
225 | 215 | inttotalbuckets; |
226 | | -intbucketsize; |
| 216 | +intnbuckets; |
| 217 | +intnbatch; |
227 | 218 | inti; |
228 | 219 | MemoryContextoldcxt; |
229 | 220 |
|
230 | 221 | /* |
231 | 222 | * Get information about the size of the relation to be hashed (it's |
232 | 223 | * the "outer" subtree of this node, but the inner relation of the |
233 | | - * hashjoin). |
234 | | - * |
235 | | - * Caution: this is only the planner's estimates, and so can't be trusted |
236 | | - * too far. Apply a healthy fudge factor. |
| 224 | + * hashjoin). Compute the appropriate size of the hash table. |
237 | 225 | */ |
238 | 226 | outerNode=outerPlan(node); |
239 | | -ntuples=outerNode->plan_rows; |
240 | | -if (ntuples <=0.0)/* force a plausible size if no info */ |
241 | | -ntuples=1000.0; |
242 | | - |
243 | | -/* |
244 | | - * estimate tupsize based on footprint of tuple in hashtable... but |
245 | | - * what about palloc overhead? |
246 | | - */ |
247 | | -tupsize=MAXALIGN(outerNode->plan_width)+ |
248 | | -MAXALIGN(sizeof(HashJoinTupleData)); |
249 | | -inner_rel_bytes=ntuples*tupsize*FUDGE_FAC; |
250 | | - |
251 | | -/* |
252 | | - * Target hashtable size is SortMem kilobytes, but not less than |
253 | | - * sqrt(estimated inner rel size), so as to avoid horrible |
254 | | - * performance. |
255 | | - */ |
256 | | -hash_table_bytes=sqrt(inner_rel_bytes); |
257 | | -if (hash_table_bytes< (SortMem*1024L)) |
258 | | -hash_table_bytes=SortMem*1024L; |
259 | | - |
260 | | -/* |
261 | | - * Count the number of hash buckets we want for the whole relation, |
262 | | - * for an average bucket load of NTUP_PER_BUCKET (per virtual |
263 | | - * bucket!). |
264 | | - */ |
265 | | -totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET); |
266 | | - |
267 | | -/* |
268 | | - * Count the number of buckets we think will actually fit in the |
269 | | - * target memory size, at a loading of NTUP_PER_BUCKET (physical |
270 | | - * buckets). NOTE: FUDGE_FAC here determines the fraction of the |
271 | | - * hashtable space reserved to allow for nonuniform distribution of |
272 | | - * hash values. Perhaps this should be a different number from the |
273 | | - * other uses of FUDGE_FAC, but since we have no real good way to pick |
274 | | - * either one... |
275 | | - */ |
276 | | -bucketsize=NTUP_PER_BUCKET*tupsize; |
277 | | -nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC)); |
278 | | -if (nbuckets <=0) |
279 | | -nbuckets=1; |
280 | 227 |
|
281 | | -if (totalbuckets <=nbuckets) |
282 | | -{ |
| 228 | +ExecChooseHashTableSize(outerNode->plan_rows,outerNode->plan_width, |
| 229 | +&totalbuckets,&nbuckets,&nbatch); |
283 | 230 |
|
284 | | -/* |
285 | | - * We have enough space, so no batching. In theory we could even |
286 | | - * reduce nbuckets, but since that could lead to poor behavior if |
287 | | - * estimated ntuples is much less than reality, it seems better to |
288 | | - * make more buckets instead of fewer. |
289 | | - */ |
290 | | -totalbuckets=nbuckets; |
291 | | -nbatch=0; |
292 | | -} |
293 | | -else |
294 | | -{ |
295 | | - |
296 | | -/* |
297 | | - * Need to batch; compute how many batches we want to use. Note |
298 | | - * that nbatch doesn't have to have anything to do with the ratio |
299 | | - * totalbuckets/nbuckets; in fact, it is the number of groups we |
300 | | - * will use for the part of the data that doesn't fall into the |
301 | | - * first nbuckets hash buckets. |
302 | | - */ |
303 | | -nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) / |
304 | | -hash_table_bytes); |
305 | | -if (nbatch <=0) |
306 | | -nbatch=1; |
307 | | -} |
308 | | - |
309 | | -/* |
310 | | - * Now, totalbuckets is the number of (virtual) hashbuckets for the |
311 | | - * whole relation, and nbuckets is the number of physical hashbuckets |
312 | | - * we will use in the first pass. Data falling into the first |
313 | | - * nbuckets virtual hashbuckets gets handled in the first pass; |
314 | | - * everything else gets divided into nbatch batches to be processed in |
315 | | - * additional passes. |
316 | | - */ |
317 | 231 | #ifdefHJDEBUG |
318 | 232 | printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n", |
319 | 233 | nbatch,totalbuckets,nbuckets); |
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node) |
407 | 321 | returnhashtable; |
408 | 322 | } |
409 | 323 |
|
| 324 | + |
| 325 | +/* |
| 326 | + * Compute appropriate size for hashtable given the estimated size of the |
| 327 | + * relation to be hashed (number of rows and average row width). |
| 328 | + * |
| 329 | + * Caution: the input is only the planner's estimates, and so can't be |
| 330 | + * trusted too far. Apply a healthy fudge factor. |
| 331 | + * |
| 332 | + * This is exported so that the planner's costsize.c can use it. |
| 333 | + */ |
| 334 | + |
| 335 | +/* Target bucket loading (tuples per bucket) */ |
| 336 | +#defineNTUP_PER_BUCKET10 |
| 337 | +/* Fudge factor to allow for inaccuracy of input estimates */ |
| 338 | +#defineFUDGE_FAC2.0 |
| 339 | + |
| 340 | +void |
| 341 | +ExecChooseHashTableSize(doublentuples,inttupwidth, |
| 342 | +int*virtualbuckets, |
| 343 | +int*physicalbuckets, |
| 344 | +int*numbatches) |
| 345 | +{ |
| 346 | +inttupsize; |
| 347 | +doubleinner_rel_bytes; |
| 348 | +doublehash_table_bytes; |
| 349 | +intnbatch; |
| 350 | +intnbuckets; |
| 351 | +inttotalbuckets; |
| 352 | +intbucketsize; |
| 353 | + |
| 354 | +/* Force a plausible relation size if no info */ |
| 355 | +if (ntuples <=0.0) |
| 356 | +ntuples=1000.0; |
| 357 | + |
| 358 | +/* |
| 359 | + * Estimate tupsize based on footprint of tuple in hashtable... but |
| 360 | + * what about palloc overhead? |
| 361 | + */ |
| 362 | +tupsize=MAXALIGN(tupwidth)+MAXALIGN(sizeof(HashJoinTupleData)); |
| 363 | +inner_rel_bytes=ntuples*tupsize*FUDGE_FAC; |
| 364 | + |
| 365 | +/* |
| 366 | + * Target hashtable size is SortMem kilobytes, but not less than |
| 367 | + * sqrt(estimated inner rel size), so as to avoid horrible |
| 368 | + * performance. |
| 369 | + */ |
| 370 | +hash_table_bytes=sqrt(inner_rel_bytes); |
| 371 | +if (hash_table_bytes< (SortMem*1024L)) |
| 372 | +hash_table_bytes=SortMem*1024L; |
| 373 | + |
| 374 | +/* |
| 375 | + * Count the number of hash buckets we want for the whole relation, |
| 376 | + * for an average bucket load of NTUP_PER_BUCKET (per virtual |
| 377 | + * bucket!). |
| 378 | + */ |
| 379 | +totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET); |
| 380 | + |
| 381 | +/* |
| 382 | + * Count the number of buckets we think will actually fit in the |
| 383 | + * target memory size, at a loading of NTUP_PER_BUCKET (physical |
| 384 | + * buckets). NOTE: FUDGE_FAC here determines the fraction of the |
| 385 | + * hashtable space reserved to allow for nonuniform distribution of |
| 386 | + * hash values. Perhaps this should be a different number from the |
| 387 | + * other uses of FUDGE_FAC, but since we have no real good way to pick |
| 388 | + * either one... |
| 389 | + */ |
| 390 | +bucketsize=NTUP_PER_BUCKET*tupsize; |
| 391 | +nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC)); |
| 392 | +if (nbuckets <=0) |
| 393 | +nbuckets=1; |
| 394 | + |
| 395 | +if (totalbuckets <=nbuckets) |
| 396 | +{ |
| 397 | +/* |
| 398 | + * We have enough space, so no batching. In theory we could even |
| 399 | + * reduce nbuckets, but since that could lead to poor behavior if |
| 400 | + * estimated ntuples is much less than reality, it seems better to |
| 401 | + * make more buckets instead of fewer. |
| 402 | + */ |
| 403 | +totalbuckets=nbuckets; |
| 404 | +nbatch=0; |
| 405 | +} |
| 406 | +else |
| 407 | +{ |
| 408 | +/* |
| 409 | + * Need to batch; compute how many batches we want to use. Note |
| 410 | + * that nbatch doesn't have to have anything to do with the ratio |
| 411 | + * totalbuckets/nbuckets; in fact, it is the number of groups we |
| 412 | + * will use for the part of the data that doesn't fall into the |
| 413 | + * first nbuckets hash buckets. |
| 414 | + */ |
| 415 | +nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) / |
| 416 | +hash_table_bytes); |
| 417 | +if (nbatch <=0) |
| 418 | +nbatch=1; |
| 419 | +} |
| 420 | + |
| 421 | +/* |
| 422 | + * Now, totalbuckets is the number of (virtual) hashbuckets for the |
| 423 | + * whole relation, and nbuckets is the number of physical hashbuckets |
| 424 | + * we will use in the first pass. Data falling into the first |
| 425 | + * nbuckets virtual hashbuckets gets handled in the first pass; |
| 426 | + * everything else gets divided into nbatch batches to be processed in |
| 427 | + * additional passes. |
| 428 | + */ |
| 429 | +*virtualbuckets=totalbuckets; |
| 430 | +*physicalbuckets=nbuckets; |
| 431 | +*numbatches=nbatch; |
| 432 | +} |
| 433 | + |
| 434 | + |
410 | 435 | /* ---------------------------------------------------------------- |
411 | 436 | *ExecHashTableDestroy |
412 | 437 | * |
|