|
7 | 7 | * Portions Copyright (c) 1994, Regents of the University of California
|
8 | 8 | *
|
9 | 9 | *
|
10 |
| - *$Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $ |
| 10 | + *$Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $ |
11 | 11 | *
|
12 | 12 | *-------------------------------------------------------------------------
|
13 | 13 | */
|
|
16 | 16 | *ExecHash- generate an in-memory hash table of the relation
|
17 | 17 | *ExecInitHash- initialize node and subnodes
|
18 | 18 | *ExecEndHash- shutdown node and subnodes
|
19 |
| - * |
20 | 19 | */
|
| 20 | +#include"postgres.h" |
21 | 21 |
|
22 | 22 | #include<sys/types.h>
|
23 | 23 | #include<math.h>
|
24 | 24 |
|
25 |
| -#include"postgres.h" |
26 |
| - |
27 | 25 | #include"executor/execdebug.h"
|
28 | 26 | #include"executor/nodeHash.h"
|
29 | 27 | #include"executor/nodeHashjoin.h"
|
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
|
209 | 207 | *create a hashtable in shared memory for hashjoin.
|
210 | 208 | * ----------------------------------------------------------------
|
211 | 209 | */
|
212 |
| -#defineFUDGE_FAC2.0 |
213 |
| - |
214 | 210 | HashJoinTable
|
215 | 211 | ExecHashTableCreate(Hash*node)
|
216 | 212 | {
|
217 |
| -Plan*outerNode; |
218 |
| -doublentuples; |
219 |
| -inttupsize; |
220 |
| -doubleinner_rel_bytes; |
221 |
| -doublehash_table_bytes; |
222 |
| -intnbatch; |
223 | 213 | HashJoinTablehashtable;
|
224 |
| -intnbuckets; |
| 214 | +Plan*outerNode; |
225 | 215 | inttotalbuckets;
|
226 |
| -intbucketsize; |
| 216 | +intnbuckets; |
| 217 | +intnbatch; |
227 | 218 | inti;
|
228 | 219 | MemoryContextoldcxt;
|
229 | 220 |
|
230 | 221 | /*
|
231 | 222 | * Get information about the size of the relation to be hashed (it's
|
232 | 223 | * the "outer" subtree of this node, but the inner relation of the
|
233 |
| - * hashjoin). |
234 |
| - * |
235 |
| - * Caution: this is only the planner's estimates, and so can't be trusted |
236 |
| - * too far. Apply a healthy fudge factor. |
| 224 | + * hashjoin). Compute the appropriate size of the hash table. |
237 | 225 | */
|
238 | 226 | outerNode=outerPlan(node);
|
239 |
| -ntuples=outerNode->plan_rows; |
240 |
| -if (ntuples <=0.0)/* force a plausible size if no info */ |
241 |
| -ntuples=1000.0; |
242 |
| - |
243 |
| -/* |
244 |
| - * estimate tupsize based on footprint of tuple in hashtable... but |
245 |
| - * what about palloc overhead? |
246 |
| - */ |
247 |
| -tupsize=MAXALIGN(outerNode->plan_width)+ |
248 |
| -MAXALIGN(sizeof(HashJoinTupleData)); |
249 |
| -inner_rel_bytes=ntuples*tupsize*FUDGE_FAC; |
250 |
| - |
251 |
| -/* |
252 |
| - * Target hashtable size is SortMem kilobytes, but not less than |
253 |
| - * sqrt(estimated inner rel size), so as to avoid horrible |
254 |
| - * performance. |
255 |
| - */ |
256 |
| -hash_table_bytes=sqrt(inner_rel_bytes); |
257 |
| -if (hash_table_bytes< (SortMem*1024L)) |
258 |
| -hash_table_bytes=SortMem*1024L; |
259 |
| - |
260 |
| -/* |
261 |
| - * Count the number of hash buckets we want for the whole relation, |
262 |
| - * for an average bucket load of NTUP_PER_BUCKET (per virtual |
263 |
| - * bucket!). |
264 |
| - */ |
265 |
| -totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET); |
266 |
| - |
267 |
| -/* |
268 |
| - * Count the number of buckets we think will actually fit in the |
269 |
| - * target memory size, at a loading of NTUP_PER_BUCKET (physical |
270 |
| - * buckets). NOTE: FUDGE_FAC here determines the fraction of the |
271 |
| - * hashtable space reserved to allow for nonuniform distribution of |
272 |
| - * hash values. Perhaps this should be a different number from the |
273 |
| - * other uses of FUDGE_FAC, but since we have no real good way to pick |
274 |
| - * either one... |
275 |
| - */ |
276 |
| -bucketsize=NTUP_PER_BUCKET*tupsize; |
277 |
| -nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC)); |
278 |
| -if (nbuckets <=0) |
279 |
| -nbuckets=1; |
280 | 227 |
|
281 |
| -if (totalbuckets <=nbuckets) |
282 |
| -{ |
| 228 | +ExecChooseHashTableSize(outerNode->plan_rows,outerNode->plan_width, |
| 229 | +&totalbuckets,&nbuckets,&nbatch); |
283 | 230 |
|
284 |
| -/* |
285 |
| - * We have enough space, so no batching. In theory we could even |
286 |
| - * reduce nbuckets, but since that could lead to poor behavior if |
287 |
| - * estimated ntuples is much less than reality, it seems better to |
288 |
| - * make more buckets instead of fewer. |
289 |
| - */ |
290 |
| -totalbuckets=nbuckets; |
291 |
| -nbatch=0; |
292 |
| -} |
293 |
| -else |
294 |
| -{ |
295 |
| - |
296 |
| -/* |
297 |
| - * Need to batch; compute how many batches we want to use. Note |
298 |
| - * that nbatch doesn't have to have anything to do with the ratio |
299 |
| - * totalbuckets/nbuckets; in fact, it is the number of groups we |
300 |
| - * will use for the part of the data that doesn't fall into the |
301 |
| - * first nbuckets hash buckets. |
302 |
| - */ |
303 |
| -nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) / |
304 |
| -hash_table_bytes); |
305 |
| -if (nbatch <=0) |
306 |
| -nbatch=1; |
307 |
| -} |
308 |
| - |
309 |
| -/* |
310 |
| - * Now, totalbuckets is the number of (virtual) hashbuckets for the |
311 |
| - * whole relation, and nbuckets is the number of physical hashbuckets |
312 |
| - * we will use in the first pass. Data falling into the first |
313 |
| - * nbuckets virtual hashbuckets gets handled in the first pass; |
314 |
| - * everything else gets divided into nbatch batches to be processed in |
315 |
| - * additional passes. |
316 |
| - */ |
317 | 231 | #ifdefHJDEBUG
|
318 | 232 | printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",
|
319 | 233 | nbatch,totalbuckets,nbuckets);
|
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
|
407 | 321 | returnhashtable;
|
408 | 322 | }
|
409 | 323 |
|
| 324 | + |
| 325 | +/* |
| 326 | + * Compute appropriate size for hashtable given the estimated size of the |
| 327 | + * relation to be hashed (number of rows and average row width). |
| 328 | + * |
| 329 | + * Caution: the input is only the planner's estimates, and so can't be |
| 330 | + * trusted too far. Apply a healthy fudge factor. |
| 331 | + * |
| 332 | + * This is exported so that the planner's costsize.c can use it. |
| 333 | + */ |
| 334 | + |
| 335 | +/* Target bucket loading (tuples per bucket) */ |
| 336 | +#defineNTUP_PER_BUCKET10 |
| 337 | +/* Fudge factor to allow for inaccuracy of input estimates */ |
| 338 | +#defineFUDGE_FAC2.0 |
| 339 | + |
| 340 | +void |
| 341 | +ExecChooseHashTableSize(doublentuples,inttupwidth, |
| 342 | +int*virtualbuckets, |
| 343 | +int*physicalbuckets, |
| 344 | +int*numbatches) |
| 345 | +{ |
| 346 | +inttupsize; |
| 347 | +doubleinner_rel_bytes; |
| 348 | +doublehash_table_bytes; |
| 349 | +intnbatch; |
| 350 | +intnbuckets; |
| 351 | +inttotalbuckets; |
| 352 | +intbucketsize; |
| 353 | + |
| 354 | +/* Force a plausible relation size if no info */ |
| 355 | +if (ntuples <=0.0) |
| 356 | +ntuples=1000.0; |
| 357 | + |
| 358 | +/* |
| 359 | + * Estimate tupsize based on footprint of tuple in hashtable... but |
| 360 | + * what about palloc overhead? |
| 361 | + */ |
| 362 | +tupsize=MAXALIGN(tupwidth)+MAXALIGN(sizeof(HashJoinTupleData)); |
| 363 | +inner_rel_bytes=ntuples*tupsize*FUDGE_FAC; |
| 364 | + |
| 365 | +/* |
| 366 | + * Target hashtable size is SortMem kilobytes, but not less than |
| 367 | + * sqrt(estimated inner rel size), so as to avoid horrible |
| 368 | + * performance. |
| 369 | + */ |
| 370 | +hash_table_bytes=sqrt(inner_rel_bytes); |
| 371 | +if (hash_table_bytes< (SortMem*1024L)) |
| 372 | +hash_table_bytes=SortMem*1024L; |
| 373 | + |
| 374 | +/* |
| 375 | + * Count the number of hash buckets we want for the whole relation, |
| 376 | + * for an average bucket load of NTUP_PER_BUCKET (per virtual |
| 377 | + * bucket!). |
| 378 | + */ |
| 379 | +totalbuckets= (int)ceil(ntuples*FUDGE_FAC /NTUP_PER_BUCKET); |
| 380 | + |
| 381 | +/* |
| 382 | + * Count the number of buckets we think will actually fit in the |
| 383 | + * target memory size, at a loading of NTUP_PER_BUCKET (physical |
| 384 | + * buckets). NOTE: FUDGE_FAC here determines the fraction of the |
| 385 | + * hashtable space reserved to allow for nonuniform distribution of |
| 386 | + * hash values. Perhaps this should be a different number from the |
| 387 | + * other uses of FUDGE_FAC, but since we have no real good way to pick |
| 388 | + * either one... |
| 389 | + */ |
| 390 | +bucketsize=NTUP_PER_BUCKET*tupsize; |
| 391 | +nbuckets= (int) (hash_table_bytes / (bucketsize*FUDGE_FAC)); |
| 392 | +if (nbuckets <=0) |
| 393 | +nbuckets=1; |
| 394 | + |
| 395 | +if (totalbuckets <=nbuckets) |
| 396 | +{ |
| 397 | +/* |
| 398 | + * We have enough space, so no batching. In theory we could even |
| 399 | + * reduce nbuckets, but since that could lead to poor behavior if |
| 400 | + * estimated ntuples is much less than reality, it seems better to |
| 401 | + * make more buckets instead of fewer. |
| 402 | + */ |
| 403 | +totalbuckets=nbuckets; |
| 404 | +nbatch=0; |
| 405 | +} |
| 406 | +else |
| 407 | +{ |
| 408 | +/* |
| 409 | + * Need to batch; compute how many batches we want to use. Note |
| 410 | + * that nbatch doesn't have to have anything to do with the ratio |
| 411 | + * totalbuckets/nbuckets; in fact, it is the number of groups we |
| 412 | + * will use for the part of the data that doesn't fall into the |
| 413 | + * first nbuckets hash buckets. |
| 414 | + */ |
| 415 | +nbatch= (int)ceil((inner_rel_bytes-hash_table_bytes) / |
| 416 | +hash_table_bytes); |
| 417 | +if (nbatch <=0) |
| 418 | +nbatch=1; |
| 419 | +} |
| 420 | + |
| 421 | +/* |
| 422 | + * Now, totalbuckets is the number of (virtual) hashbuckets for the |
| 423 | + * whole relation, and nbuckets is the number of physical hashbuckets |
| 424 | + * we will use in the first pass. Data falling into the first |
| 425 | + * nbuckets virtual hashbuckets gets handled in the first pass; |
| 426 | + * everything else gets divided into nbatch batches to be processed in |
| 427 | + * additional passes. |
| 428 | + */ |
| 429 | +*virtualbuckets=totalbuckets; |
| 430 | +*physicalbuckets=nbuckets; |
| 431 | +*numbatches=nbatch; |
| 432 | +} |
| 433 | + |
| 434 | + |
410 | 435 | /* ----------------------------------------------------------------
|
411 | 436 | *ExecHashTableDestroy
|
412 | 437 | *
|
|