Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite365a58

Browse files
committed
Speed-up build of MCV lists with many distinct values
When building multi-column MCV lists, we compute base frequency for eachitem, i.e. a product of per-column frequencies for values from the item.As a value may be in multiple groups, the code was scanning the wholearray of groups while adding items to the MCV list. This works fine aslong as the number of distinct groups is small, but it's easy to triggertrigger O(N^2) behavior, especially after increasing statistics target.This commit precomputes frequencies for values in all columns, so thatwhen computing the base frequency it's enough to make a simple bsearchlookup in the array.Backpatch to 12, where multi-column MCV lists were introduced.Discussion:https://postgr.es/m/20190618205920.qtlzcu73whfpfqne@development
1 parentd5ab9df commite365a58

File tree

1 file changed

+122
-8
lines changed
  • src/backend/statistics

1 file changed

+122
-8
lines changed

‎src/backend/statistics/mcv.c

Lines changed: 122 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
7878
staticSortItem*build_distinct_groups(intnumrows,SortItem*items,
7979
MultiSortSupportmss,int*ndistinct);
8080

81+
staticSortItem**build_column_frequencies(SortItem*groups,intngroups,
82+
MultiSortSupportmss,int*ncounts);
83+
8184
staticintcount_distinct_groups(intnumrows,SortItem*items,
8285
MultiSortSupportmss);
8386

@@ -242,6 +245,20 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
242245
if (nitems>0)
243246
{
244247
intj;
248+
SortItemkey;
249+
MultiSortSupporttmp;
250+
251+
/* frequencies for values in each attribute */
252+
SortItem**freqs;
253+
int*nfreqs;
254+
255+
/* used to search values */
256+
tmp= (MultiSortSupport)palloc(offsetof(MultiSortSupportData,ssup)
257+
+sizeof(SortSupportData));
258+
259+
/* compute frequencies for values in each column */
260+
nfreqs= (int*)palloc0(sizeof(int)*numattrs);
261+
freqs=build_column_frequencies(groups,ngroups,mss,nfreqs);
245262

246263
/*
247264
* Allocate the MCV list structure, set the global parameters.
@@ -281,18 +298,26 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
281298
item->base_frequency=1.0;
282299
for (j=0;j<numattrs;j++)
283300
{
284-
intcount=0;
285-
intk;
301+
SortItem*freq;
286302

287-
for (k=0;k<ngroups;k++)
288-
{
289-
if (multi_sort_compare_dim(j,&groups[i],&groups[k],mss)==0)
290-
count+=groups[k].count;
291-
}
303+
/* single dimension */
304+
tmp->ndims=1;
305+
tmp->ssup[0]=mss->ssup[j];
292306

293-
item->base_frequency *= (double)count /numrows;
307+
/* fill search key */
308+
key.values=&groups[i].values[j];
309+
key.isnull=&groups[i].isnull[j];
310+
311+
freq= (SortItem*)bsearch_arg(&key,freqs[j],nfreqs[j],
312+
sizeof(SortItem),
313+
multi_sort_compare,tmp);
314+
315+
item->base_frequency *= ((double)freq->count) /numrows;
294316
}
295317
}
318+
319+
pfree(nfreqs);
320+
pfree(freqs);
296321
}
297322

298323
pfree(items);
@@ -419,6 +444,95 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
419444
returngroups;
420445
}
421446

447+
/* compare sort items (single dimension) */
448+
staticint
449+
sort_item_compare(constvoid*a,constvoid*b,void*arg)
450+
{
451+
SortSupportssup= (SortSupport)arg;
452+
SortItem*ia= (SortItem*)a;
453+
SortItem*ib= (SortItem*)b;
454+
455+
returnApplySortComparator(ia->values[0],ia->isnull[0],
456+
ib->values[0],ib->isnull[0],
457+
ssup);
458+
}
459+
460+
/*
461+
* build_column_frequencies
462+
*compute frequencies of values in each column
463+
*
464+
* This returns an array of SortItems for each attibute the MCV is built
465+
* on, with a frequency (number of occurrences) for each value. This is
466+
* then used to compute "base" frequency of MCV items.
467+
*
468+
* All the memory is allocated in a single chunk, so that a single pfree
469+
* is enough to release it. We do not allocate space for values/isnull
470+
* arrays in the SortItems, because we can simply point into the input
471+
* groups directly.
472+
*/
473+
staticSortItem**
474+
build_column_frequencies(SortItem*groups,intngroups,
475+
MultiSortSupportmss,int*ncounts)
476+
{
477+
inti,
478+
dim;
479+
SortItem**result;
480+
char*ptr;
481+
482+
Assert(groups);
483+
Assert(ncounts);
484+
485+
/* allocate arrays for all columns as a single chunk */
486+
ptr=palloc(MAXALIGN(sizeof(SortItem*)*mss->ndims)+
487+
mss->ndims*MAXALIGN(sizeof(SortItem)*ngroups));
488+
489+
/* initial array of pointers */
490+
result= (SortItem**)ptr;
491+
ptr+=MAXALIGN(sizeof(SortItem*)*mss->ndims);
492+
493+
for (dim=0;dim<mss->ndims;dim++)
494+
{
495+
SortSupportssup=&mss->ssup[dim];
496+
497+
/* array of values for a single column */
498+
result[dim]= (SortItem*)ptr;
499+
ptr+=MAXALIGN(sizeof(SortItem)*ngroups);
500+
501+
/* extract data for the dimension */
502+
for (i=0;i<ngroups;i++)
503+
{
504+
/* point into the input groups */
505+
result[dim][i].values=&groups[i].values[dim];
506+
result[dim][i].isnull=&groups[i].isnull[dim];
507+
result[dim][i].count=groups[i].count;
508+
}
509+
510+
/* sort the values, deduplicate */
511+
qsort_arg((void*)result[dim],ngroups,sizeof(SortItem),
512+
sort_item_compare,ssup);
513+
514+
/*
515+
* Identify distinct values, compute frequency (there might be
516+
* multiple MCV items containing this value, so we need to sum
517+
* counts from all of them.
518+
*/
519+
ncounts[dim]=1;
520+
for (i=1;i<ngroups;i++)
521+
{
522+
if (sort_item_compare(&result[dim][i-1],&result[dim][i],ssup)==0)
523+
{
524+
result[dim][ncounts[dim]-1].count+=result[dim][i].count;
525+
continue;
526+
}
527+
528+
result[dim][ncounts[dim]]=result[dim][i];
529+
530+
ncounts[dim]++;
531+
}
532+
}
533+
534+
returnresult;
535+
}
422536

423537
/*
424538
* statext_mcv_load

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp