@@ -78,6 +78,9 @@ static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
7878static SortItem * build_distinct_groups (int numrows ,SortItem * items ,
7979MultiSortSupport mss ,int * ndistinct );
8080
81+ static SortItem * * build_column_frequencies (SortItem * groups ,int ngroups ,
82+ MultiSortSupport mss ,int * ncounts );
83+
8184static int count_distinct_groups (int numrows ,SortItem * items ,
8285MultiSortSupport mss );
8386
@@ -242,6 +245,20 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
242245if (nitems > 0 )
243246{
244247int j ;
248+ SortItem key ;
249+ MultiSortSupport tmp ;
250+
251+ /* frequencies for values in each attribute */
252+ SortItem * * freqs ;
253+ int * nfreqs ;
254+
255+ /* used to search values */
256+ tmp = (MultiSortSupport )palloc (offsetof(MultiSortSupportData ,ssup )
257+ + sizeof (SortSupportData ));
258+
259+ /* compute frequencies for values in each column */
260+ nfreqs = (int * )palloc0 (sizeof (int )* numattrs );
261+ freqs = build_column_frequencies (groups ,ngroups ,mss ,nfreqs );
245262
246263/*
247264 * Allocate the MCV list structure, set the global parameters.
@@ -281,18 +298,26 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
281298item -> base_frequency = 1.0 ;
282299for (j = 0 ;j < numattrs ;j ++ )
283300{
284- int count = 0 ;
285- int k ;
301+ SortItem * freq ;
286302
287- for (k = 0 ;k < ngroups ;k ++ )
288- {
289- if (multi_sort_compare_dim (j ,& groups [i ],& groups [k ],mss )== 0 )
290- count += groups [k ].count ;
291- }
303+ /* single dimension */
304+ tmp -> ndims = 1 ;
305+ tmp -> ssup [0 ]= mss -> ssup [j ];
292306
293- item -> base_frequency *= (double )count /numrows ;
307+ /* fill search key */
308+ key .values = & groups [i ].values [j ];
309+ key .isnull = & groups [i ].isnull [j ];
310+
311+ freq = (SortItem * )bsearch_arg (& key ,freqs [j ],nfreqs [j ],
312+ sizeof (SortItem ),
313+ multi_sort_compare ,tmp );
314+
315+ item -> base_frequency *= ((double )freq -> count ) /numrows ;
294316}
295317}
318+
319+ pfree (nfreqs );
320+ pfree (freqs );
296321}
297322
298323pfree (items );
@@ -419,6 +444,95 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
419444return groups ;
420445}
421446
447+ /* compare sort items (single dimension) */
448+ static int
449+ sort_item_compare (const void * a ,const void * b ,void * arg )
450+ {
451+ SortSupport ssup = (SortSupport )arg ;
452+ SortItem * ia = (SortItem * )a ;
453+ SortItem * ib = (SortItem * )b ;
454+
455+ return ApplySortComparator (ia -> values [0 ],ia -> isnull [0 ],
456+ ib -> values [0 ],ib -> isnull [0 ],
457+ ssup );
458+ }
459+
460+ /*
461+ * build_column_frequencies
462+ *compute frequencies of values in each column
463+ *
464+ * This returns an array of SortItems for each attibute the MCV is built
465+ * on, with a frequency (number of occurrences) for each value. This is
466+ * then used to compute "base" frequency of MCV items.
467+ *
468+ * All the memory is allocated in a single chunk, so that a single pfree
469+ * is enough to release it. We do not allocate space for values/isnull
470+ * arrays in the SortItems, because we can simply point into the input
471+ * groups directly.
472+ */
473+ static SortItem * *
474+ build_column_frequencies (SortItem * groups ,int ngroups ,
475+ MultiSortSupport mss ,int * ncounts )
476+ {
477+ int i ,
478+ dim ;
479+ SortItem * * result ;
480+ char * ptr ;
481+
482+ Assert (groups );
483+ Assert (ncounts );
484+
485+ /* allocate arrays for all columns as a single chunk */
486+ ptr = palloc (MAXALIGN (sizeof (SortItem * )* mss -> ndims )+
487+ mss -> ndims * MAXALIGN (sizeof (SortItem )* ngroups ));
488+
489+ /* initial array of pointers */
490+ result = (SortItem * * )ptr ;
491+ ptr += MAXALIGN (sizeof (SortItem * )* mss -> ndims );
492+
493+ for (dim = 0 ;dim < mss -> ndims ;dim ++ )
494+ {
495+ SortSupport ssup = & mss -> ssup [dim ];
496+
497+ /* array of values for a single column */
498+ result [dim ]= (SortItem * )ptr ;
499+ ptr += MAXALIGN (sizeof (SortItem )* ngroups );
500+
501+ /* extract data for the dimension */
502+ for (i = 0 ;i < ngroups ;i ++ )
503+ {
504+ /* point into the input groups */
505+ result [dim ][i ].values = & groups [i ].values [dim ];
506+ result [dim ][i ].isnull = & groups [i ].isnull [dim ];
507+ result [dim ][i ].count = groups [i ].count ;
508+ }
509+
510+ /* sort the values, deduplicate */
511+ qsort_arg ((void * )result [dim ],ngroups ,sizeof (SortItem ),
512+ sort_item_compare ,ssup );
513+
514+ /*
515+ * Identify distinct values, compute frequency (there might be
516+ * multiple MCV items containing this value, so we need to sum
517+ * counts from all of them.
518+ */
519+ ncounts [dim ]= 1 ;
520+ for (i = 1 ;i < ngroups ;i ++ )
521+ {
522+ if (sort_item_compare (& result [dim ][i - 1 ],& result [dim ][i ],ssup )== 0 )
523+ {
524+ result [dim ][ncounts [dim ]- 1 ].count += result [dim ][i ].count ;
525+ continue ;
526+ }
527+
528+ result [dim ][ncounts [dim ]]= result [dim ][i ];
529+
530+ ncounts [dim ]++ ;
531+ }
532+ }
533+
534+ return result ;
535+ }
422536
423537/*
424538 * statext_mcv_load