@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root,
242242}
243243
244244/*
245- * arraycontsel -- restriction selectivity for "arraycolumn @> const",
246- * "arraycolumn && const" or "arraycolumn <@ const"
245+ * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
247246 */
248247Datum
249248arraycontsel (PG_FUNCTION_ARGS )
@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS)
323322}
324323
325324/*
326- * arraycontjoinsel -- join selectivity for "arraycolumn @> const",
327- * "arraycolumn && const" or "arraycolumn <@ const"
325+ * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
328326 */
329327Datum
330328arraycontjoinsel (PG_FUNCTION_ARGS )
@@ -744,18 +742,18 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
744742if (numbers == NULL || nnumbers != nmcelem + 3 )
745743return DEFAULT_CONTAIN_SEL ;
746744
745+ /* Can't do much without a count histogram, either */
746+ if (hist == NULL || nhist < 3 )
747+ return DEFAULT_CONTAIN_SEL ;
748+
747749/*
748750 * Grab some of the summary statistics that compute_array_stats() stores:
749751 * lowest frequency, frequency of null elements, and average distinct
750752 * element count.
751753 */
752754minfreq = numbers [nmcelem ];
753755nullelem_freq = numbers [nmcelem + 2 ];
754-
755- if (hist && nhist > 0 )
756- avg_count = hist [nhist - 1 ];
757- else
758- avg_count = 10.0f ;/* default assumption */
756+ avg_count = hist [nhist - 1 ];
759757
760758/*
761759 * "rest" will be the sum of the frequencies of all elements not
@@ -853,83 +851,71 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
853851 */
854852mult *=exp (- rest );
855853
856- /* Check we have nonempty distinct element count histogram */
857- if (hist && nhist >=3 )
858- {
859- /*----------
860- * Using the distinct element count histogram requires
861- *O(unique_nitems * (nmcelem + unique_nitems))
862- * operations. Beyond a certain computational cost threshold, it's
863- * reasonable to sacrifice accuracy for decreased planning time.
864- * We limit the number of operations to EFFORT * nmcelem; since
865- * nmcelem is limited by the column's statistics target, the work
866- * done is user-controllable.
867- *
868- * If the number of operations would be too large, we can reduce it
869- * without losing all accuracy by reducing unique_nitems and
870- * considering only the most-common elements of the constant array.
871- * To make the results exactly match what we would have gotten with
872- * only those elements to start with, we'd have to remove any
873- * discarded elements' frequencies from "mult", but since this is only
874- * an approximation anyway, we don't bother with that. Therefore it's
875- * sufficient to qsort elem_selec[] and take the largest elements.
876- * (They will no longer match up with the elements of array_data[],
877- * but we don't care.)
878- *----------
879- */
854+ /*----------
855+ * Using the distinct element count histogram requires
856+ *O(unique_nitems * (nmcelem + unique_nitems))
857+ * operations. Beyond a certain computational cost threshold, it's
858+ * reasonable to sacrifice accuracy for decreased planning time. We limit
859+ * the number of operations to EFFORT * nmcelem; since nmcelem is limited
860+ * by the column's statistics target, the work done is user-controllable.
861+ *
862+ * If the number of operations would be too large, we can reduce it
863+ * without losing all accuracy by reducing unique_nitems and considering
864+ * only the most-common elements of the constant array. To make the
865+ * results exactly match what we would have gotten with only those
866+ * elements to start with, we'd have to remove any discarded elements'
867+ * frequencies from "mult", but since this is only an approximation
868+ * anyway, we don't bother with that. Therefore it's sufficient to qsort
869+ * elem_selec[] and take the largest elements. (They will no longer match
870+ * up with the elements of array_data[], but we don't care.)
871+ *----------
872+ */
880873#define EFFORT 100
881874
882- if ((nmcelem + unique_nitems )> 0 &&
883- unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems ))
884- {
885- /*
886- * Use the quadratic formula to solve for largest allowable N;
887- * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
888- */
889- double b = (double )nmcelem ;
890- int n ;
891-
892- n = (int ) ((sqrt (b * b + 4 * EFFORT * b )- b ) /2 );
893-
894- /* Sort, then take just the first n elements */
895- qsort (elem_selec ,unique_nitems ,sizeof (float ),
896- float_compare_desc );
897- unique_nitems = n ;
898- }
899-
875+ if ((nmcelem + unique_nitems )> 0 &&
876+ unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems ))
877+ {
900878/*
901- * Calculate probabilities of each distinct element count for both
902- * mcelems and constant elements. At this point, assume independent
903- * element occurrence.
879+ * Use the quadratic formula to solve for largest allowable N. We
880+ * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
904881 */
905- dist = calc_distr ( elem_selec , unique_nitems , unique_nitems , 0.0f ) ;
906- mcelem_dist = calc_distr ( numbers , nmcelem , unique_nitems , rest ) ;
882+ double b = ( double ) nmcelem ;
883+ int n ;
907884
908- /* ignore hist[nhist-1], which is the avg not a histogram member */
909- hist_part = calc_hist (hist ,nhist - 1 ,unique_nitems );
885+ n = (int ) ((sqrt (b * b + 4 * EFFORT * b )- b ) /2 );
910886
911- selec = 0.0f ;
912- for (i = 0 ;i <=unique_nitems ;i ++ )
913- {
914- /*
915- * mult * dist[i] / mcelem_dist[i] gives us probability of qual
916- * matching from assumption of independent element occurrence with
917- * the condition that distinct element count = i.
918- */
919- if (mcelem_dist [i ]> 0 )
920- selec += hist_part [i ]* mult * dist [i ] /mcelem_dist [i ];
921- }
922-
923- pfree (dist );
924- pfree (mcelem_dist );
925- pfree (hist_part );
887+ /* Sort, then take just the first n elements */
888+ qsort (elem_selec ,unique_nitems ,sizeof (float ),
889+ float_compare_desc );
890+ unique_nitems = n ;
926891}
927- else
892+
893+ /*
894+ * Calculate probabilities of each distinct element count for both
895+ * mcelems and constant elements. At this point, assume independent
896+ * element occurrence.
897+ */
898+ dist = calc_distr (elem_selec ,unique_nitems ,unique_nitems ,0.0f );
899+ mcelem_dist = calc_distr (numbers ,nmcelem ,unique_nitems ,rest );
900+
901+ /* ignore hist[nhist-1], which is the average not a histogram member */
902+ hist_part = calc_hist (hist ,nhist - 1 ,unique_nitems );
903+
904+ selec = 0.0f ;
905+ for (i = 0 ;i <=unique_nitems ;i ++ )
928906{
929- /* We don't have histogram. Use a rough estimate. */
930- selec = mult ;
907+ /*
908+ * mult * dist[i] / mcelem_dist[i] gives us probability of qual
909+ * matching from assumption of independent element occurrence with
910+ * the condition that distinct element count = i.
911+ */
912+ if (mcelem_dist [i ]> 0 )
913+ selec += hist_part [i ]* mult * dist [i ] /mcelem_dist [i ];
931914}
932915
916+ pfree (dist );
917+ pfree (mcelem_dist );
918+ pfree (hist_part );
933919pfree (elem_selec );
934920
935921/* Take into account occurrence of NULL element. */