1717 */
1818#include "postgres.h"
1919
20+ #include <math.h>
21+
2022#include "access/htup_details.h"
2123#include "catalog/pg_operator.h"
2224#include "catalog/pg_statistic.h"
@@ -397,6 +399,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
397399ATTSTATSSLOT_VALUES )))
398400return -1.0 ;
399401
402+ /* check that it's a histogram, not just a dummy entry */
403+ if (hslot .nvalues < 2 )
404+ {
405+ free_attstatsslot (& hslot );
406+ return -1.0 ;
407+ }
408+
400409/*
401410 * Convert histogram of ranges into histograms of its lower and upper
402411 * bounds.
@@ -683,7 +692,8 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
683692/*
684693 * Both bounds are finite. Assuming the subtype's comparison function
685694 * works sanely, the value must be finite, too, because it lies
686- * somewhere between the bounds. If it doesn't, just return something.
695+ * somewhere between the bounds. If it doesn't, arbitrarily return
696+ * 0.5.
687697 */
688698if (value -> infinite )
689699return 0.5 ;
@@ -693,21 +703,22 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
693703return 0.5 ;
694704
695705/* Calculate relative position using subdiff function. */
696- bin_width = DatumGetFloat8 (FunctionCall2Coll (
697- & typcache -> rng_subdiff_finfo ,
706+ bin_width = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
698707typcache -> rng_collation ,
699708hist2 -> val ,
700709hist1 -> val ));
701- if (bin_width <=0.0 )
702- return 0.5 ;/*zero width bin */
710+ if (isnan ( bin_width ) || bin_width <=0.0 )
711+ return 0.5 ;/*punt for NaN or zero- width bin */
703712
704- position = DatumGetFloat8 (FunctionCall2Coll (
705- & typcache -> rng_subdiff_finfo ,
713+ position = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
706714typcache -> rng_collation ,
707715value -> val ,
708716hist1 -> val ))
709717/bin_width ;
710718
719+ if (isnan (position ))
720+ return 0.5 ;/* punt for NaN from subdiff, Inf/Inf, etc */
721+
711722/* Relative position must be in [0,1] range */
712723position = Max (position ,0.0 );
713724position = Min (position ,1.0 );
@@ -799,15 +810,23 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
799810if (!bound1 -> infinite && !bound2 -> infinite )
800811{
801812/*
802- *No bounds are infinite, use subdiff function or return default
813+ *Neither bound is infinite, use subdiff function or return default
803814 * value of 1.0 if no subdiff is available.
804815 */
805816if (has_subdiff )
806- return
807- DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
808- typcache -> rng_collation ,
809- bound2 -> val ,
810- bound1 -> val ));
817+ {
818+ float8 res ;
819+
820+ res = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
821+ typcache -> rng_collation ,
822+ bound2 -> val ,
823+ bound1 -> val ));
824+ /* Reject possible NaN result, also negative result */
825+ if (isnan (res )|| res < 0.0 )
826+ return 1.0 ;
827+ else
828+ return res ;
829+ }
811830else
812831return 1.0 ;
813832}
@@ -821,7 +840,7 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
821840}
822841else
823842{
824- /* One bound is infinite,another is not */
843+ /* One bound is infinite,the other is not */
825844return get_float8_infinity ();
826845}
827846}
@@ -1017,17 +1036,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
10171036upper_index = rbound_bsearch (typcache ,upper ,hist_lower ,hist_nvalues ,
10181037 false);
10191038
1039+ /*
1040+ * If the upper bound value is below the histogram's lower limit, there
1041+ * are no matches.
1042+ */
1043+ if (upper_index < 0 )
1044+ return 0.0 ;
1045+
1046+ /*
1047+ * If the upper bound value is at or beyond the histogram's upper limit,
1048+ * start our loop at the last actual bin, as though the upper bound were
1049+ * within that bin; get_position will clamp its result to 1.0 anyway.
1050+ * (This corresponds to assuming that the data population above the
1051+ * histogram's upper limit is empty, exactly like what we just assumed for
1052+ * the lower limit.)
1053+ */
1054+ upper_index = Min (upper_index ,hist_nvalues - 2 );
1055+
10201056/*
10211057 * Calculate upper_bin_width, ie. the fraction of the (upper_index,
10221058 * upper_index + 1) bin which is greater than upper bound of query range
10231059 * using linear interpolation of subdiff function.
10241060 */
1025- if (upper_index >=0 && upper_index < hist_nvalues - 1 )
1026- upper_bin_width = get_position (typcache ,upper ,
1027- & hist_lower [upper_index ],
1028- & hist_lower [upper_index + 1 ]);
1029- else
1030- upper_bin_width = 0.0 ;
1061+ upper_bin_width = get_position (typcache ,upper ,
1062+ & hist_lower [upper_index ],
1063+ & hist_lower [upper_index + 1 ]);
10311064
10321065/*
10331066 * In the loop, dist and prev_dist are the distance of the "current" bin's
@@ -1100,9 +1133,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
11001133 * of ranges that contain the constant lower and upper bounds. This uses
11011134 * the histograms of range lower bounds and range lengths, on the assumption
11021135 * that the range lengths are independent of the lower bounds.
1103- *
1104- * Note, this is "var @> const", ie. estimate the fraction of ranges that
1105- * contain the constant lower and upper bounds.
11061136 */
11071137static double
11081138calc_hist_selectivity_contains (TypeCacheEntry * typcache ,
@@ -1121,16 +1151,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
11211151lower_index = rbound_bsearch (typcache ,lower ,hist_lower ,hist_nvalues ,
11221152 true);
11231153
1154+ /*
1155+ * If the lower bound value is below the histogram's lower limit, there
1156+ * are no matches.
1157+ */
1158+ if (lower_index < 0 )
1159+ return 0.0 ;
1160+
1161+ /*
1162+ * If the lower bound value is at or beyond the histogram's upper limit,
1163+ * start our loop at the last actual bin, as though the upper bound were
1164+ * within that bin; get_position will clamp its result to 1.0 anyway.
1165+ * (This corresponds to assuming that the data population above the
1166+ * histogram's upper limit is empty, exactly like what we just assumed for
1167+ * the lower limit.)
1168+ */
1169+ lower_index = Min (lower_index ,hist_nvalues - 2 );
1170+
11241171/*
11251172 * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
11261173 * lower_index + 1) bin which is greater than lower bound of query range
11271174 * using linear interpolation of subdiff function.
11281175 */
1129- if (lower_index >=0 && lower_index < hist_nvalues - 1 )
1130- lower_bin_width = get_position (typcache ,lower ,& hist_lower [lower_index ],
1131- & hist_lower [lower_index + 1 ]);
1132- else
1133- lower_bin_width = 0.0 ;
1176+ lower_bin_width = get_position (typcache ,lower ,& hist_lower [lower_index ],
1177+ & hist_lower [lower_index + 1 ]);
11341178
11351179/*
11361180 * Loop through all the lower bound bins, smaller than the query lower