17
17
*/
18
18
#include "postgres.h"
19
19
20
+ #include <math.h>
21
+
20
22
#include "access/htup_details.h"
21
23
#include "catalog/pg_operator.h"
22
24
#include "catalog/pg_statistic.h"
@@ -397,6 +399,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
397
399
ATTSTATSSLOT_VALUES )))
398
400
return -1.0 ;
399
401
402
+ /* check that it's a histogram, not just a dummy entry */
403
+ if (hslot .nvalues < 2 )
404
+ {
405
+ free_attstatsslot (& hslot );
406
+ return -1.0 ;
407
+ }
408
+
400
409
/*
401
410
* Convert histogram of ranges into histograms of its lower and upper
402
411
* bounds.
@@ -683,7 +692,8 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
683
692
/*
684
693
* Both bounds are finite. Assuming the subtype's comparison function
685
694
* works sanely, the value must be finite, too, because it lies
686
- * somewhere between the bounds. If it doesn't, just return something.
695
+ * somewhere between the bounds. If it doesn't, arbitrarily return
696
+ * 0.5.
687
697
*/
688
698
if (value -> infinite )
689
699
return 0.5 ;
@@ -693,21 +703,22 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
693
703
return 0.5 ;
694
704
695
705
/* Calculate relative position using subdiff function. */
696
- bin_width = DatumGetFloat8 (FunctionCall2Coll (
697
- & typcache -> rng_subdiff_finfo ,
706
+ bin_width = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
698
707
typcache -> rng_collation ,
699
708
hist2 -> val ,
700
709
hist1 -> val ));
701
- if (bin_width <=0.0 )
702
- return 0.5 ;/*zero width bin */
710
+ if (isnan ( bin_width ) || bin_width <=0.0 )
711
+ return 0.5 ;/*punt for NaN or zero- width bin */
703
712
704
- position = DatumGetFloat8 (FunctionCall2Coll (
705
- & typcache -> rng_subdiff_finfo ,
713
+ position = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
706
714
typcache -> rng_collation ,
707
715
value -> val ,
708
716
hist1 -> val ))
709
717
/bin_width ;
710
718
719
+ if (isnan (position ))
720
+ return 0.5 ;/* punt for NaN from subdiff, Inf/Inf, etc */
721
+
711
722
/* Relative position must be in [0,1] range */
712
723
position = Max (position ,0.0 );
713
724
position = Min (position ,1.0 );
@@ -799,15 +810,23 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
799
810
if (!bound1 -> infinite && !bound2 -> infinite )
800
811
{
801
812
/*
802
- *No bounds are infinite, use subdiff function or return default
813
+ *Neither bound is infinite, use subdiff function or return default
803
814
* value of 1.0 if no subdiff is available.
804
815
*/
805
816
if (has_subdiff )
806
- return
807
- DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
808
- typcache -> rng_collation ,
809
- bound2 -> val ,
810
- bound1 -> val ));
817
+ {
818
+ float8 res ;
819
+
820
+ res = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
821
+ typcache -> rng_collation ,
822
+ bound2 -> val ,
823
+ bound1 -> val ));
824
+ /* Reject possible NaN result, also negative result */
825
+ if (isnan (res )|| res < 0.0 )
826
+ return 1.0 ;
827
+ else
828
+ return res ;
829
+ }
811
830
else
812
831
return 1.0 ;
813
832
}
@@ -821,7 +840,7 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
821
840
}
822
841
else
823
842
{
824
- /* One bound is infinite,another is not */
843
+ /* One bound is infinite,the other is not */
825
844
return get_float8_infinity ();
826
845
}
827
846
}
@@ -1017,17 +1036,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
1017
1036
upper_index = rbound_bsearch (typcache ,upper ,hist_lower ,hist_nvalues ,
1018
1037
false);
1019
1038
1039
+ /*
1040
+ * If the upper bound value is below the histogram's lower limit, there
1041
+ * are no matches.
1042
+ */
1043
+ if (upper_index < 0 )
1044
+ return 0.0 ;
1045
+
1046
+ /*
1047
+ * If the upper bound value is at or beyond the histogram's upper limit,
1048
+ * start our loop at the last actual bin, as though the upper bound were
1049
+ * within that bin; get_position will clamp its result to 1.0 anyway.
1050
+ * (This corresponds to assuming that the data population above the
1051
+ * histogram's upper limit is empty, exactly like what we just assumed for
1052
+ * the lower limit.)
1053
+ */
1054
+ upper_index = Min (upper_index ,hist_nvalues - 2 );
1055
+
1020
1056
/*
1021
1057
* Calculate upper_bin_width, ie. the fraction of the (upper_index,
1022
1058
* upper_index + 1) bin which is greater than upper bound of query range
1023
1059
* using linear interpolation of subdiff function.
1024
1060
*/
1025
- if (upper_index >=0 && upper_index < hist_nvalues - 1 )
1026
- upper_bin_width = get_position (typcache ,upper ,
1027
- & hist_lower [upper_index ],
1028
- & hist_lower [upper_index + 1 ]);
1029
- else
1030
- upper_bin_width = 0.0 ;
1061
+ upper_bin_width = get_position (typcache ,upper ,
1062
+ & hist_lower [upper_index ],
1063
+ & hist_lower [upper_index + 1 ]);
1031
1064
1032
1065
/*
1033
1066
* In the loop, dist and prev_dist are the distance of the "current" bin's
@@ -1100,9 +1133,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
1100
1133
* of ranges that contain the constant lower and upper bounds. This uses
1101
1134
* the histograms of range lower bounds and range lengths, on the assumption
1102
1135
* that the range lengths are independent of the lower bounds.
1103
- *
1104
- * Note, this is "var @> const", ie. estimate the fraction of ranges that
1105
- * contain the constant lower and upper bounds.
1106
1136
*/
1107
1137
static double
1108
1138
calc_hist_selectivity_contains (TypeCacheEntry * typcache ,
@@ -1121,16 +1151,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
1121
1151
lower_index = rbound_bsearch (typcache ,lower ,hist_lower ,hist_nvalues ,
1122
1152
true);
1123
1153
1154
+ /*
1155
+ * If the lower bound value is below the histogram's lower limit, there
1156
+ * are no matches.
1157
+ */
1158
+ if (lower_index < 0 )
1159
+ return 0.0 ;
1160
+
1161
+ /*
1162
+ * If the lower bound value is at or beyond the histogram's upper limit,
1163
+ * start our loop at the last actual bin, as though the upper bound were
1164
+ * within that bin; get_position will clamp its result to 1.0 anyway.
1165
+ * (This corresponds to assuming that the data population above the
1166
+ * histogram's upper limit is empty, exactly like what we just assumed for
1167
+ * the lower limit.)
1168
+ */
1169
+ lower_index = Min (lower_index ,hist_nvalues - 2 );
1170
+
1124
1171
/*
1125
1172
* Calculate lower_bin_width, ie. the fraction of the of (lower_index,
1126
1173
* lower_index + 1) bin which is greater than lower bound of query range
1127
1174
* using linear interpolation of subdiff function.
1128
1175
*/
1129
- if (lower_index >=0 && lower_index < hist_nvalues - 1 )
1130
- lower_bin_width = get_position (typcache ,lower ,& hist_lower [lower_index ],
1131
- & hist_lower [lower_index + 1 ]);
1132
- else
1133
- lower_bin_width = 0.0 ;
1176
+ lower_bin_width = get_position (typcache ,lower ,& hist_lower [lower_index ],
1177
+ & hist_lower [lower_index + 1 ]);
1134
1178
1135
1179
/*
1136
1180
* Loop through all the lower bound bins, smaller than the query lower