NotificationsYou must be signed in to change notification settings
Fork28
Star153

Commite2eed78

committed

Remove useless "rough estimate" path from mcelem_array_contained_selec.

The code in this function that tried to cope with a missing count histogramwas quite ineffective for anything except a perfectly flat distribution.Furthermore, since we were already punting for missing MCELEM slot, it'srather useless to sweat over missing DECHIST: there are no cases whereANALYZE will create the first but not the second. So just simplify thecode by punting rather than pretending we can do something useful.

1 parent4fb694a commite2eed78Copy full SHA for e2eed78

File tree

1 file changed

+62

-76

lines changed

src/backend/utils/adt
- array_selfuncs.c

1 file changed

+62

-76

lines changed

`‎src/backend/utils/adt/array_selfuncs.c‎`

Lines changed: 62 additions & 76 deletions

Original file line number	Diff line number	Diff line change
`@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root,`
`242`	`242`	`}`
`243`	`243`
`244`	`244`	`/*`
`245`		`- * arraycontsel -- restriction selectivity for "arraycolumn @> const",`
`246`		`- * "arraycolumn && const" or "arraycolumn <@ const"`
	`245`	`+ * arraycontsel -- restriction selectivity for array @>, &&, <@ operators`
`247`	`246`	`*/`
`248`	`247`	`Datum`
`249`	`248`	`arraycontsel(PG_FUNCTION_ARGS)`
`@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS)`
`323`	`322`	`}`
`324`	`323`
`325`	`324`	`/*`
`326`		`- * arraycontjoinsel -- join selectivity for "arraycolumn @> const",`
`327`		`- * "arraycolumn && const" or "arraycolumn <@ const"`
	`325`	`+ * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators`
`328`	`326`	`*/`
`329`	`327`	`Datum`
`330`	`328`	`arraycontjoinsel(PG_FUNCTION_ARGS)`
`@@ -744,18 +742,18 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,`
`744`	`742`	`if (numbers==NULL\|\|nnumbers!=nmcelem+3)`
`745`	`743`	`returnDEFAULT_CONTAIN_SEL;`
`746`	`744`
	`745`	`+/* Can't do much without a count histogram, either */`
	`746`	`+if (hist==NULL\|\|nhist<3)`
	`747`	`+returnDEFAULT_CONTAIN_SEL;`
	`748`	`+`
`747`	`749`	`/*`
`748`	`750`	`* Grab some of the summary statistics that compute_array_stats() stores:`
`749`	`751`	`* lowest frequency, frequency of null elements, and average distinct`
`750`	`752`	`* element count.`
`751`	`753`	`*/`
`752`	`754`	`minfreq=numbers[nmcelem];`
`753`	`755`	`nullelem_freq=numbers[nmcelem+2];`
`754`		`-`
`755`		`-if (hist&&nhist>0)`
`756`		`-avg_count=hist[nhist-1];`
`757`		`-else`
`758`		`-avg_count=10.0f;/* default assumption */`
	`756`	`+avg_count=hist[nhist-1];`
`759`	`757`
`760`	`758`	`/*`
`761`	`759`	`* "rest" will be the sum of the frequencies of all elements not`
`@@ -853,83 +851,71 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,`
`853`	`851`	`*/`
`854`	`852`	`mult *=exp(-rest);`
`855`	`853`
`856`		`-/* Check we have nonempty distinct element count histogram */`
`857`		`-if (hist&&nhist >=3)`
`858`		`-{`
`859`		`-/*----------`
`860`		`- * Using the distinct element count histogram requires`
`861`		`- O(unique_nitems (nmcelem + unique_nitems))`
`862`		`- * operations. Beyond a certain computational cost threshold, it's`
`863`		`- * reasonable to sacrifice accuracy for decreased planning time.`
`864`		`- * We limit the number of operations to EFFORT * nmcelem; since`
`865`		`- * nmcelem is limited by the column's statistics target, the work`
`866`		`- * done is user-controllable.`
`867`		`- *`
`868`		`- * If the number of operations would be too large, we can reduce it`
`869`		`- * without losing all accuracy by reducing unique_nitems and`
`870`		`- * considering only the most-common elements of the constant array.`
`871`		`- * To make the results exactly match what we would have gotten with`
`872`		`- * only those elements to start with, we'd have to remove any`
`873`		`- * discarded elements' frequencies from "mult", but since this is only`
`874`		`- * an approximation anyway, we don't bother with that. Therefore it's`
`875`		`- * sufficient to qsort elem_selec[] and take the largest elements.`
`876`		`- * (They will no longer match up with the elements of array_data[],`
`877`		`- * but we don't care.)`
`878`		`- *----------`
`879`		`- */`
	`854`	`+/*----------`
	`855`	`+ * Using the distinct element count histogram requires`
	`856`	`+ O(unique_nitems (nmcelem + unique_nitems))`
	`857`	`+ * operations. Beyond a certain computational cost threshold, it's`
	`858`	`+ * reasonable to sacrifice accuracy for decreased planning time. We limit`
	`859`	`+ * the number of operations to EFFORT * nmcelem; since nmcelem is limited`
	`860`	`+ * by the column's statistics target, the work done is user-controllable.`
	`861`	`+ *`
	`862`	`+ * If the number of operations would be too large, we can reduce it`
	`863`	`+ * without losing all accuracy by reducing unique_nitems and considering`
	`864`	`+ * only the most-common elements of the constant array. To make the`
	`865`	`+ * results exactly match what we would have gotten with only those`
	`866`	`+ * elements to start with, we'd have to remove any discarded elements'`
	`867`	`+ * frequencies from "mult", but since this is only an approximation`
	`868`	`+ * anyway, we don't bother with that. Therefore it's sufficient to qsort`
	`869`	`+ * elem_selec[] and take the largest elements. (They will no longer match`
	`870`	`+ * up with the elements of array_data[], but we don't care.)`
	`871`	`+ *----------`
	`872`	`+ */`
`880`	`873`	`#defineEFFORT 100`
`881`	`874`
`882`		`-if ((nmcelem+unique_nitems)>0&&`
`883`		`-unique_nitems>EFFORT*nmcelem / (nmcelem+unique_nitems))`
`884`		`-{`
`885`		`-/*`
`886`		`- * Use the quadratic formula to solve for largest allowable N;`
`887`		`- * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem.`
`888`		`- */`
`889`		`-doubleb= (double)nmcelem;`
`890`		`-intn;`
`891`		`-`
`892`		`-n= (int) ((sqrt(bb+4EFFORT*b)-b) /2);`
`893`		`-`
`894`		`-/* Sort, then take just the first n elements */`
`895`		`-qsort(elem_selec,unique_nitems,sizeof(float),`
`896`		`-float_compare_desc);`
`897`		`-unique_nitems=n;`
`898`		`-}`
`899`		`-`
	`875`	`+if ((nmcelem+unique_nitems)>0&&`
	`876`	`+unique_nitems>EFFORT*nmcelem / (nmcelem+unique_nitems))`
	`877`	`+{`
`900`	`878`	`/*`
`901`		`- * Calculate probabilities of each distinct element count for both`
`902`		`- * mcelems and constant elements. At this point, assume independent`
`903`		`- * element occurrence.`
	`879`	`+ * Use the quadratic formula to solve for largest allowable N. We`
	`880`	`+ * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.`
`904`	`881`	`*/`
`905`		`-dist=calc_distr(elem_selec,unique_nitems,unique_nitems,0.0f);`
`906`		`-mcelem_dist=calc_distr(numbers,nmcelem,unique_nitems,rest);`
	`882`	`+doubleb=(double)nmcelem;`
	`883`	`+intn;`
`907`	`884`
`908`		`-/* ignore hist[nhist-1], which is the avg not a histogram member */`
`909`		`-hist_part=calc_hist(hist,nhist-1,unique_nitems);`
	`885`	`+n= (int) ((sqrt(bb+4EFFORT*b)-b) /2);`
`910`	`886`
`911`		`-selec=0.0f;`
`912`		`-for (i=0;i <=unique_nitems;i++)`
`913`		`-{`
`914`		`-/*`
`915`		`- * mult * dist[i] / mcelem_dist[i] gives us probability of qual`
`916`		`- * matching from assumption of independent element occurrence with`
`917`		`- * the condition that distinct element count = i.`
`918`		`- */`
`919`		`-if (mcelem_dist[i]>0)`
`920`		`-selec+=hist_part[i]multdist[i] /mcelem_dist[i];`
`921`		`-}`
`922`		`-`
`923`		`-pfree(dist);`
`924`		`-pfree(mcelem_dist);`
`925`		`-pfree(hist_part);`
	`887`	`+/* Sort, then take just the first n elements */`
	`888`	`+qsort(elem_selec,unique_nitems,sizeof(float),`
	`889`	`+float_compare_desc);`
	`890`	`+unique_nitems=n;`
`926`	`891`	`}`
`927`		`-else`
	`892`	`+`
	`893`	`+/*`
	`894`	`+ * Calculate probabilities of each distinct element count for both`
	`895`	`+ * mcelems and constant elements. At this point, assume independent`
	`896`	`+ * element occurrence.`
	`897`	`+ */`
	`898`	`+dist=calc_distr(elem_selec,unique_nitems,unique_nitems,0.0f);`
	`899`	`+mcelem_dist=calc_distr(numbers,nmcelem,unique_nitems,rest);`
	`900`	`+`
	`901`	`+/* ignore hist[nhist-1], which is the average not a histogram member */`
	`902`	`+hist_part=calc_hist(hist,nhist-1,unique_nitems);`
	`903`	`+`
	`904`	`+selec=0.0f;`
	`905`	`+for (i=0;i <=unique_nitems;i++)`
`928`	`906`	`{`
`929`		`-/* We don't have histogram. Use a rough estimate. */`
`930`		`-selec=mult;`
	`907`	`+/*`
	`908`	`+ * mult * dist[i] / mcelem_dist[i] gives us probability of qual`
	`909`	`+ * matching from assumption of independent element occurrence with`
	`910`	`+ * the condition that distinct element count = i.`
	`911`	`+ */`
	`912`	`+if (mcelem_dist[i]>0)`
	`913`	`+selec+=hist_part[i]multdist[i] /mcelem_dist[i];`
`931`	`914`	`}`
`932`	`915`
	`916`	`+pfree(dist);`
	`917`	`+pfree(mcelem_dist);`
	`918`	`+pfree(hist_part);`
`933`	`919`	`pfree(elem_selec);`
`934`	`920`
`935`	`921`	`/* Take into account occurrence of NULL element. */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commite2eed78

File tree

1 file changed

1 file changed

`‎src/backend/utils/adt/array_selfuncs.c‎`

0 commit comments