Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit512c735

Browse files
committed
Fix <> and pattern-NOT-match estimators to handle nulls correctly.
These estimators returned 1 minus the corresponding equality/matchestimate, which is incorrect: we need to subtract off the fractionof nulls in the column, since those are neither equal nor not equalto the comparison value. The error only becomes obvious if thenullfrac is large, but it could be very bad in a mostly-nullscolumn, as reported in bug #14676 from Marko Tiikkaja.To fix the <> case, refactor eqsel() and neqsel() to call a commonsupport routine, which can be made to account for nullfrac correctly.The pattern-match cases were already factored that way, and it wassimply an oversight that patternsel() wasn't subtracting off nullfrac.neqjoinsel() has a similar problem, but since we're elsewhere discussingchanging its behavior entirely, I left it alone for now.This is a very longstanding bug, but I'm hesitant to back-patch a fix forit. Given the lack of prior complaints, such cases must not come up often,so it's probably not worth the risk of destabilizing plans in stablebranches.Discussion:https://postgr.es/m/20170529153847.4275.95416@wrigleys.postgresql.org
1 parent2388658 commit512c735

File tree

1 file changed

+103
-60
lines changed

1 file changed

+103
-60
lines changed

‎src/backend/utils/adt/selfuncs.c

Lines changed: 103 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,13 @@
154154
get_relation_stats_hook_typeget_relation_stats_hook=NULL;
155155
get_index_stats_hook_typeget_index_stats_hook=NULL;
156156

157+
staticdoubleeqsel_internal(PG_FUNCTION_ARGS,boolnegate);
157158
staticdoublevar_eq_const(VariableStatData*vardata,Oidoperator,
158159
Datumconstval,boolconstisnull,
159-
boolvaronleft);
160+
boolvaronleft,boolnegate);
160161
staticdoublevar_eq_non_const(VariableStatData*vardata,Oidoperator,
161162
Node*other,
162-
boolvaronleft);
163+
boolvaronleft,boolnegate);
163164
staticdoubleineq_histogram_selectivity(PlannerInfo*root,
164165
VariableStatData*vardata,
165166
FmgrInfo*opproc,boolisgt,
@@ -226,6 +227,15 @@ static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
226227
*/
227228
Datum
228229
eqsel(PG_FUNCTION_ARGS)
230+
{
231+
PG_RETURN_FLOAT8((float8)eqsel_internal(fcinfo, false));
232+
}
233+
234+
/*
235+
* Common code for eqsel() and neqsel()
236+
*/
237+
staticdouble
238+
eqsel_internal(PG_FUNCTION_ARGS,boolnegate)
229239
{
230240
PlannerInfo*root= (PlannerInfo*)PG_GETARG_POINTER(0);
231241
Oidoperator=PG_GETARG_OID(1);
@@ -236,13 +246,27 @@ eqsel(PG_FUNCTION_ARGS)
236246
boolvaronleft;
237247
doubleselec;
238248

249+
/*
250+
* When asked about <>, we do the estimation using the corresponding =
251+
* operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
252+
*/
253+
if (negate)
254+
{
255+
operator=get_negator(operator);
256+
if (!OidIsValid(operator))
257+
{
258+
/* Use default selectivity (should we raise an error instead?) */
259+
return1.0-DEFAULT_EQ_SEL;
260+
}
261+
}
262+
239263
/*
240264
* If expression is not variable = something or something = variable, then
241265
* punt and return a default estimate.
242266
*/
243267
if (!get_restriction_variable(root,args,varRelid,
244268
&vardata,&other,&varonleft))
245-
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
269+
returnnegate ? (1.0-DEFAULT_EQ_SEL) :DEFAULT_EQ_SEL;
246270

247271
/*
248272
* We can do a lot better if the something is a constant. (Note: the
@@ -253,14 +277,14 @@ eqsel(PG_FUNCTION_ARGS)
253277
selec=var_eq_const(&vardata,operator,
254278
((Const*)other)->constvalue,
255279
((Const*)other)->constisnull,
256-
varonleft);
280+
varonleft,negate);
257281
else
258282
selec=var_eq_non_const(&vardata,operator,other,
259-
varonleft);
283+
varonleft,negate);
260284

261285
ReleaseVariableStats(vardata);
262286

263-
PG_RETURN_FLOAT8((float8)selec);
287+
returnselec;
264288
}
265289

266290
/*
@@ -271,19 +295,32 @@ eqsel(PG_FUNCTION_ARGS)
271295
staticdouble
272296
var_eq_const(VariableStatData*vardata,Oidoperator,
273297
Datumconstval,boolconstisnull,
274-
boolvaronleft)
298+
boolvaronleft,boolnegate)
275299
{
276300
doubleselec;
301+
doublenullfrac=0.0;
277302
boolisdefault;
278303
Oidopfuncoid;
279304

280305
/*
281306
* If the constant is NULL, assume operator is strict and return zero, ie,
282-
* operator will never return TRUE.
307+
* operator will never return TRUE. (It's zero even for a negator op.)
283308
*/
284309
if (constisnull)
285310
return0.0;
286311

312+
/*
313+
* Grab the nullfrac for use below. Note we allow use of nullfrac
314+
* regardless of security check.
315+
*/
316+
if (HeapTupleIsValid(vardata->statsTuple))
317+
{
318+
Form_pg_statisticstats;
319+
320+
stats= (Form_pg_statistic)GETSTRUCT(vardata->statsTuple);
321+
nullfrac=stats->stanullfrac;
322+
}
323+
287324
/*
288325
* If we matched the var to a unique index or DISTINCT clause, assume
289326
* there is exactly one match regardless of anything else. (This is
@@ -292,11 +329,12 @@ var_eq_const(VariableStatData *vardata, Oid operator,
292329
* ignoring the information.)
293330
*/
294331
if (vardata->isunique&&vardata->rel&&vardata->rel->tuples >=1.0)
295-
return1.0 /vardata->rel->tuples;
296-
297-
if (HeapTupleIsValid(vardata->statsTuple)&&
298-
statistic_proc_security_check(vardata,
299-
(opfuncoid=get_opcode(operator))))
332+
{
333+
selec=1.0 /vardata->rel->tuples;
334+
}
335+
elseif (HeapTupleIsValid(vardata->statsTuple)&&
336+
statistic_proc_security_check(vardata,
337+
(opfuncoid=get_opcode(operator))))
300338
{
301339
Form_pg_statisticstats;
302340
AttStatsSlotsslot;
@@ -363,7 +401,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
363401

364402
for (i=0;i<sslot.nnumbers;i++)
365403
sumcommon+=sslot.numbers[i];
366-
selec=1.0-sumcommon-stats->stanullfrac;
404+
selec=1.0-sumcommon-nullfrac;
367405
CLAMP_PROBABILITY(selec);
368406

369407
/*
@@ -396,6 +434,10 @@ var_eq_const(VariableStatData *vardata, Oid operator,
396434
selec=1.0 /get_variable_numdistinct(vardata,&isdefault);
397435
}
398436

437+
/* now adjust if we wanted <> rather than = */
438+
if (negate)
439+
selec=1.0-selec-nullfrac;
440+
399441
/* result should be in range, but make sure... */
400442
CLAMP_PROBABILITY(selec);
401443

@@ -408,11 +450,23 @@ var_eq_const(VariableStatData *vardata, Oid operator,
408450
staticdouble
409451
var_eq_non_const(VariableStatData*vardata,Oidoperator,
410452
Node*other,
411-
boolvaronleft)
453+
boolvaronleft,boolnegate)
412454
{
413455
doubleselec;
456+
doublenullfrac=0.0;
414457
boolisdefault;
415458

459+
/*
460+
* Grab the nullfrac for use below.
461+
*/
462+
if (HeapTupleIsValid(vardata->statsTuple))
463+
{
464+
Form_pg_statisticstats;
465+
466+
stats= (Form_pg_statistic)GETSTRUCT(vardata->statsTuple);
467+
nullfrac=stats->stanullfrac;
468+
}
469+
416470
/*
417471
* If we matched the var to a unique index or DISTINCT clause, assume
418472
* there is exactly one match regardless of anything else. (This is
@@ -421,9 +475,10 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
421475
* ignoring the information.)
422476
*/
423477
if (vardata->isunique&&vardata->rel&&vardata->rel->tuples >=1.0)
424-
return1.0 /vardata->rel->tuples;
425-
426-
if (HeapTupleIsValid(vardata->statsTuple))
478+
{
479+
selec=1.0 /vardata->rel->tuples;
480+
}
481+
elseif (HeapTupleIsValid(vardata->statsTuple))
427482
{
428483
Form_pg_statisticstats;
429484
doublendistinct;
@@ -441,7 +496,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
441496
* values, regardless of their frequency in the table. Is that a good
442497
* idea?)
443498
*/
444-
selec=1.0-stats->stanullfrac;
499+
selec=1.0-nullfrac;
445500
ndistinct=get_variable_numdistinct(vardata,&isdefault);
446501
if (ndistinct>1)
447502
selec /=ndistinct;
@@ -469,6 +524,10 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
469524
selec=1.0 /get_variable_numdistinct(vardata,&isdefault);
470525
}
471526

527+
/* now adjust if we wanted <> rather than = */
528+
if (negate)
529+
selec=1.0-selec-nullfrac;
530+
472531
/* result should be in range, but make sure... */
473532
CLAMP_PROBABILITY(selec);
474533

@@ -485,33 +544,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
485544
Datum
486545
neqsel(PG_FUNCTION_ARGS)
487546
{
488-
PlannerInfo*root= (PlannerInfo*)PG_GETARG_POINTER(0);
489-
Oidoperator=PG_GETARG_OID(1);
490-
List*args= (List*)PG_GETARG_POINTER(2);
491-
intvarRelid=PG_GETARG_INT32(3);
492-
Oideqop;
493-
float8result;
494-
495-
/*
496-
* We want 1 - eqsel() where the equality operator is the one associated
497-
* with this != operator, that is, its negator.
498-
*/
499-
eqop=get_negator(operator);
500-
if (eqop)
501-
{
502-
result=DatumGetFloat8(DirectFunctionCall4(eqsel,
503-
PointerGetDatum(root),
504-
ObjectIdGetDatum(eqop),
505-
PointerGetDatum(args),
506-
Int32GetDatum(varRelid)));
507-
}
508-
else
509-
{
510-
/* Use default selectivity (should we raise an error instead?) */
511-
result=DEFAULT_EQ_SEL;
512-
}
513-
result=1.0-result;
514-
PG_RETURN_FLOAT8(result);
547+
PG_RETURN_FLOAT8((float8)eqsel_internal(fcinfo, true));
515548
}
516549

517550
/*
@@ -1114,6 +1147,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11141147
Const*patt;
11151148
Const*prefix=NULL;
11161149
Selectivityrest_selec=0;
1150+
doublenullfrac=0.0;
11171151
doubleresult;
11181152

11191153
/*
@@ -1202,6 +1236,17 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
12021236
returnresult;
12031237
}
12041238

1239+
/*
1240+
* Grab the nullfrac for use below.
1241+
*/
1242+
if (HeapTupleIsValid(vardata.statsTuple))
1243+
{
1244+
Form_pg_statisticstats;
1245+
1246+
stats= (Form_pg_statistic)GETSTRUCT(vardata.statsTuple);
1247+
nullfrac=stats->stanullfrac;
1248+
}
1249+
12051250
/*
12061251
* Pull out any fixed prefix implied by the pattern, and estimate the
12071252
* fractional selectivity of the remainder of the pattern. Unlike many of
@@ -1252,7 +1297,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
12521297
if (eqopr==InvalidOid)
12531298
elog(ERROR,"no = operator for opfamily %u",opfamily);
12541299
result=var_eq_const(&vardata,eqopr,prefix->constvalue,
1255-
false, true);
1300+
false, true, false);
12561301
}
12571302
else
12581303
{
@@ -1275,8 +1320,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
12751320
Selectivityselec;
12761321
inthist_size;
12771322
FmgrInfoopproc;
1278-
doublenullfrac,
1279-
mcv_selec,
1323+
doublemcv_selec,
12801324
sumcommon;
12811325

12821326
/* Try to use the histogram entries to get selectivity */
@@ -1328,24 +1372,23 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
13281372
mcv_selec=mcv_selectivity(&vardata,&opproc,constval, true,
13291373
&sumcommon);
13301374

1331-
if (HeapTupleIsValid(vardata.statsTuple))
1332-
nullfrac= ((Form_pg_statistic)GETSTRUCT(vardata.statsTuple))->stanullfrac;
1333-
else
1334-
nullfrac=0.0;
1335-
13361375
/*
13371376
* Now merge the results from the MCV and histogram calculations,
13381377
* realizing that the histogram covers only the non-null values that
13391378
* are not listed in MCV.
13401379
*/
13411380
selec *=1.0-nullfrac-sumcommon;
13421381
selec+=mcv_selec;
1343-
1344-
/* result should be in range, but make sure... */
1345-
CLAMP_PROBABILITY(selec);
13461382
result=selec;
13471383
}
13481384

1385+
/* now adjust if we wanted not-match rather than match */
1386+
if (negate)
1387+
result=1.0-result-nullfrac;
1388+
1389+
/* result should be in range, but make sure... */
1390+
CLAMP_PROBABILITY(result);
1391+
13491392
if (prefix)
13501393
{
13511394
pfree(DatumGetPointer(prefix->constvalue));
@@ -1354,7 +1397,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
13541397

13551398
ReleaseVariableStats(vardata);
13561399

1357-
returnnegate ? (1.0-result) :result;
1400+
returnresult;
13581401
}
13591402

13601403
/*
@@ -1451,7 +1494,7 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
14511494
* compute the selectivity as if that is what we have.
14521495
*/
14531496
selec=var_eq_const(&vardata,BooleanEqualOperator,
1454-
BoolGetDatum(true), false, true);
1497+
BoolGetDatum(true), false, true, false);
14551498
}
14561499
elseif (is_funcclause(arg))
14571500
{
@@ -5788,7 +5831,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
57885831
if (cmpopr==InvalidOid)
57895832
elog(ERROR,"no = operator for opfamily %u",opfamily);
57905833
eq_sel=var_eq_const(vardata,cmpopr,prefixcon->constvalue,
5791-
false, true);
5834+
false, true, false);
57925835

57935836
prefixsel=Max(prefixsel,eq_sel);
57945837

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp