|
110 | 110 | #include"optimizer/predtest.h"
|
111 | 111 | #include"optimizer/restrictinfo.h"
|
112 | 112 | #include"optimizer/var.h"
|
| 113 | +#include"parser/parse_clause.h" |
113 | 114 | #include"parser/parse_coerce.h"
|
114 | 115 | #include"parser/parsetree.h"
|
115 | 116 | #include"utils/builtins.h"
|
@@ -255,10 +256,11 @@ var_eq_const(VariableStatData *vardata, Oid operator,
|
255 | 256 | return0.0;
|
256 | 257 |
|
257 | 258 | /*
|
258 |
| - * If we matched the var to a unique index, assume there is exactly one |
259 |
| - * match regardless of anything else. (This is slightly bogus, since the |
260 |
| - * index's equality operator might be different from ours, but it's more |
261 |
| - * likely to be right than ignoring the information.) |
| 259 | + * If we matched the var to a unique index or DISTINCT clause, assume |
| 260 | + * there is exactly one match regardless of anything else. (This is |
| 261 | + * slightly bogus, since the index or clause's equality operator might be |
| 262 | + * different from ours, but it's much more likely to be right than |
| 263 | + * ignoring the information.) |
262 | 264 | */
|
263 | 265 | if (vardata->isunique&&vardata->rel&&vardata->rel->tuples >=1.0)
|
264 | 266 | return1.0 /vardata->rel->tuples;
|
@@ -389,10 +391,11 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
|
389 | 391 | boolisdefault;
|
390 | 392 |
|
391 | 393 | /*
|
392 |
| - * If we matched the var to a unique index, assume there is exactly one |
393 |
| - * match regardless of anything else. (This is slightly bogus, since the |
394 |
| - * index's equality operator might be different from ours, but it's more |
395 |
| - * likely to be right than ignoring the information.) |
| 394 | + * If we matched the var to a unique index or DISTINCT clause, assume |
| 395 | + * there is exactly one match regardless of anything else. (This is |
| 396 | + * slightly bogus, since the index or clause's equality operator might be |
| 397 | + * different from ours, but it's much more likely to be right than |
| 398 | + * ignoring the information.) |
396 | 399 | */
|
397 | 400 | if (vardata->isunique&&vardata->rel&&vardata->rel->tuples >=1.0)
|
398 | 401 | return1.0 /vardata->rel->tuples;
|
@@ -4128,10 +4131,11 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
|
4128 | 4131 | *atttype, atttypmod: type data to pass to get_attstatsslot(). This is
|
4129 | 4132 | *commonly the same as the exposed type of the variable argument,
|
4130 | 4133 | *but can be different in binary-compatible-type cases.
|
4131 |
| - *isunique: TRUE if we were able to match the var to a unique index, |
4132 |
| - *implying its values are unique for this query. (Caution: this |
4133 |
| - *should be trusted for statistical purposes only, since we do not |
4134 |
| - *check indimmediate.) |
| 4134 | + *isunique: TRUE if we were able to match the var to a unique index or a |
| 4135 | + *single-column DISTINCT clause, implying its values are unique for |
| 4136 | + *this query. (Caution: this should be trusted for statistical |
| 4137 | + *purposes only, since we do not check indimmediate nor verify that |
| 4138 | + *the exact same definition of equality applies.) |
4135 | 4139 | *
|
4136 | 4140 | * Caller is responsible for doing ReleaseVariableStats() before exiting.
|
4137 | 4141 | */
|
@@ -4357,32 +4361,21 @@ examine_simple_variable(PlannerInfo *root, Var *var,
|
4357 | 4361 | {
|
4358 | 4362 | /*
|
4359 | 4363 | * Plain subquery (not one that was converted to an appendrel).
|
4360 |
| - * |
4361 |
| - * Punt if subquery uses set operations, GROUP BY, or DISTINCT --- any |
4362 |
| - * of these will mash underlying columns' stats beyond recognition. |
4363 |
| - * (Set ops are particularly nasty; if we forged ahead, we would |
4364 |
| - * return stats relevant to only the leftmost subselect...) |
4365 | 4364 | */
|
4366 | 4365 | Query*subquery=rte->subquery;
|
4367 | 4366 | RelOptInfo*rel;
|
4368 | 4367 | TargetEntry*ste;
|
4369 | 4368 |
|
4370 |
| -if (subquery->setOperations|| |
4371 |
| -subquery->groupClause|| |
4372 |
| -subquery->distinctClause) |
4373 |
| -return; |
4374 |
| - |
4375 | 4369 | /*
|
4376 |
| - * If the sub-query originated from a view with the security_barrier |
4377 |
| - * attribute, we treat it as a black-box from outside of the view. |
4378 |
| - * This is probably a harsher restriction than necessary; it's |
4379 |
| - * certainly OK for the selectivity estimator (which is a C function, |
4380 |
| - * and therefore omnipotent anyway) to look at the statistics. But |
4381 |
| - * many selectivity estimators will happily *invoke the operator |
4382 |
| - * function* to try to work out a good estimate - and that's not OK. |
4383 |
| - * So for now, we do this. |
| 4370 | + * Punt if subquery uses set operations or GROUP BY, as these will |
| 4371 | + * mash underlying columns' stats beyond recognition. (Set ops are |
| 4372 | + * particularly nasty; if we forged ahead, we would return stats |
| 4373 | + * relevant to only the leftmost subselect...) DISTINCT is also |
| 4374 | + * problematic, but we check that later because there is a possibility |
| 4375 | + * of learning something even with it. |
4384 | 4376 | */
|
4385 |
| -if (rte->security_barrier) |
| 4377 | +if (subquery->setOperations|| |
| 4378 | +subquery->groupClause) |
4386 | 4379 | return;
|
4387 | 4380 |
|
4388 | 4381 | /*
|
@@ -4415,6 +4408,37 @@ examine_simple_variable(PlannerInfo *root, Var *var,
|
4415 | 4408 | rte->eref->aliasname,var->varattno);
|
4416 | 4409 | var= (Var*)ste->expr;
|
4417 | 4410 |
|
| 4411 | +/* |
| 4412 | + * If subquery uses DISTINCT, we can't make use of any stats for the |
| 4413 | + * variable ... but, if it's the only DISTINCT column, we are entitled |
| 4414 | + * to consider it unique. We do the test this way so that it works |
| 4415 | + * for cases involving DISTINCT ON. |
| 4416 | + */ |
| 4417 | +if (subquery->distinctClause) |
| 4418 | +{ |
| 4419 | +if (list_length(subquery->distinctClause)==1&& |
| 4420 | +targetIsInSortList(ste,InvalidOid,subquery->distinctClause)) |
| 4421 | +vardata->isunique= true; |
| 4422 | +/* cannot go further */ |
| 4423 | +return; |
| 4424 | +} |
| 4425 | + |
| 4426 | +/* |
| 4427 | + * If the sub-query originated from a view with the security_barrier |
| 4428 | + * attribute, we must not look at the variable's statistics, though |
| 4429 | + * it seems all right to notice the existence of a DISTINCT clause. |
| 4430 | + * So stop here. |
| 4431 | + * |
| 4432 | + * This is probably a harsher restriction than necessary; it's |
| 4433 | + * certainly OK for the selectivity estimator (which is a C function, |
| 4434 | + * and therefore omnipotent anyway) to look at the statistics. But |
| 4435 | + * many selectivity estimators will happily *invoke the operator |
| 4436 | + * function* to try to work out a good estimate - and that's not OK. |
| 4437 | + * So for now, don't dig down for stats. |
| 4438 | + */ |
| 4439 | +if (rte->security_barrier) |
| 4440 | +return; |
| 4441 | + |
4418 | 4442 | /* Can only handle a simple Var of subquery's query level */
|
4419 | 4443 | if (var&&IsA(var,Var)&&
|
4420 | 4444 | var->varlevelsup==0)
|
@@ -4513,10 +4537,10 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
|
4513 | 4537 | }
|
4514 | 4538 |
|
4515 | 4539 | /*
|
4516 |
| - * If there is a unique index for the variable, assume it is unique no |
4517 |
| - * matter what pg_statistic says; the statistics could be out of date, or |
4518 |
| - * we might have found a partial unique index that proves the var is |
4519 |
| - * unique for this query. |
| 4540 | + * If there is a unique indexor DISTINCT clausefor the variable, assume |
| 4541 | + *it is unique nomatter what pg_statistic says; the statistics could be |
| 4542 | + *out of date, orwe might have found a partial unique index that proves |
| 4543 | + *the var isunique for this query. |
4520 | 4544 | */
|
4521 | 4545 | if (vardata->isunique)
|
4522 | 4546 | stadistinct=-1.0;
|
|