|
7 | 7 | *
|
8 | 8 | *
|
9 | 9 | * IDENTIFICATION
|
10 |
| - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $ |
| 10 | + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $ |
11 | 11 | *
|
12 | 12 | *-------------------------------------------------------------------------
|
13 | 13 | */
|
|
23 | 23 | #include"utils/lsyscache.h"
|
24 | 24 |
|
25 | 25 |
|
| 26 | +/* |
| 27 | + * Data structure for accumulating info about possible range-query |
| 28 | + * clause pairs in clauselist_selectivity. |
| 29 | + */ |
| 30 | +typedefstructRangeQueryClause { |
| 31 | +structRangeQueryClause*next;/* next in linked list */ |
| 32 | +Node*var;/* The common variable of the clauses */ |
| 33 | +boolhave_lobound;/* found a low-bound clause yet? */ |
| 34 | +boolhave_hibound;/* found a high-bound clause yet? */ |
| 35 | +Selectivitylobound;/* Selectivity of a var > something clause */ |
| 36 | +Selectivityhibound;/* Selectivity of a var < something clause */ |
| 37 | +}RangeQueryClause; |
| 38 | + |
| 39 | +staticvoidaddRangeClause(RangeQueryClause**rqlist,Node*clause, |
| 40 | +intflag,boolisLTsel,Selectivitys2); |
| 41 | + |
| 42 | + |
26 | 43 | /****************************************************************************
|
27 | 44 | *ROUTINES TO COMPUTE SELECTIVITIES
|
28 | 45 | ****************************************************************************/
|
@@ -55,29 +72,237 @@ restrictlist_selectivity(Query *root,
|
55 | 72 | * must be returned.
|
56 | 73 | *
|
57 | 74 | * See clause_selectivity() for the meaning of the varRelid parameter.
|
| 75 | + * |
| 76 | + * Our basic approach is to take the product of the selectivities of the |
| 77 | + * subclauses. However, that's only right if the subclauses have independent |
| 78 | + * probabilities, and in reality they are often NOT independent. So, |
| 79 | + * we want to be smarter where we can. |
| 80 | +
|
| 81 | + * Currently, the only extra smarts we have is to recognize "range queries", |
| 82 | + * such as "x > 34 AND x < 42". Clauses are recognized as possible range |
| 83 | + * query components if they are restriction opclauses whose operators have |
| 84 | + * scalarltsel() or scalargtsel() as their restriction selectivity estimator. |
| 85 | + * We pair up clauses of this form that refer to the same variable. An |
| 86 | + * unpairable clause of this kind is simply multiplied into the selectivity |
| 87 | + * product in the normal way. But when we find a pair, we know that the |
| 88 | + * selectivities represent the relative positions of the low and high bounds |
| 89 | + * within the column's range, so instead of figuring the selectivity as |
| 90 | + * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, |
| 91 | + * see that hisel is the fraction of the range below the high bound, while |
| 92 | + * losel is the fraction above the low bound; so hisel can be interpreted |
| 93 | + * directly as a 0..1 value but we need to convert losel to 1-losel before |
| 94 | + * interpreting it as a value. Then the available range is 1-losel to hisel.) |
| 95 | + * If the calculation yields zero or negative, however, we chicken out and |
| 96 | + * use the default interpretation; that probably means that one or both |
| 97 | + * selectivities is a default estimate rather than an actual range value. |
| 98 | + * Of course this is all very dependent on the behavior of |
| 99 | + * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. |
58 | 100 | */
|
59 | 101 | Selectivity
|
60 | 102 | clauselist_selectivity(Query*root,
|
61 | 103 | List*clauses,
|
62 | 104 | intvarRelid)
|
63 | 105 | {
|
64 |
| -Selectivitys1=1.0; |
65 |
| -List*clause; |
| 106 | +Selectivitys1=1.0; |
| 107 | +RangeQueryClause*rqlist=NULL; |
| 108 | +List*clist; |
66 | 109 |
|
67 |
| -/* Use the product of the selectivities of the subclauses. |
68 |
| - * XXX this is too optimistic, since the subclauses |
69 |
| - * are very likely not independent... |
| 110 | +/* |
| 111 | + * Initial scan over clauses. Anything that doesn't look like a |
| 112 | + * potential rangequery clause gets multiplied into s1 and forgotten. |
| 113 | + * Anything that does gets inserted into an rqlist entry. |
70 | 114 | */
|
71 |
| -foreach(clause,clauses) |
| 115 | +foreach(clist,clauses) |
72 | 116 | {
|
73 |
| -Selectivitys2=clause_selectivity(root, |
74 |
| -(Node*)lfirst(clause), |
75 |
| -varRelid); |
| 117 | +Node*clause= (Node*)lfirst(clist); |
| 118 | +Selectivitys2; |
| 119 | + |
| 120 | +/* |
| 121 | + * See if it looks like a restriction clause with a constant. |
| 122 | + * (If it's not a constant we can't really trust the selectivity!) |
| 123 | + * NB: for consistency of results, this fragment of code had |
| 124 | + * better match what clause_selectivity() would do. |
| 125 | + */ |
| 126 | +if (varRelid!=0||NumRelids(clause)==1) |
| 127 | +{ |
| 128 | +intrelidx; |
| 129 | +AttrNumberattno; |
| 130 | +Datumconstval; |
| 131 | +intflag; |
| 132 | + |
| 133 | +get_relattval(clause,varRelid, |
| 134 | +&relidx,&attno,&constval,&flag); |
| 135 | +if (relidx!=0&& (flag&SEL_CONSTANT)) |
| 136 | +{ |
| 137 | +/* if get_relattval succeeded, it must be an opclause */ |
| 138 | +Oidopno= ((Oper*) ((Expr*)clause)->oper)->opno; |
| 139 | +RegProcedureoprrest=get_oprrest(opno); |
| 140 | + |
| 141 | +if (!oprrest) |
| 142 | +s2= (Selectivity)0.5; |
| 143 | +else |
| 144 | +s2=restriction_selectivity(oprrest,opno, |
| 145 | +getrelid(relidx, |
| 146 | +root->rtable), |
| 147 | +attno, |
| 148 | +constval,flag); |
| 149 | +/* |
| 150 | + * If we reach here, we have computed the same result |
| 151 | + * that clause_selectivity would, so we can just use s2 |
| 152 | + * if it's the wrong oprrest. But if it's the right |
| 153 | + * oprrest, add the clause to rqlist for later processing. |
| 154 | + */ |
| 155 | +switch (oprrest) |
| 156 | +{ |
| 157 | +caseF_SCALARLTSEL: |
| 158 | +addRangeClause(&rqlist,clause,flag, true,s2); |
| 159 | +break; |
| 160 | +caseF_SCALARGTSEL: |
| 161 | +addRangeClause(&rqlist,clause,flag, false,s2); |
| 162 | +break; |
| 163 | +default: |
| 164 | +/* Just merge the selectivity in generically */ |
| 165 | +s1=s1*s2; |
| 166 | +break; |
| 167 | +} |
| 168 | +continue;/* drop to loop bottom */ |
| 169 | +} |
| 170 | +} |
| 171 | +/* Not the right form, so treat it generically. */ |
| 172 | +s2=clause_selectivity(root,clause,varRelid); |
76 | 173 | s1=s1*s2;
|
77 | 174 | }
|
| 175 | + |
| 176 | +/* |
| 177 | + * Now scan the rangequery pair list. |
| 178 | + */ |
| 179 | +while (rqlist!=NULL) |
| 180 | +{ |
| 181 | +RangeQueryClause*rqnext; |
| 182 | + |
| 183 | +if (rqlist->have_lobound&&rqlist->have_hibound) |
| 184 | +{ |
| 185 | +/* Successfully matched a pair of range clauses */ |
| 186 | +Selectivitys2=rqlist->hibound+rqlist->lobound-1.0; |
| 187 | + |
| 188 | +if (s2>0.0) |
| 189 | +{ |
| 190 | +/* All our hard work has paid off! */ |
| 191 | +s1 *=s2; |
| 192 | +} |
| 193 | +else |
| 194 | +{ |
| 195 | +/* One or both is probably a default estimate, |
| 196 | + * so punt and just merge them in generically. |
| 197 | + */ |
| 198 | +s1 *=rqlist->hibound*rqlist->lobound; |
| 199 | +} |
| 200 | +} |
| 201 | +else |
| 202 | +{ |
| 203 | +/* Only found one of a pair, merge it in generically */ |
| 204 | +if (rqlist->have_lobound) |
| 205 | +s1 *=rqlist->lobound; |
| 206 | +else |
| 207 | +s1 *=rqlist->hibound; |
| 208 | +} |
| 209 | +/* release storage and advance */ |
| 210 | +rqnext=rqlist->next; |
| 211 | +pfree(rqlist); |
| 212 | +rqlist=rqnext; |
| 213 | +} |
| 214 | + |
78 | 215 | returns1;
|
79 | 216 | }
|
80 | 217 |
|
| 218 | +/* |
| 219 | + * addRangeClause --- add a new range clause for clauselist_selectivity |
| 220 | + * |
| 221 | + * Here is where we try to match up pairs of range-query clauses |
| 222 | + */ |
| 223 | +staticvoid |
| 224 | +addRangeClause(RangeQueryClause**rqlist,Node*clause, |
| 225 | +intflag,boolisLTsel,Selectivitys2) |
| 226 | +{ |
| 227 | +RangeQueryClause*rqelem; |
| 228 | +Node*var; |
| 229 | +boolis_lobound; |
| 230 | + |
| 231 | +/* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */ |
| 232 | +if (flag&SEL_RIGHT) |
| 233 | +{ |
| 234 | +var= (Node*)get_leftop((Expr*)clause); |
| 235 | +is_lobound= !isLTsel;/* x < something is high bound */ |
| 236 | +} |
| 237 | +else |
| 238 | +{ |
| 239 | +var= (Node*)get_rightop((Expr*)clause); |
| 240 | +is_lobound=isLTsel;/* something < x is low bound */ |
| 241 | +} |
| 242 | + |
| 243 | +for (rqelem=*rqlist;rqelem;rqelem=rqelem->next) |
| 244 | +{ |
| 245 | +/* We use full equal() here because the "var" might be a function |
| 246 | + * of one or more attributes of the same relation... |
| 247 | + */ |
| 248 | +if (!equal(var,rqelem->var)) |
| 249 | +continue; |
| 250 | +/* Found the right group to put this clause in */ |
| 251 | +if (is_lobound) |
| 252 | +{ |
| 253 | +if (!rqelem->have_lobound) |
| 254 | +{ |
| 255 | +rqelem->have_lobound= true; |
| 256 | +rqelem->lobound=s2; |
| 257 | +} |
| 258 | +else |
| 259 | +{ |
| 260 | +/* We have found two similar clauses, such as |
| 261 | + * x < y AND x < z. Keep only the more restrictive one. |
| 262 | + */ |
| 263 | +if (rqelem->lobound>s2) |
| 264 | +rqelem->lobound=s2; |
| 265 | +} |
| 266 | +} |
| 267 | +else |
| 268 | +{ |
| 269 | +if (!rqelem->have_hibound) |
| 270 | +{ |
| 271 | +rqelem->have_hibound= true; |
| 272 | +rqelem->hibound=s2; |
| 273 | +} |
| 274 | +else |
| 275 | +{ |
| 276 | +/* We have found two similar clauses, such as |
| 277 | + * x > y AND x > z. Keep only the more restrictive one. |
| 278 | + */ |
| 279 | +if (rqelem->hibound>s2) |
| 280 | +rqelem->hibound=s2; |
| 281 | +} |
| 282 | +} |
| 283 | +return; |
| 284 | +} |
| 285 | + |
| 286 | +/* No matching var found, so make a new clause-pair data structure */ |
| 287 | +rqelem= (RangeQueryClause*)palloc(sizeof(RangeQueryClause)); |
| 288 | +rqelem->var=var; |
| 289 | +if (is_lobound) |
| 290 | +{ |
| 291 | +rqelem->have_lobound= true; |
| 292 | +rqelem->have_hibound= false; |
| 293 | +rqelem->lobound=s2; |
| 294 | +} |
| 295 | +else |
| 296 | +{ |
| 297 | +rqelem->have_lobound= false; |
| 298 | +rqelem->have_hibound= true; |
| 299 | +rqelem->hibound=s2; |
| 300 | +} |
| 301 | +rqelem->next=*rqlist; |
| 302 | +*rqlist=rqelem; |
| 303 | +} |
| 304 | + |
| 305 | + |
81 | 306 | /*
|
82 | 307 | * clause_selectivity -
|
83 | 308 | * Compute the selectivity of a general boolean expression clause.
|
|