|
17 | 17 |
|
18 | 18 | #include<math.h>
|
19 | 19 |
|
| 20 | +#include"access/sysattr.h" |
20 | 21 | #include"catalog/pg_class.h"
|
21 | 22 | #include"catalog/pg_operator.h"
|
| 23 | +#include"catalog/pg_type.h" |
22 | 24 | #include"foreign/fdwapi.h"
|
| 25 | +#include"nodes/makefuncs.h" |
23 | 26 | #include"nodes/nodeFuncs.h"
|
24 | 27 | #ifdefOPTIMIZER_DEBUG
|
25 | 28 | #include"nodes/print.h"
|
@@ -98,6 +101,7 @@ static void subquery_push_qual(Query *subquery,
|
98 | 101 | RangeTblEntry*rte,Indexrti,Node*qual);
|
99 | 102 | staticvoidrecurse_push_qual(Node*setOp,Query*topquery,
|
100 | 103 | RangeTblEntry*rte,Indexrti,Node*qual);
|
| 104 | +staticvoidremove_unused_subquery_outputs(Query*subquery,RelOptInfo*rel); |
101 | 105 |
|
102 | 106 |
|
103 | 107 | /*
|
@@ -1124,7 +1128,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
1124 | 1128 | /*
|
1125 | 1129 | * Must copy the Query so that planning doesn't mess up the RTE contents
|
1126 | 1130 | * (really really need to fix the planner to not scribble on its input,
|
1127 |
| - * someday). |
| 1131 | + * someday ... but see remove_unused_subquery_outputs to start with). |
1128 | 1132 | */
|
1129 | 1133 | subquery=copyObject(subquery);
|
1130 | 1134 |
|
@@ -1198,6 +1202,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
|
1198 | 1202 |
|
1199 | 1203 | pfree(unsafeColumns);
|
1200 | 1204 |
|
| 1205 | +/* |
| 1206 | + * The upper query might not use all the subquery's output columns; if |
| 1207 | + * not, we can simplify. |
| 1208 | + */ |
| 1209 | +remove_unused_subquery_outputs(subquery,rel); |
| 1210 | + |
1201 | 1211 | /*
|
1202 | 1212 | * We can safely pass the outer tuple_fraction down to the subquery if the
|
1203 | 1213 | * outer level has no joining, aggregation, or sorting to do. Otherwise
|
@@ -2032,6 +2042,129 @@ recurse_push_qual(Node *setOp, Query *topquery,
|
2032 | 2042 | }
|
2033 | 2043 | }
|
2034 | 2044 |
|
| 2045 | +/***************************************************************************** |
| 2046 | + *SIMPLIFYING SUBQUERY TARGETLISTS |
| 2047 | + *****************************************************************************/ |
| 2048 | + |
| 2049 | +/* |
| 2050 | + * remove_unused_subquery_outputs |
| 2051 | + *Remove subquery targetlist items we don't need |
| 2052 | + * |
| 2053 | + * It's possible, even likely, that the upper query does not read all the |
| 2054 | + * output columns of the subquery. We can remove any such outputs that are |
| 2055 | + * not needed by the subquery itself (e.g., as sort/group columns) and do not |
| 2056 | + * affect semantics otherwise (e.g., volatile functions can't be removed). |
| 2057 | + * This is useful not only because we might be able to remove expensive-to- |
| 2058 | + * compute expressions, but because deletion of output columns might allow |
| 2059 | + * optimizations such as join removal to occur within the subquery. |
| 2060 | + * |
| 2061 | + * To avoid affecting column numbering in the targetlist, we don't physically |
| 2062 | + * remove unused tlist entries, but rather replace their expressions with NULL |
| 2063 | + * constants. This is implemented by modifying subquery->targetList. |
| 2064 | + */ |
| 2065 | +staticvoid |
| 2066 | +remove_unused_subquery_outputs(Query*subquery,RelOptInfo*rel) |
| 2067 | +{ |
| 2068 | +Bitmapset*attrs_used=NULL; |
| 2069 | +ListCell*lc; |
| 2070 | + |
| 2071 | +/* |
| 2072 | + * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we |
| 2073 | + * could update all the child SELECTs' tlists, but it seems not worth the |
| 2074 | + * trouble presently. |
| 2075 | + */ |
| 2076 | +if (subquery->setOperations) |
| 2077 | +return; |
| 2078 | + |
| 2079 | +/* |
| 2080 | + * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our |
| 2081 | + * time: all its output columns must be used in the distinctClause. |
| 2082 | + */ |
| 2083 | +if (subquery->distinctClause&& !subquery->hasDistinctOn) |
| 2084 | +return; |
| 2085 | + |
| 2086 | +/* |
| 2087 | + * Collect a bitmap of all the output column numbers used by the upper |
| 2088 | + * query. |
| 2089 | + * |
| 2090 | + * Add all the attributes needed for joins or final output. Note: we must |
| 2091 | + * look at reltargetlist, not the attr_needed data, because attr_needed |
| 2092 | + * isn't computed for inheritance child rels, cf set_append_rel_size(). |
| 2093 | + * (XXX might be worth changing that sometime.) |
| 2094 | + */ |
| 2095 | +pull_varattnos((Node*)rel->reltargetlist,rel->relid,&attrs_used); |
| 2096 | + |
| 2097 | +/* Add all the attributes used by un-pushed-down restriction clauses. */ |
| 2098 | +foreach(lc,rel->baserestrictinfo) |
| 2099 | +{ |
| 2100 | +RestrictInfo*rinfo= (RestrictInfo*)lfirst(lc); |
| 2101 | + |
| 2102 | +pull_varattnos((Node*)rinfo->clause,rel->relid,&attrs_used); |
| 2103 | +} |
| 2104 | + |
| 2105 | +/* |
| 2106 | + * If there's a whole-row reference to the subquery, we can't remove |
| 2107 | + * anything. |
| 2108 | + */ |
| 2109 | +if (bms_is_member(0-FirstLowInvalidHeapAttributeNumber,attrs_used)) |
| 2110 | +return; |
| 2111 | + |
| 2112 | +/* |
| 2113 | + * Run through the tlist and zap entries we don't need. It's okay to |
| 2114 | + * modify the tlist items in-place because set_subquery_pathlist made a |
| 2115 | + * copy of the subquery. |
| 2116 | + */ |
| 2117 | +foreach(lc,subquery->targetList) |
| 2118 | +{ |
| 2119 | +TargetEntry*tle= (TargetEntry*)lfirst(lc); |
| 2120 | + |
| 2121 | +/* |
| 2122 | + * If it has a sortgroupref number, it's used in some sort/group |
| 2123 | + * clause so we'd better not remove it. Also, don't remove any |
| 2124 | + * resjunk columns, since their reason for being has nothing to do |
| 2125 | + * with anybody reading the subquery's output. (It's likely that |
| 2126 | + * resjunk columns in a sub-SELECT would always have ressortgroupref |
| 2127 | + * set, but even if they don't, it seems imprudent to remove them.) |
| 2128 | + */ |
| 2129 | +if (tle->ressortgroupref||tle->resjunk) |
| 2130 | +continue; |
| 2131 | + |
| 2132 | +/* |
| 2133 | + * If it's used by the upper query, we can't remove it. |
| 2134 | + */ |
| 2135 | +if (bms_is_member(tle->resno-FirstLowInvalidHeapAttributeNumber, |
| 2136 | +attrs_used)) |
| 2137 | +continue; |
| 2138 | + |
| 2139 | +/* |
| 2140 | + * If it contains a set-returning function, we can't remove it since |
| 2141 | + * that could change the number of rows returned by the subquery. |
| 2142 | + */ |
| 2143 | +if (expression_returns_set((Node*)tle->expr)) |
| 2144 | +continue; |
| 2145 | + |
| 2146 | +/* |
| 2147 | + * If it contains volatile functions, we daren't remove it for fear |
| 2148 | + * that the user is expecting their side-effects to happen. |
| 2149 | + */ |
| 2150 | +if (contain_volatile_functions((Node*)tle->expr)) |
| 2151 | +continue; |
| 2152 | + |
| 2153 | +/* |
| 2154 | + * OK, we don't need it. Replace the expression with a NULL constant. |
| 2155 | + * We can just make the constant be of INT4 type, since nothing's |
| 2156 | + * going to look at it anyway. |
| 2157 | + */ |
| 2158 | +tle->expr= (Expr*)makeConst(INT4OID, |
| 2159 | +-1, |
| 2160 | +InvalidOid, |
| 2161 | +sizeof(int32), |
| 2162 | + (Datum)0, |
| 2163 | + true,/* isnull */ |
| 2164 | + true/* byval */ ); |
| 2165 | +} |
| 2166 | +} |
| 2167 | + |
2035 | 2168 | /*****************************************************************************
|
2036 | 2169 | *DEBUG SUPPORT
|
2037 | 2170 | *****************************************************************************/
|
|