@@ -268,21 +268,23 @@ typedef struct AggStatePerTransData
268268 */
269269int numInputs ;
270270
271- /*
272- * At each input row, we evaluate all argument expressions needed for all
273- * the aggregates in this Agg node in a single ExecProject call. inputoff
274- * is the starting index of this aggregate's argument expressions in the
275- * resulting tuple (in AggState->evalslot).
276- */
277- int inputoff ;
278-
279271/*
280272 * Number of aggregated input columns to pass to the transfn. This
281273 * includes the ORDER BY columns for ordered-set aggs, but not for plain
282274 * aggs. (This doesn't count the transition state value!)
283275 */
284276int numTransInputs ;
285277
278+ /*
279+ * At each input row, we perform a single ExecProject call to evaluate all
280+ * argument expressions that will certainly be needed at this row; that
281+ * includes this aggregate's filter expression if it has one, or its
282+ * regular argument expressions (including any ORDER BY columns) if it
283+ * doesn't. inputoff is the starting index of this aggregate's required
284+ * expressions in the resulting tuple.
285+ */
286+ int inputoff ;
287+
286288/* Oid of the state transition or combine function */
287289Oid transfn_oid ;
288290
@@ -295,9 +297,8 @@ typedef struct AggStatePerTransData
295297/* Oid of state value's datatype */
296298Oid aggtranstype ;
297299
298- /* ExprStates of the FILTER and argument expressions. */
299- ExprState * aggfilter ;/* state of FILTER expression, if any */
300- List * aggdirectargs ;/* states of direct-argument expressions */
300+ /* ExprStates for any direct-argument expressions */
301+ List * aggdirectargs ;
301302
302303/*
303304 * fmgr lookup data for transition function or combine function. Note in
@@ -353,20 +354,21 @@ typedef struct AggStatePerTransData
353354transtypeByVal ;
354355
355356/*
356- * Stuff for evaluation of aggregate inputs in cases where the aggregate
357- *requires sorted input. The arguments themselves will be evaluated via
358- *AggState->evalslot/evalproj for all aggregates at once, but we only
359- *want to sort the relevant columns for individual aggregates .
357+ * Stuff for evaluation of aggregate inputs, when they must be evaluated
358+ *separately because there's a FILTER expression. In such cases we will
359+ *create a sortslot and the result will be stored there, whether or not
360+ *we're actually sorting .
360361 */
361- TupleDesc sortdesc ; /*descriptor of input tuples */
362+ ProjectionInfo * evalproj ; /*projection machinery */
362363
363364/*
364365 * Slots for holding the evaluated input arguments. These are set up
365- * during ExecInitAgg() and then used for each input row requiring
366- *processing besides what's done in AggState->evalproj .
366+ * during ExecInitAgg() and then used for each input row requiring either
367+ *FILTER or ORDER BY/DISTINCT processing .
367368 */
368369TupleTableSlot * sortslot ;/* current input tuple */
369370TupleTableSlot * uniqslot ;/* used for multi-column DISTINCT */
371+ TupleDesc sortdesc ;/* descriptor of input tuples */
370372
371373/*
372374 * These values are working state that is initialized at the start of an
@@ -973,30 +975,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGro
973975int numGroupingSets = Max (aggstate -> phase -> numsets ,1 );
974976int numHashes = aggstate -> num_hashes ;
975977int numTrans = aggstate -> numtrans ;
976- TupleTableSlot * slot = aggstate -> evalslot ;
978+ TupleTableSlot * combinedslot ;
977979
978- /* compute input for all aggregates */
979- if (aggstate -> evalproj )
980- aggstate -> evalslot = ExecProject (aggstate -> evalproj );
980+ /* compute required inputs for all aggregates */
981+ combinedslot = ExecProject (aggstate -> combinedproj );
981982
982983for (transno = 0 ;transno < numTrans ;transno ++ )
983984{
984985AggStatePerTrans pertrans = & aggstate -> pertrans [transno ];
985- ExprState * filter = pertrans -> aggfilter ;
986986int numTransInputs = pertrans -> numTransInputs ;
987- int i ;
988987int inputoff = pertrans -> inputoff ;
988+ TupleTableSlot * slot ;
989+ int i ;
989990
990991/* Skip anything FILTERed out */
991- if (filter )
992+ if (pertrans -> aggref -> aggfilter )
992993{
993- Datum res ;
994- bool isnull ;
995-
996- res = ExecEvalExprSwitchContext (filter ,aggstate -> tmpcontext ,
997- & isnull );
998- if (isnull || !DatumGetBool (res ))
994+ /* Check the result of the filter expression */
995+ if (combinedslot -> tts_isnull [inputoff ]||
996+ !DatumGetBool (combinedslot -> tts_values [inputoff ]))
999997continue ;
998+
999+ /* Now it's safe to evaluate this agg's arguments */
1000+ slot = ExecProject (pertrans -> evalproj );
1001+ /* There's no offset needed in this slot, of course */
1002+ inputoff = 0 ;
1003+ }
1004+ else
1005+ {
1006+ /* arguments are already evaluated into combinedslot @ inputoff */
1007+ slot = combinedslot ;
10001008}
10011009
10021010if (pertrans -> numSortCols > 0 )
@@ -1030,11 +1038,21 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGro
10301038tuplesort_putdatum (pertrans -> sortstates [setno ],
10311039slot -> tts_values [inputoff ],
10321040slot -> tts_isnull [inputoff ]);
1041+ else if (pertrans -> aggref -> aggfilter )
1042+ {
1043+ /*
1044+ * When filtering and ordering, we already have a slot
1045+ * containing just the argument columns.
1046+ */
1047+ Assert (slot == pertrans -> sortslot );
1048+ tuplesort_puttupleslot (pertrans -> sortstates [setno ],slot );
1049+ }
10331050else
10341051{
10351052/*
1036- * Copy slot contents, starting from inputoff, into sort
1037- * slot.
1053+ * Copy argument columns from combined slot, starting at
1054+ * inputoff, into sortslot, so that we can store just the
1055+ * columns we want.
10381056 */
10391057ExecClearTuple (pertrans -> sortslot );
10401058memcpy (pertrans -> sortslot -> tts_values ,
@@ -1043,9 +1061,9 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGro
10431061memcpy (pertrans -> sortslot -> tts_isnull ,
10441062& slot -> tts_isnull [inputoff ],
10451063pertrans -> numInputs * sizeof (bool ));
1046- pertrans -> sortslot -> tts_nvalid = pertrans -> numInputs ;
10471064ExecStoreVirtualTuple (pertrans -> sortslot );
1048- tuplesort_puttupleslot (pertrans -> sortstates [setno ],pertrans -> sortslot );
1065+ tuplesort_puttupleslot (pertrans -> sortstates [setno ],
1066+ pertrans -> sortslot );
10491067}
10501068}
10511069}
@@ -1117,7 +1135,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
11171135Assert (aggstate -> phase -> numsets <=1 );
11181136
11191137/* compute input for all aggregates */
1120- slot = ExecProject (aggstate -> evalproj );
1138+ slot = ExecProject (aggstate -> combinedproj );
11211139
11221140for (transno = 0 ;transno < numTrans ;transno ++ )
11231141{
@@ -2681,6 +2699,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
26812699int phase ;
26822700int phaseidx ;
26832701List * combined_inputeval ;
2702+ TupleDesc combineddesc ;
2703+ TupleTableSlot * combinedslot ;
26842704ListCell * l ;
26852705Bitmapset * all_grouped_cols = NULL ;
26862706int numGroupingSets = 1 ;
@@ -3345,19 +3365,17 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
33453365aggstate -> numtrans = transno + 1 ;
33463366
33473367/*
3348- * Build a single projection computing theaggregate arguments for all
3368+ * Build a single projection computing therequired arguments for all
33493369 * aggregates at once; if there's more than one, that's considerably
33503370 * faster than doing it separately for each.
33513371 *
3352- * First create a targetlist combining the targetlists of all the
3353- * per-trans states.
3372+ * First create a targetlist representing the values to compute.
33543373 */
33553374combined_inputeval = NIL ;
33563375column_offset = 0 ;
33573376for (transno = 0 ;transno < aggstate -> numtrans ;transno ++ )
33583377{
33593378AggStatePerTrans pertrans = & pertransstates [transno ];
3360- ListCell * arg ;
33613379
33623380/*
33633381 * Mark this per-trans state with its starting column in the combined
@@ -3366,38 +3384,70 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
33663384pertrans -> inputoff = column_offset ;
33673385
33683386/*
3369- * Adjust resnos in the copied target entries to match the combined
3370- * slot.
3387+ * If the aggregate has a FILTER, we can only evaluate the filter
3388+ * expression, not the actual input expressions, during the combined
3389+ * eval step --- unless we're ignoring the filter because this node is
3390+ * running combinefns not transfns.
33713391 */
3372- foreach (arg ,pertrans -> aggref -> args )
3392+ if (pertrans -> aggref -> aggfilter &&
3393+ !DO_AGGSPLIT_COMBINE (aggstate -> aggsplit ))
33733394{
3374- TargetEntry * source_tle = lfirst_node (TargetEntry ,arg );
33753395TargetEntry * tle ;
33763396
3377- tle = flatCopyTargetEntry (source_tle );
3378- tle -> resno += column_offset ;
3379-
3397+ tle = makeTargetEntry (pertrans -> aggref -> aggfilter ,
3398+ column_offset + 1 ,NULL , false);
33803399combined_inputeval = lappend (combined_inputeval ,tle );
3400+ column_offset ++ ;
3401+
3402+ /*
3403+ * We'll need separate projection machinery for the real args.
3404+ * Arrange to evaluate them into the sortslot previously created.
3405+ */
3406+ Assert (pertrans -> sortslot );
3407+ pertrans -> evalproj = ExecBuildProjectionInfo (pertrans -> aggref -> args ,
3408+ aggstate -> tmpcontext ,
3409+ pertrans -> sortslot ,
3410+ & aggstate -> ss .ps ,
3411+ NULL );
33813412}
3413+ else
3414+ {
3415+ /*
3416+ * Add agg's input expressions to combined_inputeval, adjusting
3417+ * resnos in the copied target entries to match the combined slot.
3418+ */
3419+ ListCell * arg ;
3420+
3421+ foreach (arg ,pertrans -> aggref -> args )
3422+ {
3423+ TargetEntry * source_tle = lfirst_node (TargetEntry ,arg );
3424+ TargetEntry * tle ;
3425+
3426+ tle = flatCopyTargetEntry (source_tle );
3427+ tle -> resno += column_offset ;
33823428
3383- column_offset += list_length (pertrans -> aggref -> args );
3429+ combined_inputeval = lappend (combined_inputeval ,tle );
3430+ }
3431+
3432+ column_offset += list_length (pertrans -> aggref -> args );
3433+ }
33843434}
33853435
33863436/* Now create a projection for the combined targetlist */
3387- aggstate -> evaldesc = ExecTypeFromTL (combined_inputeval , false);
3388- aggstate -> evalslot = ExecInitExtraTupleSlot (estate );
3389- aggstate -> evalproj = ExecBuildProjectionInfo ( combined_inputeval ,
3390- aggstate -> tmpcontext ,
3391- aggstate -> evalslot ,
3392- & aggstate -> ss . ps ,
3393- NULL );
3394- ExecSetSlotDescriptor ( aggstate -> evalslot , aggstate -> evaldesc );
3437+ combineddesc = ExecTypeFromTL (combined_inputeval , false);
3438+ combinedslot = ExecInitExtraTupleSlot (estate );
3439+ ExecSetSlotDescriptor ( combinedslot , combineddesc );
3440+ aggstate -> combinedproj = ExecBuildProjectionInfo ( combined_inputeval ,
3441+ aggstate -> tmpcontext ,
3442+ combinedslot ,
3443+ & aggstate -> ss . ps ,
3444+ NULL );
33953445
33963446/*
33973447 * Last, check whether any more aggregates got added onto the node while
33983448 * we processed the expressions for the aggregate arguments (including not
3399- * only the regular argumentshandled immediately above, but any FILTER
3400- *expressions and direct arguments we might've handled earlier). If so,
3449+ * only the regular argumentsand FILTER expressions handled immediately
3450+ *above, but any direct arguments we might've handled earlier). If so,
34013451 * we have nested aggregate functions, which is semantically nonsensical,
34023452 * so complain. (This should have been caught by the parser, so we don't
34033453 * need to work hard on a helpful error message; but we defend against it
@@ -3462,6 +3512,8 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
34623512else
34633513pertrans -> numTransInputs = numArguments ;
34643514
3515+ /* inputoff and evalproj will be set up later, in ExecInitAgg */
3516+
34653517/*
34663518 * When combining states, we have no use at all for the aggregate
34673519 * function's transfn. Instead we use the combinefn. In this case, the
@@ -3577,9 +3629,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
35773629
35783630}
35793631
3580- /* Initialize the input and FILTER expressions */
3581- pertrans -> aggfilter = ExecInitExpr (aggref -> aggfilter ,
3582- (PlanState * )aggstate );
3632+ /* Initialize any direct-argument expressions */
35833633pertrans -> aggdirectargs = ExecInitExprList (aggref -> aggdirectargs ,
35843634 (PlanState * )aggstate );
35853635
@@ -3613,16 +3663,20 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
36133663pertrans -> numSortCols = numSortCols ;
36143664pertrans -> numDistinctCols = numDistinctCols ;
36153665
3616- if (numSortCols > 0 )
3666+ /*
3667+ * If we have either sorting or filtering to do, create a tupledesc and
3668+ * slot corresponding to the aggregated inputs (including sort
3669+ * expressions) of the agg.
3670+ */
3671+ if (numSortCols > 0 || aggref -> aggfilter )
36173672{
3618- /*
3619- * Get a tupledesc and slot corresponding to the aggregated inputs
3620- * (including sort expressions) of the agg.
3621- */
36223673pertrans -> sortdesc = ExecTypeFromTL (aggref -> args , false);
36233674pertrans -> sortslot = ExecInitExtraTupleSlot (estate );
36243675ExecSetSlotDescriptor (pertrans -> sortslot ,pertrans -> sortdesc );
3676+ }
36253677
3678+ if (numSortCols > 0 )
3679+ {
36263680/*
36273681 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
36283682 * (yet)