160160#include "executor/executor.h"
161161#include "executor/nodeAgg.h"
162162#include "miscadmin.h"
163+ #include "nodes/makefuncs.h"
163164#include "nodes/nodeFuncs.h"
164165#include "optimizer/clauses.h"
165166#include "optimizer/tlist.h"
@@ -213,6 +214,9 @@ typedef struct AggStatePerTransData
213214 */
214215int numInputs ;
215216
217+ /* offset of input columns in AggState->evalslot */
218+ int inputoff ;
219+
216220/*
217221 * Number of aggregated input columns to pass to the transfn. This
218222 * includes the ORDER BY columns for ordered-set aggs, but not for plain
@@ -234,7 +238,6 @@ typedef struct AggStatePerTransData
234238
235239/* ExprStates of the FILTER and argument expressions. */
236240ExprState * aggfilter ;/* state of FILTER expression, if any */
237- List * args ;/* states of aggregated-argument expressions */
238241List * aggdirectargs ;/* states of direct-argument expressions */
239242
240243/*
@@ -291,19 +294,19 @@ typedef struct AggStatePerTransData
291294transtypeByVal ;
292295
293296/*
294- * Stuff for evaluation of inputs. We used to just use ExecEvalExpr, but
295- *with the addition of ORDER BY we now need at least a slot for passing
296- *data to the sort object, which requires a tupledesc, so wemight as
297- *well go whole hog and use ExecProject too .
297+ * Stuff for evaluation ofaggregate inputs in cases where the aggregate
298+ *requires sorted input. The arguments themselves will be evaluated via
299+ *AggState->evalslot/evalproj for all aggregates at once, but weonly
300+ *want to sort the relevant columns for individual aggregates .
298301 */
299- TupleDesc evaldesc ;/* descriptor of input tuples */
300- ProjectionInfo * evalproj ;/* projection machinery */
302+ TupleDesc sortdesc ;/* descriptor of input tuples */
301303
302304/*
303305 * Slots for holding the evaluated input arguments. These are set up
304- * during ExecInitAgg() and then used for each input row.
306+ * during ExecInitAgg() and then used for each input row requiring
307+ * procesessing besides what's done in AggState->evalproj.
305308 */
306- TupleTableSlot * evalslot ;/* current input tuple */
309+ TupleTableSlot * sortslot ;/* current input tuple */
307310TupleTableSlot * uniqslot ;/* used for multi-column DISTINCT */
308311
309312/*
@@ -621,14 +624,14 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
621624 */
622625if (pertrans -> numInputs == 1 )
623626pertrans -> sortstates [aggstate -> current_set ]=
624- tuplesort_begin_datum (pertrans -> evaldesc -> attrs [0 ]-> atttypid ,
627+ tuplesort_begin_datum (pertrans -> sortdesc -> attrs [0 ]-> atttypid ,
625628pertrans -> sortOperators [0 ],
626629pertrans -> sortCollations [0 ],
627630pertrans -> sortNullsFirst [0 ],
628631work_mem , false);
629632else
630633pertrans -> sortstates [aggstate -> current_set ]=
631- tuplesort_begin_heap (pertrans -> evaldesc ,
634+ tuplesort_begin_heap (pertrans -> sortdesc ,
632635pertrans -> numSortCols ,
633636pertrans -> sortColIdx ,
634637pertrans -> sortOperators ,
@@ -847,14 +850,19 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
847850int setno = 0 ;
848851int numGroupingSets = Max (aggstate -> phase -> numsets ,1 );
849852int numTrans = aggstate -> numtrans ;
853+ TupleTableSlot * slot = aggstate -> evalslot ;
854+
855+ /* compute input for all aggregates */
856+ if (aggstate -> evalproj )
857+ aggstate -> evalslot = ExecProject (aggstate -> evalproj ,NULL );
850858
851859for (transno = 0 ;transno < numTrans ;transno ++ )
852860{
853861AggStatePerTrans pertrans = & aggstate -> pertrans [transno ];
854862ExprState * filter = pertrans -> aggfilter ;
855863int numTransInputs = pertrans -> numTransInputs ;
856864int i ;
857- TupleTableSlot * slot ;
865+ int inputoff = pertrans -> inputoff ;
858866
859867/* Skip anything FILTERed out */
860868if (filter )
@@ -868,13 +876,10 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
868876continue ;
869877}
870878
871- /* Evaluate the current input expressions for this aggregate */
872- slot = ExecProject (pertrans -> evalproj ,NULL );
873-
874879if (pertrans -> numSortCols > 0 )
875880{
876881/* DISTINCT and/or ORDER BY case */
877- Assert (slot -> tts_nvalid == pertrans -> numInputs );
882+ Assert (slot -> tts_nvalid >= ( pertrans -> numInputs + inputoff ) );
878883
879884/*
880885 * If the transfn is strict, we want to check for nullity before
@@ -887,7 +892,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
887892{
888893for (i = 0 ;i < numTransInputs ;i ++ )
889894{
890- if (slot -> tts_isnull [i ])
895+ if (slot -> tts_isnull [i + inputoff ])
891896break ;
892897}
893898if (i < numTransInputs )
@@ -899,10 +904,25 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
899904/* OK, put the tuple into the tuplesort object */
900905if (pertrans -> numInputs == 1 )
901906tuplesort_putdatum (pertrans -> sortstates [setno ],
902- slot -> tts_values [0 ],
903- slot -> tts_isnull [0 ]);
907+ slot -> tts_values [inputoff ],
908+ slot -> tts_isnull [inputoff ]);
904909else
905- tuplesort_puttupleslot (pertrans -> sortstates [setno ],slot );
910+ {
911+ /*
912+ * Copy slot contents, starting from inputoff, into sort
913+ * slot.
914+ */
915+ ExecClearTuple (pertrans -> sortslot );
916+ memcpy (pertrans -> sortslot -> tts_values ,
917+ & slot -> tts_values [inputoff ],
918+ pertrans -> numInputs * sizeof (Datum ));
919+ memcpy (pertrans -> sortslot -> tts_isnull ,
920+ & slot -> tts_isnull [inputoff ],
921+ pertrans -> numInputs * sizeof (bool ));
922+ pertrans -> sortslot -> tts_nvalid = pertrans -> numInputs ;
923+ ExecStoreVirtualTuple (pertrans -> sortslot );
924+ tuplesort_puttupleslot (pertrans -> sortstates [setno ],pertrans -> sortslot );
925+ }
906926}
907927}
908928else
@@ -915,8 +935,8 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
915935Assert (slot -> tts_nvalid >=numTransInputs );
916936for (i = 0 ;i < numTransInputs ;i ++ )
917937{
918- fcinfo -> arg [i + 1 ]= slot -> tts_values [i ];
919- fcinfo -> argnull [i + 1 ]= slot -> tts_isnull [i ];
938+ fcinfo -> arg [i + 1 ]= slot -> tts_values [i + inputoff ];
939+ fcinfo -> argnull [i + 1 ]= slot -> tts_isnull [i + inputoff ];
920940}
921941
922942for (setno = 0 ;setno < numGroupingSets ;setno ++ )
@@ -943,20 +963,24 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
943963{
944964int transno ;
945965int numTrans = aggstate -> numtrans ;
966+ TupleTableSlot * slot = NULL ;
946967
947968/* combine not supported with grouping sets */
948969Assert (aggstate -> phase -> numsets == 0 );
949970
971+ /* compute input for all aggregates */
972+ if (aggstate -> evalproj )
973+ slot = ExecProject (aggstate -> evalproj ,NULL );
974+
950975for (transno = 0 ;transno < numTrans ;transno ++ )
951976{
952977AggStatePerTrans pertrans = & aggstate -> pertrans [transno ];
953978AggStatePerGroup pergroupstate = & pergroup [transno ];
954- TupleTableSlot * slot ;
955979FunctionCallInfo fcinfo = & pertrans -> transfn_fcinfo ;
980+ int inputoff = pertrans -> inputoff ;
956981
957- /* Evaluate the current input expressions for this aggregate */
958- slot = ExecProject (pertrans -> evalproj ,NULL );
959982Assert (slot -> tts_nvalid >=1 );
983+ Assert (slot -> tts_nvalid + inputoff >=1 );
960984
961985/*
962986 * deserialfn_oid will be set if we must deserialize the input state
@@ -965,18 +989,18 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
965989if (OidIsValid (pertrans -> deserialfn_oid ))
966990{
967991/* Don't call a strict deserialization function with NULL input */
968- if (pertrans -> deserialfn .fn_strict && slot -> tts_isnull [0 ])
992+ if (pertrans -> deserialfn .fn_strict && slot -> tts_isnull [inputoff ])
969993{
970- fcinfo -> arg [1 ]= slot -> tts_values [0 ];
971- fcinfo -> argnull [1 ]= slot -> tts_isnull [0 ];
994+ fcinfo -> arg [1 ]= slot -> tts_values [inputoff ];
995+ fcinfo -> argnull [1 ]= slot -> tts_isnull [inputoff ];
972996}
973997else
974998{
975999FunctionCallInfo dsinfo = & pertrans -> deserialfn_fcinfo ;
9761000MemoryContext oldContext ;
9771001
978- dsinfo -> arg [0 ]= slot -> tts_values [0 ];
979- dsinfo -> argnull [0 ]= slot -> tts_isnull [0 ];
1002+ dsinfo -> arg [0 ]= slot -> tts_values [inputoff ];
1003+ dsinfo -> argnull [0 ]= slot -> tts_isnull [inputoff ];
9801004/* Dummy second argument for type-safety reasons */
9811005dsinfo -> arg [1 ]= PointerGetDatum (NULL );
9821006dsinfo -> argnull [1 ]= false;
@@ -995,8 +1019,8 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
9951019}
9961020else
9971021{
998- fcinfo -> arg [1 ]= slot -> tts_values [0 ];
999- fcinfo -> argnull [1 ]= slot -> tts_isnull [0 ];
1022+ fcinfo -> arg [1 ]= slot -> tts_values [inputoff ];
1023+ fcinfo -> argnull [1 ]= slot -> tts_isnull [inputoff ];
10001024}
10011025
10021026advance_combine_function (aggstate ,pertrans ,pergroupstate );
@@ -1233,7 +1257,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
12331257{
12341258MemoryContext workcontext = aggstate -> tmpcontext -> ecxt_per_tuple_memory ;
12351259FunctionCallInfo fcinfo = & pertrans -> transfn_fcinfo ;
1236- TupleTableSlot * slot1 = pertrans -> evalslot ;
1260+ TupleTableSlot * slot1 = pertrans -> sortslot ;
12371261TupleTableSlot * slot2 = pertrans -> uniqslot ;
12381262int numTransInputs = pertrans -> numTransInputs ;
12391263int numDistinctCols = pertrans -> numDistinctCols ;
@@ -2343,10 +2367,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
23432367transno ,
23442368aggno ;
23452369int phase ;
2370+ List * combined_inputeval ;
23462371ListCell * l ;
23472372Bitmapset * all_grouped_cols = NULL ;
23482373int numGroupingSets = 1 ;
23492374int numPhases ;
2375+ int column_offset ;
23502376int i = 0 ;
23512377int j = 0 ;
23522378
@@ -2928,6 +2954,53 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
29282954aggstate -> numaggs = aggno + 1 ;
29292955aggstate -> numtrans = transno + 1 ;
29302956
2957+ /*
2958+ * Build a single projection computing the aggregate arguments for all
2959+ * aggregates at once, that's considerably faster than doing it separately
2960+ * for each.
2961+ *
2962+ * First create a targetlist combining the targetlist of all the
2963+ * transitions.
2964+ */
2965+ combined_inputeval = NIL ;
2966+ column_offset = 0 ;
2967+ for (transno = 0 ;transno < aggstate -> numtrans ;transno ++ )
2968+ {
2969+ AggStatePerTrans pertrans = & pertransstates [transno ];
2970+ ListCell * arg ;
2971+
2972+ pertrans -> inputoff = column_offset ;
2973+
2974+ /*
2975+ * Adjust resno in a copied target entries, to point into the combined
2976+ * slot.
2977+ */
2978+ foreach (arg ,pertrans -> aggref -> args )
2979+ {
2980+ TargetEntry * source_tle = (TargetEntry * )lfirst (arg );
2981+ TargetEntry * tle ;
2982+
2983+ Assert (IsA (source_tle ,TargetEntry ));
2984+ tle = flatCopyTargetEntry (source_tle );
2985+ tle -> resno += column_offset ;
2986+
2987+ combined_inputeval = lappend (combined_inputeval ,tle );
2988+ }
2989+
2990+ column_offset += list_length (pertrans -> aggref -> args );
2991+ }
2992+
2993+ /* and then create a projection for that targetlist */
2994+ aggstate -> evaldesc = ExecTypeFromTL (combined_inputeval , false);
2995+ aggstate -> evalslot = ExecInitExtraTupleSlot (estate );
2996+ combined_inputeval = (List * )ExecInitExpr ((Expr * )combined_inputeval ,
2997+ (PlanState * )aggstate );
2998+ aggstate -> evalproj = ExecBuildProjectionInfo (combined_inputeval ,
2999+ aggstate -> tmpcontext ,
3000+ aggstate -> evalslot ,
3001+ NULL );
3002+ ExecSetSlotDescriptor (aggstate -> evalslot ,aggstate -> evaldesc );
3003+
29313004return aggstate ;
29323005}
29333006
@@ -3098,24 +3171,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
30983171
30993172}
31003173
3101- /*
3102- * Get a tupledesc corresponding to the aggregated inputs (including sort
3103- * expressions) of the agg.
3104- */
3105- pertrans -> evaldesc = ExecTypeFromTL (aggref -> args , false);
3106-
3107- /* Create slot we're going to do argument evaluation in */
3108- pertrans -> evalslot = ExecInitExtraTupleSlot (estate );
3109- ExecSetSlotDescriptor (pertrans -> evalslot ,pertrans -> evaldesc );
3110-
31113174/* Initialize the input and FILTER expressions */
31123175naggs = aggstate -> numaggs ;
31133176pertrans -> aggfilter = ExecInitExpr (aggref -> aggfilter ,
31143177 (PlanState * )aggstate );
31153178pertrans -> aggdirectargs = (List * )ExecInitExpr ((Expr * )aggref -> aggdirectargs ,
31163179(PlanState * )aggstate );
3117- pertrans -> args = (List * )ExecInitExpr ((Expr * )aggref -> args ,
3118- (PlanState * )aggstate );
31193180
31203181/*
31213182 * Complain if the aggregate's arguments contain any aggregates; nested
@@ -3127,12 +3188,6 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31273188(errcode (ERRCODE_GROUPING_ERROR ),
31283189errmsg ("aggregate function calls cannot be nested" )));
31293190
3130- /* Set up projection info for evaluation */
3131- pertrans -> evalproj = ExecBuildProjectionInfo (pertrans -> args ,
3132- aggstate -> tmpcontext ,
3133- pertrans -> evalslot ,
3134- NULL );
3135-
31363191/*
31373192 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
31383193 * have a list of SortGroupClause nodes; fish out the data in them and
@@ -3165,6 +3220,14 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31653220
31663221if (numSortCols > 0 )
31673222{
3223+ /*
3224+ * Get a tupledesc and slot corresponding to the aggregated inputs
3225+ * (including sort expressions) of the agg.
3226+ */
3227+ pertrans -> sortdesc = ExecTypeFromTL (aggref -> args , false);
3228+ pertrans -> sortslot = ExecInitExtraTupleSlot (estate );
3229+ ExecSetSlotDescriptor (pertrans -> sortslot ,pertrans -> sortdesc );
3230+
31683231/*
31693232 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
31703233 * (yet)
@@ -3183,7 +3246,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31833246/* we will need an extra slot to store prior values */
31843247pertrans -> uniqslot = ExecInitExtraTupleSlot (estate );
31853248ExecSetSlotDescriptor (pertrans -> uniqslot ,
3186- pertrans -> evaldesc );
3249+ pertrans -> sortdesc );
31873250}
31883251
31893252/* Extract the sort information for use later */