PostgreSQL Source Code  git master
nodeAgg.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeAgg.h
4  * prototypes for nodeAgg.c
5  *
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/executor/nodeAgg.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef NODEAGG_H
15 #define NODEAGG_H
16 
17 #include "access/parallel.h"
18 #include "nodes/execnodes.h"
19 
20 
21 /*
22  * AggStatePerTransData - per aggregate state value information
23  *
24  * Working state for updating the aggregate's state value, by calling the
25  * transition function with an input row. This struct does not store the
26  * information needed to produce the final aggregate result from the transition
27  * state, that's stored in AggStatePerAggData instead. This separation allows
28  * multiple aggregate results to be produced from a single state value.
29  */
30 typedef struct AggStatePerTransData
31 {
32  /*
33  * These values are set up during ExecInitAgg() and do not change
34  * thereafter:
35  */
36 
37  /*
38  * Link to an Aggref expr this state value is for.
39  *
40  * There can be multiple Aggref's sharing the same state value, so long as
41  * the inputs and transition functions are identical and the final
42  * functions are not read-write. This points to the first one of them.
43  */
45 
46  /*
47  * Is this state value actually being shared by more than one Aggref?
48  */
49  bool aggshared;
50 
51  /*
52  * Number of aggregated input columns. This includes ORDER BY expressions
53  * in both the plain-agg and ordered-set cases. Ordered-set direct args
54  * are not counted, though.
55  */
56  int numInputs;
57 
58  /*
59  * Number of aggregated input columns to pass to the transfn. This
60  * includes the ORDER BY columns for ordered-set aggs, but not for plain
61  * aggs. (This doesn't count the transition state value!)
62  */
64 
65  /* Oid of the state transition or combine function */
67 
68  /* Oid of the serialization function or InvalidOid */
70 
71  /* Oid of the deserialization function or InvalidOid */
73 
74  /* Oid of state value's datatype */
76 
77  /*
78  * fmgr lookup data for transition function or combine function. Note in
79  * particular that the fn_strict flag is kept here.
80  */
82 
83  /* fmgr lookup data for serialization function */
85 
86  /* fmgr lookup data for deserialization function */
88 
89  /* Input collation derived for aggregate */
91 
92  /* number of sorting columns */
94 
95  /* number of sorting columns to consider in DISTINCT comparisons */
96  /* (this is either zero or the same as numSortCols) */
98 
99  /* deconstructed sorting information (arrays of length numSortCols) */
104 
105  /*
106  * Comparators for input columns --- only set/used when aggregate has
107  * DISTINCT flag. equalfnOne version is used for single-column
108  * comparisons, equalfnMulti for the case of multiple columns.
109  */
112 
113  /*
114  * initial value from pg_aggregate entry
115  */
118 
119  /*
120  * We need the len and byval info for the agg's input and transition data
121  * types in order to know how to copy/delete values.
122  *
123  * Note that the info for the input type is used only when handling
124  * DISTINCT aggs with just one argument, so there is only one input type.
125  */
127  transtypeLen;
130 
131  /*
132  * Slots for holding the evaluated input arguments. These are set up
133  * during ExecInitAgg() and then used for each input row requiring either
134  * FILTER or ORDER BY/DISTINCT processing.
135  */
136  TupleTableSlot *sortslot; /* current input tuple */
137  TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */
138  TupleDesc sortdesc; /* descriptor of input tuples */
139 
140  /*
141  * These values are working state that is initialized at the start of an
142  * input tuple group and updated for each input tuple.
143  *
144  * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
145  * values straight to the transition function. If it's DISTINCT or
146  * requires ORDER BY, we pass the input values into a Tuplesort object;
147  * then at completion of the input tuple group, we scan the sorted values,
148  * eliminate duplicates if needed, and run the transition function on the
149  * rest.
150  *
151  * We need a separate tuplesort for each grouping set.
152  */
153 
154  Tuplesortstate **sortstates; /* sort objects, if DISTINCT or ORDER BY */
155 
156  /*
157  * This field is a pre-initialized FunctionCallInfo struct used for
158  * calling this aggregate's transfn. We save a few cycles per row by not
159  * re-initializing the unchanging fields; which isn't much, but it seems
160  * worth the extra space consumption.
161  */
163 
164  /* Likewise for serialization and deserialization functions */
166 
169 
170 /*
171  * AggStatePerAggData - per-aggregate information
172  *
173  * This contains the information needed to call the final function, to produce
174  * a final aggregate result from the state value. If there are multiple
175  * identical Aggrefs in the query, they can all share the same per-agg data.
176  *
177  * These values are set up during ExecInitAgg() and do not change thereafter.
178  */
179 typedef struct AggStatePerAggData
180 {
181  /*
182  * Link to an Aggref expr this state value is for.
183  *
184  * There can be multiple identical Aggref's sharing the same per-agg. This
185  * points to the first one of them.
186  */
188 
189  /* index to the state value which this agg should use */
190  int transno;
191 
192  /* Optional Oid of final function (may be InvalidOid) */
194 
195  /*
196  * fmgr lookup data for final function --- only valid when finalfn_oid is
197  * not InvalidOid.
198  */
200 
201  /*
202  * Number of arguments to pass to the finalfn. This is always at least 1
203  * (the transition state value) plus any ordered-set direct args. If the
204  * finalfn wants extra args then we pass nulls corresponding to the
205  * aggregated input columns.
206  */
208 
209  /* ExprStates for any direct-argument expressions */
211 
212  /*
213  * We need the len and byval info for the agg's result data type in order
214  * to know how to copy/delete values.
215  */
218 
219  /*
220  * "shareable" is false if this agg cannot share state values with other
221  * aggregates because the final function is read-write.
222  */
223  bool shareable;
225 
226 /*
227  * AggStatePerGroupData - per-aggregate-per-group working state
228  *
229  * These values are working state that is initialized at the start of
230  * an input tuple group and updated for each input tuple.
231  *
232  * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
233  * structs (pointed to by aggstate->pergroup); we re-use the array for
234  * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
235  * hash table contains an array of these structs for each tuple group.
236  *
237  * Logically, the sortstate field belongs in this struct, but we do not
238  * keep it here for space reasons: we don't support DISTINCT aggregates
239  * in AGG_HASHED mode, so there's no reason to use up a pointer field
240  * in every entry of the hashtable.
241  */
242 typedef struct AggStatePerGroupData
243 {
244 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0
245  Datum transValue; /* current transition value */
246 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1
248 
249 #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2
250  bool noTransValue; /* true if transValue not set yet */
251 
252  /*
253  * Note: noTransValue initially has the same value as transValueIsNull,
254  * and if true both are cleared to false at the same time. They are not
255  * the same though: if transfn later returns a NULL, we want to keep that
256  * NULL and not auto-replace it with a later input value. Only the first
257  * non-NULL input will be auto-substituted.
258  */
260 
261 /*
262  * AggStatePerPhaseData - per-grouping-set-phase state
263  *
264  * Grouping sets are divided into "phases", where a single phase can be
265  * processed in one pass over the input. If there is more than one phase, then
266  * at the end of input from the current phase, state is reset and another pass
267  * taken over the data which has been re-sorted in the mean time.
268  *
269  * Accordingly, each phase specifies a list of grouping sets and group clause
270  * information, plus each phase after the first also has a sort order.
271  */
272 typedef struct AggStatePerPhaseData
273 {
274  AggStrategy aggstrategy; /* strategy for this phase */
275  int numsets; /* number of grouping sets (or 0) */
276  int *gset_lengths; /* lengths of grouping sets */
277  Bitmapset **grouped_cols; /* column groupings for rollup */
278  ExprState **eqfunctions; /* expression returning equality, indexed by
279  * nr of cols to compare */
280  Agg *aggnode; /* Agg node for phase data */
281  Sort *sortnode; /* Sort node for input ordering for phase */
282 
283  ExprState *evaltrans; /* evaluation of transition functions */
284 
285  /*----------
286  * Cached variants of the compiled expression.
287  * first subscript: 0: outerops; 1: TTSOpsMinimalTuple
288  * second subscript: 0: no NULL check; 1: with NULL check
289  *----------
290  */
291  ExprState *evaltrans_cache[2][2];
293 
294 /*
295  * AggStatePerHashData - per-hashtable state
296  *
297  * When doing grouping sets with hashing, we have one of these for each
298  * grouping set. (When doing hashing without grouping sets, we have just one of
299  * them.)
300  */
301 typedef struct AggStatePerHashData
302 {
303  TupleHashTable hashtable; /* hash table with one entry per group */
304  TupleHashIterator hashiter; /* for iterating through hash table */
305  TupleTableSlot *hashslot; /* slot for loading hash table */
306  FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
307  Oid *eqfuncoids; /* per-grouping-field equality fns */
308  int numCols; /* number of hash key columns */
309  int numhashGrpCols; /* number of columns in hash table */
310  int largestGrpColIdx; /* largest col required for hashing */
311  AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
312  AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */
313  Agg *aggnode; /* original Agg node, for numGroups etc. */
315 
316 
317 extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
318 extern void ExecEndAgg(AggState *node);
319 extern void ExecReScanAgg(AggState *node);
320 
321 extern Size hash_agg_entry_size(int numTrans, Size tupleWidth,
322  Size transitionSpace);
323 extern void hash_agg_set_limits(double hashentrysize, double input_groups,
324  int used_bits, Size *mem_limit,
325  uint64 *ngroups_limit, int *num_partitions);
326 
327 /* parallel instrumentation support */
328 extern void ExecAggEstimate(AggState *node, ParallelContext *pcxt);
329 extern void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt);
330 extern void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt);
331 extern void ExecAggRetrieveInstrumentation(AggState *node);
332 
333 #endif /* NODEAGG_H */
signed short int16
Definition: c.h:361
ExprState ** eqfunctions
Definition: nodeAgg.h:278
Definition: fmgr.h:56
AttrNumber * hashGrpColIdxInput
Definition: nodeAgg.h:311
void ExecAggEstimate(AggState *node, ParallelContext *pcxt)
Definition: nodeAgg.c:4964
void ExecReScanAgg(AggState *node)
Definition: nodeAgg.c:4627
FmgrInfo equalfnOne
Definition: nodeAgg.h:110
ExprState * evaltrans
Definition: nodeAgg.h:283
tuplehash_iterator TupleHashIterator
Definition: execnodes.h:716
void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt)
Definition: nodeAgg.c:5010
unsigned int Oid
Definition: postgres_ext.h:31
Aggref * aggref
Definition: nodeAgg.h:187
FunctionCallInfo transfn_fcinfo
Definition: nodeAgg.h:162
TupleDesc sortdesc
Definition: nodeAgg.h:138
FmgrInfo transfn
Definition: nodeAgg.h:81
void ExecAggRetrieveInstrumentation(AggState *node)
Definition: nodeAgg.c:5023
void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt)
Definition: nodeAgg.c:4985
Aggref * aggref
Definition: nodeAgg.h:44
Bitmapset ** grouped_cols
Definition: nodeAgg.h:277
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1695
AggState * ExecInitAgg(Agg *node, EState *estate, int eflags)
Definition: nodeAgg.c:3234
ExprState * equalfnMulti
Definition: nodeAgg.h:111
Tuplesortstate ** sortstates
Definition: nodeAgg.h:154
AttrNumber * sortColIdx
Definition: nodeAgg.h:100
struct AggStatePerGroupData AggStatePerGroupData
AggStrategy aggstrategy
Definition: nodeAgg.h:274
struct AggStatePerTransData AggStatePerTransData
TupleHashIterator hashiter
Definition: nodeAgg.h:304
AttrNumber * hashGrpColIdxHash
Definition: nodeAgg.h:312
uintptr_t Datum
Definition: postgres.h:367
FmgrInfo deserialfn
Definition: nodeAgg.h:87
int16 resulttypeLen
Definition: nodeAgg.h:216
struct AggStatePerHashData AggStatePerHashData
FmgrInfo * hashfunctions
Definition: nodeAgg.h:306
void ExecEndAgg(AggState *node)
Definition: nodeAgg.c:4557
FmgrInfo serialfn
Definition: nodeAgg.h:84
FunctionCallInfo deserialfn_fcinfo
Definition: nodeAgg.h:167
size_t Size
Definition: c.h:473
FunctionCallInfo serialfn_fcinfo
Definition: nodeAgg.h:165
void hash_agg_set_limits(double hashentrysize, double input_groups, int used_bits, Size *mem_limit, uint64 *ngroups_limit, int *num_partitions)
Definition: nodeAgg.c:1793
TupleTableSlot * uniqslot
Definition: nodeAgg.h:137
AggStrategy
Definition: nodes.h:758
List * aggdirectargs
Definition: nodeAgg.h:210
bool resulttypeByVal
Definition: nodeAgg.h:217
Definition: plannodes.h:816
TupleTableSlot * sortslot
Definition: nodeAgg.h:136
struct AggStatePerPhaseData AggStatePerPhaseData
TupleTableSlot * hashslot
Definition: nodeAgg.h:305
Definition: pg_list.h:50
TupleHashTable hashtable
Definition: nodeAgg.h:303
int16 AttrNumber
Definition: attnum.h:21
FmgrInfo finalfn
Definition: nodeAgg.h:199
struct AggStatePerAggData AggStatePerAggData
bool * sortNullsFirst
Definition: nodeAgg.h:103