PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
execGrouping.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execGrouping.c
4 * executor utility routines for grouping, hashing, and aggregation
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/execGrouping.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/parallel.h"
18#include "common/hashfn.h"
19#include "executor/executor.h"
20#include "miscadmin.h"
21#include "utils/lsyscache.h"
22
23static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 const MinimalTuple tuple);
27 TupleTableSlot *slot,
28 bool *isnew, uint32 hash);
29
30/*
31 * Define parameters for tuple hash table code generation. The interface is
32 * *also* declared in execnodes.h (to generate the types, which are externally
33 * visible).
34 */
35#define SH_PREFIX tuplehash
36#define SH_ELEMENT_TYPE TupleHashEntryData
37#define SH_KEY_TYPE MinimalTuple
38#define SH_KEY firstTuple
39#define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41#define SH_SCOPE extern
42#define SH_STORE_HASH
43#define SH_GET_HASH(tb, a) a->hash
44#define SH_DEFINE
45#include "lib/simplehash.h"
46
47
48/*****************************************************************************
49 * Utility routines for grouping tuples together
50 *****************************************************************************/
51
52/*
53 * execTuplesMatchPrepare
54 * Build expression that can be evaluated using ExecQual(), returning
55 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 */
59 int numCols,
60 const AttrNumber *keyColIdx,
61 const Oid *eqOperators,
62 const Oid *collations,
63 PlanState *parent)
64{
65 Oid *eqFunctions;
66 int i;
67 ExprState *expr;
68
69 if (numCols == 0)
70 return NULL;
71
72 eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
73
74 /* lookup equality functions */
75 for (i = 0; i < numCols; i++)
76 eqFunctions[i] = get_opcode(eqOperators[i]);
77
78 /* build actual expression */
79 expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
80 numCols, keyColIdx, eqFunctions, collations,
81 parent);
82
83 return expr;
84}
85
86/*
87 * execTuplesHashPrepare
88 * Look up the equality and hashing functions needed for a TupleHashTable.
89 *
90 * This is similar to execTuplesMatchPrepare, but we also need to find the
91 * hash functions associated with the equality operators. *eqFunctions and
92 * *hashFunctions receive the palloc'd result arrays.
93 *
94 * Note: we expect that the given operators are not cross-type comparisons.
95 */
96void
98 const Oid *eqOperators,
99 Oid **eqFuncOids,
100 FmgrInfo **hashFunctions)
101{
102 int i;
103
104 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
105 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106
107 for (i = 0; i < numCols; i++)
108 {
109 Oid eq_opr = eqOperators[i];
110 Oid eq_function;
111 Oid left_hash_function;
112 Oid right_hash_function;
113
114 eq_function = get_opcode(eq_opr);
115 if (!get_op_hash_functions(eq_opr,
116 &left_hash_function, &right_hash_function))
117 elog(ERROR, "could not find hash function for hash operator %u",
118 eq_opr);
119 /* We're not supporting cross-type cases here */
120 Assert(left_hash_function == right_hash_function);
121 (*eqFuncOids)[i] = eq_function;
122 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
123 }
124}
125
126
127/*****************************************************************************
128 * Utility routines for all-in-memory hash tables
129 *
130 * These routines build hash tables for grouping tuples together (eg, for
131 * hash aggregation). There is one entry for each not-distinct set of tuples
132 * presented.
133 *****************************************************************************/
134
135/*
136 * Construct an empty TupleHashTable
137 *
138 * parent: PlanState node that will own this hash table
139 * inputDesc: tuple descriptor for input tuples
140 * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
141 * numCols: number of columns to be compared (length of next 4 arrays)
142 * keyColIdx: indexes of tuple columns to compare
143 * eqfuncoids: OIDs of equality comparison functions to use
144 * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
145 * collations: collations to use in comparisons
146 * nbuckets: initial estimate of hashtable size
147 * additionalsize: size of data stored in ->additional
148 * metacxt: memory context for long-lived allocation, but not per-entry data
149 * tablecxt: memory context in which to store table entries
150 * tempcxt: short-lived context for evaluation hash and comparison functions
151 * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
152 *
153 * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
154 * are not cross-type functions, but expect to see the table datatype(s)
155 * on both sides.
156 *
157 * Note that the keyColIdx, hashfunctions, and collations arrays must be
158 * allocated in storage that will live as long as the hashtable does.
159 */
162 TupleDesc inputDesc,
163 const TupleTableSlotOps *inputOps,
164 int numCols,
165 AttrNumber *keyColIdx,
166 const Oid *eqfuncoids,
167 FmgrInfo *hashfunctions,
168 Oid *collations,
169 long nbuckets,
170 Size additionalsize,
171 MemoryContext metacxt,
172 MemoryContext tablecxt,
173 MemoryContext tempcxt,
174 bool use_variable_hash_iv)
175{
176 TupleHashTable hashtable;
177 Size entrysize;
178 Size hash_mem_limit;
179 MemoryContext oldcontext;
180 bool allow_jit;
181 uint32 hash_iv = 0;
182
183 Assert(nbuckets > 0);
184 additionalsize = MAXALIGN(additionalsize);
185 entrysize = sizeof(TupleHashEntryData) + additionalsize;
186
187 /* Limit initial table size request to not more than hash_mem */
188 hash_mem_limit = get_hash_memory_limit() / entrysize;
189 if (nbuckets > hash_mem_limit)
190 nbuckets = hash_mem_limit;
191
192 oldcontext = MemoryContextSwitchTo(metacxt);
193
194 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
195
196 hashtable->numCols = numCols;
197 hashtable->keyColIdx = keyColIdx;
198 hashtable->tab_collations = collations;
199 hashtable->tablecxt = tablecxt;
200 hashtable->tempcxt = tempcxt;
201 hashtable->additionalsize = additionalsize;
202 hashtable->tableslot = NULL; /* will be made on first lookup */
203 hashtable->inputslot = NULL;
204 hashtable->in_hash_expr = NULL;
205 hashtable->cur_eq_func = NULL;
206
207 /*
208 * If parallelism is in use, even if the leader backend is performing the
209 * scan itself, we don't want to create the hashtable exactly the same way
210 * in all workers. As hashtables are iterated over in keyspace-order,
211 * doing so in all processes in the same way is likely to lead to
212 * "unbalanced" hashtables when the table size initially is
213 * underestimated.
214 */
215 if (use_variable_hash_iv)
217
218 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
219
220 /*
221 * We copy the input tuple descriptor just for safety --- we assume all
222 * input tuples will have equivalent descriptors.
223 */
226
227 /*
228 * If the caller fails to make the metacxt different from the tablecxt,
229 * allowing JIT would lead to the generated functions to a) live longer
230 * than the query or b) be re-generated each time the table is being
231 * reset. Therefore prevent JIT from being used in that case, by not
232 * providing a parent node (which prevents accessing the JitContext in the
233 * EState).
234 */
235 allow_jit = (metacxt != tablecxt);
236
237 /* build hash ExprState for all columns */
238 hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
239 inputOps,
240 hashfunctions,
241 collations,
242 numCols,
243 keyColIdx,
244 allow_jit ? parent : NULL,
245 hash_iv);
246
247 /* build comparator for all columns */
248 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
249 inputOps,
251 numCols,
252 keyColIdx, eqfuncoids, collations,
253 allow_jit ? parent : NULL);
254
255 /*
256 * While not pretty, it's ok to not shut down this context, but instead
257 * rely on the containing memory context being reset, as
258 * ExecBuildGroupingEqual() only builds a very simple expression calling
259 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
260 */
262
263 MemoryContextSwitchTo(oldcontext);
264
265 return hashtable;
266}
267
268/*
269 * Reset contents of the hashtable to be empty, preserving all the non-content
270 * state. Note that the tablecxt passed to BuildTupleHashTable() should
271 * also be reset, otherwise there will be leaks.
272 */
273void
275{
276 tuplehash_reset(hashtable->hashtab);
277}
278
279/*
280 * Find or create a hashtable entry for the tuple group containing the
281 * given tuple. The tuple must be the same type as the hashtable entries.
282 *
283 * If isnew is NULL, we do not create new entries; we return NULL if no
284 * match is found.
285 *
286 * If hash is not NULL, we set it to the calculated hash value. This allows
287 * callers access to the hash value even if no entry is returned.
288 *
289 * If isnew isn't NULL, then a new entry is created if no existing entry
290 * matches. On return, *isnew is true if the entry is newly created,
291 * false if it existed already. ->additional_data in the new entry has
292 * been zeroed.
293 */
296 bool *isnew, uint32 *hash)
297{
298 TupleHashEntry entry;
299 MemoryContext oldContext;
300 uint32 local_hash;
301
302 /* Need to run the hash functions in short-lived context */
303 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
304
305 /* set up data needed by hash and match functions */
306 hashtable->inputslot = slot;
307 hashtable->in_hash_expr = hashtable->tab_hash_expr;
308 hashtable->cur_eq_func = hashtable->tab_eq_func;
309
310 local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
311 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
312
313 if (hash != NULL)
314 *hash = local_hash;
315
316 Assert(entry == NULL || entry->hash == local_hash);
317
318 MemoryContextSwitchTo(oldContext);
319
320 return entry;
321}
322
323/*
324 * Compute the hash value for a tuple
325 */
326uint32
328{
329 MemoryContext oldContext;
330 uint32 hash;
331
332 hashtable->inputslot = slot;
333 hashtable->in_hash_expr = hashtable->tab_hash_expr;
334
335 /* Need to run the hash functions in short-lived context */
336 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
337
338 hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
339
340 MemoryContextSwitchTo(oldContext);
341
342 return hash;
343}
344
345/*
346 * A variant of LookupTupleHashEntry for callers that have already computed
347 * the hash value.
348 */
351 bool *isnew, uint32 hash)
352{
353 TupleHashEntry entry;
354 MemoryContext oldContext;
355
356 /* Need to run the hash functions in short-lived context */
357 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
358
359 /* set up data needed by hash and match functions */
360 hashtable->inputslot = slot;
361 hashtable->in_hash_expr = hashtable->tab_hash_expr;
362 hashtable->cur_eq_func = hashtable->tab_eq_func;
363
364 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
365 Assert(entry == NULL || entry->hash == hash);
366
367 MemoryContextSwitchTo(oldContext);
368
369 return entry;
370}
371
372/*
373 * Search for a hashtable entry matching the given tuple. No entry is
374 * created if there's not a match. This is similar to the non-creating
375 * case of LookupTupleHashEntry, except that it supports cross-type
376 * comparisons, in which the given tuple is not of the same type as the
377 * table entries. The caller must provide the hash ExprState to use for
378 * the input tuple, as well as the equality ExprState, since these may be
379 * different from the table's internal functions.
380 */
383 ExprState *eqcomp,
384 ExprState *hashexpr)
385{
386 TupleHashEntry entry;
387 MemoryContext oldContext;
389
390 /* Need to run the hash functions in short-lived context */
391 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
392
393 /* Set up data needed by hash and match functions */
394 hashtable->inputslot = slot;
395 hashtable->in_hash_expr = hashexpr;
396 hashtable->cur_eq_func = eqcomp;
397
398 /* Search the hash table */
399 key = NULL; /* flag to reference inputslot */
400 entry = tuplehash_lookup(hashtable->hashtab, key);
401 MemoryContextSwitchTo(oldContext);
402
403 return entry;
404}
405
406/*
407 * If tuple is NULL, use the input slot instead. This convention avoids the
408 * need to materialize virtual input tuples unless they actually need to get
409 * copied into the table.
410 *
411 * Also, the caller must select an appropriate memory context for running
412 * the hash functions.
413 */
414static uint32
415TupleHashTableHash_internal(struct tuplehash_hash *tb,
416 const MinimalTuple tuple)
417{
418 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
419 uint32 hashkey;
420 TupleTableSlot *slot;
421 bool isnull;
422
423 if (tuple == NULL)
424 {
425 /* Process the current input tuple for the table */
426 hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
427 hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
428 hashtable->exprcontext,
429 &isnull));
430 }
431 else
432 {
433 /*
434 * Process a tuple already stored in the table.
435 *
436 * (this case never actually occurs due to the way simplehash.h is
437 * used, as the hash-value is stored in the entries)
438 */
439 slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
440 ExecStoreMinimalTuple(tuple, slot, false);
441 hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
442 hashtable->exprcontext,
443 &isnull));
444 }
445
446 /*
447 * The hashing done above, even with an initial value, doesn't tend to
448 * result in good hash perturbation. Running the value produced above
449 * through murmurhash32 leads to near perfect hash perturbation.
450 */
451 return murmurhash32(hashkey);
452}
453
454/*
455 * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
456 * so that we can avoid switching the memory context multiple times for
457 * LookupTupleHashEntry.
458 *
459 * NB: This function may or may not change the memory context. Caller is
460 * expected to change it back.
461 */
462static inline TupleHashEntry
464 bool *isnew, uint32 hash)
465{
466 TupleHashEntryData *entry;
467 bool found;
469
470 key = NULL; /* flag to reference inputslot */
471
472 if (isnew)
473 {
474 entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
475
476 if (found)
477 {
478 /* found pre-existing entry */
479 *isnew = false;
480 }
481 else
482 {
483 /* created new entry */
484 *isnew = true;
485
487
488 /*
489 * Copy the first tuple into the table context, and request
490 * additionalsize extra bytes before the allocation.
491 *
492 * The caller can get a pointer to the additional data with
493 * TupleHashEntryGetAdditional(), and store arbitrary data there.
494 * Placing both the tuple and additional data in the same
495 * allocation avoids the need to store an extra pointer in
496 * TupleHashEntryData or allocate an additional chunk.
497 */
499 hashtable->additionalsize);
500 }
501 }
502 else
503 {
504 entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
505 }
506
507 return entry;
508}
509
510/*
511 * See whether two tuples (presumably of the same hash value) match
512 */
513static int
514TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
515{
516 TupleTableSlot *slot1;
517 TupleTableSlot *slot2;
518 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
519 ExprContext *econtext = hashtable->exprcontext;
520
521 /*
522 * We assume that simplehash.h will only ever call us with the first
523 * argument being an actual table entry, and the second argument being
524 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
525 * could be supported too, but is not currently required.
526 */
527 Assert(tuple1 != NULL);
528 slot1 = hashtable->tableslot;
529 ExecStoreMinimalTuple(tuple1, slot1, false);
530 Assert(tuple2 == NULL);
531 slot2 = hashtable->inputslot;
532
533 /* For crosstype comparisons, the inputslot must be first */
534 econtext->ecxt_innertuple = slot2;
535 econtext->ecxt_outertuple = slot1;
536 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
537}
int16 AttrNumber
Definition: attnum.h:21
int ParallelWorkerNumber
Definition: parallel.c:115
#define MAXALIGN(LEN)
Definition: c.h:782
uint32_t uint32
Definition: c.h:502
size_t Size
Definition: c.h:576
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
ExprState * ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, FmgrInfo *hashfunctions, Oid *collations, int numCols, AttrNumber *keyColIdx, PlanState *parent, uint32 init_value)
Definition: execExpr.c:4141
ExprState * ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, const Oid *collations, PlanState *parent)
Definition: execExpr.c:4465
ExprState * execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, const Oid *collations, PlanState *parent)
Definition: execGrouping.c:58
void execTuplesHashPrepare(int numCols, const Oid *eqOperators, Oid **eqFuncOids, FmgrInfo **hashFunctions)
Definition: execGrouping.c:97
TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
Definition: execGrouping.c:350
TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 *hash)
Definition: execGrouping.c:295
static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple)
Definition: execGrouping.c:415
uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
Definition: execGrouping.c:327
TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, ExprState *hashexpr)
Definition: execGrouping.c:382
static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
Definition: execGrouping.c:514
TupleHashTable BuildTupleHashTable(PlanState *parent, TupleDesc inputDesc, const TupleTableSlotOps *inputOps, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv)
Definition: execGrouping.c:161
void ResetTupleHashTable(TupleHashTable hashtable)
Definition: execGrouping.c:274
static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
Definition: execGrouping.c:463
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1635
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
ExprContext * CreateStandaloneExprContext(void)
Definition: execUtils.c:358
struct TupleHashTableData * TupleHashTable
Definition: execnodes.h:844
struct TupleHashEntryData TupleHashEntryData
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:568
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:415
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
static uint32 murmurhash32(uint32 data)
Definition: hashfn.h:92
Assert(PointerIsAligned(start, uint64))
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1425
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition: lsyscache.c:581
void * palloc(Size size)
Definition: mcxt.c:1940
size_t get_hash_memory_limit(void)
Definition: nodeHash.c:3616
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:227
unsigned int Oid
Definition: postgres_ext.h:30
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:270
Definition: fmgr.h:57
MinimalTuple firstTuple
Definition: execnodes.h:848
AttrNumber * keyColIdx
Definition: execnodes.h:865
tuplehash_hash * hashtab
Definition: execnodes.h:863
ExprState * in_hash_expr
Definition: execnodes.h:875
ExprState * tab_hash_expr
Definition: execnodes.h:866
MemoryContext tempcxt
Definition: execnodes.h:870
ExprState * tab_eq_func
Definition: execnodes.h:867
TupleTableSlot * tableslot
Definition: execnodes.h:872
ExprContext * exprcontext
Definition: execnodes.h:877
MemoryContext tablecxt
Definition: execnodes.h:869
TupleTableSlot * inputslot
Definition: execnodes.h:874
ExprState * cur_eq_func
Definition: execnodes.h:876
TupleDesc CreateTupleDescCopy(TupleDesc tupdesc)
Definition: tupdesc.c:245
static MinimalTuple ExecCopySlotMinimalTupleExtra(TupleTableSlot *slot, Size extra)
Definition: tuptable.h:508