PostgreSQL Source Code  git master
execGrouping.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * execGrouping.c
4  * executor utility routines for grouping, hashing, and aggregation
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/execGrouping.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/parallel.h"
18 #include "common/hashfn.h"
19 #include "executor/executor.h"
20 #include "miscadmin.h"
21 #include "utils/lsyscache.h"
22 #include "utils/memutils.h"
23 
24 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
25 static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
26  const MinimalTuple tuple);
28  TupleTableSlot *slot,
29  bool *isnew, uint32 hash);
30 
31 /*
32  * Define parameters for tuple hash table code generation. The interface is
33  * *also* declared in execnodes.h (to generate the types, which are externally
34  * visible).
35  */
36 #define SH_PREFIX tuplehash
37 #define SH_ELEMENT_TYPE TupleHashEntryData
38 #define SH_KEY_TYPE MinimalTuple
39 #define SH_KEY firstTuple
40 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 #define SH_SCOPE extern
43 #define SH_STORE_HASH
44 #define SH_GET_HASH(tb, a) a->hash
45 #define SH_DEFINE
46 #include "lib/simplehash.h"
47 
48 
49 /*****************************************************************************
50  * Utility routines for grouping tuples together
51  *****************************************************************************/
52 
53 /*
54  * execTuplesMatchPrepare
55  * Build expression that can be evaluated using ExecQual(), returning
56  * whether an ExprContext's inner/outer tuples are NOT DISTINCT
57  */
58 ExprState *
60  int numCols,
61  const AttrNumber *keyColIdx,
62  const Oid *eqOperators,
63  const Oid *collations,
64  PlanState *parent)
65 {
66  Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67  int i;
68  ExprState *expr;
69 
70  if (numCols == 0)
71  return NULL;
72 
73  /* lookup equality functions */
74  for (i = 0; i < numCols; i++)
75  eqFunctions[i] = get_opcode(eqOperators[i]);
76 
77  /* build actual expression */
78  expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79  numCols, keyColIdx, eqFunctions, collations,
80  parent);
81 
82  return expr;
83 }
84 
85 /*
86  * execTuplesHashPrepare
87  * Look up the equality and hashing functions needed for a TupleHashTable.
88  *
89  * This is similar to execTuplesMatchPrepare, but we also need to find the
90  * hash functions associated with the equality operators. *eqFunctions and
91  * *hashFunctions receive the palloc'd result arrays.
92  *
93  * Note: we expect that the given operators are not cross-type comparisons.
94  */
95 void
97  const Oid *eqOperators,
98  Oid **eqFuncOids,
99  FmgrInfo **hashFunctions)
100 {
101  int i;
102 
103  *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
104  *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
105 
106  for (i = 0; i < numCols; i++)
107  {
108  Oid eq_opr = eqOperators[i];
109  Oid eq_function;
110  Oid left_hash_function;
111  Oid right_hash_function;
112 
113  eq_function = get_opcode(eq_opr);
114  if (!get_op_hash_functions(eq_opr,
115  &left_hash_function, &right_hash_function))
116  elog(ERROR, "could not find hash function for hash operator %u",
117  eq_opr);
118  /* We're not supporting cross-type cases here */
119  Assert(left_hash_function == right_hash_function);
120  (*eqFuncOids)[i] = eq_function;
121  fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122  }
123 }
124 
125 
126 /*****************************************************************************
127  * Utility routines for all-in-memory hash tables
128  *
129  * These routines build hash tables for grouping tuples together (eg, for
130  * hash aggregation). There is one entry for each not-distinct set of tuples
131  * presented.
132  *****************************************************************************/
133 
134 /*
135  * Construct an empty TupleHashTable
136  *
137  * numCols, keyColIdx: identify the tuple fields to use as lookup key
138  * eqfunctions: equality comparison functions to use
139  * hashfunctions: datatype-specific hashing functions to use
140  * nbuckets: initial estimate of hashtable size
141  * additionalsize: size of data stored in ->additional
142  * metacxt: memory context for long-lived allocation, but not per-entry data
143  * tablecxt: memory context in which to store table entries
144  * tempcxt: short-lived context for evaluation hash and comparison functions
145  *
146  * The function arrays may be made with execTuplesHashPrepare(). Note they
147  * are not cross-type functions, but expect to see the table datatype(s)
148  * on both sides.
149  *
150  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151  * storage that will live as long as the hashtable does.
152  */
155  TupleDesc inputDesc,
156  int numCols, AttrNumber *keyColIdx,
157  const Oid *eqfuncoids,
158  FmgrInfo *hashfunctions,
159  Oid *collations,
160  long nbuckets, Size additionalsize,
161  MemoryContext metacxt,
162  MemoryContext tablecxt,
163  MemoryContext tempcxt,
164  bool use_variable_hash_iv)
165 {
166  TupleHashTable hashtable;
167  Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168  MemoryContext oldcontext;
169  bool allow_jit;
170 
171  Assert(nbuckets > 0);
172 
173  /* Limit initial table size request to not more than work_mem */
174  nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
175 
176  oldcontext = MemoryContextSwitchTo(metacxt);
177 
178  hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
179 
180  hashtable->numCols = numCols;
181  hashtable->keyColIdx = keyColIdx;
182  hashtable->tab_hash_funcs = hashfunctions;
183  hashtable->tab_collations = collations;
184  hashtable->tablecxt = tablecxt;
185  hashtable->tempcxt = tempcxt;
186  hashtable->entrysize = entrysize;
187  hashtable->tableslot = NULL; /* will be made on first lookup */
188  hashtable->inputslot = NULL;
189  hashtable->in_hash_funcs = NULL;
190  hashtable->cur_eq_func = NULL;
191 
192  /*
193  * If parallelism is in use, even if the master backend is performing the
194  * scan itself, we don't want to create the hashtable exactly the same way
195  * in all workers. As hashtables are iterated over in keyspace-order,
196  * doing so in all processes in the same way is likely to lead to
197  * "unbalanced" hashtables when the table size initially is
198  * underestimated.
199  */
200  if (use_variable_hash_iv)
202  else
203  hashtable->hash_iv = 0;
204 
205  hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
206 
207  /*
208  * We copy the input tuple descriptor just for safety --- we assume all
209  * input tuples will have equivalent descriptors.
210  */
213 
214  /*
215  * If the old reset interface is used (i.e. BuildTupleHashTable, rather
216  * than BuildTupleHashTableExt), allowing JIT would lead to the generated
217  * functions to a) live longer than the query b) be re-generated each time
218  * the table is being reset. Therefore prevent JIT from being used in that
219  * case, by not providing a parent node (which prevents accessing the
220  * JitContext in the EState).
221  */
222  allow_jit = metacxt != tablecxt;
223 
224  /* build comparator for all columns */
225  /* XXX: should we support non-minimal tuples for the inputslot? */
226  hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
228  numCols,
229  keyColIdx, eqfuncoids, collations,
230  allow_jit ? parent : NULL);
231 
232  /*
233  * While not pretty, it's ok to not shut down this context, but instead
234  * rely on the containing memory context being reset, as
235  * ExecBuildGroupingEqual() only builds a very simple expression calling
236  * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
237  */
239 
240  MemoryContextSwitchTo(oldcontext);
241 
242  return hashtable;
243 }
244 
245 /*
246  * BuildTupleHashTable is a backwards-compatibilty wrapper for
247  * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
248  * tablecxt. Note that hashtables created this way cannot be reset leak-free
249  * with ResetTupleHashTable().
250  */
253  TupleDesc inputDesc,
254  int numCols, AttrNumber *keyColIdx,
255  const Oid *eqfuncoids,
256  FmgrInfo *hashfunctions,
257  Oid *collations,
258  long nbuckets, Size additionalsize,
259  MemoryContext tablecxt,
260  MemoryContext tempcxt,
261  bool use_variable_hash_iv)
262 {
263  return BuildTupleHashTableExt(parent,
264  inputDesc,
265  numCols, keyColIdx,
266  eqfuncoids,
267  hashfunctions,
268  collations,
269  nbuckets, additionalsize,
270  tablecxt,
271  tablecxt,
272  tempcxt,
273  use_variable_hash_iv);
274 }
275 
276 /*
277  * Reset contents of the hashtable to be empty, preserving all the non-content
278  * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
279  * also be reset, otherwise there will be leaks.
280  */
281 void
283 {
284  tuplehash_reset(hashtable->hashtab);
285 }
286 
287 /*
288  * Find or create a hashtable entry for the tuple group containing the
289  * given tuple. The tuple must be the same type as the hashtable entries.
290  *
291  * If isnew is NULL, we do not create new entries; we return NULL if no
292  * match is found.
293  *
294  * If isnew isn't NULL, then a new entry is created if no existing entry
295  * matches. On return, *isnew is true if the entry is newly created,
296  * false if it existed already. ->additional_data in the new entry has
297  * been zeroed.
298  */
301  bool *isnew)
302 {
303  TupleHashEntry entry;
304  MemoryContext oldContext;
305  uint32 hash;
306 
307  /* Need to run the hash functions in short-lived context */
308  oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
309 
310  /* set up data needed by hash and match functions */
311  hashtable->inputslot = slot;
312  hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
313  hashtable->cur_eq_func = hashtable->tab_eq_func;
314 
315  hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
316  entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
317 
318  MemoryContextSwitchTo(oldContext);
319 
320  return entry;
321 }
322 
323 /*
324  * Compute the hash value for a tuple
325  */
326 uint32
328 {
329  MemoryContext oldContext;
330  uint32 hash;
331 
332  hashtable->inputslot = slot;
333  hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
334 
335  /* Need to run the hash functions in short-lived context */
336  oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
337 
338  hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
339 
340  MemoryContextSwitchTo(oldContext);
341 
342  return hash;
343 }
344 
345 /*
346  * A variant of LookupTupleHashEntry for callers that have already computed
347  * the hash value.
348  */
351  bool *isnew, uint32 hash)
352 {
353  TupleHashEntry entry;
354  MemoryContext oldContext;
355 
356  /* Need to run the hash functions in short-lived context */
357  oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
358 
359  /* set up data needed by hash and match functions */
360  hashtable->inputslot = slot;
361  hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
362  hashtable->cur_eq_func = hashtable->tab_eq_func;
363 
364  entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
365 
366  MemoryContextSwitchTo(oldContext);
367 
368  return entry;
369 }
370 
371 /*
372  * Search for a hashtable entry matching the given tuple. No entry is
373  * created if there's not a match. This is similar to the non-creating
374  * case of LookupTupleHashEntry, except that it supports cross-type
375  * comparisons, in which the given tuple is not of the same type as the
376  * table entries. The caller must provide the hash functions to use for
377  * the input tuple, as well as the equality functions, since these may be
378  * different from the table's internal functions.
379  */
382  ExprState *eqcomp,
383  FmgrInfo *hashfunctions)
384 {
385  TupleHashEntry entry;
386  MemoryContext oldContext;
388 
389  /* Need to run the hash functions in short-lived context */
390  oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
391 
392  /* Set up data needed by hash and match functions */
393  hashtable->inputslot = slot;
394  hashtable->in_hash_funcs = hashfunctions;
395  hashtable->cur_eq_func = eqcomp;
396 
397  /* Search the hash table */
398  key = NULL; /* flag to reference inputslot */
399  entry = tuplehash_lookup(hashtable->hashtab, key);
400  MemoryContextSwitchTo(oldContext);
401 
402  return entry;
403 }
404 
405 /*
406  * If tuple is NULL, use the input slot instead. This convention avoids the
407  * need to materialize virtual input tuples unless they actually need to get
408  * copied into the table.
409  *
410  * Also, the caller must select an appropriate memory context for running
411  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
412  */
413 static uint32
414 TupleHashTableHash_internal(struct tuplehash_hash *tb,
415  const MinimalTuple tuple)
416 {
417  TupleHashTable hashtable = (TupleHashTable) tb->private_data;
418  int numCols = hashtable->numCols;
419  AttrNumber *keyColIdx = hashtable->keyColIdx;
420  uint32 hashkey = hashtable->hash_iv;
421  TupleTableSlot *slot;
422  FmgrInfo *hashfunctions;
423  int i;
424 
425  if (tuple == NULL)
426  {
427  /* Process the current input tuple for the table */
428  slot = hashtable->inputslot;
429  hashfunctions = hashtable->in_hash_funcs;
430  }
431  else
432  {
433  /*
434  * Process a tuple already stored in the table.
435  *
436  * (this case never actually occurs due to the way simplehash.h is
437  * used, as the hash-value is stored in the entries)
438  */
439  slot = hashtable->tableslot;
440  ExecStoreMinimalTuple(tuple, slot, false);
441  hashfunctions = hashtable->tab_hash_funcs;
442  }
443 
444  for (i = 0; i < numCols; i++)
445  {
446  AttrNumber att = keyColIdx[i];
447  Datum attr;
448  bool isNull;
449 
450  /* rotate hashkey left 1 bit at each step */
451  hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
452 
453  attr = slot_getattr(slot, att, &isNull);
454 
455  if (!isNull) /* treat nulls as having hash key 0 */
456  {
457  uint32 hkey;
458 
459  hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
460  hashtable->tab_collations[i],
461  attr));
462  hashkey ^= hkey;
463  }
464  }
465 
466  /*
467  * The way hashes are combined above, among each other and with the IV,
468  * doesn't lead to good bit perturbation. As the IV's goal is to lead to
469  * achieve that, perform a round of hashing of the combined hash -
470  * resulting in near perfect perturbation.
471  */
472  return murmurhash32(hashkey);
473 }
474 
475 /*
476  * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
477  * so that we can avoid switching the memory context multiple times for
478  * LookupTupleHashEntry.
479  *
480  * NB: This function may or may not change the memory context. Caller is
481  * expected to change it back.
482  */
483 static TupleHashEntry
485  bool *isnew, uint32 hash)
486 {
487  TupleHashEntryData *entry;
488  bool found;
490 
491  key = NULL; /* flag to reference inputslot */
492 
493  if (isnew)
494  {
495  entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
496 
497  if (found)
498  {
499  /* found pre-existing entry */
500  *isnew = false;
501  }
502  else
503  {
504  /* created new entry */
505  *isnew = true;
506  /* zero caller data */
507  entry->additional = NULL;
508  MemoryContextSwitchTo(hashtable->tablecxt);
509  /* Copy the first tuple into the table context */
510  entry->firstTuple = ExecCopySlotMinimalTuple(slot);
511  }
512  }
513  else
514  {
515  entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
516  }
517 
518  return entry;
519 }
520 
521 /*
522  * See whether two tuples (presumably of the same hash value) match
523  */
524 static int
525 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
526 {
527  TupleTableSlot *slot1;
528  TupleTableSlot *slot2;
529  TupleHashTable hashtable = (TupleHashTable) tb->private_data;
530  ExprContext *econtext = hashtable->exprcontext;
531 
532  /*
533  * We assume that simplehash.h will only ever call us with the first
534  * argument being an actual table entry, and the second argument being
535  * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
536  * could be supported too, but is not currently required.
537  */
538  Assert(tuple1 != NULL);
539  slot1 = hashtable->tableslot;
540  ExecStoreMinimalTuple(tuple1, slot1, false);
541  Assert(tuple2 == NULL);
542  slot2 = hashtable->inputslot;
543 
544  /* For crosstype comparisons, the inputslot must be first */
545  econtext->ecxt_innertuple = slot2;
546  econtext->ecxt_outertuple = slot1;
547  return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
548 }
ExprContext * exprcontext
Definition: execnodes.h:713
#define DatumGetUInt32(X)
Definition: postgres.h:486
ExprContext * CreateStandaloneExprContext(void)
Definition: execUtils.c:357
Definition: fmgr.h:56
TupleHashTable BuildTupleHashTableExt(PlanState *parent, TupleDesc inputDesc, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv)
Definition: execGrouping.c:154
TupleDesc CreateTupleDescCopy(TupleDesc tupdesc)
Definition: tupdesc.c:110
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition: lsyscache.c:508
TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
Definition: execGrouping.c:350
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1416
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1208
ExprState * ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, const Oid *collations, PlanState *parent)
Definition: execExpr.c:3350
TupleTableSlot * inputslot
Definition: execnodes.h:709
MinimalTuple firstTuple
Definition: execnodes.h:682
static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple)
Definition: execGrouping.c:414
#define Min(x, y)
Definition: c.h:920
TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, FmgrInfo *hashfunctions)
Definition: execGrouping.c:381
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
AttrNumber * keyColIdx
Definition: execnodes.h:700
ExprState * tab_eq_func
Definition: execnodes.h:702
ExprState * cur_eq_func
Definition: execnodes.h:711
static uint32 murmurhash32(uint32 data)
Definition: hashfn.h:92
unsigned int Oid
Definition: postgres_ext.h:31
void execTuplesHashPrepare(int numCols, const Oid *eqOperators, Oid **eqFuncOids, FmgrInfo **hashFunctions)
Definition: execGrouping.c:96
void ResetTupleHashTable(TupleHashTable hashtable)
Definition: execGrouping.c:282
FmgrInfo * tab_hash_funcs
Definition: execnodes.h:701
static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
Definition: execGrouping.c:525
FmgrInfo * in_hash_funcs
Definition: execnodes.h:710
#define ERROR
Definition: elog.h:43
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
struct TupleHashEntryData TupleHashEntryData
static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
Definition: execGrouping.c:484
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition: tuptable.h:463
MemoryContext tablecxt
Definition: execnodes.h:704
struct TupleHashTableData * TupleHashTable
Definition: execnodes.h:678
int ParallelWorkerNumber
Definition: parallel.c:112
unsigned int uint32
Definition: c.h:367
TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew)
Definition: execGrouping.c:300
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:397
uintptr_t Datum
Definition: postgres.h:367
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1132
int work_mem
Definition: globals.c:121
TupleHashTable BuildTupleHashTable(PlanState *parent, TupleDesc inputDesc, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv)
Definition: execGrouping.c:252
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:381
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1202
tuplehash_hash * hashtab
Definition: execnodes.h:698
#define Assert(condition)
Definition: c.h:738
TupleTableSlot * tableslot
Definition: execnodes.h:707
ExprState * execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, const Oid *collations, PlanState *parent)
Definition: execGrouping.c:59
size_t Size
Definition: c.h:466
uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
Definition: execGrouping.c:327
void * palloc(Size size)
Definition: mcxt.c:949
#define elog(elevel,...)
Definition: elog.h:214
int i
MemoryContext tempcxt
Definition: execnodes.h:705
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
int16 AttrNumber
Definition: attnum.h:21
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85