PostgreSQL Source Code  git master
nodeHashjoin.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/parallel.h"
#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "miscadmin.h"
#include "utils/lsyscache.h"
#include "utils/sharedtuplestore.h"
#include "utils/wait_event.h"
Include dependency graph for nodeHashjoin.c:

Go to the source code of this file.

Macros

#define HJ_BUILD_HASHTABLE   1
 
#define HJ_NEED_NEW_OUTER   2
 
#define HJ_SCAN_BUCKET   3
 
#define HJ_FILL_OUTER_TUPLE   4
 
#define HJ_FILL_INNER_TUPLES   5
 
#define HJ_NEED_NEW_BATCH   6
 
#define HJ_FILL_OUTER(hjstate)   ((hjstate)->hj_NullInnerTupleSlot != NULL)
 
#define HJ_FILL_INNER(hjstate)   ((hjstate)->hj_NullOuterTupleSlot != NULL)
 

Functions

static TupleTableSlotExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecParallelHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
 
static bool ExecHashJoinNewBatch (HashJoinState *hjstate)
 
static bool ExecParallelHashJoinNewBatch (HashJoinState *hjstate)
 
static void ExecParallelHashJoinPartitionOuter (HashJoinState *hjstate)
 
static pg_attribute_always_inline TupleTableSlotExecHashJoinImpl (PlanState *pstate, bool parallel)
 
static TupleTableSlotExecHashJoin (PlanState *pstate)
 
static TupleTableSlotExecParallelHashJoin (PlanState *pstate)
 
HashJoinStateExecInitHashJoin (HashJoin *node, EState *estate, int eflags)
 
void ExecEndHashJoin (HashJoinState *node)
 
void ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
 
void ExecReScanHashJoin (HashJoinState *node)
 
void ExecShutdownHashJoin (HashJoinState *node)
 
void ExecHashJoinEstimate (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinReInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeWorker (HashJoinState *state, ParallelWorkerContext *pwcxt)
 

Macro Definition Documentation

◆ HJ_BUILD_HASHTABLE

#define HJ_BUILD_HASHTABLE   1

Definition at line 180 of file nodeHashjoin.c.

◆ HJ_FILL_INNER

#define HJ_FILL_INNER (   hjstate)    ((hjstate)->hj_NullOuterTupleSlot != NULL)

Definition at line 190 of file nodeHashjoin.c.

◆ HJ_FILL_INNER_TUPLES

#define HJ_FILL_INNER_TUPLES   5

Definition at line 184 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER

#define HJ_FILL_OUTER (   hjstate)    ((hjstate)->hj_NullInnerTupleSlot != NULL)

Definition at line 188 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER_TUPLE

#define HJ_FILL_OUTER_TUPLE   4

Definition at line 183 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_BATCH

#define HJ_NEED_NEW_BATCH   6

Definition at line 185 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_OUTER

#define HJ_NEED_NEW_OUTER   2

Definition at line 181 of file nodeHashjoin.c.

◆ HJ_SCAN_BUCKET

#define HJ_SCAN_BUCKET   3

Definition at line 182 of file nodeHashjoin.c.

Function Documentation

◆ ExecEndHashJoin()

void ExecEndHashJoin ( HashJoinState node)

Definition at line 948 of file nodeHashjoin.c.

949 {
950  /*
951  * Free hash table
952  */
953  if (node->hj_HashTable)
954  {
956  node->hj_HashTable = NULL;
957  }
958 
959  /*
960  * clean up subtrees
961  */
964 }
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:562
#define outerPlanState(node)
Definition: execnodes.h:1216
#define innerPlanState(node)
Definition: execnodes.h:1215
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:867
HashJoinTable hj_HashTable
Definition: execnodes.h:2221

References ExecEndNode(), ExecHashTableDestroy(), HashJoinState::hj_HashTable, innerPlanState, and outerPlanState.

Referenced by ExecEndNode().

◆ ExecHashJoin()

static TupleTableSlot* ExecHashJoin ( PlanState pstate)
static

Definition at line 684 of file nodeHashjoin.c.

685 {
686  /*
687  * On sufficiently smart compilers this should be inlined with the
688  * parallel-aware branches removed.
689  */
690  return ExecHashJoinImpl(pstate, false);
691 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:221

References ExecHashJoinImpl().

Referenced by ExecInitHashJoin().

◆ ExecHashJoinEstimate()

void ExecHashJoinEstimate ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1648 of file nodeHashjoin.c.

1649 {
1651  shm_toc_estimate_keys(&pcxt->estimator, 1);
1652 }
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_toc_estimator estimator
Definition: parallel.h:41

References ParallelContext::estimator, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

◆ ExecHashJoinGetSavedTuple()

static TupleTableSlot * ExecHashJoinGetSavedTuple ( HashJoinState hjstate,
BufFile file,
uint32 hashvalue,
TupleTableSlot tupleSlot 
)
static

Definition at line 1455 of file nodeHashjoin.c.

1459 {
1460  uint32 header[2];
1461  size_t nread;
1462  MinimalTuple tuple;
1463 
1464  /*
1465  * We check for interrupts here because this is typically taken as an
1466  * alternative code path to an ExecProcNode() call, which would include
1467  * such a check.
1468  */
1470 
1471  /*
1472  * Since both the hash value and the MinimalTuple length word are uint32,
1473  * we can read them both in one BufFileRead() call without any type
1474  * cheating.
1475  */
1476  nread = BufFileReadMaybeEOF(file, header, sizeof(header), true);
1477  if (nread == 0) /* end of file */
1478  {
1479  ExecClearTuple(tupleSlot);
1480  return NULL;
1481  }
1482  *hashvalue = header[0];
1483  tuple = (MinimalTuple) palloc(header[1]);
1484  tuple->t_len = header[1];
1485  BufFileReadExact(file,
1486  (char *) tuple + sizeof(uint32),
1487  header[1] - sizeof(uint32));
1488  ExecForceStoreMinimalTuple(tuple, tupleSlot, true);
1489  return tupleSlot;
1490 }
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition: buffile.c:664
unsigned int uint32
Definition: c.h:506
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1599
MinimalTupleData * MinimalTuple
Definition: htup.h:27
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References BufFileReadExact(), BufFileReadMaybeEOF(), CHECK_FOR_INTERRUPTS, ExecClearTuple(), ExecForceStoreMinimalTuple(), palloc(), and MinimalTupleData::t_len.

Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().

◆ ExecHashJoinImpl()

static pg_attribute_always_inline TupleTableSlot* ExecHashJoinImpl ( PlanState pstate,
bool  parallel 
)
static

Definition at line 221 of file nodeHashjoin.c.

222 {
223  HashJoinState *node = castNode(HashJoinState, pstate);
224  PlanState *outerNode;
225  HashState *hashNode;
226  ExprState *joinqual;
227  ExprState *otherqual;
228  ExprContext *econtext;
229  HashJoinTable hashtable;
230  TupleTableSlot *outerTupleSlot;
231  uint32 hashvalue;
232  int batchno;
233  ParallelHashJoinState *parallel_state;
234 
235  /*
236  * get information from HashJoin node
237  */
238  joinqual = node->js.joinqual;
239  otherqual = node->js.ps.qual;
240  hashNode = (HashState *) innerPlanState(node);
241  outerNode = outerPlanState(node);
242  hashtable = node->hj_HashTable;
243  econtext = node->js.ps.ps_ExprContext;
244  parallel_state = hashNode->parallel_state;
245 
246  /*
247  * Reset per-tuple memory context to free any expression evaluation
248  * storage allocated in the previous tuple cycle.
249  */
250  ResetExprContext(econtext);
251 
252  /*
253  * run the hash join state machine
254  */
255  for (;;)
256  {
257  /*
258  * It's possible to iterate this loop many times before returning a
259  * tuple, in some pathological cases such as needing to move much of
260  * the current batch to a later batch. So let's check for interrupts
261  * each time through.
262  */
264 
265  switch (node->hj_JoinState)
266  {
267  case HJ_BUILD_HASHTABLE:
268 
269  /*
270  * First time through: build hash table for inner relation.
271  */
272  Assert(hashtable == NULL);
273 
274  /*
275  * If the outer relation is completely empty, and it's not
276  * right/right-anti/full join, we can quit without building
277  * the hash table. However, for an inner join it is only a
278  * win to check this when the outer relation's startup cost is
279  * less than the projected cost of building the hash table.
280  * Otherwise it's best to build the hash table first and see
281  * if the inner relation is empty. (When it's a left join, we
282  * should always make this check, since we aren't going to be
283  * able to skip the join on the strength of an empty inner
284  * relation anyway.)
285  *
286  * If we are rescanning the join, we make use of information
287  * gained on the previous scan: don't bother to try the
288  * prefetch if the previous scan found the outer relation
289  * nonempty. This is not 100% reliable since with new
290  * parameters the outer relation might yield different
291  * results, but it's a good heuristic.
292  *
293  * The only way to make the check is to try to fetch a tuple
294  * from the outer plan node. If we succeed, we have to stash
295  * it away for later consumption by ExecHashJoinOuterGetTuple.
296  */
297  if (HJ_FILL_INNER(node))
298  {
299  /* no chance to not build the hash table */
300  node->hj_FirstOuterTupleSlot = NULL;
301  }
302  else if (parallel)
303  {
304  /*
305  * The empty-outer optimization is not implemented for
306  * shared hash tables, because no one participant can
307  * determine that there are no outer tuples, and it's not
308  * yet clear that it's worth the synchronization overhead
309  * of reaching consensus to figure that out. So we have
310  * to build the hash table.
311  */
312  node->hj_FirstOuterTupleSlot = NULL;
313  }
314  else if (HJ_FILL_OUTER(node) ||
315  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
316  !node->hj_OuterNotEmpty))
317  {
318  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
320  {
321  node->hj_OuterNotEmpty = false;
322  return NULL;
323  }
324  else
325  node->hj_OuterNotEmpty = true;
326  }
327  else
328  node->hj_FirstOuterTupleSlot = NULL;
329 
330  /*
331  * Create the hash table. If using Parallel Hash, then
332  * whoever gets here first will create the hash table and any
333  * later arrivals will merely attach to it.
334  */
335  hashtable = ExecHashTableCreate(hashNode);
336  node->hj_HashTable = hashtable;
337 
338  /*
339  * Execute the Hash node, to build the hash table. If using
340  * Parallel Hash, then we'll try to help hashing unless we
341  * arrived too late.
342  */
343  hashNode->hashtable = hashtable;
344  (void) MultiExecProcNode((PlanState *) hashNode);
345 
346  /*
347  * If the inner relation is completely empty, and we're not
348  * doing a left outer join, we can quit without scanning the
349  * outer relation.
350  */
351  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
352  {
353  if (parallel)
354  {
355  /*
356  * Advance the build barrier to PHJ_BUILD_RUN before
357  * proceeding so we can negotiate resource cleanup.
358  */
359  Barrier *build_barrier = &parallel_state->build_barrier;
360 
361  while (BarrierPhase(build_barrier) < PHJ_BUILD_RUN)
362  BarrierArriveAndWait(build_barrier, 0);
363  }
364  return NULL;
365  }
366 
367  /*
368  * need to remember whether nbatch has increased since we
369  * began scanning the outer relation
370  */
371  hashtable->nbatch_outstart = hashtable->nbatch;
372 
373  /*
374  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
375  * tuple above, because ExecHashJoinOuterGetTuple will
376  * immediately set it again.)
377  */
378  node->hj_OuterNotEmpty = false;
379 
380  if (parallel)
381  {
382  Barrier *build_barrier;
383 
384  build_barrier = &parallel_state->build_barrier;
385  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
386  BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
387  BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
388  if (BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER)
389  {
390  /*
391  * If multi-batch, we need to hash the outer relation
392  * up front.
393  */
394  if (hashtable->nbatch > 1)
396  BarrierArriveAndWait(build_barrier,
397  WAIT_EVENT_HASH_BUILD_HASH_OUTER);
398  }
399  else if (BarrierPhase(build_barrier) == PHJ_BUILD_FREE)
400  {
401  /*
402  * If we attached so late that the job is finished and
403  * the batch state has been freed, we can return
404  * immediately.
405  */
406  return NULL;
407  }
408 
409  /* Each backend should now select a batch to work on. */
410  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUN);
411  hashtable->curbatch = -1;
413 
414  continue;
415  }
416  else
418 
419  /* FALL THRU */
420 
421  case HJ_NEED_NEW_OUTER:
422 
423  /*
424  * We don't have an outer tuple, try to get the next one
425  */
426  if (parallel)
427  outerTupleSlot =
428  ExecParallelHashJoinOuterGetTuple(outerNode, node,
429  &hashvalue);
430  else
431  outerTupleSlot =
432  ExecHashJoinOuterGetTuple(outerNode, node, &hashvalue);
433 
434  if (TupIsNull(outerTupleSlot))
435  {
436  /* end of batch, or maybe whole join */
437  if (HJ_FILL_INNER(node))
438  {
439  /* set up to scan for unmatched inner tuples */
440  if (parallel)
441  {
442  /*
443  * Only one process is currently allow to handle
444  * each batch's unmatched tuples, in a parallel
445  * join.
446  */
449  else
451  }
452  else
453  {
456  }
457  }
458  else
460  continue;
461  }
462 
463  econtext->ecxt_outertuple = outerTupleSlot;
464  node->hj_MatchedOuter = false;
465 
466  /*
467  * Find the corresponding bucket for this tuple in the main
468  * hash table or skew hash table.
469  */
470  node->hj_CurHashValue = hashvalue;
471  ExecHashGetBucketAndBatch(hashtable, hashvalue,
472  &node->hj_CurBucketNo, &batchno);
473  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
474  hashvalue);
475  node->hj_CurTuple = NULL;
476 
477  /*
478  * The tuple might not belong to the current batch (where
479  * "current batch" includes the skew buckets if any).
480  */
481  if (batchno != hashtable->curbatch &&
483  {
484  bool shouldFree;
485  MinimalTuple mintuple = ExecFetchSlotMinimalTuple(outerTupleSlot,
486  &shouldFree);
487 
488  /*
489  * Need to postpone this outer tuple to a later batch.
490  * Save it in the corresponding outer-batch file.
491  */
492  Assert(parallel_state == NULL);
493  Assert(batchno > hashtable->curbatch);
494  ExecHashJoinSaveTuple(mintuple, hashvalue,
495  &hashtable->outerBatchFile[batchno],
496  hashtable);
497 
498  if (shouldFree)
499  heap_free_minimal_tuple(mintuple);
500 
501  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
502  continue;
503  }
504 
505  /* OK, let's scan the bucket for matches */
507 
508  /* FALL THRU */
509 
510  case HJ_SCAN_BUCKET:
511 
512  /*
513  * Scan the selected hash bucket for matches to current outer
514  */
515  if (parallel)
516  {
517  if (!ExecParallelScanHashBucket(node, econtext))
518  {
519  /* out of matches; check for possible outer-join fill */
521  continue;
522  }
523  }
524  else
525  {
526  if (!ExecScanHashBucket(node, econtext))
527  {
528  /* out of matches; check for possible outer-join fill */
530  continue;
531  }
532  }
533 
534  /*
535  * In a right-semijoin, we only need the first match for each
536  * inner tuple.
537  */
538  if (node->js.jointype == JOIN_RIGHT_SEMI &&
540  continue;
541 
542  /*
543  * We've got a match, but still need to test non-hashed quals.
544  * ExecScanHashBucket already set up all the state needed to
545  * call ExecQual.
546  *
547  * If we pass the qual, then save state for next call and have
548  * ExecProject form the projection, store it in the tuple
549  * table, and return the slot.
550  *
551  * Only the joinquals determine tuple match status, but all
552  * quals must pass to actually return the tuple.
553  */
554  if (joinqual == NULL || ExecQual(joinqual, econtext))
555  {
556  node->hj_MatchedOuter = true;
557 
558  /*
559  * This is really only needed if HJ_FILL_INNER(node) or if
560  * we are in a right-semijoin, but we'll avoid the branch
561  * and just set it always.
562  */
565 
566  /* In an antijoin, we never return a matched tuple */
567  if (node->js.jointype == JOIN_ANTI)
568  {
570  continue;
571  }
572 
573  /*
574  * If we only need to consider the first matching inner
575  * tuple, then advance to next outer tuple after we've
576  * processed this one.
577  */
578  if (node->js.single_match)
580 
581  /*
582  * In a right-antijoin, we never return a matched tuple.
583  * If it's not an inner_unique join, we need to stay on
584  * the current outer tuple to continue scanning the inner
585  * side for matches.
586  */
587  if (node->js.jointype == JOIN_RIGHT_ANTI)
588  continue;
589 
590  if (otherqual == NULL || ExecQual(otherqual, econtext))
591  return ExecProject(node->js.ps.ps_ProjInfo);
592  else
593  InstrCountFiltered2(node, 1);
594  }
595  else
596  InstrCountFiltered1(node, 1);
597  break;
598 
599  case HJ_FILL_OUTER_TUPLE:
600 
601  /*
602  * The current outer tuple has run out of matches, so check
603  * whether to emit a dummy outer-join tuple. Whether we emit
604  * one or not, the next state is NEED_NEW_OUTER.
605  */
607 
608  if (!node->hj_MatchedOuter &&
609  HJ_FILL_OUTER(node))
610  {
611  /*
612  * Generate a fake join tuple with nulls for the inner
613  * tuple, and return it if it passes the non-join quals.
614  */
615  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
616 
617  if (otherqual == NULL || ExecQual(otherqual, econtext))
618  return ExecProject(node->js.ps.ps_ProjInfo);
619  else
620  InstrCountFiltered2(node, 1);
621  }
622  break;
623 
625 
626  /*
627  * We have finished a batch, but we are doing
628  * right/right-anti/full join, so any unmatched inner tuples
629  * in the hashtable have to be emitted before we continue to
630  * the next batch.
631  */
632  if (!(parallel ? ExecParallelScanHashTableForUnmatched(node, econtext)
633  : ExecScanHashTableForUnmatched(node, econtext)))
634  {
635  /* no more unmatched tuples */
637  continue;
638  }
639 
640  /*
641  * Generate a fake join tuple with nulls for the outer tuple,
642  * and return it if it passes the non-join quals.
643  */
644  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
645 
646  if (otherqual == NULL || ExecQual(otherqual, econtext))
647  return ExecProject(node->js.ps.ps_ProjInfo);
648  else
649  InstrCountFiltered2(node, 1);
650  break;
651 
652  case HJ_NEED_NEW_BATCH:
653 
654  /*
655  * Try to advance to next batch. Done if there are no more.
656  */
657  if (parallel)
658  {
659  if (!ExecParallelHashJoinNewBatch(node))
660  return NULL; /* end of parallel-aware join */
661  }
662  else
663  {
664  if (!ExecHashJoinNewBatch(node))
665  return NULL; /* end of parallel-oblivious join */
666  }
668  break;
669 
670  default:
671  elog(ERROR, "unrecognized hashjoin state: %d",
672  (int) node->hj_JoinState);
673  }
674  }
675 }
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:265
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
#define Assert(condition)
Definition: c.h:858
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:507
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
Definition: execTuples.c:1779
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:1224
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1229
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: executor.h:387
#define ResetExprContext(econtext)
Definition: executor.h:555
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:424
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:273
#define PHJ_BUILD_FREE
Definition: hashjoin.h:274
#define PHJ_BUILD_HASH_OUTER
Definition: hashjoin.h:272
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:91
#define PHJ_BUILD_RUN
Definition: hashjoin.h:273
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:120
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1523
#define HeapTupleHeaderHasMatch(tup)
Definition: htup_details.h:514
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:519
bool ExecParallelScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1912
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:1963
bool ExecParallelScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2123
HashJoinTable ExecHashTableCreate(HashState *state)
Definition: nodeHash.c:447
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:2414
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2049
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1819
bool ExecParallelPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:1984
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1851
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:185
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:182
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:183
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
static TupleTableSlot * ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:190
static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:184
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:979
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:181
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:188
static void ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:180
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
@ JOIN_RIGHT_SEMI
Definition: nodes.h:309
@ JOIN_RIGHT_ANTI
Definition: nodes.h:310
@ JOIN_ANTI
Definition: nodes.h:308
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:260
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:262
HashJoinTuple hj_CurTuple
Definition: execnodes.h:2225
int hj_CurSkewBucketNo
Definition: execnodes.h:2224
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:2228
bool hj_OuterNotEmpty
Definition: execnodes.h:2233
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:2229
JoinState js
Definition: execnodes.h:2218
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:2230
bool hj_MatchedOuter
Definition: execnodes.h:2232
uint32 hj_CurHashValue
Definition: execnodes.h:2222
int hj_CurBucketNo
Definition: execnodes.h:2223
double totalTuples
Definition: hashjoin.h:330
BufFile ** outerBatchFile
Definition: hashjoin.h:342
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2796
HashJoinTable hashtable
Definition: execnodes.h:2774
PlanState ps
Definition: execnodes.h:2773
JoinType jointype
Definition: execnodes.h:2117
PlanState ps
Definition: execnodes.h:2116
ExprState * joinqual
Definition: execnodes.h:2120
bool single_match
Definition: execnodes.h:2118
ExprState * qual
Definition: execnodes.h:1141
Plan * plan
Definition: execnodes.h:1120
ExprContext * ps_ExprContext
Definition: execnodes.h:1159
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1160
Cost total_cost
Definition: plannodes.h:130
Cost startup_cost
Definition: plannodes.h:129
#define TupIsNull(slot)
Definition: tuptable.h:306

References Assert, BarrierArriveAndWait(), BarrierPhase(), ParallelHashJoinState::build_barrier, castNode, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog, ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecParallelHashJoinNewBatch(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), ExecParallelPrepHashTableForUnmatched(), ExecParallelScanHashBucket(), ExecParallelScanHashTableForUnmatched(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), HashState::hashtable, heap_free_minimal_tuple(), HeapTupleHeaderHasMatch, HeapTupleHeaderSetMatch, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterNotEmpty, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JOIN_RIGHT_ANTI, JOIN_RIGHT_SEMI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, HashJoinTableData::outerBatchFile, outerPlanState, HashState::parallel_state, PHJ_BUILD_FREE, PHJ_BUILD_HASH_OUTER, PHJ_BUILD_RUN, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::qual, ResetExprContext, JoinState::single_match, Plan::startup_cost, Plan::total_cost, HashJoinTableData::totalTuples, and TupIsNull.

Referenced by ExecHashJoin(), and ExecParallelHashJoin().

◆ ExecHashJoinInitializeDSM()

void ExecHashJoinInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1655 of file nodeHashjoin.c.

1656 {
1657  int plan_node_id = state->js.ps.plan->plan_node_id;
1658  HashState *hashNode;
1659  ParallelHashJoinState *pstate;
1660 
1661  /*
1662  * Disable shared hash table mode if we failed to create a real DSM
1663  * segment, because that means that we don't have a DSA area to work with.
1664  */
1665  if (pcxt->seg == NULL)
1666  return;
1667 
1669 
1670  /*
1671  * Set up the state needed to coordinate access to the shared hash
1672  * table(s), using the plan node ID as the toc key.
1673  */
1674  pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelHashJoinState));
1675  shm_toc_insert(pcxt->toc, plan_node_id, pstate);
1676 
1677  /*
1678  * Set up the shared hash join state with no batches initially.
1679  * ExecHashTableCreate() will prepare at least one later and set nbatch
1680  * and space_allowed.
1681  */
1682  pstate->nbatch = 0;
1683  pstate->space_allowed = 0;
1684  pstate->batches = InvalidDsaPointer;
1685  pstate->old_batches = InvalidDsaPointer;
1686  pstate->nbuckets = 0;
1687  pstate->growth = PHJ_GROWTH_OK;
1689  pg_atomic_init_u32(&pstate->distributor, 0);
1690  pstate->nparticipants = pcxt->nworkers + 1;
1691  pstate->total_tuples = 0;
1692  LWLockInitialize(&pstate->lock,
1694  BarrierInit(&pstate->build_barrier, 0);
1695  BarrierInit(&pstate->grow_batches_barrier, 0);
1696  BarrierInit(&pstate->grow_buckets_barrier, 0);
1697 
1698  /* Set up the space we'll use for shared temporary files. */
1699  SharedFileSetInit(&pstate->fileset, pcxt->seg);
1700 
1701  /* Initialize the shared state in the hash node. */
1702  hashNode = (HashState *) innerPlanState(state);
1703  hashNode->parallel_state = pstate;
1704 }
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:221
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
#define InvalidDsaPointer
Definition: dsa.h:78
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:430
@ PHJ_GROWTH_OK
Definition: hashjoin.h:233
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_PARALLEL_HASH_JOIN
Definition: lwlock.h:194
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:700
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:38
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
dsm_segment * seg
Definition: parallel.h:42
shm_toc * toc
Definition: parallel.h:44
Barrier grow_batches_barrier
Definition: hashjoin.h:261
dsa_pointer old_batches
Definition: hashjoin.h:249
dsa_pointer chunk_work_queue
Definition: hashjoin.h:254
Barrier grow_buckets_barrier
Definition: hashjoin.h:262
ParallelHashGrowth growth
Definition: hashjoin.h:253
pg_atomic_uint32 distributor
Definition: hashjoin.h:263
SharedFileSet fileset
Definition: hashjoin.h:265
dsa_pointer batches
Definition: hashjoin.h:248
Definition: regguts.h:323

References BarrierInit(), ParallelHashJoinState::batches, ParallelHashJoinState::build_barrier, ParallelHashJoinState::chunk_work_queue, ParallelHashJoinState::distributor, ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, ParallelHashJoinState::grow_batches_barrier, ParallelHashJoinState::grow_buckets_barrier, ParallelHashJoinState::growth, innerPlanState, InvalidDsaPointer, ParallelHashJoinState::lock, LWLockInitialize(), LWTRANCHE_PARALLEL_HASH_JOIN, ParallelHashJoinState::nbatch, ParallelHashJoinState::nbuckets, ParallelHashJoinState::nparticipants, ParallelContext::nworkers, ParallelHashJoinState::old_batches, HashState::parallel_state, pg_atomic_init_u32(), PHJ_GROWTH_OK, ParallelContext::seg, SharedFileSetInit(), shm_toc_allocate(), shm_toc_insert(), ParallelHashJoinState::space_allowed, ParallelContext::toc, and ParallelHashJoinState::total_tuples.

Referenced by ExecParallelInitializeDSM().

◆ ExecHashJoinInitializeWorker()

void ExecHashJoinInitializeWorker ( HashJoinState state,
ParallelWorkerContext pwcxt 
)

Definition at line 1746 of file nodeHashjoin.c.

1748 {
1749  HashState *hashNode;
1750  int plan_node_id = state->js.ps.plan->plan_node_id;
1751  ParallelHashJoinState *pstate =
1752  shm_toc_lookup(pwcxt->toc, plan_node_id, false);
1753 
1754  /* Attach to the space for shared temporary files. */
1755  SharedFileSetAttach(&pstate->fileset, pwcxt->seg);
1756 
1757  /* Attach to the shared state in the hash node. */
1758  hashNode = (HashState *) innerPlanState(state);
1759  hashNode->parallel_state = pstate;
1760 
1762 }
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:56
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
dsm_segment * seg
Definition: parallel.h:52

References ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, innerPlanState, HashState::parallel_state, ParallelWorkerContext::seg, SharedFileSetAttach(), shm_toc_lookup(), and ParallelWorkerContext::toc.

Referenced by ExecParallelInitializeWorker().

◆ ExecHashJoinNewBatch()

static bool ExecHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1130 of file nodeHashjoin.c.

1131 {
1132  HashJoinTable hashtable = hjstate->hj_HashTable;
1133  int nbatch;
1134  int curbatch;
1135  BufFile *innerFile;
1136  TupleTableSlot *slot;
1137  uint32 hashvalue;
1138 
1139  nbatch = hashtable->nbatch;
1140  curbatch = hashtable->curbatch;
1141 
1142  if (curbatch > 0)
1143  {
1144  /*
1145  * We no longer need the previous outer batch file; close it right
1146  * away to free disk space.
1147  */
1148  if (hashtable->outerBatchFile[curbatch])
1149  BufFileClose(hashtable->outerBatchFile[curbatch]);
1150  hashtable->outerBatchFile[curbatch] = NULL;
1151  }
1152  else /* we just finished the first batch */
1153  {
1154  /*
1155  * Reset some of the skew optimization state variables, since we no
1156  * longer need to consider skew tuples after the first batch. The
1157  * memory context reset we are about to do will release the skew
1158  * hashtable itself.
1159  */
1160  hashtable->skewEnabled = false;
1161  hashtable->skewBucket = NULL;
1162  hashtable->skewBucketNums = NULL;
1163  hashtable->nSkewBuckets = 0;
1164  hashtable->spaceUsedSkew = 0;
1165  }
1166 
1167  /*
1168  * We can always skip over any batches that are completely empty on both
1169  * sides. We can sometimes skip over batches that are empty on only one
1170  * side, but there are exceptions:
1171  *
1172  * 1. In a left/full outer join, we have to process outer batches even if
1173  * the inner batch is empty. Similarly, in a right/right-anti/full outer
1174  * join, we have to process inner batches even if the outer batch is
1175  * empty.
1176  *
1177  * 2. If we have increased nbatch since the initial estimate, we have to
1178  * scan inner batches since they might contain tuples that need to be
1179  * reassigned to later inner batches.
1180  *
1181  * 3. Similarly, if we have increased nbatch since starting the outer
1182  * scan, we have to rescan outer batches in case they contain tuples that
1183  * need to be reassigned.
1184  */
1185  curbatch++;
1186  while (curbatch < nbatch &&
1187  (hashtable->outerBatchFile[curbatch] == NULL ||
1188  hashtable->innerBatchFile[curbatch] == NULL))
1189  {
1190  if (hashtable->outerBatchFile[curbatch] &&
1191  HJ_FILL_OUTER(hjstate))
1192  break; /* must process due to rule 1 */
1193  if (hashtable->innerBatchFile[curbatch] &&
1194  HJ_FILL_INNER(hjstate))
1195  break; /* must process due to rule 1 */
1196  if (hashtable->innerBatchFile[curbatch] &&
1197  nbatch != hashtable->nbatch_original)
1198  break; /* must process due to rule 2 */
1199  if (hashtable->outerBatchFile[curbatch] &&
1200  nbatch != hashtable->nbatch_outstart)
1201  break; /* must process due to rule 3 */
1202  /* We can ignore this batch. */
1203  /* Release associated temp files right away. */
1204  if (hashtable->innerBatchFile[curbatch])
1205  BufFileClose(hashtable->innerBatchFile[curbatch]);
1206  hashtable->innerBatchFile[curbatch] = NULL;
1207  if (hashtable->outerBatchFile[curbatch])
1208  BufFileClose(hashtable->outerBatchFile[curbatch]);
1209  hashtable->outerBatchFile[curbatch] = NULL;
1210  curbatch++;
1211  }
1212 
1213  if (curbatch >= nbatch)
1214  return false; /* no more batches */
1215 
1216  hashtable->curbatch = curbatch;
1217 
1218  /*
1219  * Reload the hash table with the new inner batch (which could be empty)
1220  */
1221  ExecHashTableReset(hashtable);
1222 
1223  innerFile = hashtable->innerBatchFile[curbatch];
1224 
1225  if (innerFile != NULL)
1226  {
1227  if (BufFileSeek(innerFile, 0, 0, SEEK_SET))
1228  ereport(ERROR,
1230  errmsg("could not rewind hash-join temporary file")));
1231 
1232  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
1233  innerFile,
1234  &hashvalue,
1235  hjstate->hj_HashTupleSlot)))
1236  {
1237  /*
1238  * NOTE: some tuples may be sent to future batches. Also, it is
1239  * possible for hashtable->nbatch to be increased here!
1240  */
1241  ExecHashTableInsert(hashtable, slot, hashvalue);
1242  }
1243 
1244  /*
1245  * after we build the hash table, the inner batch file is no longer
1246  * needed
1247  */
1248  BufFileClose(innerFile);
1249  hashtable->innerBatchFile[curbatch] = NULL;
1250  }
1251 
1252  /*
1253  * Rewind outer batch file (if present), so that we can start reading it.
1254  */
1255  if (hashtable->outerBatchFile[curbatch] != NULL)
1256  {
1257  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0, SEEK_SET))
1258  ereport(ERROR,
1260  errmsg("could not rewind hash-join temporary file")));
1261  }
1262 
1263  return true;
1264 }
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
void BufFileClose(BufFile *file)
Definition: buffile.c:412
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:2186
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1608
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:2227
int * skewBucketNums
Definition: hashjoin.h:320
BufFile ** innerBatchFile
Definition: hashjoin.h:341
HashSkewBucket ** skewBucket
Definition: hashjoin.h:317

References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinOuterGetTuple()

static TupleTableSlot * ExecHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 979 of file nodeHashjoin.c.

982 {
983  HashJoinTable hashtable = hjstate->hj_HashTable;
984  int curbatch = hashtable->curbatch;
985  TupleTableSlot *slot;
986 
987  if (curbatch == 0) /* if it is the first pass */
988  {
989  /*
990  * Check to see if first outer tuple was already fetched by
991  * ExecHashJoin() and not used yet.
992  */
993  slot = hjstate->hj_FirstOuterTupleSlot;
994  if (!TupIsNull(slot))
995  hjstate->hj_FirstOuterTupleSlot = NULL;
996  else
997  slot = ExecProcNode(outerNode);
998 
999  while (!TupIsNull(slot))
1000  {
1001  bool isnull;
1002 
1003  /*
1004  * We have to compute the tuple's hash value.
1005  */
1006  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1007 
1008  econtext->ecxt_outertuple = slot;
1009 
1010  ResetExprContext(econtext);
1011 
1013  econtext,
1014  &isnull));
1015 
1016  if (!isnull)
1017  {
1018  /* remember outer relation is not empty for possible rescan */
1019  hjstate->hj_OuterNotEmpty = true;
1020 
1021  return slot;
1022  }
1023 
1024  /*
1025  * That tuple couldn't match because of a NULL, so discard it and
1026  * continue with the next one.
1027  */
1028  slot = ExecProcNode(outerNode);
1029  }
1030  }
1031  else if (curbatch < hashtable->nbatch)
1032  {
1033  BufFile *file = hashtable->outerBatchFile[curbatch];
1034 
1035  /*
1036  * In outer-join cases, we could get here even though the batch file
1037  * is empty.
1038  */
1039  if (file == NULL)
1040  return NULL;
1041 
1042  slot = ExecHashJoinGetSavedTuple(hjstate,
1043  file,
1044  hashvalue,
1045  hjstate->hj_OuterTupleSlot);
1046  if (!TupIsNull(slot))
1047  return slot;
1048  }
1049 
1050  /* End of this batch */
1051  return NULL;
1052 }
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:359
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
ExprState * hj_OuterHash
Definition: execnodes.h:2220
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:2226

References HashJoinTableData::curbatch, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecEvalExprSwitchContext(), ExecHashJoinGetSavedTuple(), ExecProcNode(), HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHash, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::outerBatchFile, JoinState::ps, PlanState::ps_ExprContext, ResetExprContext, and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinReInitializeDSM()

void ExecHashJoinReInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1713 of file nodeHashjoin.c.

1714 {
1715  int plan_node_id = state->js.ps.plan->plan_node_id;
1716  ParallelHashJoinState *pstate =
1717  shm_toc_lookup(pcxt->toc, plan_node_id, false);
1718 
1719  /*
1720  * It would be possible to reuse the shared hash table in single-batch
1721  * cases by resetting and then fast-forwarding build_barrier to
1722  * PHJ_BUILD_FREE and batch 0's batch_barrier to PHJ_BATCH_PROBE, but
1723  * currently shared hash tables are already freed by now (by the last
1724  * participant to detach from the batch). We could consider keeping it
1725  * around for single-batch joins. We'd also need to adjust
1726  * finalize_plan() so that it doesn't record a dummy dependency for
1727  * Parallel Hash nodes, preventing the rescan optimization. For now we
1728  * don't try.
1729  */
1730 
1731  /* Detach, freeing any remaining shared memory. */
1732  if (state->hj_HashTable != NULL)
1733  {
1734  ExecHashTableDetachBatch(state->hj_HashTable);
1735  ExecHashTableDetach(state->hj_HashTable);
1736  }
1737 
1738  /* Clear any shared batch files. */
1739  SharedFileSetDeleteAll(&pstate->fileset);
1740 
1741  /* Reset build_barrier to PHJ_BUILD_ELECT so we can go around again. */
1742  BarrierInit(&pstate->build_barrier, 0);
1743 }
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3168
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3260
void SharedFileSetDeleteAll(SharedFileSet *fileset)
Definition: sharedfileset.c:83

References BarrierInit(), ParallelHashJoinState::build_barrier, ExecHashTableDetach(), ExecHashTableDetachBatch(), ParallelHashJoinState::fileset, SharedFileSetDeleteAll(), shm_toc_lookup(), and ParallelContext::toc.

Referenced by ExecParallelReInitializeDSM().

◆ ExecHashJoinSaveTuple()

void ExecHashJoinSaveTuple ( MinimalTuple  tuple,
uint32  hashvalue,
BufFile **  fileptr,
HashJoinTable  hashtable 
)

Definition at line 1414 of file nodeHashjoin.c.

1416 {
1417  BufFile *file = *fileptr;
1418 
1419  /*
1420  * The batch file is lazily created. If this is the first tuple written to
1421  * this batch, the batch file is created and its buffer is allocated in
1422  * the spillCxt context, NOT in the batchCxt.
1423  *
1424  * During the build phase, buffered files are created for inner batches.
1425  * Each batch's buffered file is closed (and its buffer freed) after the
1426  * batch is loaded into memory during the outer side scan. Therefore, it
1427  * is necessary to allocate the batch file buffer in a memory context
1428  * which outlives the batch itself.
1429  *
1430  * Also, we use spillCxt instead of hashCxt for a better accounting of the
1431  * spilling memory consumption.
1432  */
1433  if (file == NULL)
1434  {
1435  MemoryContext oldctx = MemoryContextSwitchTo(hashtable->spillCxt);
1436 
1437  file = BufFileCreateTemp(false);
1438  *fileptr = file;
1439 
1440  MemoryContextSwitchTo(oldctx);
1441  }
1442 
1443  BufFileWrite(file, &hashvalue, sizeof(uint32));
1444  BufFileWrite(file, tuple, tuple->t_len);
1445 }
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
MemoryContextSwitchTo(old_ctx)
MemoryContext spillCxt
Definition: hashjoin.h:352

References BufFileCreateTemp(), BufFileWrite(), MemoryContextSwitchTo(), HashJoinTableData::spillCxt, and MinimalTupleData::t_len.

Referenced by ExecHashIncreaseNumBatches(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().

◆ ExecInitHashJoin()

HashJoinState* ExecInitHashJoin ( HashJoin node,
EState estate,
int  eflags 
)

Definition at line 716 of file nodeHashjoin.c.

717 {
718  HashJoinState *hjstate;
719  Plan *outerNode;
720  Hash *hashNode;
721  TupleDesc outerDesc,
722  innerDesc;
723  const TupleTableSlotOps *ops;
724 
725  /* check for unsupported flags */
726  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
727 
728  /*
729  * create state structure
730  */
731  hjstate = makeNode(HashJoinState);
732  hjstate->js.ps.plan = (Plan *) node;
733  hjstate->js.ps.state = estate;
734 
735  /*
736  * See ExecHashJoinInitializeDSM() and ExecHashJoinInitializeWorker()
737  * where this function may be replaced with a parallel version, if we
738  * managed to launch a parallel query.
739  */
740  hjstate->js.ps.ExecProcNode = ExecHashJoin;
741  hjstate->js.jointype = node->join.jointype;
742 
743  /*
744  * Miscellaneous initialization
745  *
746  * create expression context for node
747  */
748  ExecAssignExprContext(estate, &hjstate->js.ps);
749 
750  /*
751  * initialize child nodes
752  *
753  * Note: we could suppress the REWIND flag for the inner input, which
754  * would amount to betting that the hash will be a single batch. Not
755  * clear if this would be a win or not.
756  */
757  outerNode = outerPlan(node);
758  hashNode = (Hash *) innerPlan(node);
759 
760  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
761  outerDesc = ExecGetResultType(outerPlanState(hjstate));
762  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
763  innerDesc = ExecGetResultType(innerPlanState(hjstate));
764 
765  /*
766  * Initialize result slot, type and projection.
767  */
769  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
770 
771  /*
772  * tuple table initialization
773  */
774  ops = ExecGetResultSlotOps(outerPlanState(hjstate), NULL);
775  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate, outerDesc,
776  ops);
777 
778  /*
779  * detect whether we need only consider the first matching inner tuple
780  */
781  hjstate->js.single_match = (node->join.inner_unique ||
782  node->join.jointype == JOIN_SEMI);
783 
784  /* set up null tuples for outer joins, if needed */
785  switch (node->join.jointype)
786  {
787  case JOIN_INNER:
788  case JOIN_SEMI:
789  case JOIN_RIGHT_SEMI:
790  break;
791  case JOIN_LEFT:
792  case JOIN_ANTI:
793  hjstate->hj_NullInnerTupleSlot =
794  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
795  break;
796  case JOIN_RIGHT:
797  case JOIN_RIGHT_ANTI:
798  hjstate->hj_NullOuterTupleSlot =
799  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
800  break;
801  case JOIN_FULL:
802  hjstate->hj_NullOuterTupleSlot =
803  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
804  hjstate->hj_NullInnerTupleSlot =
805  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
806  break;
807  default:
808  elog(ERROR, "unrecognized join type: %d",
809  (int) node->join.jointype);
810  }
811 
812  /*
813  * now for some voodoo. our temporary tuple slot is actually the result
814  * tuple slot of the Hash node (which is our inner plan). we can do this
815  * because Hash nodes don't return tuples via ExecProcNode() -- instead
816  * the hash join node uses ExecScanHashBucket() to get at the contents of
817  * the hash table. -cim 6/9/91
818  */
819  {
820  HashState *hashstate = (HashState *) innerPlanState(hjstate);
821  Hash *hash = (Hash *) hashstate->ps.plan;
822  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
823  Oid *outer_hashfuncid;
824  Oid *inner_hashfuncid;
825  bool *hash_strict;
826  ListCell *lc;
827  int nkeys;
828 
829 
830  hjstate->hj_HashTupleSlot = slot;
831 
832  /*
833  * Build ExprStates to obtain hash values for either side of the join.
834  * This must be done here as ExecBuildHash32Expr needs to know how to
835  * handle NULL inputs and the required handling of that depends on the
836  * jointype. We don't know the join type in ExecInitHash() and we
837  * must build the ExprStates before ExecHashTableCreate() so we
838  * properly attribute any SubPlans that exist in the hash expressions
839  * to the correct PlanState.
840  */
841  nkeys = list_length(node->hashoperators);
842 
843  outer_hashfuncid = palloc_array(Oid, nkeys);
844  inner_hashfuncid = palloc_array(Oid, nkeys);
845  hash_strict = palloc_array(bool, nkeys);
846 
847  /*
848  * Determine the hash function for each side of the join for the given
849  * hash operator.
850  */
851  foreach(lc, node->hashoperators)
852  {
853  Oid hashop = lfirst_oid(lc);
854  int i = foreach_current_index(lc);
855 
856  if (!get_op_hash_functions(hashop,
857  &outer_hashfuncid[i],
858  &inner_hashfuncid[i]))
859  elog(ERROR,
860  "could not find hash function for hash operator %u",
861  hashop);
862  hash_strict[i] = op_strict(hashop);
863  }
864 
865  /*
866  * Build an ExprState to generate the hash value for the expressions
867  * on the outer of the join. This ExprState must finish generating
868  * the hash value when HJ_FILL_OUTER() is true. Otherwise,
869  * ExecBuildHash32Expr will set up the ExprState to abort early if it
870  * finds a NULL. In these cases, we don't need to store these tuples
871  * in the hash table as the jointype does not require it.
872  */
873  hjstate->hj_OuterHash =
875  hjstate->js.ps.resultops,
876  outer_hashfuncid,
877  node->hashcollations,
878  node->hashkeys,
879  hash_strict,
880  &hjstate->js.ps,
881  0,
882  HJ_FILL_OUTER(hjstate));
883 
884  /* As above, but for the inner side of the join */
885  hashstate->hash_expr =
887  hashstate->ps.resultops,
888  inner_hashfuncid,
889  node->hashcollations,
890  hash->hashkeys,
891  hash_strict,
892  &hashstate->ps,
893  0,
894  HJ_FILL_INNER(hjstate));
895 
896  /*
897  * Set up the skew table hash function while we have a record of the
898  * first key's hash function Oid.
899  */
900  if (OidIsValid(hash->skewTable))
901  {
902  hashstate->skew_hashfunction = palloc0(sizeof(FmgrInfo));
903  hashstate->skew_collation = linitial_oid(node->hashcollations);
904  fmgr_info(outer_hashfuncid[0], hashstate->skew_hashfunction);
905  }
906 
907  /* no need to keep these */
908  pfree(outer_hashfuncid);
909  pfree(inner_hashfuncid);
910  pfree(hash_strict);
911  }
912 
913  /*
914  * initialize child expressions
915  */
916  hjstate->js.ps.qual =
917  ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
918  hjstate->js.joinqual =
919  ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
920  hjstate->hashclauses =
921  ExecInitQual(node->hashclauses, (PlanState *) hjstate);
922 
923  /*
924  * initialize hash-specific info
925  */
926  hjstate->hj_HashTable = NULL;
927  hjstate->hj_FirstOuterTupleSlot = NULL;
928 
929  hjstate->hj_CurHashValue = 0;
930  hjstate->hj_CurBucketNo = 0;
932  hjstate->hj_CurTuple = NULL;
933 
934  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
935  hjstate->hj_MatchedOuter = false;
936  hjstate->hj_OuterNotEmpty = false;
937 
938  return hjstate;
939 }
#define OidIsValid(objectId)
Definition: c.h:775
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:224
ExprState * ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops, const Oid *hashfunc_oids, const List *collations, const List *hash_exprs, const bool *opstrict, PlanState *parent, uint32 init_value, bool keep_nulls)
Definition: execExpr.c:3992
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1934
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1918
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1886
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:493
const TupleTableSlotOps * ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
Definition: execUtils.c:502
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:483
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:538
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
#define EXEC_FLAG_MARK
Definition: executor.h:69
#define palloc_array(type, count)
Definition: fe_memutils.h:64
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
int i
Definition: isn.c:73
bool op_strict(Oid opno)
Definition: lsyscache.c:1477
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition: lsyscache.c:510
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
static TupleTableSlot * ExecHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:684
#define makeNode(_type_)
Definition: nodes.h:155
@ JOIN_SEMI
Definition: nodes.h:307
@ JOIN_FULL
Definition: nodes.h:295
@ JOIN_INNER
Definition: nodes.h:293
@ JOIN_RIGHT
Definition: nodes.h:296
@ JOIN_LEFT
Definition: nodes.h:294
static int list_length(const List *l)
Definition: pg_list.h:152
#define foreach_current_index(var_or_cell)
Definition: pg_list.h:403
#define linitial_oid(l)
Definition: pg_list.h:180
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define innerPlan(node)
Definition: plannodes.h:182
#define outerPlan(node)
Definition: plannodes.h:183
unsigned int Oid
Definition: postgres_ext.h:31
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
Definition: fmgr.h:57
ExprState * hashclauses
Definition: execnodes.h:2219
List * hashcollations
Definition: plannodes.h:868
List * hashclauses
Definition: plannodes.h:866
List * hashoperators
Definition: plannodes.h:867
Join join
Definition: plannodes.h:865
List * hashkeys
Definition: plannodes.h:874
ExprState * hash_expr
Definition: execnodes.h:2775
Oid skew_collation
Definition: execnodes.h:2778
FmgrInfo * skew_hashfunction
Definition: execnodes.h:2777
List * joinqual
Definition: plannodes.h:794
JoinType jointype
Definition: plannodes.h:792
bool inner_unique
Definition: plannodes.h:793
const TupleTableSlotOps * resultops
Definition: execnodes.h:1197
EState * state
Definition: execnodes.h:1122
TupleDesc ps_ResultTupleDesc
Definition: execnodes.h:1157
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1158
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1126

References Assert, elog, ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecBuildHash32Expr(), ExecGetResultSlotOps(), ExecGetResultType(), ExecHashJoin(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitQual(), ExecInitResultTupleSlotTL(), PlanState::ExecProcNode, fmgr_info(), foreach_current_index, get_op_hash_functions(), hash(), HashState::hash_expr, HashJoinState::hashclauses, HashJoin::hashclauses, HashJoin::hashcollations, HashJoin::hashkeys, HashJoin::hashoperators, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterHash, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, i, Join::inner_unique, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_RIGHT_ANTI, JOIN_RIGHT_SEMI, JOIN_SEMI, JoinState::joinqual, Join::joinqual, JoinState::jointype, Join::jointype, HashJoinState::js, lfirst_oid, linitial_oid, list_length(), makeNode, OidIsValid, op_strict(), outerPlan, outerPlanState, palloc0(), palloc_array, pfree(), PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ResultTupleDesc, PlanState::ps_ResultTupleSlot, PlanState::qual, PlanState::resultops, JoinState::single_match, HashState::skew_collation, HashState::skew_hashfunction, PlanState::state, and TTSOpsVirtual.

Referenced by ExecInitNode().

◆ ExecParallelHashJoin()

static TupleTableSlot* ExecParallelHashJoin ( PlanState pstate)
static

Definition at line 700 of file nodeHashjoin.c.

701 {
702  /*
703  * On sufficiently smart compilers this should be inlined with the
704  * parallel-oblivious branches removed.
705  */
706  return ExecHashJoinImpl(pstate, true);
707 }

References ExecHashJoinImpl().

Referenced by ExecHashJoinInitializeDSM(), and ExecHashJoinInitializeWorker().

◆ ExecParallelHashJoinNewBatch()

static bool ExecParallelHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1271 of file nodeHashjoin.c.

1272 {
1273  HashJoinTable hashtable = hjstate->hj_HashTable;
1274  int start_batchno;
1275  int batchno;
1276 
1277  /*
1278  * If we were already attached to a batch, remember not to bother checking
1279  * it again, and detach from it (possibly freeing the hash table if we are
1280  * last to detach).
1281  */
1282  if (hashtable->curbatch >= 0)
1283  {
1284  hashtable->batches[hashtable->curbatch].done = true;
1285  ExecHashTableDetachBatch(hashtable);
1286  }
1287 
1288  /*
1289  * Search for a batch that isn't done. We use an atomic counter to start
1290  * our search at a different batch in every participant when there are
1291  * more batches than participants.
1292  */
1293  batchno = start_batchno =
1295  hashtable->nbatch;
1296  do
1297  {
1298  uint32 hashvalue;
1299  MinimalTuple tuple;
1300  TupleTableSlot *slot;
1301 
1302  if (!hashtable->batches[batchno].done)
1303  {
1304  SharedTuplestoreAccessor *inner_tuples;
1305  Barrier *batch_barrier =
1306  &hashtable->batches[batchno].shared->batch_barrier;
1307 
1308  switch (BarrierAttach(batch_barrier))
1309  {
1310  case PHJ_BATCH_ELECT:
1311 
1312  /* One backend allocates the hash table. */
1313  if (BarrierArriveAndWait(batch_barrier,
1314  WAIT_EVENT_HASH_BATCH_ELECT))
1315  ExecParallelHashTableAlloc(hashtable, batchno);
1316  /* Fall through. */
1317 
1318  case PHJ_BATCH_ALLOCATE:
1319  /* Wait for allocation to complete. */
1320  BarrierArriveAndWait(batch_barrier,
1321  WAIT_EVENT_HASH_BATCH_ALLOCATE);
1322  /* Fall through. */
1323 
1324  case PHJ_BATCH_LOAD:
1325  /* Start (or join in) loading tuples. */
1326  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1327  inner_tuples = hashtable->batches[batchno].inner_tuples;
1328  sts_begin_parallel_scan(inner_tuples);
1329  while ((tuple = sts_parallel_scan_next(inner_tuples,
1330  &hashvalue)))
1331  {
1333  hjstate->hj_HashTupleSlot,
1334  false);
1335  slot = hjstate->hj_HashTupleSlot;
1337  hashvalue);
1338  }
1339  sts_end_parallel_scan(inner_tuples);
1340  BarrierArriveAndWait(batch_barrier,
1341  WAIT_EVENT_HASH_BATCH_LOAD);
1342  /* Fall through. */
1343 
1344  case PHJ_BATCH_PROBE:
1345 
1346  /*
1347  * This batch is ready to probe. Return control to
1348  * caller. We stay attached to batch_barrier so that the
1349  * hash table stays alive until everyone's finished
1350  * probing it, but no participant is allowed to wait at
1351  * this barrier again (or else a deadlock could occur).
1352  * All attached participants must eventually detach from
1353  * the barrier and one worker must advance the phase so
1354  * that the final phase is reached.
1355  */
1356  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1357  sts_begin_parallel_scan(hashtable->batches[batchno].outer_tuples);
1358 
1359  return true;
1360  case PHJ_BATCH_SCAN:
1361 
1362  /*
1363  * In principle, we could help scan for unmatched tuples,
1364  * since that phase is already underway (the thing we
1365  * can't do under current deadlock-avoidance rules is wait
1366  * for others to arrive at PHJ_BATCH_SCAN, because
1367  * PHJ_BATCH_PROBE emits tuples, but in this case we just
1368  * got here without waiting). That is not yet done. For
1369  * now, we just detach and go around again. We have to
1370  * use ExecHashTableDetachBatch() because there's a small
1371  * chance we'll be the last to detach, and then we're
1372  * responsible for freeing memory.
1373  */
1374  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1375  hashtable->batches[batchno].done = true;
1376  ExecHashTableDetachBatch(hashtable);
1377  break;
1378 
1379  case PHJ_BATCH_FREE:
1380 
1381  /*
1382  * Already done. Detach and go around again (if any
1383  * remain).
1384  */
1385  BarrierDetach(batch_barrier);
1386  hashtable->batches[batchno].done = true;
1387  hashtable->curbatch = -1;
1388  break;
1389 
1390  default:
1391  elog(ERROR, "unexpected batch phase %d",
1392  BarrierPhase(batch_barrier));
1393  }
1394  }
1395  batchno = (batchno + 1) % hashtable->nbatch;
1396  } while (batchno != start_batchno);
1397 
1398  return false;
1399 }
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:366
int BarrierAttach(Barrier *barrier)
Definition: barrier.c:236
bool BarrierDetach(Barrier *barrier)
Definition: barrier.c:256
#define PHJ_BATCH_SCAN
Definition: hashjoin.h:281
#define PHJ_BATCH_PROBE
Definition: hashjoin.h:280
#define PHJ_BATCH_LOAD
Definition: hashjoin.h:279
#define PHJ_BATCH_ELECT
Definition: hashjoin.h:277
#define PHJ_BATCH_ALLOCATE
Definition: hashjoin.h:278
#define PHJ_BATCH_FREE
Definition: hashjoin.h:282
void ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3358
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3148
void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1764
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
void sts_begin_parallel_scan(SharedTuplestoreAccessor *accessor)
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:361
ParallelHashJoinState * parallel_state
Definition: hashjoin.h:360
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:221
ParallelHashJoinBatch * shared
Definition: hashjoin.h:209
SharedTuplestoreAccessor * inner_tuples
Definition: hashjoin.h:220

References BarrierArriveAndWait(), BarrierAttach(), BarrierDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, HashJoinTableData::curbatch, ParallelHashJoinState::distributor, ParallelHashJoinBatchAccessor::done, elog, ERROR, ExecForceStoreMinimalTuple(), ExecHashTableDetachBatch(), ExecParallelHashTableAlloc(), ExecParallelHashTableInsertCurrentBatch(), ExecParallelHashTableSetCurrentBatch(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, ParallelHashJoinBatchAccessor::inner_tuples, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, pg_atomic_fetch_add_u32(), PHJ_BATCH_ALLOCATE, PHJ_BATCH_ELECT, PHJ_BATCH_FREE, PHJ_BATCH_LOAD, PHJ_BATCH_PROBE, PHJ_BATCH_SCAN, ParallelHashJoinBatchAccessor::shared, sts_begin_parallel_scan(), sts_end_parallel_scan(), and sts_parallel_scan_next().

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinOuterGetTuple()

static TupleTableSlot * ExecParallelHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 1058 of file nodeHashjoin.c.

1061 {
1062  HashJoinTable hashtable = hjstate->hj_HashTable;
1063  int curbatch = hashtable->curbatch;
1064  TupleTableSlot *slot;
1065 
1066  /*
1067  * In the Parallel Hash case we only run the outer plan directly for
1068  * single-batch hash joins. Otherwise we have to go to batch files, even
1069  * for batch 0.
1070  */
1071  if (curbatch == 0 && hashtable->nbatch == 1)
1072  {
1073  slot = ExecProcNode(outerNode);
1074 
1075  while (!TupIsNull(slot))
1076  {
1077  bool isnull;
1078 
1079  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1080 
1081  econtext->ecxt_outertuple = slot;
1082 
1083  ResetExprContext(econtext);
1084 
1086  econtext,
1087  &isnull));
1088 
1089  if (!isnull)
1090  return slot;
1091 
1092  /*
1093  * That tuple couldn't match because of a NULL, so discard it and
1094  * continue with the next one.
1095  */
1096  slot = ExecProcNode(outerNode);
1097  }
1098  }
1099  else if (curbatch < hashtable->nbatch)
1100  {
1101  MinimalTuple tuple;
1102 
1103  tuple = sts_parallel_scan_next(hashtable->batches[curbatch].outer_tuples,
1104  hashvalue);
1105  if (tuple != NULL)
1106  {
1108  hjstate->hj_OuterTupleSlot,
1109  false);
1110  slot = hjstate->hj_OuterTupleSlot;
1111  return slot;
1112  }
1113  else
1115  }
1116 
1117  /* End of this batch */
1118  hashtable->batches[curbatch].outer_eof = true;
1119 
1120  return NULL;
1121 }

References HashJoinTableData::batches, HashJoinTableData::curbatch, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecClearTuple(), ExecEvalExprSwitchContext(), ExecForceStoreMinimalTuple(), ExecProcNode(), HashJoinState::hj_HashTable, HashJoinState::hj_OuterHash, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_eof, ParallelHashJoinBatchAccessor::outer_tuples, JoinState::ps, PlanState::ps_ExprContext, ResetExprContext, sts_parallel_scan_next(), and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinPartitionOuter()

static void ExecParallelHashJoinPartitionOuter ( HashJoinState hjstate)
static

Definition at line 1597 of file nodeHashjoin.c.

1598 {
1599  PlanState *outerState = outerPlanState(hjstate);
1600  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1601  HashJoinTable hashtable = hjstate->hj_HashTable;
1602  TupleTableSlot *slot;
1603  uint32 hashvalue;
1604  int i;
1605 
1606  Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
1607 
1608  /* Execute outer plan, writing all tuples to shared tuplestores. */
1609  for (;;)
1610  {
1611  bool isnull;
1612 
1613  slot = ExecProcNode(outerState);
1614  if (TupIsNull(slot))
1615  break;
1616  econtext->ecxt_outertuple = slot;
1617 
1618  ResetExprContext(econtext);
1619 
1621  econtext,
1622  &isnull));
1623 
1624  if (!isnull)
1625  {
1626  int batchno;
1627  int bucketno;
1628  bool shouldFree;
1629  MinimalTuple mintup = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1630 
1631  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno,
1632  &batchno);
1633  sts_puttuple(hashtable->batches[batchno].outer_tuples,
1634  &hashvalue, mintup);
1635 
1636  if (shouldFree)
1637  heap_free_minimal_tuple(mintup);
1638  }
1640  }
1641 
1642  /* Make sure all outer partitions are readable by any backend. */
1643  for (i = 0; i < hashtable->nbatch; ++i)
1644  sts_end_write(hashtable->batches[i].outer_tuples);
1645 }
void sts_end_write(SharedTuplestoreAccessor *accessor)
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)

References Assert, HashJoinTableData::batches, CHECK_FOR_INTERRUPTS, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecEvalExprSwitchContext(), ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecProcNode(), heap_free_minimal_tuple(), HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHash, i, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, outerPlanState, JoinState::ps, PlanState::ps_ExprContext, ResetExprContext, sts_end_write(), sts_puttuple(), and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecReScanHashJoin()

void ExecReScanHashJoin ( HashJoinState node)

Definition at line 1494 of file nodeHashjoin.c.

1495 {
1498 
1499  /*
1500  * In a multi-batch join, we currently have to do rescans the hard way,
1501  * primarily because batch temp files may have already been released. But
1502  * if it's a single-batch join, and there is no parameter change for the
1503  * inner subnode, then we can just re-use the existing hash table without
1504  * rebuilding it.
1505  */
1506  if (node->hj_HashTable != NULL)
1507  {
1508  if (node->hj_HashTable->nbatch == 1 &&
1509  innerPlan->chgParam == NULL)
1510  {
1511  /*
1512  * Okay to reuse the hash table; needn't rescan inner, either.
1513  *
1514  * However, if it's a right/right-anti/full join, we'd better
1515  * reset the inner-tuple match flags contained in the table.
1516  */
1517  if (HJ_FILL_INNER(node))
1519 
1520  /*
1521  * Also, we need to reset our state about the emptiness of the
1522  * outer relation, so that the new scan of the outer will update
1523  * it correctly if it turns out to be empty this time. (There's no
1524  * harm in clearing it now because ExecHashJoin won't need the
1525  * info. In the other cases, where the hash table doesn't exist
1526  * or we are destroying it, we leave this state alone because
1527  * ExecHashJoin will need it the first time through.)
1528  */
1529  node->hj_OuterNotEmpty = false;
1530 
1531  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
1533  }
1534  else
1535  {
1536  /* must destroy and rebuild hash table */
1537  HashState *hashNode = castNode(HashState, innerPlan);
1538 
1539  Assert(hashNode->hashtable == node->hj_HashTable);
1540  /* accumulate stats from old hash table, if wanted */
1541  /* (this should match ExecShutdownHash) */
1542  if (hashNode->ps.instrument && !hashNode->hinstrument)
1543  hashNode->hinstrument = (HashInstrumentation *)
1544  palloc0(sizeof(HashInstrumentation));
1545  if (hashNode->hinstrument)
1547  hashNode->hashtable);
1548  /* for safety, be sure to clear child plan node's pointer too */
1549  hashNode->hashtable = NULL;
1550 
1552  node->hj_HashTable = NULL;
1554 
1555  /*
1556  * if chgParam of subnode is not null then plan will be re-scanned
1557  * by first ExecProcNode.
1558  */
1559  if (innerPlan->chgParam == NULL)
1561  }
1562  }
1563 
1564  /* Always reset intra-tuple state */
1565  node->hj_CurHashValue = 0;
1566  node->hj_CurBucketNo = 0;
1568  node->hj_CurTuple = NULL;
1569 
1570  node->hj_MatchedOuter = false;
1571  node->hj_FirstOuterTupleSlot = NULL;
1572 
1573  /*
1574  * if chgParam of subnode is not null then plan will be re-scanned by
1575  * first ExecProcNode.
1576  */
1577  if (outerPlan->chgParam == NULL)
1579 }
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
void ExecHashAccumInstrumentation(HashInstrumentation *instrument, HashJoinTable hashtable)
Definition: nodeHash.c:2736
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:2214
HashInstrumentation * hinstrument
Definition: execnodes.h:2793
Instrumentation * instrument
Definition: execnodes.h:1130

References Assert, castNode, ExecHashAccumInstrumentation(), ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), HashState::hashtable, HashState::hinstrument, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_OUTER, HashJoinState::hj_OuterNotEmpty, innerPlan, innerPlanState, PlanState::instrument, INVALID_SKEW_BUCKET_NO, HashJoinTableData::nbatch, outerPlan, outerPlanState, palloc0(), and HashState::ps.

Referenced by ExecReScan().

◆ ExecShutdownHashJoin()

void ExecShutdownHashJoin ( HashJoinState node)

Definition at line 1582 of file nodeHashjoin.c.

1583 {
1584  if (node->hj_HashTable)
1585  {
1586  /*
1587  * Detach from shared state before DSM memory goes away. This makes
1588  * sure that we don't have any pointers into DSM memory by the time
1589  * ExecEndHashJoin runs.
1590  */
1593  }
1594 }

References ExecHashTableDetach(), ExecHashTableDetachBatch(), and HashJoinState::hj_HashTable.

Referenced by ExecShutdownNode_walker().