PostgreSQL Source Code  git master
nodeHashjoin.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/parallel.h"
#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "utils/memutils.h"
#include "utils/sharedtuplestore.h"
Include dependency graph for nodeHashjoin.c:

Go to the source code of this file.

Macros

#define HJ_BUILD_HASHTABLE   1
 
#define HJ_NEED_NEW_OUTER   2
 
#define HJ_SCAN_BUCKET   3
 
#define HJ_FILL_OUTER_TUPLE   4
 
#define HJ_FILL_INNER_TUPLES   5
 
#define HJ_NEED_NEW_BATCH   6
 
#define HJ_FILL_OUTER(hjstate)   ((hjstate)->hj_NullInnerTupleSlot != NULL)
 
#define HJ_FILL_INNER(hjstate)   ((hjstate)->hj_NullOuterTupleSlot != NULL)
 

Functions

static TupleTableSlotExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecParallelHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
 
static bool ExecHashJoinNewBatch (HashJoinState *hjstate)
 
static bool ExecParallelHashJoinNewBatch (HashJoinState *hjstate)
 
static void ExecParallelHashJoinPartitionOuter (HashJoinState *hjstate)
 
static pg_attribute_always_inline TupleTableSlotExecHashJoinImpl (PlanState *pstate, bool parallel)
 
static TupleTableSlotExecHashJoin (PlanState *pstate)
 
static TupleTableSlotExecParallelHashJoin (PlanState *pstate)
 
HashJoinStateExecInitHashJoin (HashJoin *node, EState *estate, int eflags)
 
void ExecEndHashJoin (HashJoinState *node)
 
void ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
 
void ExecReScanHashJoin (HashJoinState *node)
 
void ExecShutdownHashJoin (HashJoinState *node)
 
void ExecHashJoinEstimate (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinReInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeWorker (HashJoinState *state, ParallelWorkerContext *pwcxt)
 

Macro Definition Documentation

◆ HJ_BUILD_HASHTABLE

#define HJ_BUILD_HASHTABLE   1

Definition at line 180 of file nodeHashjoin.c.

◆ HJ_FILL_INNER

#define HJ_FILL_INNER (   hjstate)    ((hjstate)->hj_NullOuterTupleSlot != NULL)

Definition at line 190 of file nodeHashjoin.c.

◆ HJ_FILL_INNER_TUPLES

#define HJ_FILL_INNER_TUPLES   5

Definition at line 184 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER

#define HJ_FILL_OUTER (   hjstate)    ((hjstate)->hj_NullInnerTupleSlot != NULL)

Definition at line 188 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER_TUPLE

#define HJ_FILL_OUTER_TUPLE   4

Definition at line 183 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_BATCH

#define HJ_NEED_NEW_BATCH   6

Definition at line 185 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_OUTER

#define HJ_NEED_NEW_OUTER   2

Definition at line 181 of file nodeHashjoin.c.

◆ HJ_SCAN_BUCKET

#define HJ_SCAN_BUCKET   3

Definition at line 182 of file nodeHashjoin.c.

Function Documentation

◆ ExecEndHashJoin()

void ExecEndHashJoin ( HashJoinState node)

Definition at line 859 of file nodeHashjoin.c.

860 {
861  /*
862  * Free hash table
863  */
864  if (node->hj_HashTable)
865  {
867  node->hj_HashTable = NULL;
868  }
869 
870  /*
871  * clean up subtrees
872  */
875 }
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
#define outerPlanState(node)
Definition: execnodes.h:1132
#define innerPlanState(node)
Definition: execnodes.h:1131
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:884
HashJoinTable hj_HashTable
Definition: execnodes.h:2109

References ExecEndNode(), ExecHashTableDestroy(), HashJoinState::hj_HashTable, innerPlanState, and outerPlanState.

Referenced by ExecEndNode().

◆ ExecHashJoin()

static TupleTableSlot* ExecHashJoin ( PlanState pstate)
static

Definition at line 678 of file nodeHashjoin.c.

679 {
680  /*
681  * On sufficiently smart compilers this should be inlined with the
682  * parallel-aware branches removed.
683  */
684  return ExecHashJoinImpl(pstate, false);
685 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:221

References ExecHashJoinImpl().

Referenced by ExecInitHashJoin().

◆ ExecHashJoinEstimate()

void ExecHashJoinEstimate ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1544 of file nodeHashjoin.c.

1545 {
1547  shm_toc_estimate_keys(&pcxt->estimator, 1);
1548 }
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
shm_toc_estimator estimator
Definition: parallel.h:42

References ParallelContext::estimator, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

◆ ExecHashJoinGetSavedTuple()

static TupleTableSlot * ExecHashJoinGetSavedTuple ( HashJoinState hjstate,
BufFile file,
uint32 hashvalue,
TupleTableSlot tupleSlot 
)
static

Definition at line 1356 of file nodeHashjoin.c.

1360 {
1361  uint32 header[2];
1362  size_t nread;
1363  MinimalTuple tuple;
1364 
1365  /*
1366  * We check for interrupts here because this is typically taken as an
1367  * alternative code path to an ExecProcNode() call, which would include
1368  * such a check.
1369  */
1371 
1372  /*
1373  * Since both the hash value and the MinimalTuple length word are uint32,
1374  * we can read them both in one BufFileRead() call without any type
1375  * cheating.
1376  */
1377  nread = BufFileReadMaybeEOF(file, header, sizeof(header), true);
1378  if (nread == 0) /* end of file */
1379  {
1380  ExecClearTuple(tupleSlot);
1381  return NULL;
1382  }
1383  *hashvalue = header[0];
1384  tuple = (MinimalTuple) palloc(header[1]);
1385  tuple->t_len = header[1];
1386  BufFileReadExact(file,
1387  (char *) tuple + sizeof(uint32),
1388  header[1] - sizeof(uint32));
1389  ExecForceStoreMinimalTuple(tuple, tupleSlot, true);
1390  return tupleSlot;
1391 }
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition: buffile.c:664
unsigned int uint32
Definition: c.h:495
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1511
MinimalTupleData * MinimalTuple
Definition: htup.h:27
void * palloc(Size size)
Definition: mcxt.c:1226
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433

References BufFileReadExact(), BufFileReadMaybeEOF(), CHECK_FOR_INTERRUPTS, ExecClearTuple(), ExecForceStoreMinimalTuple(), palloc(), and MinimalTupleData::t_len.

Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().

◆ ExecHashJoinImpl()

static pg_attribute_always_inline TupleTableSlot* ExecHashJoinImpl ( PlanState pstate,
bool  parallel 
)
static

Definition at line 221 of file nodeHashjoin.c.

222 {
223  HashJoinState *node = castNode(HashJoinState, pstate);
224  PlanState *outerNode;
225  HashState *hashNode;
226  ExprState *joinqual;
227  ExprState *otherqual;
228  ExprContext *econtext;
229  HashJoinTable hashtable;
230  TupleTableSlot *outerTupleSlot;
231  uint32 hashvalue;
232  int batchno;
233  ParallelHashJoinState *parallel_state;
234 
235  /*
236  * get information from HashJoin node
237  */
238  joinqual = node->js.joinqual;
239  otherqual = node->js.ps.qual;
240  hashNode = (HashState *) innerPlanState(node);
241  outerNode = outerPlanState(node);
242  hashtable = node->hj_HashTable;
243  econtext = node->js.ps.ps_ExprContext;
244  parallel_state = hashNode->parallel_state;
245 
246  /*
247  * Reset per-tuple memory context to free any expression evaluation
248  * storage allocated in the previous tuple cycle.
249  */
250  ResetExprContext(econtext);
251 
252  /*
253  * run the hash join state machine
254  */
255  for (;;)
256  {
257  /*
258  * It's possible to iterate this loop many times before returning a
259  * tuple, in some pathological cases such as needing to move much of
260  * the current batch to a later batch. So let's check for interrupts
261  * each time through.
262  */
264 
265  switch (node->hj_JoinState)
266  {
267  case HJ_BUILD_HASHTABLE:
268 
269  /*
270  * First time through: build hash table for inner relation.
271  */
272  Assert(hashtable == NULL);
273 
274  /*
275  * If the outer relation is completely empty, and it's not
276  * right/right-anti/full join, we can quit without building
277  * the hash table. However, for an inner join it is only a
278  * win to check this when the outer relation's startup cost is
279  * less than the projected cost of building the hash table.
280  * Otherwise it's best to build the hash table first and see
281  * if the inner relation is empty. (When it's a left join, we
282  * should always make this check, since we aren't going to be
283  * able to skip the join on the strength of an empty inner
284  * relation anyway.)
285  *
286  * If we are rescanning the join, we make use of information
287  * gained on the previous scan: don't bother to try the
288  * prefetch if the previous scan found the outer relation
289  * nonempty. This is not 100% reliable since with new
290  * parameters the outer relation might yield different
291  * results, but it's a good heuristic.
292  *
293  * The only way to make the check is to try to fetch a tuple
294  * from the outer plan node. If we succeed, we have to stash
295  * it away for later consumption by ExecHashJoinOuterGetTuple.
296  */
297  if (HJ_FILL_INNER(node))
298  {
299  /* no chance to not build the hash table */
300  node->hj_FirstOuterTupleSlot = NULL;
301  }
302  else if (parallel)
303  {
304  /*
305  * The empty-outer optimization is not implemented for
306  * shared hash tables, because no one participant can
307  * determine that there are no outer tuples, and it's not
308  * yet clear that it's worth the synchronization overhead
309  * of reaching consensus to figure that out. So we have
310  * to build the hash table.
311  */
312  node->hj_FirstOuterTupleSlot = NULL;
313  }
314  else if (HJ_FILL_OUTER(node) ||
315  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
316  !node->hj_OuterNotEmpty))
317  {
318  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
320  {
321  node->hj_OuterNotEmpty = false;
322  return NULL;
323  }
324  else
325  node->hj_OuterNotEmpty = true;
326  }
327  else
328  node->hj_FirstOuterTupleSlot = NULL;
329 
330  /*
331  * Create the hash table. If using Parallel Hash, then
332  * whoever gets here first will create the hash table and any
333  * later arrivals will merely attach to it.
334  */
335  hashtable = ExecHashTableCreate(hashNode,
336  node->hj_HashOperators,
337  node->hj_Collations,
338  HJ_FILL_INNER(node));
339  node->hj_HashTable = hashtable;
340 
341  /*
342  * Execute the Hash node, to build the hash table. If using
343  * Parallel Hash, then we'll try to help hashing unless we
344  * arrived too late.
345  */
346  hashNode->hashtable = hashtable;
347  (void) MultiExecProcNode((PlanState *) hashNode);
348 
349  /*
350  * If the inner relation is completely empty, and we're not
351  * doing a left outer join, we can quit without scanning the
352  * outer relation.
353  */
354  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
355  {
356  if (parallel)
357  {
358  /*
359  * Advance the build barrier to PHJ_BUILD_RUN before
360  * proceeding so we can negotiate resource cleanup.
361  */
362  Barrier *build_barrier = &parallel_state->build_barrier;
363 
364  while (BarrierPhase(build_barrier) < PHJ_BUILD_RUN)
365  BarrierArriveAndWait(build_barrier, 0);
366  }
367  return NULL;
368  }
369 
370  /*
371  * need to remember whether nbatch has increased since we
372  * began scanning the outer relation
373  */
374  hashtable->nbatch_outstart = hashtable->nbatch;
375 
376  /*
377  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
378  * tuple above, because ExecHashJoinOuterGetTuple will
379  * immediately set it again.)
380  */
381  node->hj_OuterNotEmpty = false;
382 
383  if (parallel)
384  {
385  Barrier *build_barrier;
386 
387  build_barrier = &parallel_state->build_barrier;
388  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
389  BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
390  BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
391  if (BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER)
392  {
393  /*
394  * If multi-batch, we need to hash the outer relation
395  * up front.
396  */
397  if (hashtable->nbatch > 1)
399  BarrierArriveAndWait(build_barrier,
400  WAIT_EVENT_HASH_BUILD_HASH_OUTER);
401  }
402  else if (BarrierPhase(build_barrier) == PHJ_BUILD_FREE)
403  {
404  /*
405  * If we attached so late that the job is finished and
406  * the batch state has been freed, we can return
407  * immediately.
408  */
409  return NULL;
410  }
411 
412  /* Each backend should now select a batch to work on. */
413  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUN);
414  hashtable->curbatch = -1;
416 
417  continue;
418  }
419  else
421 
422  /* FALL THRU */
423 
424  case HJ_NEED_NEW_OUTER:
425 
426  /*
427  * We don't have an outer tuple, try to get the next one
428  */
429  if (parallel)
430  outerTupleSlot =
431  ExecParallelHashJoinOuterGetTuple(outerNode, node,
432  &hashvalue);
433  else
434  outerTupleSlot =
435  ExecHashJoinOuterGetTuple(outerNode, node, &hashvalue);
436 
437  if (TupIsNull(outerTupleSlot))
438  {
439  /* end of batch, or maybe whole join */
440  if (HJ_FILL_INNER(node))
441  {
442  /* set up to scan for unmatched inner tuples */
443  if (parallel)
444  {
445  /*
446  * Only one process is currently allow to handle
447  * each batch's unmatched tuples, in a parallel
448  * join.
449  */
452  else
454  }
455  else
456  {
459  }
460  }
461  else
463  continue;
464  }
465 
466  econtext->ecxt_outertuple = outerTupleSlot;
467  node->hj_MatchedOuter = false;
468 
469  /*
470  * Find the corresponding bucket for this tuple in the main
471  * hash table or skew hash table.
472  */
473  node->hj_CurHashValue = hashvalue;
474  ExecHashGetBucketAndBatch(hashtable, hashvalue,
475  &node->hj_CurBucketNo, &batchno);
476  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
477  hashvalue);
478  node->hj_CurTuple = NULL;
479 
480  /*
481  * The tuple might not belong to the current batch (where
482  * "current batch" includes the skew buckets if any).
483  */
484  if (batchno != hashtable->curbatch &&
486  {
487  bool shouldFree;
488  MinimalTuple mintuple = ExecFetchSlotMinimalTuple(outerTupleSlot,
489  &shouldFree);
490 
491  /*
492  * Need to postpone this outer tuple to a later batch.
493  * Save it in the corresponding outer-batch file.
494  */
495  Assert(parallel_state == NULL);
496  Assert(batchno > hashtable->curbatch);
497  ExecHashJoinSaveTuple(mintuple, hashvalue,
498  &hashtable->outerBatchFile[batchno],
499  hashtable);
500 
501  if (shouldFree)
502  heap_free_minimal_tuple(mintuple);
503 
504  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
505  continue;
506  }
507 
508  /* OK, let's scan the bucket for matches */
510 
511  /* FALL THRU */
512 
513  case HJ_SCAN_BUCKET:
514 
515  /*
516  * Scan the selected hash bucket for matches to current outer
517  */
518  if (parallel)
519  {
520  if (!ExecParallelScanHashBucket(node, econtext))
521  {
522  /* out of matches; check for possible outer-join fill */
524  continue;
525  }
526  }
527  else
528  {
529  if (!ExecScanHashBucket(node, econtext))
530  {
531  /* out of matches; check for possible outer-join fill */
533  continue;
534  }
535  }
536 
537  /*
538  * We've got a match, but still need to test non-hashed quals.
539  * ExecScanHashBucket already set up all the state needed to
540  * call ExecQual.
541  *
542  * If we pass the qual, then save state for next call and have
543  * ExecProject form the projection, store it in the tuple
544  * table, and return the slot.
545  *
546  * Only the joinquals determine tuple match status, but all
547  * quals must pass to actually return the tuple.
548  */
549  if (joinqual == NULL || ExecQual(joinqual, econtext))
550  {
551  node->hj_MatchedOuter = true;
552 
553 
554  /*
555  * This is really only needed if HJ_FILL_INNER(node), but
556  * we'll avoid the branch and just set it always.
557  */
560 
561  /* In an antijoin, we never return a matched tuple */
562  if (node->js.jointype == JOIN_ANTI)
563  {
565  continue;
566  }
567 
568  /*
569  * In a right-antijoin, we never return a matched tuple.
570  * And we need to stay on the current outer tuple to
571  * continue scanning the inner side for matches.
572  */
573  if (node->js.jointype == JOIN_RIGHT_ANTI)
574  continue;
575 
576  /*
577  * If we only need to join to the first matching inner
578  * tuple, then consider returning this one, but after that
579  * continue with next outer tuple.
580  */
581  if (node->js.single_match)
583 
584  if (otherqual == NULL || ExecQual(otherqual, econtext))
585  return ExecProject(node->js.ps.ps_ProjInfo);
586  else
587  InstrCountFiltered2(node, 1);
588  }
589  else
590  InstrCountFiltered1(node, 1);
591  break;
592 
593  case HJ_FILL_OUTER_TUPLE:
594 
595  /*
596  * The current outer tuple has run out of matches, so check
597  * whether to emit a dummy outer-join tuple. Whether we emit
598  * one or not, the next state is NEED_NEW_OUTER.
599  */
601 
602  if (!node->hj_MatchedOuter &&
603  HJ_FILL_OUTER(node))
604  {
605  /*
606  * Generate a fake join tuple with nulls for the inner
607  * tuple, and return it if it passes the non-join quals.
608  */
609  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
610 
611  if (otherqual == NULL || ExecQual(otherqual, econtext))
612  return ExecProject(node->js.ps.ps_ProjInfo);
613  else
614  InstrCountFiltered2(node, 1);
615  }
616  break;
617 
619 
620  /*
621  * We have finished a batch, but we are doing
622  * right/right-anti/full join, so any unmatched inner tuples
623  * in the hashtable have to be emitted before we continue to
624  * the next batch.
625  */
626  if (!(parallel ? ExecParallelScanHashTableForUnmatched(node, econtext)
627  : ExecScanHashTableForUnmatched(node, econtext)))
628  {
629  /* no more unmatched tuples */
631  continue;
632  }
633 
634  /*
635  * Generate a fake join tuple with nulls for the outer tuple,
636  * and return it if it passes the non-join quals.
637  */
638  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
639 
640  if (otherqual == NULL || ExecQual(otherqual, econtext))
641  return ExecProject(node->js.ps.ps_ProjInfo);
642  else
643  InstrCountFiltered2(node, 1);
644  break;
645 
646  case HJ_NEED_NEW_BATCH:
647 
648  /*
649  * Try to advance to next batch. Done if there are no more.
650  */
651  if (parallel)
652  {
653  if (!ExecParallelHashJoinNewBatch(node))
654  return NULL; /* end of parallel-aware join */
655  }
656  else
657  {
658  if (!ExecHashJoinNewBatch(node))
659  return NULL; /* end of parallel-oblivious join */
660  }
662  break;
663 
664  default:
665  elog(ERROR, "unrecognized hashjoin state: %d",
666  (int) node->hj_JoinState);
667  }
668  }
669 }
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:265
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
#define ERROR
Definition: elog.h:39
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:502
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
Definition: execTuples.c:1691
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:1140
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1145
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: executor.h:375
#define ResetExprContext(econtext)
Definition: executor.h:543
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:412
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:268
#define PHJ_BUILD_FREE
Definition: hashjoin.h:274
#define PHJ_BUILD_HASH_OUTER
Definition: hashjoin.h:272
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:91
#define PHJ_BUILD_RUN
Definition: hashjoin.h:273
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:120
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1524
#define HeapTupleHeaderHasMatch(tup)
Definition: htup_details.h:514
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:519
Assert(fmt[strlen(fmt) - 1] !='\n')
bool ExecParallelScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2017
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:2068
bool ExecParallelScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2228
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:2520
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2154
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1924
HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
Definition: nodeHash.c:433
bool ExecParallelPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:2089
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1956
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:185
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:182
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:183
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
static TupleTableSlot * ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:964
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:190
static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:184
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:890
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:181
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:188
static void ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:180
#define castNode(_type_, nodeptr)
Definition: nodes.h:197
@ JOIN_RIGHT_ANTI
Definition: nodes.h:320
@ JOIN_ANTI
Definition: nodes.h:319
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:250
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:252
HashJoinTuple hj_CurTuple
Definition: execnodes.h:2113
int hj_CurSkewBucketNo
Definition: execnodes.h:2112
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:2116
bool hj_OuterNotEmpty
Definition: execnodes.h:2121
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:2117
List * hj_HashOperators
Definition: execnodes.h:2107
JoinState js
Definition: execnodes.h:2104
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:2118
bool hj_MatchedOuter
Definition: execnodes.h:2120
uint32 hj_CurHashValue
Definition: execnodes.h:2110
List * hj_Collations
Definition: execnodes.h:2108
int hj_CurBucketNo
Definition: execnodes.h:2111
double totalTuples
Definition: hashjoin.h:332
BufFile ** outerBatchFile
Definition: hashjoin.h:344
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2680
HashJoinTable hashtable
Definition: execnodes.h:2661
PlanState ps
Definition: execnodes.h:2660
JoinType jointype
Definition: execnodes.h:2002
PlanState ps
Definition: execnodes.h:2001
ExprState * joinqual
Definition: execnodes.h:2005
bool single_match
Definition: execnodes.h:2003
ExprState * qual
Definition: execnodes.h:1057
Plan * plan
Definition: execnodes.h:1036
ExprContext * ps_ExprContext
Definition: execnodes.h:1075
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1076
Cost total_cost
Definition: plannodes.h:129
Cost startup_cost
Definition: plannodes.h:128
#define TupIsNull(slot)
Definition: tuptable.h:300

References Assert(), BarrierArriveAndWait(), BarrierPhase(), ParallelHashJoinState::build_barrier, castNode, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog(), ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecParallelHashJoinNewBatch(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), ExecParallelPrepHashTableForUnmatched(), ExecParallelScanHashBucket(), ExecParallelScanHashTableForUnmatched(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), HashState::hashtable, heap_free_minimal_tuple(), HeapTupleHeaderHasMatch, HeapTupleHeaderSetMatch, HJ_BUILD_HASHTABLE, HashJoinState::hj_Collations, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterNotEmpty, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JOIN_RIGHT_ANTI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, HashJoinTableData::outerBatchFile, outerPlanState, HashState::parallel_state, PHJ_BUILD_FREE, PHJ_BUILD_HASH_OUTER, PHJ_BUILD_RUN, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::qual, ResetExprContext, JoinState::single_match, Plan::startup_cost, Plan::total_cost, HashJoinTableData::totalTuples, and TupIsNull.

Referenced by ExecHashJoin(), and ExecParallelHashJoin().

◆ ExecHashJoinInitializeDSM()

void ExecHashJoinInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1551 of file nodeHashjoin.c.

1552 {
1553  int plan_node_id = state->js.ps.plan->plan_node_id;
1554  HashState *hashNode;
1555  ParallelHashJoinState *pstate;
1556 
1557  /*
1558  * Disable shared hash table mode if we failed to create a real DSM
1559  * segment, because that means that we don't have a DSA area to work with.
1560  */
1561  if (pcxt->seg == NULL)
1562  return;
1563 
1565 
1566  /*
1567  * Set up the state needed to coordinate access to the shared hash
1568  * table(s), using the plan node ID as the toc key.
1569  */
1570  pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelHashJoinState));
1571  shm_toc_insert(pcxt->toc, plan_node_id, pstate);
1572 
1573  /*
1574  * Set up the shared hash join state with no batches initially.
1575  * ExecHashTableCreate() will prepare at least one later and set nbatch
1576  * and space_allowed.
1577  */
1578  pstate->nbatch = 0;
1579  pstate->space_allowed = 0;
1580  pstate->batches = InvalidDsaPointer;
1581  pstate->old_batches = InvalidDsaPointer;
1582  pstate->nbuckets = 0;
1583  pstate->growth = PHJ_GROWTH_OK;
1585  pg_atomic_init_u32(&pstate->distributor, 0);
1586  pstate->nparticipants = pcxt->nworkers + 1;
1587  pstate->total_tuples = 0;
1588  LWLockInitialize(&pstate->lock,
1590  BarrierInit(&pstate->build_barrier, 0);
1591  BarrierInit(&pstate->grow_batches_barrier, 0);
1592  BarrierInit(&pstate->grow_buckets_barrier, 0);
1593 
1594  /* Set up the space we'll use for shared temporary files. */
1595  SharedFileSetInit(&pstate->fileset, pcxt->seg);
1596 
1597  /* Initialize the shared state in the hash node. */
1598  hashNode = (HashState *) innerPlanState(state);
1599  hashNode->parallel_state = pstate;
1600 }
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:218
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
#define InvalidDsaPointer
Definition: dsa.h:78
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:425
@ PHJ_GROWTH_OK
Definition: hashjoin.h:233
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:730
@ LWTRANCHE_PARALLEL_HASH_JOIN
Definition: lwlock.h:196
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:694
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:44
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
dsm_segment * seg
Definition: parallel.h:43
shm_toc * toc
Definition: parallel.h:45
Barrier grow_batches_barrier
Definition: hashjoin.h:261
dsa_pointer old_batches
Definition: hashjoin.h:249
dsa_pointer chunk_work_queue
Definition: hashjoin.h:254
Barrier grow_buckets_barrier
Definition: hashjoin.h:262
ParallelHashGrowth growth
Definition: hashjoin.h:253
pg_atomic_uint32 distributor
Definition: hashjoin.h:263
SharedFileSet fileset
Definition: hashjoin.h:265
dsa_pointer batches
Definition: hashjoin.h:248
Definition: regguts.h:323

References BarrierInit(), ParallelHashJoinState::batches, ParallelHashJoinState::build_barrier, ParallelHashJoinState::chunk_work_queue, ParallelHashJoinState::distributor, ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, ParallelHashJoinState::grow_batches_barrier, ParallelHashJoinState::grow_buckets_barrier, ParallelHashJoinState::growth, innerPlanState, InvalidDsaPointer, ParallelHashJoinState::lock, LWLockInitialize(), LWTRANCHE_PARALLEL_HASH_JOIN, ParallelHashJoinState::nbatch, ParallelHashJoinState::nbuckets, ParallelHashJoinState::nparticipants, ParallelContext::nworkers, ParallelHashJoinState::old_batches, HashState::parallel_state, pg_atomic_init_u32(), PHJ_GROWTH_OK, ParallelContext::seg, SharedFileSetInit(), shm_toc_allocate(), shm_toc_insert(), ParallelHashJoinState::space_allowed, ParallelContext::toc, and ParallelHashJoinState::total_tuples.

Referenced by ExecParallelInitializeDSM().

◆ ExecHashJoinInitializeWorker()

void ExecHashJoinInitializeWorker ( HashJoinState state,
ParallelWorkerContext pwcxt 
)

Definition at line 1642 of file nodeHashjoin.c.

1644 {
1645  HashState *hashNode;
1646  int plan_node_id = state->js.ps.plan->plan_node_id;
1647  ParallelHashJoinState *pstate =
1648  shm_toc_lookup(pwcxt->toc, plan_node_id, false);
1649 
1650  /* Attach to the space for shared temporary files. */
1651  SharedFileSetAttach(&pstate->fileset, pwcxt->seg);
1652 
1653  /* Attach to the shared state in the hash node. */
1654  hashNode = (HashState *) innerPlanState(state);
1655  hashNode->parallel_state = pstate;
1656 
1658 }
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:62
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
dsm_segment * seg
Definition: parallel.h:53

References ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, innerPlanState, HashState::parallel_state, ParallelWorkerContext::seg, SharedFileSetAttach(), shm_toc_lookup(), and ParallelWorkerContext::toc.

Referenced by ExecParallelInitializeWorker().

◆ ExecHashJoinNewBatch()

static bool ExecHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1031 of file nodeHashjoin.c.

1032 {
1033  HashJoinTable hashtable = hjstate->hj_HashTable;
1034  int nbatch;
1035  int curbatch;
1036  BufFile *innerFile;
1037  TupleTableSlot *slot;
1038  uint32 hashvalue;
1039 
1040  nbatch = hashtable->nbatch;
1041  curbatch = hashtable->curbatch;
1042 
1043  if (curbatch > 0)
1044  {
1045  /*
1046  * We no longer need the previous outer batch file; close it right
1047  * away to free disk space.
1048  */
1049  if (hashtable->outerBatchFile[curbatch])
1050  BufFileClose(hashtable->outerBatchFile[curbatch]);
1051  hashtable->outerBatchFile[curbatch] = NULL;
1052  }
1053  else /* we just finished the first batch */
1054  {
1055  /*
1056  * Reset some of the skew optimization state variables, since we no
1057  * longer need to consider skew tuples after the first batch. The
1058  * memory context reset we are about to do will release the skew
1059  * hashtable itself.
1060  */
1061  hashtable->skewEnabled = false;
1062  hashtable->skewBucket = NULL;
1063  hashtable->skewBucketNums = NULL;
1064  hashtable->nSkewBuckets = 0;
1065  hashtable->spaceUsedSkew = 0;
1066  }
1067 
1068  /*
1069  * We can always skip over any batches that are completely empty on both
1070  * sides. We can sometimes skip over batches that are empty on only one
1071  * side, but there are exceptions:
1072  *
1073  * 1. In a left/full outer join, we have to process outer batches even if
1074  * the inner batch is empty. Similarly, in a right/right-anti/full outer
1075  * join, we have to process inner batches even if the outer batch is
1076  * empty.
1077  *
1078  * 2. If we have increased nbatch since the initial estimate, we have to
1079  * scan inner batches since they might contain tuples that need to be
1080  * reassigned to later inner batches.
1081  *
1082  * 3. Similarly, if we have increased nbatch since starting the outer
1083  * scan, we have to rescan outer batches in case they contain tuples that
1084  * need to be reassigned.
1085  */
1086  curbatch++;
1087  while (curbatch < nbatch &&
1088  (hashtable->outerBatchFile[curbatch] == NULL ||
1089  hashtable->innerBatchFile[curbatch] == NULL))
1090  {
1091  if (hashtable->outerBatchFile[curbatch] &&
1092  HJ_FILL_OUTER(hjstate))
1093  break; /* must process due to rule 1 */
1094  if (hashtable->innerBatchFile[curbatch] &&
1095  HJ_FILL_INNER(hjstate))
1096  break; /* must process due to rule 1 */
1097  if (hashtable->innerBatchFile[curbatch] &&
1098  nbatch != hashtable->nbatch_original)
1099  break; /* must process due to rule 2 */
1100  if (hashtable->outerBatchFile[curbatch] &&
1101  nbatch != hashtable->nbatch_outstart)
1102  break; /* must process due to rule 3 */
1103  /* We can ignore this batch. */
1104  /* Release associated temp files right away. */
1105  if (hashtable->innerBatchFile[curbatch])
1106  BufFileClose(hashtable->innerBatchFile[curbatch]);
1107  hashtable->innerBatchFile[curbatch] = NULL;
1108  if (hashtable->outerBatchFile[curbatch])
1109  BufFileClose(hashtable->outerBatchFile[curbatch]);
1110  hashtable->outerBatchFile[curbatch] = NULL;
1111  curbatch++;
1112  }
1113 
1114  if (curbatch >= nbatch)
1115  return false; /* no more batches */
1116 
1117  hashtable->curbatch = curbatch;
1118 
1119  /*
1120  * Reload the hash table with the new inner batch (which could be empty)
1121  */
1122  ExecHashTableReset(hashtable);
1123 
1124  innerFile = hashtable->innerBatchFile[curbatch];
1125 
1126  if (innerFile != NULL)
1127  {
1128  if (BufFileSeek(innerFile, 0, 0, SEEK_SET))
1129  ereport(ERROR,
1131  errmsg("could not rewind hash-join temporary file")));
1132 
1133  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
1134  innerFile,
1135  &hashvalue,
1136  hjstate->hj_HashTupleSlot)))
1137  {
1138  /*
1139  * NOTE: some tuples may be sent to future batches. Also, it is
1140  * possible for hashtable->nbatch to be increased here!
1141  */
1142  ExecHashTableInsert(hashtable, slot, hashvalue);
1143  }
1144 
1145  /*
1146  * after we build the hash table, the inner batch file is no longer
1147  * needed
1148  */
1149  BufFileClose(innerFile);
1150  hashtable->innerBatchFile[curbatch] = NULL;
1151  }
1152 
1153  /*
1154  * Rewind outer batch file (if present), so that we can start reading it.
1155  */
1156  if (hashtable->outerBatchFile[curbatch] != NULL)
1157  {
1158  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0, SEEK_SET))
1159  ereport(ERROR,
1161  errmsg("could not rewind hash-join temporary file")));
1162  }
1163 
1164  return true;
1165 }
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
void BufFileClose(BufFile *file)
Definition: buffile.c:412
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ereport(elevel,...)
Definition: elog.h:149
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:2291
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1616
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:2115
int * skewBucketNums
Definition: hashjoin.h:322
BufFile ** innerBatchFile
Definition: hashjoin.h:343
HashSkewBucket ** skewBucket
Definition: hashjoin.h:319

References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinOuterGetTuple()

static TupleTableSlot * ExecHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 890 of file nodeHashjoin.c.

893 {
894  HashJoinTable hashtable = hjstate->hj_HashTable;
895  int curbatch = hashtable->curbatch;
896  TupleTableSlot *slot;
897 
898  if (curbatch == 0) /* if it is the first pass */
899  {
900  /*
901  * Check to see if first outer tuple was already fetched by
902  * ExecHashJoin() and not used yet.
903  */
904  slot = hjstate->hj_FirstOuterTupleSlot;
905  if (!TupIsNull(slot))
906  hjstate->hj_FirstOuterTupleSlot = NULL;
907  else
908  slot = ExecProcNode(outerNode);
909 
910  while (!TupIsNull(slot))
911  {
912  /*
913  * We have to compute the tuple's hash value.
914  */
915  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
916 
917  econtext->ecxt_outertuple = slot;
918  if (ExecHashGetHashValue(hashtable, econtext,
919  hjstate->hj_OuterHashKeys,
920  true, /* outer tuple */
921  HJ_FILL_OUTER(hjstate),
922  hashvalue))
923  {
924  /* remember outer relation is not empty for possible rescan */
925  hjstate->hj_OuterNotEmpty = true;
926 
927  return slot;
928  }
929 
930  /*
931  * That tuple couldn't match because of a NULL, so discard it and
932  * continue with the next one.
933  */
934  slot = ExecProcNode(outerNode);
935  }
936  }
937  else if (curbatch < hashtable->nbatch)
938  {
939  BufFile *file = hashtable->outerBatchFile[curbatch];
940 
941  /*
942  * In outer-join cases, we could get here even though the batch file
943  * is empty.
944  */
945  if (file == NULL)
946  return NULL;
947 
948  slot = ExecHashJoinGetSavedTuple(hjstate,
949  file,
950  hashvalue,
951  hjstate->hj_OuterTupleSlot);
952  if (!TupIsNull(slot))
953  return slot;
954  }
955 
956  /* End of this batch */
957  return NULL;
958 }
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1816
List * hj_OuterHashKeys
Definition: execnodes.h:2106
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:2114

References HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecHashGetHashValue(), ExecHashJoinGetSavedTuple(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::outerBatchFile, JoinState::ps, PlanState::ps_ExprContext, and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinReInitializeDSM()

void ExecHashJoinReInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1609 of file nodeHashjoin.c.

1610 {
1611  int plan_node_id = state->js.ps.plan->plan_node_id;
1612  ParallelHashJoinState *pstate =
1613  shm_toc_lookup(pcxt->toc, plan_node_id, false);
1614 
1615  /*
1616  * It would be possible to reuse the shared hash table in single-batch
1617  * cases by resetting and then fast-forwarding build_barrier to
1618  * PHJ_BUILD_FREE and batch 0's batch_barrier to PHJ_BATCH_PROBE, but
1619  * currently shared hash tables are already freed by now (by the last
1620  * participant to detach from the batch). We could consider keeping it
1621  * around for single-batch joins. We'd also need to adjust
1622  * finalize_plan() so that it doesn't record a dummy dependency for
1623  * Parallel Hash nodes, preventing the rescan optimization. For now we
1624  * don't try.
1625  */
1626 
1627  /* Detach, freeing any remaining shared memory. */
1628  if (state->hj_HashTable != NULL)
1629  {
1630  ExecHashTableDetachBatch(state->hj_HashTable);
1631  ExecHashTableDetach(state->hj_HashTable);
1632  }
1633 
1634  /* Clear any shared batch files. */
1635  SharedFileSetDeleteAll(&pstate->fileset);
1636 
1637  /* Reset build_barrier to PHJ_BUILD_ELECT so we can go around again. */
1638  BarrierInit(&pstate->build_barrier, 0);
1639 }
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3274
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3366
void SharedFileSetDeleteAll(SharedFileSet *fileset)
Definition: sharedfileset.c:89

References BarrierInit(), ParallelHashJoinState::build_barrier, ExecHashTableDetach(), ExecHashTableDetachBatch(), ParallelHashJoinState::fileset, SharedFileSetDeleteAll(), shm_toc_lookup(), and ParallelContext::toc.

Referenced by ExecParallelReInitializeDSM().

◆ ExecHashJoinSaveTuple()

void ExecHashJoinSaveTuple ( MinimalTuple  tuple,
uint32  hashvalue,
BufFile **  fileptr,
HashJoinTable  hashtable 
)

Definition at line 1315 of file nodeHashjoin.c.

1317 {
1318  BufFile *file = *fileptr;
1319 
1320  /*
1321  * The batch file is lazily created. If this is the first tuple written to
1322  * this batch, the batch file is created and its buffer is allocated in
1323  * the spillCxt context, NOT in the batchCxt.
1324  *
1325  * During the build phase, buffered files are created for inner batches.
1326  * Each batch's buffered file is closed (and its buffer freed) after the
1327  * batch is loaded into memory during the outer side scan. Therefore, it
1328  * is necessary to allocate the batch file buffer in a memory context
1329  * which outlives the batch itself.
1330  *
1331  * Also, we use spillCxt instead of hashCxt for a better accounting of the
1332  * spilling memory consumption.
1333  */
1334  if (file == NULL)
1335  {
1336  MemoryContext oldctx = MemoryContextSwitchTo(hashtable->spillCxt);
1337 
1338  file = BufFileCreateTemp(false);
1339  *fileptr = file;
1340 
1341  MemoryContextSwitchTo(oldctx);
1342  }
1343 
1344  BufFileWrite(file, &hashvalue, sizeof(uint32));
1345  BufFileWrite(file, tuple, tuple->t_len);
1346 }
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
MemoryContext spillCxt
Definition: hashjoin.h:364

References BufFileCreateTemp(), BufFileWrite(), MemoryContextSwitchTo(), HashJoinTableData::spillCxt, and MinimalTupleData::t_len.

Referenced by ExecHashIncreaseNumBatches(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().

◆ ExecInitHashJoin()

HashJoinState* ExecInitHashJoin ( HashJoin node,
EState estate,
int  eflags 
)

Definition at line 710 of file nodeHashjoin.c.

711 {
712  HashJoinState *hjstate;
713  Plan *outerNode;
714  Hash *hashNode;
715  TupleDesc outerDesc,
716  innerDesc;
717  const TupleTableSlotOps *ops;
718 
719  /* check for unsupported flags */
720  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
721 
722  /*
723  * create state structure
724  */
725  hjstate = makeNode(HashJoinState);
726  hjstate->js.ps.plan = (Plan *) node;
727  hjstate->js.ps.state = estate;
728 
729  /*
730  * See ExecHashJoinInitializeDSM() and ExecHashJoinInitializeWorker()
731  * where this function may be replaced with a parallel version, if we
732  * managed to launch a parallel query.
733  */
734  hjstate->js.ps.ExecProcNode = ExecHashJoin;
735  hjstate->js.jointype = node->join.jointype;
736 
737  /*
738  * Miscellaneous initialization
739  *
740  * create expression context for node
741  */
742  ExecAssignExprContext(estate, &hjstate->js.ps);
743 
744  /*
745  * initialize child nodes
746  *
747  * Note: we could suppress the REWIND flag for the inner input, which
748  * would amount to betting that the hash will be a single batch. Not
749  * clear if this would be a win or not.
750  */
751  outerNode = outerPlan(node);
752  hashNode = (Hash *) innerPlan(node);
753 
754  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
755  outerDesc = ExecGetResultType(outerPlanState(hjstate));
756  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
757  innerDesc = ExecGetResultType(innerPlanState(hjstate));
758 
759  /*
760  * Initialize result slot, type and projection.
761  */
763  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
764 
765  /*
766  * tuple table initialization
767  */
768  ops = ExecGetResultSlotOps(outerPlanState(hjstate), NULL);
769  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate, outerDesc,
770  ops);
771 
772  /*
773  * detect whether we need only consider the first matching inner tuple
774  */
775  hjstate->js.single_match = (node->join.inner_unique ||
776  node->join.jointype == JOIN_SEMI);
777 
778  /* set up null tuples for outer joins, if needed */
779  switch (node->join.jointype)
780  {
781  case JOIN_INNER:
782  case JOIN_SEMI:
783  break;
784  case JOIN_LEFT:
785  case JOIN_ANTI:
786  hjstate->hj_NullInnerTupleSlot =
787  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
788  break;
789  case JOIN_RIGHT:
790  case JOIN_RIGHT_ANTI:
791  hjstate->hj_NullOuterTupleSlot =
792  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
793  break;
794  case JOIN_FULL:
795  hjstate->hj_NullOuterTupleSlot =
796  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
797  hjstate->hj_NullInnerTupleSlot =
798  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
799  break;
800  default:
801  elog(ERROR, "unrecognized join type: %d",
802  (int) node->join.jointype);
803  }
804 
805  /*
806  * now for some voodoo. our temporary tuple slot is actually the result
807  * tuple slot of the Hash node (which is our inner plan). we can do this
808  * because Hash nodes don't return tuples via ExecProcNode() -- instead
809  * the hash join node uses ExecScanHashBucket() to get at the contents of
810  * the hash table. -cim 6/9/91
811  */
812  {
813  HashState *hashstate = (HashState *) innerPlanState(hjstate);
814  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
815 
816  hjstate->hj_HashTupleSlot = slot;
817  }
818 
819  /*
820  * initialize child expressions
821  */
822  hjstate->js.ps.qual =
823  ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
824  hjstate->js.joinqual =
825  ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
826  hjstate->hashclauses =
827  ExecInitQual(node->hashclauses, (PlanState *) hjstate);
828 
829  /*
830  * initialize hash-specific info
831  */
832  hjstate->hj_HashTable = NULL;
833  hjstate->hj_FirstOuterTupleSlot = NULL;
834 
835  hjstate->hj_CurHashValue = 0;
836  hjstate->hj_CurBucketNo = 0;
838  hjstate->hj_CurTuple = NULL;
839 
840  hjstate->hj_OuterHashKeys = ExecInitExprList(node->hashkeys,
841  (PlanState *) hjstate);
842  hjstate->hj_HashOperators = node->hashoperators;
843  hjstate->hj_Collations = node->hashcollations;
844 
845  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
846  hjstate->hj_MatchedOuter = false;
847  hjstate->hj_OuterNotEmpty = false;
848 
849  return hjstate;
850 }
List * ExecInitExprList(List *nodes, PlanState *parent)
Definition: execExpr.c:323
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:214
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1846
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1830
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1798
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:498
const TupleTableSlotOps * ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
Definition: execUtils.c:507
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:488
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:543
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
#define EXEC_FLAG_MARK
Definition: executor.h:69
static TupleTableSlot * ExecHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:678
#define makeNode(_type_)
Definition: nodes.h:176
@ JOIN_SEMI
Definition: nodes.h:318
@ JOIN_FULL
Definition: nodes.h:306
@ JOIN_INNER
Definition: nodes.h:304
@ JOIN_RIGHT
Definition: nodes.h:307
@ JOIN_LEFT
Definition: nodes.h:305
#define innerPlan(node)
Definition: plannodes.h:181
#define outerPlan(node)
Definition: plannodes.h:182
ExprState * hashclauses
Definition: execnodes.h:2105
List * hashcollations
Definition: plannodes.h:865
List * hashclauses
Definition: plannodes.h:863
List * hashoperators
Definition: plannodes.h:864
Join join
Definition: plannodes.h:862
List * hashkeys
Definition: plannodes.h:871
List * joinqual
Definition: plannodes.h:791
JoinType jointype
Definition: plannodes.h:789
bool inner_unique
Definition: plannodes.h:790
EState * state
Definition: execnodes.h:1038
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1074
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1042

References Assert(), elog(), ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecGetResultSlotOps(), ExecGetResultType(), ExecHashJoin(), ExecInitExprList(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitQual(), ExecInitResultTupleSlotTL(), PlanState::ExecProcNode, HashJoinState::hashclauses, HashJoin::hashclauses, HashJoin::hashcollations, HashJoin::hashkeys, HashJoin::hashoperators, HJ_BUILD_HASHTABLE, HashJoinState::hj_Collations, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, Join::inner_unique, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_RIGHT_ANTI, JOIN_SEMI, JoinState::joinqual, Join::joinqual, JoinState::jointype, Join::jointype, HashJoinState::js, makeNode, outerPlan, outerPlanState, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ResultTupleSlot, PlanState::qual, JoinState::single_match, PlanState::state, and TTSOpsVirtual.

Referenced by ExecInitNode().

◆ ExecParallelHashJoin()

static TupleTableSlot* ExecParallelHashJoin ( PlanState pstate)
static

Definition at line 694 of file nodeHashjoin.c.

695 {
696  /*
697  * On sufficiently smart compilers this should be inlined with the
698  * parallel-oblivious branches removed.
699  */
700  return ExecHashJoinImpl(pstate, true);
701 }

References ExecHashJoinImpl().

Referenced by ExecHashJoinInitializeDSM(), and ExecHashJoinInitializeWorker().

◆ ExecParallelHashJoinNewBatch()

static bool ExecParallelHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1172 of file nodeHashjoin.c.

1173 {
1174  HashJoinTable hashtable = hjstate->hj_HashTable;
1175  int start_batchno;
1176  int batchno;
1177 
1178  /*
1179  * If we were already attached to a batch, remember not to bother checking
1180  * it again, and detach from it (possibly freeing the hash table if we are
1181  * last to detach).
1182  */
1183  if (hashtable->curbatch >= 0)
1184  {
1185  hashtable->batches[hashtable->curbatch].done = true;
1186  ExecHashTableDetachBatch(hashtable);
1187  }
1188 
1189  /*
1190  * Search for a batch that isn't done. We use an atomic counter to start
1191  * our search at a different batch in every participant when there are
1192  * more batches than participants.
1193  */
1194  batchno = start_batchno =
1196  hashtable->nbatch;
1197  do
1198  {
1199  uint32 hashvalue;
1200  MinimalTuple tuple;
1201  TupleTableSlot *slot;
1202 
1203  if (!hashtable->batches[batchno].done)
1204  {
1205  SharedTuplestoreAccessor *inner_tuples;
1206  Barrier *batch_barrier =
1207  &hashtable->batches[batchno].shared->batch_barrier;
1208 
1209  switch (BarrierAttach(batch_barrier))
1210  {
1211  case PHJ_BATCH_ELECT:
1212 
1213  /* One backend allocates the hash table. */
1214  if (BarrierArriveAndWait(batch_barrier,
1215  WAIT_EVENT_HASH_BATCH_ELECT))
1216  ExecParallelHashTableAlloc(hashtable, batchno);
1217  /* Fall through. */
1218 
1219  case PHJ_BATCH_ALLOCATE:
1220  /* Wait for allocation to complete. */
1221  BarrierArriveAndWait(batch_barrier,
1222  WAIT_EVENT_HASH_BATCH_ALLOCATE);
1223  /* Fall through. */
1224 
1225  case PHJ_BATCH_LOAD:
1226  /* Start (or join in) loading tuples. */
1227  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1228  inner_tuples = hashtable->batches[batchno].inner_tuples;
1229  sts_begin_parallel_scan(inner_tuples);
1230  while ((tuple = sts_parallel_scan_next(inner_tuples,
1231  &hashvalue)))
1232  {
1234  hjstate->hj_HashTupleSlot,
1235  false);
1236  slot = hjstate->hj_HashTupleSlot;
1238  hashvalue);
1239  }
1240  sts_end_parallel_scan(inner_tuples);
1241  BarrierArriveAndWait(batch_barrier,
1242  WAIT_EVENT_HASH_BATCH_LOAD);
1243  /* Fall through. */
1244 
1245  case PHJ_BATCH_PROBE:
1246 
1247  /*
1248  * This batch is ready to probe. Return control to
1249  * caller. We stay attached to batch_barrier so that the
1250  * hash table stays alive until everyone's finished
1251  * probing it, but no participant is allowed to wait at
1252  * this barrier again (or else a deadlock could occur).
1253  * All attached participants must eventually detach from
1254  * the barrier and one worker must advance the phase so
1255  * that the final phase is reached.
1256  */
1257  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1258  sts_begin_parallel_scan(hashtable->batches[batchno].outer_tuples);
1259 
1260  return true;
1261  case PHJ_BATCH_SCAN:
1262 
1263  /*
1264  * In principle, we could help scan for unmatched tuples,
1265  * since that phase is already underway (the thing we
1266  * can't do under current deadlock-avoidance rules is wait
1267  * for others to arrive at PHJ_BATCH_SCAN, because
1268  * PHJ_BATCH_PROBE emits tuples, but in this case we just
1269  * got here without waiting). That is not yet done. For
1270  * now, we just detach and go around again. We have to
1271  * use ExecHashTableDetachBatch() because there's a small
1272  * chance we'll be the last to detach, and then we're
1273  * responsible for freeing memory.
1274  */
1275  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1276  hashtable->batches[batchno].done = true;
1277  ExecHashTableDetachBatch(hashtable);
1278  break;
1279 
1280  case PHJ_BATCH_FREE:
1281 
1282  /*
1283  * Already done. Detach and go around again (if any
1284  * remain).
1285  */
1286  BarrierDetach(batch_barrier);
1287  hashtable->batches[batchno].done = true;
1288  hashtable->curbatch = -1;
1289  break;
1290 
1291  default:
1292  elog(ERROR, "unexpected batch phase %d",
1293  BarrierPhase(batch_barrier));
1294  }
1295  }
1296  batchno = (batchno + 1) % hashtable->nbatch;
1297  } while (batchno != start_batchno);
1298 
1299  return false;
1300 }
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:323
int BarrierAttach(Barrier *barrier)
Definition: barrier.c:236
bool BarrierDetach(Barrier *barrier)
Definition: barrier.c:256
#define PHJ_BATCH_SCAN
Definition: hashjoin.h:281
#define PHJ_BATCH_PROBE
Definition: hashjoin.h:280
#define PHJ_BATCH_LOAD
Definition: hashjoin.h:279
#define PHJ_BATCH_ELECT
Definition: hashjoin.h:277
#define PHJ_BATCH_ALLOCATE
Definition: hashjoin.h:278
#define PHJ_BATCH_FREE
Definition: hashjoin.h:282
void ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3464
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3254
void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1772
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
void sts_begin_parallel_scan(SharedTuplestoreAccessor *accessor)
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:373
ParallelHashJoinState * parallel_state
Definition: hashjoin.h:372
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:221
ParallelHashJoinBatch * shared
Definition: hashjoin.h:209
SharedTuplestoreAccessor * inner_tuples
Definition: hashjoin.h:220

References BarrierArriveAndWait(), BarrierAttach(), BarrierDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, HashJoinTableData::curbatch, ParallelHashJoinState::distributor, ParallelHashJoinBatchAccessor::done, elog(), ERROR, ExecForceStoreMinimalTuple(), ExecHashTableDetachBatch(), ExecParallelHashTableAlloc(), ExecParallelHashTableInsertCurrentBatch(), ExecParallelHashTableSetCurrentBatch(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, ParallelHashJoinBatchAccessor::inner_tuples, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, pg_atomic_fetch_add_u32(), PHJ_BATCH_ALLOCATE, PHJ_BATCH_ELECT, PHJ_BATCH_FREE, PHJ_BATCH_LOAD, PHJ_BATCH_PROBE, PHJ_BATCH_SCAN, ParallelHashJoinBatchAccessor::shared, sts_begin_parallel_scan(), sts_end_parallel_scan(), and sts_parallel_scan_next().

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinOuterGetTuple()

static TupleTableSlot * ExecParallelHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 964 of file nodeHashjoin.c.

967 {
968  HashJoinTable hashtable = hjstate->hj_HashTable;
969  int curbatch = hashtable->curbatch;
970  TupleTableSlot *slot;
971 
972  /*
973  * In the Parallel Hash case we only run the outer plan directly for
974  * single-batch hash joins. Otherwise we have to go to batch files, even
975  * for batch 0.
976  */
977  if (curbatch == 0 && hashtable->nbatch == 1)
978  {
979  slot = ExecProcNode(outerNode);
980 
981  while (!TupIsNull(slot))
982  {
983  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
984 
985  econtext->ecxt_outertuple = slot;
986  if (ExecHashGetHashValue(hashtable, econtext,
987  hjstate->hj_OuterHashKeys,
988  true, /* outer tuple */
989  HJ_FILL_OUTER(hjstate),
990  hashvalue))
991  return slot;
992 
993  /*
994  * That tuple couldn't match because of a NULL, so discard it and
995  * continue with the next one.
996  */
997  slot = ExecProcNode(outerNode);
998  }
999  }
1000  else if (curbatch < hashtable->nbatch)
1001  {
1002  MinimalTuple tuple;
1003 
1004  tuple = sts_parallel_scan_next(hashtable->batches[curbatch].outer_tuples,
1005  hashvalue);
1006  if (tuple != NULL)
1007  {
1009  hjstate->hj_OuterTupleSlot,
1010  false);
1011  slot = hjstate->hj_OuterTupleSlot;
1012  return slot;
1013  }
1014  else
1016  }
1017 
1018  /* End of this batch */
1019  hashtable->batches[curbatch].outer_eof = true;
1020 
1021  return NULL;
1022 }

References HashJoinTableData::batches, HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecClearTuple(), ExecForceStoreMinimalTuple(), ExecHashGetHashValue(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_eof, ParallelHashJoinBatchAccessor::outer_tuples, JoinState::ps, PlanState::ps_ExprContext, sts_parallel_scan_next(), and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinPartitionOuter()

static void ExecParallelHashJoinPartitionOuter ( HashJoinState hjstate)
static

Definition at line 1498 of file nodeHashjoin.c.

1499 {
1500  PlanState *outerState = outerPlanState(hjstate);
1501  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1502  HashJoinTable hashtable = hjstate->hj_HashTable;
1503  TupleTableSlot *slot;
1504  uint32 hashvalue;
1505  int i;
1506 
1507  Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
1508 
1509  /* Execute outer plan, writing all tuples to shared tuplestores. */
1510  for (;;)
1511  {
1512  slot = ExecProcNode(outerState);
1513  if (TupIsNull(slot))
1514  break;
1515  econtext->ecxt_outertuple = slot;
1516  if (ExecHashGetHashValue(hashtable, econtext,
1517  hjstate->hj_OuterHashKeys,
1518  true, /* outer tuple */
1519  HJ_FILL_OUTER(hjstate),
1520  &hashvalue))
1521  {
1522  int batchno;
1523  int bucketno;
1524  bool shouldFree;
1525  MinimalTuple mintup = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1526 
1527  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno,
1528  &batchno);
1529  sts_puttuple(hashtable->batches[batchno].outer_tuples,
1530  &hashvalue, mintup);
1531 
1532  if (shouldFree)
1533  heap_free_minimal_tuple(mintup);
1534  }
1536  }
1537 
1538  /* Make sure all outer partitions are readable by any backend. */
1539  for (i = 0; i < hashtable->nbatch; ++i)
1540  sts_end_write(hashtable->batches[i].outer_tuples);
1541 }
int i
Definition: isn.c:73
void sts_end_write(SharedTuplestoreAccessor *accessor)
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)

References Assert(), HashJoinTableData::batches, CHECK_FOR_INTERRUPTS, ExprContext::ecxt_outertuple, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetHashValue(), ExecProcNode(), heap_free_minimal_tuple(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, i, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, outerPlanState, JoinState::ps, PlanState::ps_ExprContext, sts_end_write(), sts_puttuple(), and TupIsNull.

Referenced by ExecHashJoinImpl().

◆ ExecReScanHashJoin()

void ExecReScanHashJoin ( HashJoinState node)

Definition at line 1395 of file nodeHashjoin.c.

1396 {
1399 
1400  /*
1401  * In a multi-batch join, we currently have to do rescans the hard way,
1402  * primarily because batch temp files may have already been released. But
1403  * if it's a single-batch join, and there is no parameter change for the
1404  * inner subnode, then we can just re-use the existing hash table without
1405  * rebuilding it.
1406  */
1407  if (node->hj_HashTable != NULL)
1408  {
1409  if (node->hj_HashTable->nbatch == 1 &&
1410  innerPlan->chgParam == NULL)
1411  {
1412  /*
1413  * Okay to reuse the hash table; needn't rescan inner, either.
1414  *
1415  * However, if it's a right/right-anti/full join, we'd better
1416  * reset the inner-tuple match flags contained in the table.
1417  */
1418  if (HJ_FILL_INNER(node))
1420 
1421  /*
1422  * Also, we need to reset our state about the emptiness of the
1423  * outer relation, so that the new scan of the outer will update
1424  * it correctly if it turns out to be empty this time. (There's no
1425  * harm in clearing it now because ExecHashJoin won't need the
1426  * info. In the other cases, where the hash table doesn't exist
1427  * or we are destroying it, we leave this state alone because
1428  * ExecHashJoin will need it the first time through.)
1429  */
1430  node->hj_OuterNotEmpty = false;
1431 
1432  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
1434  }
1435  else
1436  {
1437  /* must destroy and rebuild hash table */
1438  HashState *hashNode = castNode(HashState, innerPlan);
1439 
1440  Assert(hashNode->hashtable == node->hj_HashTable);
1441  /* accumulate stats from old hash table, if wanted */
1442  /* (this should match ExecShutdownHash) */
1443  if (hashNode->ps.instrument && !hashNode->hinstrument)
1444  hashNode->hinstrument = (HashInstrumentation *)
1445  palloc0(sizeof(HashInstrumentation));
1446  if (hashNode->hinstrument)
1448  hashNode->hashtable);
1449  /* for safety, be sure to clear child plan node's pointer too */
1450  hashNode->hashtable = NULL;
1451 
1453  node->hj_HashTable = NULL;
1455 
1456  /*
1457  * if chgParam of subnode is not null then plan will be re-scanned
1458  * by first ExecProcNode.
1459  */
1460  if (innerPlan->chgParam == NULL)
1462  }
1463  }
1464 
1465  /* Always reset intra-tuple state */
1466  node->hj_CurHashValue = 0;
1467  node->hj_CurBucketNo = 0;
1469  node->hj_CurTuple = NULL;
1470 
1471  node->hj_MatchedOuter = false;
1472  node->hj_FirstOuterTupleSlot = NULL;
1473 
1474  /*
1475  * if chgParam of subnode is not null then plan will be re-scanned by
1476  * first ExecProcNode.
1477  */
1478  if (outerPlan->chgParam == NULL)
1480 }
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
void * palloc0(Size size)
Definition: mcxt.c:1257
void ExecHashAccumInstrumentation(HashInstrumentation *instrument, HashJoinTable hashtable)
Definition: nodeHash.c:2842
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:2319
HashInstrumentation * hinstrument
Definition: execnodes.h:2677
Instrumentation * instrument
Definition: execnodes.h:1046

References Assert(), castNode, ExecHashAccumInstrumentation(), ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), HashState::hashtable, HashState::hinstrument, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_OUTER, HashJoinState::hj_OuterNotEmpty, innerPlan, innerPlanState, PlanState::instrument, INVALID_SKEW_BUCKET_NO, HashJoinTableData::nbatch, outerPlan, outerPlanState, palloc0(), and HashState::ps.

Referenced by ExecReScan().

◆ ExecShutdownHashJoin()

void ExecShutdownHashJoin ( HashJoinState node)

Definition at line 1483 of file nodeHashjoin.c.

1484 {
1485  if (node->hj_HashTable)
1486  {
1487  /*
1488  * Detach from shared state before DSM memory goes away. This makes
1489  * sure that we don't have any pointers into DSM memory by the time
1490  * ExecEndHashJoin runs.
1491  */
1494  }
1495 }

References ExecHashTableDetach(), ExecHashTableDetachBatch(), and HashJoinState::hj_HashTable.

Referenced by ExecShutdownNode_walker().