PostgreSQL Source Code  git master
nodeHashjoin.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/parallel.h"
#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "utils/memutils.h"
#include "utils/sharedtuplestore.h"
Include dependency graph for nodeHashjoin.c:

Go to the source code of this file.

Macros

#define HJ_BUILD_HASHTABLE   1
 
#define HJ_NEED_NEW_OUTER   2
 
#define HJ_SCAN_BUCKET   3
 
#define HJ_FILL_OUTER_TUPLE   4
 
#define HJ_FILL_INNER_TUPLES   5
 
#define HJ_NEED_NEW_BATCH   6
 
#define HJ_FILL_OUTER(hjstate)   ((hjstate)->hj_NullInnerTupleSlot != NULL)
 
#define HJ_FILL_INNER(hjstate)   ((hjstate)->hj_NullOuterTupleSlot != NULL)
 

Functions

static TupleTableSlotExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecParallelHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
 
static bool ExecHashJoinNewBatch (HashJoinState *hjstate)
 
static bool ExecParallelHashJoinNewBatch (HashJoinState *hjstate)
 
static void ExecParallelHashJoinPartitionOuter (HashJoinState *node)
 
static pg_attribute_always_inline TupleTableSlotExecHashJoinImpl (PlanState *pstate, bool parallel)
 
static TupleTableSlotExecHashJoin (PlanState *pstate)
 
static TupleTableSlotExecParallelHashJoin (PlanState *pstate)
 
HashJoinStateExecInitHashJoin (HashJoin *node, EState *estate, int eflags)
 
void ExecEndHashJoin (HashJoinState *node)
 
void ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
 
void ExecReScanHashJoin (HashJoinState *node)
 
void ExecShutdownHashJoin (HashJoinState *node)
 
void ExecHashJoinEstimate (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinReInitializeDSM (HashJoinState *state, ParallelContext *cxt)
 
void ExecHashJoinInitializeWorker (HashJoinState *state, ParallelWorkerContext *pwcxt)
 

Macro Definition Documentation

◆ HJ_BUILD_HASHTABLE

#define HJ_BUILD_HASHTABLE   1

Definition at line 124 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), ExecInitHashJoin(), and ExecReScanHashJoin().

◆ HJ_FILL_INNER

#define HJ_FILL_INNER (   hjstate)    ((hjstate)->hj_NullOuterTupleSlot != NULL)

Definition at line 134 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), ExecHashJoinNewBatch(), and ExecReScanHashJoin().

◆ HJ_FILL_INNER_TUPLES

#define HJ_FILL_INNER_TUPLES   5

Definition at line 128 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_FILL_OUTER

#define HJ_FILL_OUTER (   hjstate)    ((hjstate)->hj_NullInnerTupleSlot != NULL)

◆ HJ_FILL_OUTER_TUPLE

#define HJ_FILL_OUTER_TUPLE   4

Definition at line 127 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_NEED_NEW_BATCH

#define HJ_NEED_NEW_BATCH   6

Definition at line 129 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_NEED_NEW_OUTER

#define HJ_NEED_NEW_OUTER   2

Definition at line 125 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), and ExecReScanHashJoin().

◆ HJ_SCAN_BUCKET

#define HJ_SCAN_BUCKET   3

Definition at line 126 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

Function Documentation

◆ ExecEndHashJoin()

void ExecEndHashJoin ( HashJoinState node)

Definition at line 773 of file nodeHashjoin.c.

References ExecClearTuple(), ExecEndNode(), ExecFreeExprContext(), ExecHashTableDestroy(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_OuterTupleSlot, innerPlanState, HashJoinState::js, outerPlanState, JoinState::ps, and PlanState::ps_ResultTupleSlot.

Referenced by ExecEndNode().

774 {
775  /*
776  * Free hash table
777  */
778  if (node->hj_HashTable)
779  {
781  node->hj_HashTable = NULL;
782  }
783 
784  /*
785  * Free the exprcontext
786  */
787  ExecFreeExprContext(&node->js.ps);
788 
789  /*
790  * clean out the tuple table
791  */
795 
796  /*
797  * clean up subtrees
798  */
801 }
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:404
PlanState ps
Definition: execnodes.h:1765
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:538
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1878
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:611
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:970
#define outerPlanState(node)
Definition: execnodes.h:1026
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1879
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
#define innerPlanState(node)
Definition: execnodes.h:1025
JoinState js
Definition: execnodes.h:1868
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:849

◆ ExecHashJoin()

static TupleTableSlot* ExecHashJoin ( PlanState pstate)
static

Definition at line 565 of file nodeHashjoin.c.

References ExecHashJoinImpl().

Referenced by ExecInitHashJoin().

566 {
567  /*
568  * On sufficiently smart compilers this should be inlined with the
569  * parallel-aware branches removed.
570  */
571  return ExecHashJoinImpl(pstate, false);
572 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:165

◆ ExecHashJoinEstimate()

void ExecHashJoinEstimate ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1439 of file nodeHashjoin.c.

References ParallelContext::estimator, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

1440 {
1442  shm_toc_estimate_keys(&pcxt->estimator, 1);
1443 }
shm_toc_estimator estimator
Definition: parallel.h:41
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53

◆ ExecHashJoinGetSavedTuple()

static TupleTableSlot * ExecHashJoinGetSavedTuple ( HashJoinState hjstate,
BufFile file,
uint32 hashvalue,
TupleTableSlot tupleSlot 
)
static

Definition at line 1260 of file nodeHashjoin.c.

References BufFileRead(), CHECK_FOR_INTERRUPTS, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecClearTuple(), ExecForceStoreMinimalTuple(), header(), palloc(), and MinimalTupleData::t_len.

Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().

1264 {
1265  uint32 header[2];
1266  size_t nread;
1267  MinimalTuple tuple;
1268 
1269  /*
1270  * We check for interrupts here because this is typically taken as an
1271  * alternative code path to an ExecProcNode() call, which would include
1272  * such a check.
1273  */
1275 
1276  /*
1277  * Since both the hash value and the MinimalTuple length word are uint32,
1278  * we can read them both in one BufFileRead() call without any type
1279  * cheating.
1280  */
1281  nread = BufFileRead(file, (void *) header, sizeof(header));
1282  if (nread == 0) /* end of file */
1283  {
1284  ExecClearTuple(tupleSlot);
1285  return NULL;
1286  }
1287  if (nread != sizeof(header))
1288  ereport(ERROR,
1290  errmsg("could not read from hash-join temporary file: %m")));
1291  *hashvalue = header[0];
1292  tuple = (MinimalTuple) palloc(header[1]);
1293  tuple->t_len = header[1];
1294  nread = BufFileRead(file,
1295  (void *) ((char *) tuple + sizeof(uint32)),
1296  header[1] - sizeof(uint32));
1297  if (nread != header[1] - sizeof(uint32))
1298  ereport(ERROR,
1300  errmsg("could not read from hash-join temporary file: %m")));
1301  ExecForceStoreMinimalTuple(tuple, tupleSlot, true);
1302  return tupleSlot;
1303 }
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:404
#define ERROR
Definition: elog.h:43
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1392
MinimalTupleData * MinimalTuple
Definition: htup.h:27
int errcode_for_file_access(void)
Definition: elog.c:593
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:208
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:784
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:516
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99

◆ ExecHashJoinImpl()

static pg_attribute_always_inline TupleTableSlot* ExecHashJoinImpl ( PlanState pstate,
bool  parallel 
)
static

Definition at line 165 of file nodeHashjoin.c.

References Assert, BarrierArriveAndWait(), BarrierPhase(), ParallelHashJoinState::build_barrier, castNode, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog, ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecParallelHashJoinNewBatch(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), ExecParallelScanHashBucket(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), HashState::hashtable, heap_free_minimal_tuple(), HeapTupleHeaderSetMatch, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterNotEmpty, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, HashJoinTableData::outerBatchFile, outerPlanState, HashState::parallel_state, PHJ_BUILD_DONE, PHJ_BUILD_HASHING_OUTER, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::qual, ResetExprContext, JoinState::single_match, Plan::startup_cost, Plan::total_cost, HashJoinTableData::totalTuples, TupIsNull, and WAIT_EVENT_HASH_BUILD_HASHING_OUTER.

Referenced by ExecHashJoin(), and ExecParallelHashJoin().

166 {
167  HashJoinState *node = castNode(HashJoinState, pstate);
168  PlanState *outerNode;
169  HashState *hashNode;
170  ExprState *joinqual;
171  ExprState *otherqual;
172  ExprContext *econtext;
173  HashJoinTable hashtable;
174  TupleTableSlot *outerTupleSlot;
175  uint32 hashvalue;
176  int batchno;
177  ParallelHashJoinState *parallel_state;
178 
179  /*
180  * get information from HashJoin node
181  */
182  joinqual = node->js.joinqual;
183  otherqual = node->js.ps.qual;
184  hashNode = (HashState *) innerPlanState(node);
185  outerNode = outerPlanState(node);
186  hashtable = node->hj_HashTable;
187  econtext = node->js.ps.ps_ExprContext;
188  parallel_state = hashNode->parallel_state;
189 
190  /*
191  * Reset per-tuple memory context to free any expression evaluation
192  * storage allocated in the previous tuple cycle.
193  */
194  ResetExprContext(econtext);
195 
196  /*
197  * run the hash join state machine
198  */
199  for (;;)
200  {
201  /*
202  * It's possible to iterate this loop many times before returning a
203  * tuple, in some pathological cases such as needing to move much of
204  * the current batch to a later batch. So let's check for interrupts
205  * each time through.
206  */
208 
209  switch (node->hj_JoinState)
210  {
211  case HJ_BUILD_HASHTABLE:
212 
213  /*
214  * First time through: build hash table for inner relation.
215  */
216  Assert(hashtable == NULL);
217 
218  /*
219  * If the outer relation is completely empty, and it's not
220  * right/full join, we can quit without building the hash
221  * table. However, for an inner join it is only a win to
222  * check this when the outer relation's startup cost is less
223  * than the projected cost of building the hash table.
224  * Otherwise it's best to build the hash table first and see
225  * if the inner relation is empty. (When it's a left join, we
226  * should always make this check, since we aren't going to be
227  * able to skip the join on the strength of an empty inner
228  * relation anyway.)
229  *
230  * If we are rescanning the join, we make use of information
231  * gained on the previous scan: don't bother to try the
232  * prefetch if the previous scan found the outer relation
233  * nonempty. This is not 100% reliable since with new
234  * parameters the outer relation might yield different
235  * results, but it's a good heuristic.
236  *
237  * The only way to make the check is to try to fetch a tuple
238  * from the outer plan node. If we succeed, we have to stash
239  * it away for later consumption by ExecHashJoinOuterGetTuple.
240  */
241  if (HJ_FILL_INNER(node))
242  {
243  /* no chance to not build the hash table */
244  node->hj_FirstOuterTupleSlot = NULL;
245  }
246  else if (parallel)
247  {
248  /*
249  * The empty-outer optimization is not implemented for
250  * shared hash tables, because no one participant can
251  * determine that there are no outer tuples, and it's not
252  * yet clear that it's worth the synchronization overhead
253  * of reaching consensus to figure that out. So we have
254  * to build the hash table.
255  */
256  node->hj_FirstOuterTupleSlot = NULL;
257  }
258  else if (HJ_FILL_OUTER(node) ||
259  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
260  !node->hj_OuterNotEmpty))
261  {
262  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
264  {
265  node->hj_OuterNotEmpty = false;
266  return NULL;
267  }
268  else
269  node->hj_OuterNotEmpty = true;
270  }
271  else
272  node->hj_FirstOuterTupleSlot = NULL;
273 
274  /*
275  * Create the hash table. If using Parallel Hash, then
276  * whoever gets here first will create the hash table and any
277  * later arrivals will merely attach to it.
278  */
279  hashtable = ExecHashTableCreate(hashNode,
280  node->hj_HashOperators,
281  HJ_FILL_INNER(node));
282  node->hj_HashTable = hashtable;
283 
284  /*
285  * Execute the Hash node, to build the hash table. If using
286  * Parallel Hash, then we'll try to help hashing unless we
287  * arrived too late.
288  */
289  hashNode->hashtable = hashtable;
290  (void) MultiExecProcNode((PlanState *) hashNode);
291 
292  /*
293  * If the inner relation is completely empty, and we're not
294  * doing a left outer join, we can quit without scanning the
295  * outer relation.
296  */
297  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
298  return NULL;
299 
300  /*
301  * need to remember whether nbatch has increased since we
302  * began scanning the outer relation
303  */
304  hashtable->nbatch_outstart = hashtable->nbatch;
305 
306  /*
307  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
308  * tuple above, because ExecHashJoinOuterGetTuple will
309  * immediately set it again.)
310  */
311  node->hj_OuterNotEmpty = false;
312 
313  if (parallel)
314  {
315  Barrier *build_barrier;
316 
317  build_barrier = &parallel_state->build_barrier;
318  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
319  BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
320  if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER)
321  {
322  /*
323  * If multi-batch, we need to hash the outer relation
324  * up front.
325  */
326  if (hashtable->nbatch > 1)
328  BarrierArriveAndWait(build_barrier,
330  }
331  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
332 
333  /* Each backend should now select a batch to work on. */
334  hashtable->curbatch = -1;
336 
337  continue;
338  }
339  else
341 
342  /* FALL THRU */
343 
344  case HJ_NEED_NEW_OUTER:
345 
346  /*
347  * We don't have an outer tuple, try to get the next one
348  */
349  if (parallel)
350  outerTupleSlot =
351  ExecParallelHashJoinOuterGetTuple(outerNode, node,
352  &hashvalue);
353  else
354  outerTupleSlot =
355  ExecHashJoinOuterGetTuple(outerNode, node, &hashvalue);
356 
357  if (TupIsNull(outerTupleSlot))
358  {
359  /* end of batch, or maybe whole join */
360  if (HJ_FILL_INNER(node))
361  {
362  /* set up to scan for unmatched inner tuples */
365  }
366  else
368  continue;
369  }
370 
371  econtext->ecxt_outertuple = outerTupleSlot;
372  node->hj_MatchedOuter = false;
373 
374  /*
375  * Find the corresponding bucket for this tuple in the main
376  * hash table or skew hash table.
377  */
378  node->hj_CurHashValue = hashvalue;
379  ExecHashGetBucketAndBatch(hashtable, hashvalue,
380  &node->hj_CurBucketNo, &batchno);
381  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
382  hashvalue);
383  node->hj_CurTuple = NULL;
384 
385  /*
386  * The tuple might not belong to the current batch (where
387  * "current batch" includes the skew buckets if any).
388  */
389  if (batchno != hashtable->curbatch &&
391  {
392  bool shouldFree;
393  MinimalTuple mintuple = ExecFetchSlotMinimalTuple(outerTupleSlot,
394  &shouldFree);
395 
396  /*
397  * Need to postpone this outer tuple to a later batch.
398  * Save it in the corresponding outer-batch file.
399  */
400  Assert(parallel_state == NULL);
401  Assert(batchno > hashtable->curbatch);
402  ExecHashJoinSaveTuple(mintuple, hashvalue,
403  &hashtable->outerBatchFile[batchno]);
404 
405  if (shouldFree)
406  heap_free_minimal_tuple(mintuple);
407 
408  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
409  continue;
410  }
411 
412  /* OK, let's scan the bucket for matches */
414 
415  /* FALL THRU */
416 
417  case HJ_SCAN_BUCKET:
418 
419  /*
420  * Scan the selected hash bucket for matches to current outer
421  */
422  if (parallel)
423  {
424  if (!ExecParallelScanHashBucket(node, econtext))
425  {
426  /* out of matches; check for possible outer-join fill */
428  continue;
429  }
430  }
431  else
432  {
433  if (!ExecScanHashBucket(node, econtext))
434  {
435  /* out of matches; check for possible outer-join fill */
437  continue;
438  }
439  }
440 
441  /*
442  * We've got a match, but still need to test non-hashed quals.
443  * ExecScanHashBucket already set up all the state needed to
444  * call ExecQual.
445  *
446  * If we pass the qual, then save state for next call and have
447  * ExecProject form the projection, store it in the tuple
448  * table, and return the slot.
449  *
450  * Only the joinquals determine tuple match status, but all
451  * quals must pass to actually return the tuple.
452  */
453  if (joinqual == NULL || ExecQual(joinqual, econtext))
454  {
455  node->hj_MatchedOuter = true;
457 
458  /* In an antijoin, we never return a matched tuple */
459  if (node->js.jointype == JOIN_ANTI)
460  {
462  continue;
463  }
464 
465  /*
466  * If we only need to join to the first matching inner
467  * tuple, then consider returning this one, but after that
468  * continue with next outer tuple.
469  */
470  if (node->js.single_match)
472 
473  if (otherqual == NULL || ExecQual(otherqual, econtext))
474  return ExecProject(node->js.ps.ps_ProjInfo);
475  else
476  InstrCountFiltered2(node, 1);
477  }
478  else
479  InstrCountFiltered1(node, 1);
480  break;
481 
482  case HJ_FILL_OUTER_TUPLE:
483 
484  /*
485  * The current outer tuple has run out of matches, so check
486  * whether to emit a dummy outer-join tuple. Whether we emit
487  * one or not, the next state is NEED_NEW_OUTER.
488  */
490 
491  if (!node->hj_MatchedOuter &&
492  HJ_FILL_OUTER(node))
493  {
494  /*
495  * Generate a fake join tuple with nulls for the inner
496  * tuple, and return it if it passes the non-join quals.
497  */
498  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
499 
500  if (otherqual == NULL || ExecQual(otherqual, econtext))
501  return ExecProject(node->js.ps.ps_ProjInfo);
502  else
503  InstrCountFiltered2(node, 1);
504  }
505  break;
506 
508 
509  /*
510  * We have finished a batch, but we are doing right/full join,
511  * so any unmatched inner tuples in the hashtable have to be
512  * emitted before we continue to the next batch.
513  */
514  if (!ExecScanHashTableForUnmatched(node, econtext))
515  {
516  /* no more unmatched tuples */
518  continue;
519  }
520 
521  /*
522  * Generate a fake join tuple with nulls for the outer tuple,
523  * and return it if it passes the non-join quals.
524  */
525  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
526 
527  if (otherqual == NULL || ExecQual(otherqual, econtext))
528  return ExecProject(node->js.ps.ps_ProjInfo);
529  else
530  InstrCountFiltered2(node, 1);
531  break;
532 
533  case HJ_NEED_NEW_BATCH:
534 
535  /*
536  * Try to advance to next batch. Done if there are no more.
537  */
538  if (parallel)
539  {
540  if (!ExecParallelHashJoinNewBatch(node))
541  return NULL; /* end of parallel-aware join */
542  }
543  else
544  {
545  if (!ExecHashJoinNewBatch(node))
546  return NULL; /* end of parallel-oblivious join */
547  }
549  break;
550 
551  default:
552  elog(ERROR, "unrecognized hashjoin state: %d",
553  (int) node->hj_JoinState);
554  }
555  }
556 }
JoinType jointype
Definition: execnodes.h:1766
HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
Definition: nodeHash.c:428
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2215
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:129
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:126
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1881
ExprState * joinqual
Definition: execnodes.h:1769
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:972
static TupleTableSlot * ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:890
PlanState ps
Definition: execnodes.h:1765
#define castNode(_type_, nodeptr)
Definition: nodes.h:586
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2055
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
Definition: execTuples.c:1540
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:2031
ExprContext * ps_ExprContext
Definition: execnodes.h:971
bool single_match
Definition: execnodes.h:1767
HashJoinTable hashtable
Definition: execnodes.h:2207
bool hj_MatchedOuter
Definition: execnodes.h:1884
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:816
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:354
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1882
#define PHJ_BUILD_HASHING_OUTER
Definition: hashjoin.h:260
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:2355
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1887
uint32 hj_CurHashValue
Definition: execnodes.h:1874
int hj_CurSkewBucketNo
Definition: execnodes.h:1876
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1880
static void ExecParallelHashJoinPartitionOuter(HashJoinState *node)
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1429
BufFile ** outerBatchFile
Definition: hashjoin.h:330
#define outerPlanState(node)
Definition: execnodes.h:1026
Cost startup_cost
Definition: plannodes.h:121
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1877
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1919
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:224
#define TupIsNull(slot)
Definition: tuptable.h:288
unsigned int uint32
Definition: c.h:358
PlanState ps
Definition: execnodes.h:2206
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:1034
List * hj_HashOperators
Definition: execnodes.h:1872
#define PHJ_BUILD_DONE
Definition: hashjoin.h:261
int hj_CurBucketNo
Definition: execnodes.h:1875
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:128
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:225
Plan * plan
Definition: execnodes.h:932
double totalTuples
Definition: hashjoin.h:318
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:80
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:226
bool hj_OuterNotEmpty
Definition: execnodes.h:1885
#define Assert(condition)
Definition: c.h:732
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1039
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:243
static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:125
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
ExprState * qual
Definition: execnodes.h:953
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:483
#define elog(elevel,...)
Definition: elog.h:226
Cost total_cost
Definition: plannodes.h:122
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:521
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:127
#define innerPlanState(node)
Definition: execnodes.h:1025
bool ExecParallelScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1980
JoinState js
Definition: execnodes.h:1868
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
Definition: nodeHashjoin.c:955
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: executor.h:317
#define ResetExprContext(econtext)
Definition: executor.h:484

◆ ExecHashJoinInitializeDSM()

void ExecHashJoinInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1446 of file nodeHashjoin.c.

References BarrierInit(), ParallelHashJoinState::batches, ParallelHashJoinState::build_barrier, ParallelHashJoinState::chunk_work_queue, ParallelHashJoinState::distributor, ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, ParallelHashJoinState::grow_batches_barrier, ParallelHashJoinState::grow_buckets_barrier, ParallelHashJoinState::growth, innerPlanState, InvalidDsaPointer, HashJoinState::js, ParallelHashJoinState::lock, LWLockInitialize(), LWTRANCHE_PARALLEL_HASH_JOIN, ParallelHashJoinState::nbatch, ParallelHashJoinState::nbuckets, ParallelHashJoinState::nparticipants, ParallelContext::nworkers, ParallelHashJoinState::old_batches, HashState::parallel_state, pg_atomic_init_u32(), PHJ_GROWTH_OK, PlanState::plan, Plan::plan_node_id, JoinState::ps, ParallelContext::seg, SharedFileSetInit(), shm_toc_allocate(), shm_toc_insert(), ParallelHashJoinState::space_allowed, ParallelContext::toc, and ParallelHashJoinState::total_tuples.

Referenced by ExecParallelInitializeDSM().

1447 {
1448  int plan_node_id = state->js.ps.plan->plan_node_id;
1449  HashState *hashNode;
1450  ParallelHashJoinState *pstate;
1451 
1452  /*
1453  * Disable shared hash table mode if we failed to create a real DSM
1454  * segment, because that means that we don't have a DSA area to work with.
1455  */
1456  if (pcxt->seg == NULL)
1457  return;
1458 
1460 
1461  /*
1462  * Set up the state needed to coordinate access to the shared hash
1463  * table(s), using the plan node ID as the toc key.
1464  */
1465  pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelHashJoinState));
1466  shm_toc_insert(pcxt->toc, plan_node_id, pstate);
1467 
1468  /*
1469  * Set up the shared hash join state with no batches initially.
1470  * ExecHashTableCreate() will prepare at least one later and set nbatch
1471  * and space_allowed.
1472  */
1473  pstate->nbatch = 0;
1474  pstate->space_allowed = 0;
1475  pstate->batches = InvalidDsaPointer;
1476  pstate->old_batches = InvalidDsaPointer;
1477  pstate->nbuckets = 0;
1478  pstate->growth = PHJ_GROWTH_OK;
1480  pg_atomic_init_u32(&pstate->distributor, 0);
1481  pstate->nparticipants = pcxt->nworkers + 1;
1482  pstate->total_tuples = 0;
1483  LWLockInitialize(&pstate->lock,
1485  BarrierInit(&pstate->build_barrier, 0);
1486  BarrierInit(&pstate->grow_batches_barrier, 0);
1487  BarrierInit(&pstate->grow_buckets_barrier, 0);
1488 
1489  /* Set up the space we'll use for shared temporary files. */
1490  SharedFileSetInit(&pstate->fileset, pcxt->seg);
1491 
1492  /* Initialize the shared state in the hash node. */
1493  hashNode = (HashState *) innerPlanState(state);
1494  hashNode->parallel_state = pstate;
1495 }
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2215
dsa_pointer chunk_work_queue
Definition: hashjoin.h:242
#define InvalidDsaPointer
Definition: dsa.h:78
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:47
PlanState ps
Definition: execnodes.h:1765
dsm_segment * seg
Definition: parallel.h:42
int plan_node_id
Definition: plannodes.h:139
SharedFileSet fileset
Definition: hashjoin.h:253
Barrier grow_buckets_barrier
Definition: hashjoin.h:250
dsa_pointer batches
Definition: hashjoin.h:236
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:677
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:406
Plan * plan
Definition: execnodes.h:932
ParallelHashGrowth growth
Definition: hashjoin.h:241
dsa_pointer old_batches
Definition: hashjoin.h:237
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:581
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
pg_atomic_uint32 distributor
Definition: hashjoin.h:251
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:226
#define innerPlanState(node)
Definition: execnodes.h:1025
JoinState js
Definition: execnodes.h:1868
Barrier grow_batches_barrier
Definition: hashjoin.h:249
shm_toc * toc
Definition: parallel.h:44

◆ ExecHashJoinInitializeWorker()

void ExecHashJoinInitializeWorker ( HashJoinState state,
ParallelWorkerContext pwcxt 
)

Definition at line 1537 of file nodeHashjoin.c.

References ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, innerPlanState, HashJoinState::js, HashState::parallel_state, PlanState::plan, Plan::plan_node_id, JoinState::ps, ParallelWorkerContext::seg, SharedFileSetAttach(), shm_toc_lookup(), and ParallelWorkerContext::toc.

Referenced by ExecParallelInitializeWorker().

1539 {
1540  HashState *hashNode;
1541  int plan_node_id = state->js.ps.plan->plan_node_id;
1542  ParallelHashJoinState *pstate =
1543  shm_toc_lookup(pwcxt->toc, plan_node_id, false);
1544 
1545  /* Attach to the space for shared temporary files. */
1546  SharedFileSetAttach(&pstate->fileset, pwcxt->seg);
1547 
1548  /* Attach to the shared state in the hash node. */
1549  hashNode = (HashState *) innerPlanState(state);
1550  hashNode->parallel_state = pstate;
1551 
1553 }
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2215
PlanState ps
Definition: execnodes.h:1765
int plan_node_id
Definition: plannodes.h:139
SharedFileSet fileset
Definition: hashjoin.h:253
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:406
Plan * plan
Definition: execnodes.h:932
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:581
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:76
#define innerPlanState(node)
Definition: execnodes.h:1025
JoinState js
Definition: execnodes.h:1868
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
dsm_segment * seg
Definition: parallel.h:52

◆ ExecHashJoinNewBatch()

static bool ExecHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 955 of file nodeHashjoin.c.

References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.

Referenced by ExecHashJoinImpl().

956 {
957  HashJoinTable hashtable = hjstate->hj_HashTable;
958  int nbatch;
959  int curbatch;
960  BufFile *innerFile;
961  TupleTableSlot *slot;
962  uint32 hashvalue;
963 
964  nbatch = hashtable->nbatch;
965  curbatch = hashtable->curbatch;
966 
967  if (curbatch > 0)
968  {
969  /*
970  * We no longer need the previous outer batch file; close it right
971  * away to free disk space.
972  */
973  if (hashtable->outerBatchFile[curbatch])
974  BufFileClose(hashtable->outerBatchFile[curbatch]);
975  hashtable->outerBatchFile[curbatch] = NULL;
976  }
977  else /* we just finished the first batch */
978  {
979  /*
980  * Reset some of the skew optimization state variables, since we no
981  * longer need to consider skew tuples after the first batch. The
982  * memory context reset we are about to do will release the skew
983  * hashtable itself.
984  */
985  hashtable->skewEnabled = false;
986  hashtable->skewBucket = NULL;
987  hashtable->skewBucketNums = NULL;
988  hashtable->nSkewBuckets = 0;
989  hashtable->spaceUsedSkew = 0;
990  }
991 
992  /*
993  * We can always skip over any batches that are completely empty on both
994  * sides. We can sometimes skip over batches that are empty on only one
995  * side, but there are exceptions:
996  *
997  * 1. In a left/full outer join, we have to process outer batches even if
998  * the inner batch is empty. Similarly, in a right/full outer join, we
999  * have to process inner batches even if the outer batch is empty.
1000  *
1001  * 2. If we have increased nbatch since the initial estimate, we have to
1002  * scan inner batches since they might contain tuples that need to be
1003  * reassigned to later inner batches.
1004  *
1005  * 3. Similarly, if we have increased nbatch since starting the outer
1006  * scan, we have to rescan outer batches in case they contain tuples that
1007  * need to be reassigned.
1008  */
1009  curbatch++;
1010  while (curbatch < nbatch &&
1011  (hashtable->outerBatchFile[curbatch] == NULL ||
1012  hashtable->innerBatchFile[curbatch] == NULL))
1013  {
1014  if (hashtable->outerBatchFile[curbatch] &&
1015  HJ_FILL_OUTER(hjstate))
1016  break; /* must process due to rule 1 */
1017  if (hashtable->innerBatchFile[curbatch] &&
1018  HJ_FILL_INNER(hjstate))
1019  break; /* must process due to rule 1 */
1020  if (hashtable->innerBatchFile[curbatch] &&
1021  nbatch != hashtable->nbatch_original)
1022  break; /* must process due to rule 2 */
1023  if (hashtable->outerBatchFile[curbatch] &&
1024  nbatch != hashtable->nbatch_outstart)
1025  break; /* must process due to rule 3 */
1026  /* We can ignore this batch. */
1027  /* Release associated temp files right away. */
1028  if (hashtable->innerBatchFile[curbatch])
1029  BufFileClose(hashtable->innerBatchFile[curbatch]);
1030  hashtable->innerBatchFile[curbatch] = NULL;
1031  if (hashtable->outerBatchFile[curbatch])
1032  BufFileClose(hashtable->outerBatchFile[curbatch]);
1033  hashtable->outerBatchFile[curbatch] = NULL;
1034  curbatch++;
1035  }
1036 
1037  if (curbatch >= nbatch)
1038  return false; /* no more batches */
1039 
1040  hashtable->curbatch = curbatch;
1041 
1042  /*
1043  * Reload the hash table with the new inner batch (which could be empty)
1044  */
1045  ExecHashTableReset(hashtable);
1046 
1047  innerFile = hashtable->innerBatchFile[curbatch];
1048 
1049  if (innerFile != NULL)
1050  {
1051  if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
1052  ereport(ERROR,
1054  errmsg("could not rewind hash-join temporary file: %m")));
1055 
1056  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
1057  innerFile,
1058  &hashvalue,
1059  hjstate->hj_HashTupleSlot)))
1060  {
1061  /*
1062  * NOTE: some tuples may be sent to future batches. Also, it is
1063  * possible for hashtable->nbatch to be increased here!
1064  */
1065  ExecHashTableInsert(hashtable, slot, hashvalue);
1066  }
1067 
1068  /*
1069  * after we build the hash table, the inner batch file is no longer
1070  * needed
1071  */
1072  BufFileClose(innerFile);
1073  hashtable->innerBatchFile[curbatch] = NULL;
1074  }
1075 
1076  /*
1077  * Rewind outer batch file (if present), so that we can start reading it.
1078  */
1079  if (hashtable->outerBatchFile[curbatch] != NULL)
1080  {
1081  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
1082  ereport(ERROR,
1084  errmsg("could not rewind hash-join temporary file: %m")));
1085  }
1086 
1087  return true;
1088 }
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:638
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:2126
void BufFileClose(BufFile *file)
Definition: buffile.c:379
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
int * skewBucketNums
Definition: hashjoin.h:308
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1589
#define ERROR
Definition: elog.h:43
BufFile ** outerBatchFile
Definition: hashjoin.h:330
int errcode_for_file_access(void)
Definition: elog.c:593
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
HashSkewBucket ** skewBucket
Definition: hashjoin.h:305
BufFile ** innerBatchFile
Definition: hashjoin.h:329
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1879
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
int errmsg(const char *fmt,...)
Definition: elog.c:784

◆ ExecHashJoinOuterGetTuple()

static TupleTableSlot * ExecHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 816 of file nodeHashjoin.c.

References HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecHashGetHashValue(), ExecHashJoinGetSavedTuple(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::outerBatchFile, JoinState::ps, PlanState::ps_ExprContext, and TupIsNull.

Referenced by ExecHashJoinImpl().

819 {
820  HashJoinTable hashtable = hjstate->hj_HashTable;
821  int curbatch = hashtable->curbatch;
822  TupleTableSlot *slot;
823 
824  if (curbatch == 0) /* if it is the first pass */
825  {
826  /*
827  * Check to see if first outer tuple was already fetched by
828  * ExecHashJoin() and not used yet.
829  */
830  slot = hjstate->hj_FirstOuterTupleSlot;
831  if (!TupIsNull(slot))
832  hjstate->hj_FirstOuterTupleSlot = NULL;
833  else
834  slot = ExecProcNode(outerNode);
835 
836  while (!TupIsNull(slot))
837  {
838  /*
839  * We have to compute the tuple's hash value.
840  */
841  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
842 
843  econtext->ecxt_outertuple = slot;
844  if (ExecHashGetHashValue(hashtable, econtext,
845  hjstate->hj_OuterHashKeys,
846  true, /* outer tuple */
847  HJ_FILL_OUTER(hjstate),
848  hashvalue))
849  {
850  /* remember outer relation is not empty for possible rescan */
851  hjstate->hj_OuterNotEmpty = true;
852 
853  return slot;
854  }
855 
856  /*
857  * That tuple couldn't match because of a NULL, so discard it and
858  * continue with the next one.
859  */
860  slot = ExecProcNode(outerNode);
861  }
862  }
863  else if (curbatch < hashtable->nbatch)
864  {
865  BufFile *file = hashtable->outerBatchFile[curbatch];
866 
867  /*
868  * In outer-join cases, we could get here even though the batch file
869  * is empty.
870  */
871  if (file == NULL)
872  return NULL;
873 
874  slot = ExecHashJoinGetSavedTuple(hjstate,
875  file,
876  hashvalue,
877  hjstate->hj_OuterTupleSlot);
878  if (!TupIsNull(slot))
879  return slot;
880  }
881 
882  /* End of this batch */
883  return NULL;
884 }
PlanState ps
Definition: execnodes.h:1765
ExprContext * ps_ExprContext
Definition: execnodes.h:971
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1878
List * hj_OuterHashKeys
Definition: execnodes.h:1870
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1882
BufFile ** outerBatchFile
Definition: hashjoin.h:330
#define TupIsNull(slot)
Definition: tuptable.h:288
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:225
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:226
bool hj_OuterNotEmpty
Definition: execnodes.h:1885
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1783
JoinState js
Definition: execnodes.h:1868

◆ ExecHashJoinReInitializeDSM()

void ExecHashJoinReInitializeDSM ( HashJoinState state,
ParallelContext cxt 
)

Definition at line 1504 of file nodeHashjoin.c.

References BarrierInit(), ParallelHashJoinState::build_barrier, ExecHashTableDetach(), ExecHashTableDetachBatch(), ParallelHashJoinState::fileset, HashJoinState::hj_HashTable, HashJoinState::js, PlanState::plan, Plan::plan_node_id, JoinState::ps, SharedFileSetDeleteAll(), shm_toc_lookup(), and ParallelContext::toc.

Referenced by ExecParallelReInitializeDSM().

1505 {
1506  int plan_node_id = state->js.ps.plan->plan_node_id;
1507  ParallelHashJoinState *pstate =
1508  shm_toc_lookup(cxt->toc, plan_node_id, false);
1509 
1510  /*
1511  * It would be possible to reuse the shared hash table in single-batch
1512  * cases by resetting and then fast-forwarding build_barrier to
1513  * PHJ_BUILD_DONE and batch 0's batch_barrier to PHJ_BATCH_PROBING, but
1514  * currently shared hash tables are already freed by now (by the last
1515  * participant to detach from the batch). We could consider keeping it
1516  * around for single-batch joins. We'd also need to adjust
1517  * finalize_plan() so that it doesn't record a dummy dependency for
1518  * Parallel Hash nodes, preventing the rescan optimization. For now we
1519  * don't try.
1520  */
1521 
1522  /* Detach, freeing any remaining shared memory. */
1523  if (state->hj_HashTable != NULL)
1524  {
1527  }
1528 
1529  /* Clear any shared batch files. */
1530  SharedFileSetDeleteAll(&pstate->fileset);
1531 
1532  /* Reset build_barrier to PHJ_BUILD_ELECTING so we can go around again. */
1533  BarrierInit(&pstate->build_barrier, 0);
1534 }
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
PlanState ps
Definition: execnodes.h:1765
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3075
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3132
int plan_node_id
Definition: plannodes.h:139
SharedFileSet fileset
Definition: hashjoin.h:253
Plan * plan
Definition: execnodes.h:932
void SharedFileSetDeleteAll(SharedFileSet *fileset)
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
JoinState js
Definition: execnodes.h:1868
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
shm_toc * toc
Definition: parallel.h:44

◆ ExecHashJoinSaveTuple()

void ExecHashJoinSaveTuple ( MinimalTuple  tuple,
uint32  hashvalue,
BufFile **  fileptr 
)

Definition at line 1226 of file nodeHashjoin.c.

References BufFileCreateTemp(), BufFileWrite(), ereport, errcode_for_file_access(), errmsg(), ERROR, and MinimalTupleData::t_len.

Referenced by ExecHashIncreaseNumBatches(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().

1228 {
1229  BufFile *file = *fileptr;
1230  size_t written;
1231 
1232  if (file == NULL)
1233  {
1234  /* First write to this batch file, so open it. */
1235  file = BufFileCreateTemp(false);
1236  *fileptr = file;
1237  }
1238 
1239  written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
1240  if (written != sizeof(uint32))
1241  ereport(ERROR,
1243  errmsg("could not write to hash-join temporary file: %m")));
1244 
1245  written = BufFileWrite(file, (void *) tuple, tuple->t_len);
1246  if (written != tuple->t_len)
1247  ereport(ERROR,
1249  errmsg("could not write to hash-join temporary file: %m")));
1250 }
#define ERROR
Definition: elog.h:43
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:183
int errcode_for_file_access(void)
Definition: elog.c:593
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
int errmsg(const char *fmt,...)
Definition: elog.c:784
size_t BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:563

◆ ExecInitHashJoin()

HashJoinState* ExecInitHashJoin ( HashJoin node,
EState estate,
int  eflags 
)

Definition at line 597 of file nodeHashjoin.c.

References OpExpr::args, Assert, elog, ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecGetResultSlotOps(), ExecGetResultType(), ExecHashJoin(), ExecInitExpr(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitQual(), ExecInitResultTupleSlotTL(), PlanState::ExecProcNode, HashJoin::hashclauses, HJ_BUILD_HASHTABLE, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterTupleSlot, Join::inner_unique, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, Join::joinqual, Join::jointype, JoinState::jointype, HashJoinState::js, lappend(), lappend_oid(), lfirst_node, linitial, lsecond, makeNode, NIL, OpExpr::opno, outerPlan, outerPlanState, Join::plan, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ResultTupleSlot, Plan::qual, PlanState::qual, JoinState::single_match, PlanState::state, and TTSOpsVirtual.

Referenced by ExecInitNode().

598 {
599  HashJoinState *hjstate;
600  Plan *outerNode;
601  Hash *hashNode;
602  List *lclauses;
603  List *rclauses;
604  List *rhclauses;
605  List *hoperators;
606  TupleDesc outerDesc,
607  innerDesc;
608  ListCell *l;
609  const TupleTableSlotOps *ops;
610 
611  /* check for unsupported flags */
612  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
613 
614  /*
615  * create state structure
616  */
617  hjstate = makeNode(HashJoinState);
618  hjstate->js.ps.plan = (Plan *) node;
619  hjstate->js.ps.state = estate;
620 
621  /*
622  * See ExecHashJoinInitializeDSM() and ExecHashJoinInitializeWorker()
623  * where this function may be replaced with a parallel version, if we
624  * managed to launch a parallel query.
625  */
626  hjstate->js.ps.ExecProcNode = ExecHashJoin;
627  hjstate->js.jointype = node->join.jointype;
628 
629  /*
630  * Miscellaneous initialization
631  *
632  * create expression context for node
633  */
634  ExecAssignExprContext(estate, &hjstate->js.ps);
635 
636  /*
637  * initialize child nodes
638  *
639  * Note: we could suppress the REWIND flag for the inner input, which
640  * would amount to betting that the hash will be a single batch. Not
641  * clear if this would be a win or not.
642  */
643  outerNode = outerPlan(node);
644  hashNode = (Hash *) innerPlan(node);
645 
646  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
647  outerDesc = ExecGetResultType(outerPlanState(hjstate));
648  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
649  innerDesc = ExecGetResultType(innerPlanState(hjstate));
650 
651  /*
652  * Initialize result slot, type and projection.
653  */
655  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
656 
657  /*
658  * tuple table initialization
659  */
660  ops = ExecGetResultSlotOps(outerPlanState(hjstate), NULL);
661  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate, outerDesc,
662  ops);
663 
664  /*
665  * detect whether we need only consider the first matching inner tuple
666  */
667  hjstate->js.single_match = (node->join.inner_unique ||
668  node->join.jointype == JOIN_SEMI);
669 
670  /* set up null tuples for outer joins, if needed */
671  switch (node->join.jointype)
672  {
673  case JOIN_INNER:
674  case JOIN_SEMI:
675  break;
676  case JOIN_LEFT:
677  case JOIN_ANTI:
678  hjstate->hj_NullInnerTupleSlot =
679  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
680  break;
681  case JOIN_RIGHT:
682  hjstate->hj_NullOuterTupleSlot =
683  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
684  break;
685  case JOIN_FULL:
686  hjstate->hj_NullOuterTupleSlot =
687  ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
688  hjstate->hj_NullInnerTupleSlot =
689  ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
690  break;
691  default:
692  elog(ERROR, "unrecognized join type: %d",
693  (int) node->join.jointype);
694  }
695 
696  /*
697  * now for some voodoo. our temporary tuple slot is actually the result
698  * tuple slot of the Hash node (which is our inner plan). we can do this
699  * because Hash nodes don't return tuples via ExecProcNode() -- instead
700  * the hash join node uses ExecScanHashBucket() to get at the contents of
701  * the hash table. -cim 6/9/91
702  */
703  {
704  HashState *hashstate = (HashState *) innerPlanState(hjstate);
705  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
706 
707  hjstate->hj_HashTupleSlot = slot;
708  }
709 
710  /*
711  * initialize child expressions
712  */
713  hjstate->js.ps.qual =
714  ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
715  hjstate->js.joinqual =
716  ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
717  hjstate->hashclauses =
718  ExecInitQual(node->hashclauses, (PlanState *) hjstate);
719 
720  /*
721  * initialize hash-specific info
722  */
723  hjstate->hj_HashTable = NULL;
724  hjstate->hj_FirstOuterTupleSlot = NULL;
725 
726  hjstate->hj_CurHashValue = 0;
727  hjstate->hj_CurBucketNo = 0;
729  hjstate->hj_CurTuple = NULL;
730 
731  /*
732  * Deconstruct the hash clauses into outer and inner argument values, so
733  * that we can evaluate those subexpressions separately. Also make a list
734  * of the hash operator OIDs, in preparation for looking up the hash
735  * functions to use.
736  */
737  lclauses = NIL;
738  rclauses = NIL;
739  rhclauses = NIL;
740  hoperators = NIL;
741  foreach(l, node->hashclauses)
742  {
743  OpExpr *hclause = lfirst_node(OpExpr, l);
744 
745  lclauses = lappend(lclauses, ExecInitExpr(linitial(hclause->args),
746  (PlanState *) hjstate));
747  rclauses = lappend(rclauses, ExecInitExpr(lsecond(hclause->args),
748  (PlanState *) hjstate));
749  rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args),
750  innerPlanState(hjstate)));
751  hoperators = lappend_oid(hoperators, hclause->opno);
752  }
753  hjstate->hj_OuterHashKeys = lclauses;
754  hjstate->hj_InnerHashKeys = rclauses;
755  hjstate->hj_HashOperators = hoperators;
756  /* child Hash node needs to evaluate inner hash keys, too */
757  ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses;
758 
759  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
760  hjstate->hj_MatchedOuter = false;
761  hjstate->hj_OuterNotEmpty = false;
762 
763  return hjstate;
764 }
JoinType jointype
Definition: execnodes.h:1766
#define NIL
Definition: pg_list.h:69
List * qual
Definition: plannodes.h:141
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1881
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1680
ExprState * joinqual
Definition: execnodes.h:1769
const TupleTableSlotOps * ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
Definition: execUtils.c:460
PlanState ps
Definition: execnodes.h:1765
List * hashclauses
Definition: plannodes.h:737
bool single_match
Definition: execnodes.h:1767
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:80
bool hj_MatchedOuter
Definition: execnodes.h:1884
EState * state
Definition: execnodes.h:934
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1878
List * hj_OuterHashKeys
Definition: execnodes.h:1870
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1882
#define lsecond(l)
Definition: pg_list.h:116
Join join
Definition: plannodes.h:736
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:206
JoinType jointype
Definition: plannodes.h:678
uint32 hj_CurHashValue
Definition: execnodes.h:1874
int hj_CurSkewBucketNo
Definition: execnodes.h:1876
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:970
#define linitial(l)
Definition: pg_list.h:111
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1880
#define EXEC_FLAG_BACKWARD
Definition: executor.h:56
#define lfirst_node(type, lc)
Definition: pg_list.h:109
#define outerPlanState(node)
Definition: execnodes.h:1026
#define innerPlan(node)
Definition: plannodes.h:169
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:496
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1877
PlanState ps
Definition: execnodes.h:2206
List * hj_HashOperators
Definition: execnodes.h:1872
#define outerPlan(node)
Definition: plannodes.h:170
List * lappend(List *list, void *datum)
Definition: list.c:128
int hj_CurBucketNo
Definition: execnodes.h:1875
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:938
Plan * plan
Definition: execnodes.h:932
#define makeNode(_type_)
Definition: nodes.h:565
bool hj_OuterNotEmpty
Definition: execnodes.h:1885
#define Assert(condition)
Definition: c.h:732
#define EXEC_FLAG_MARK
Definition: executor.h:57
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1696
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:441
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1648
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:451
ExprState * qual
Definition: execnodes.h:953
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1879
List * hj_InnerHashKeys
Definition: execnodes.h:1871
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
#define elog(elevel,...)
Definition: elog.h:226
static TupleTableSlot * ExecHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:565
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:120
Oid opno
Definition: primnodes.h:499
bool inner_unique
Definition: plannodes.h:679
List * args
Definition: primnodes.h:505
#define innerPlanState(node)
Definition: execnodes.h:1025
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
Definition: pg_list.h:45
JoinState js
Definition: execnodes.h:1868
List * joinqual
Definition: plannodes.h:680
ExprState * hashclauses
Definition: execnodes.h:1869
Plan plan
Definition: plannodes.h:677

◆ ExecParallelHashJoin()

static TupleTableSlot* ExecParallelHashJoin ( PlanState pstate)
static

Definition at line 581 of file nodeHashjoin.c.

References ExecHashJoinImpl().

Referenced by ExecHashJoinInitializeDSM(), and ExecHashJoinInitializeWorker().

582 {
583  /*
584  * On sufficiently smart compilers this should be inlined with the
585  * parallel-oblivious branches removed.
586  */
587  return ExecHashJoinImpl(pstate, true);
588 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:165

◆ ExecParallelHashJoinNewBatch()

static bool ExecParallelHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1095 of file nodeHashjoin.c.

References BarrierArriveAndWait(), BarrierAttach(), BarrierDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, HashJoinTableData::curbatch, ParallelHashJoinState::distributor, ParallelHashJoinBatchAccessor::done, elog, ERROR, ExecForceStoreMinimalTuple(), ExecHashTableDetachBatch(), ExecParallelHashTableAlloc(), ExecParallelHashTableInsertCurrentBatch(), ExecParallelHashTableSetCurrentBatch(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, ParallelHashJoinBatchAccessor::inner_tuples, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, pg_atomic_fetch_add_u32(), PHJ_BATCH_ALLOCATING, PHJ_BATCH_DONE, PHJ_BATCH_ELECTING, PHJ_BATCH_LOADING, PHJ_BATCH_PROBING, ParallelHashJoinBatchAccessor::shared, sts_begin_parallel_scan(), sts_end_parallel_scan(), sts_parallel_scan_next(), WAIT_EVENT_HASH_BATCH_ALLOCATING, WAIT_EVENT_HASH_BATCH_ELECTING, and WAIT_EVENT_HASH_BATCH_LOADING.

Referenced by ExecHashJoinImpl().

1096 {
1097  HashJoinTable hashtable = hjstate->hj_HashTable;
1098  int start_batchno;
1099  int batchno;
1100 
1101  /*
1102  * If we started up so late that the batch tracking array has been freed
1103  * already by ExecHashTableDetach(), then we are finished. See also
1104  * ExecParallelHashEnsureBatchAccessors().
1105  */
1106  if (hashtable->batches == NULL)
1107  return false;
1108 
1109  /*
1110  * If we were already attached to a batch, remember not to bother checking
1111  * it again, and detach from it (possibly freeing the hash table if we are
1112  * last to detach).
1113  */
1114  if (hashtable->curbatch >= 0)
1115  {
1116  hashtable->batches[hashtable->curbatch].done = true;
1117  ExecHashTableDetachBatch(hashtable);
1118  }
1119 
1120  /*
1121  * Search for a batch that isn't done. We use an atomic counter to start
1122  * our search at a different batch in every participant when there are
1123  * more batches than participants.
1124  */
1125  batchno = start_batchno =
1127  hashtable->nbatch;
1128  do
1129  {
1130  uint32 hashvalue;
1131  MinimalTuple tuple;
1132  TupleTableSlot *slot;
1133 
1134  if (!hashtable->batches[batchno].done)
1135  {
1136  SharedTuplestoreAccessor *inner_tuples;
1137  Barrier *batch_barrier =
1138  &hashtable->batches[batchno].shared->batch_barrier;
1139 
1140  switch (BarrierAttach(batch_barrier))
1141  {
1142  case PHJ_BATCH_ELECTING:
1143 
1144  /* One backend allocates the hash table. */
1145  if (BarrierArriveAndWait(batch_barrier,
1147  ExecParallelHashTableAlloc(hashtable, batchno);
1148  /* Fall through. */
1149 
1150  case PHJ_BATCH_ALLOCATING:
1151  /* Wait for allocation to complete. */
1152  BarrierArriveAndWait(batch_barrier,
1154  /* Fall through. */
1155 
1156  case PHJ_BATCH_LOADING:
1157  /* Start (or join in) loading tuples. */
1158  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1159  inner_tuples = hashtable->batches[batchno].inner_tuples;
1160  sts_begin_parallel_scan(inner_tuples);
1161  while ((tuple = sts_parallel_scan_next(inner_tuples,
1162  &hashvalue)))
1163  {
1165  hjstate->hj_HashTupleSlot,
1166  false);
1167  slot = hjstate->hj_HashTupleSlot;
1169  hashvalue);
1170  }
1171  sts_end_parallel_scan(inner_tuples);
1172  BarrierArriveAndWait(batch_barrier,
1174  /* Fall through. */
1175 
1176  case PHJ_BATCH_PROBING:
1177 
1178  /*
1179  * This batch is ready to probe. Return control to
1180  * caller. We stay attached to batch_barrier so that the
1181  * hash table stays alive until everyone's finished
1182  * probing it, but no participant is allowed to wait at
1183  * this barrier again (or else a deadlock could occur).
1184  * All attached participants must eventually call
1185  * BarrierArriveAndDetach() so that the final phase
1186  * PHJ_BATCH_DONE can be reached.
1187  */
1188  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1189  sts_begin_parallel_scan(hashtable->batches[batchno].outer_tuples);
1190  return true;
1191 
1192  case PHJ_BATCH_DONE:
1193 
1194  /*
1195  * Already done. Detach and go around again (if any
1196  * remain).
1197  */
1198  BarrierDetach(batch_barrier);
1199  hashtable->batches[batchno].done = true;
1200  hashtable->curbatch = -1;
1201  break;
1202 
1203  default:
1204  elog(ERROR, "unexpected batch phase %d",
1205  BarrierPhase(batch_barrier));
1206  }
1207  }
1208  batchno = (batchno + 1) % hashtable->nbatch;
1209  } while (batchno != start_batchno);
1210 
1211  return false;
1212 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
#define PHJ_BATCH_DONE
Definition: hashjoin.h:268
void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1743
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3075
#define PHJ_BATCH_ALLOCATING
Definition: hashjoin.h:265
void ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3217
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3055
#define PHJ_BATCH_LOADING
Definition: hashjoin.h:266
#define PHJ_BATCH_ELECTING
Definition: hashjoin.h:264
SharedTuplestoreAccessor * inner_tuples
Definition: hashjoin.h:208
#define ERROR
Definition: elog.h:43
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1392
unsigned int uint32
Definition: c.h:358
void sts_begin_parallel_scan(SharedTuplestoreAccessor *accessor)
int BarrierAttach(Barrier *barrier)
Definition: barrier.c:214
ParallelHashJoinState * parallel_state
Definition: hashjoin.h:356
#define PHJ_BATCH_PROBING
Definition: hashjoin.h:267
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:331
bool BarrierDetach(Barrier *barrier)
Definition: barrier.c:234
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:243
ParallelHashJoinBatch * shared
Definition: hashjoin.h:197
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1879
pg_atomic_uint32 distributor
Definition: hashjoin.h:251
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
#define elog(elevel,...)
Definition: elog.h:226
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)

◆ ExecParallelHashJoinOuterGetTuple()

static TupleTableSlot * ExecParallelHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 890 of file nodeHashjoin.c.

References HashJoinTableData::batches, HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecClearTuple(), ExecForceStoreMinimalTuple(), ExecHashGetHashValue(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, JoinState::ps, PlanState::ps_ExprContext, sts_parallel_scan_next(), and TupIsNull.

Referenced by ExecHashJoinImpl().

893 {
894  HashJoinTable hashtable = hjstate->hj_HashTable;
895  int curbatch = hashtable->curbatch;
896  TupleTableSlot *slot;
897 
898  /*
899  * In the Parallel Hash case we only run the outer plan directly for
900  * single-batch hash joins. Otherwise we have to go to batch files, even
901  * for batch 0.
902  */
903  if (curbatch == 0 && hashtable->nbatch == 1)
904  {
905  slot = ExecProcNode(outerNode);
906 
907  while (!TupIsNull(slot))
908  {
909  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
910 
911  econtext->ecxt_outertuple = slot;
912  if (ExecHashGetHashValue(hashtable, econtext,
913  hjstate->hj_OuterHashKeys,
914  true, /* outer tuple */
915  HJ_FILL_OUTER(hjstate),
916  hashvalue))
917  return slot;
918 
919  /*
920  * That tuple couldn't match because of a NULL, so discard it and
921  * continue with the next one.
922  */
923  slot = ExecProcNode(outerNode);
924  }
925  }
926  else if (curbatch < hashtable->nbatch)
927  {
928  MinimalTuple tuple;
929 
930  tuple = sts_parallel_scan_next(hashtable->batches[curbatch].outer_tuples,
931  hashvalue);
932  if (tuple != NULL)
933  {
935  hjstate->hj_OuterTupleSlot,
936  false);
937  slot = hjstate->hj_OuterTupleSlot;
938  return slot;
939  }
940  else
942  }
943 
944  /* End of this batch */
945  return NULL;
946 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:404
PlanState ps
Definition: execnodes.h:1765
ExprContext * ps_ExprContext
Definition: execnodes.h:971
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1878
List * hj_OuterHashKeys
Definition: execnodes.h:1870
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1392
#define TupIsNull(slot)
Definition: tuptable.h:288
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:225
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:226
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1783
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
JoinState js
Definition: execnodes.h:1868

◆ ExecParallelHashJoinPartitionOuter()

static void ExecParallelHashJoinPartitionOuter ( HashJoinState node)
static

Definition at line 1393 of file nodeHashjoin.c.

References Assert, HashJoinTableData::batches, CHECK_FOR_INTERRUPTS, ExprContext::ecxt_outertuple, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetHashValue(), ExecProcNode(), heap_free_minimal_tuple(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, i, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, outerPlanState, JoinState::ps, PlanState::ps_ExprContext, sts_end_write(), sts_puttuple(), and TupIsNull.

Referenced by ExecHashJoinImpl().

1394 {
1395  PlanState *outerState = outerPlanState(hjstate);
1396  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1397  HashJoinTable hashtable = hjstate->hj_HashTable;
1398  TupleTableSlot *slot;
1399  uint32 hashvalue;
1400  int i;
1401 
1402  Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
1403 
1404  /* Execute outer plan, writing all tuples to shared tuplestores. */
1405  for (;;)
1406  {
1407  slot = ExecProcNode(outerState);
1408  if (TupIsNull(slot))
1409  break;
1410  econtext->ecxt_outertuple = slot;
1411  if (ExecHashGetHashValue(hashtable, econtext,
1412  hjstate->hj_OuterHashKeys,
1413  true, /* outer tuple */
1414  HJ_FILL_OUTER(hjstate),
1415  &hashvalue))
1416  {
1417  int batchno;
1418  int bucketno;
1419  bool shouldFree;
1420  MinimalTuple mintup = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1421 
1422  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno,
1423  &batchno);
1424  sts_puttuple(hashtable->batches[batchno].outer_tuples,
1425  &hashvalue, mintup);
1426 
1427  if (shouldFree)
1428  heap_free_minimal_tuple(mintup);
1429  }
1431  }
1432 
1433  /* Make sure all outer partitions are readable by any backend. */
1434  for (i = 0; i < hashtable->nbatch; ++i)
1435  sts_end_write(hashtable->batches[i].outer_tuples);
1436 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
Definition: execTuples.c:1540
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1887
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1429
#define outerPlanState(node)
Definition: execnodes.h:1026
#define TupIsNull(slot)
Definition: tuptable.h:288
unsigned int uint32
Definition: c.h:358
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:225
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:226
#define Assert(condition)
Definition: c.h:732
int i
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1783
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void sts_end_write(SharedTuplestoreAccessor *accessor)

◆ ExecReScanHashJoin()

void ExecReScanHashJoin ( HashJoinState node)

Definition at line 1307 of file nodeHashjoin.c.

References PlanState::chgParam, ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_OUTER, HashJoinState::hj_OuterNotEmpty, INVALID_SKEW_BUCKET_NO, HashJoinState::js, PlanState::lefttree, HashJoinTableData::nbatch, JoinState::ps, and PlanState::righttree.

Referenced by ExecReScan().

1308 {
1309  /*
1310  * In a multi-batch join, we currently have to do rescans the hard way,
1311  * primarily because batch temp files may have already been released. But
1312  * if it's a single-batch join, and there is no parameter change for the
1313  * inner subnode, then we can just re-use the existing hash table without
1314  * rebuilding it.
1315  */
1316  if (node->hj_HashTable != NULL)
1317  {
1318  if (node->hj_HashTable->nbatch == 1 &&
1319  node->js.ps.righttree->chgParam == NULL)
1320  {
1321  /*
1322  * Okay to reuse the hash table; needn't rescan inner, either.
1323  *
1324  * However, if it's a right/full join, we'd better reset the
1325  * inner-tuple match flags contained in the table.
1326  */
1327  if (HJ_FILL_INNER(node))
1329 
1330  /*
1331  * Also, we need to reset our state about the emptiness of the
1332  * outer relation, so that the new scan of the outer will update
1333  * it correctly if it turns out to be empty this time. (There's no
1334  * harm in clearing it now because ExecHashJoin won't need the
1335  * info. In the other cases, where the hash table doesn't exist
1336  * or we are destroying it, we leave this state alone because
1337  * ExecHashJoin will need it the first time through.)
1338  */
1339  node->hj_OuterNotEmpty = false;
1340 
1341  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
1343  }
1344  else
1345  {
1346  /* must destroy and rebuild hash table */
1348  node->hj_HashTable = NULL;
1350 
1351  /*
1352  * if chgParam of subnode is not null then plan will be re-scanned
1353  * by first ExecProcNode.
1354  */
1355  if (node->js.ps.righttree->chgParam == NULL)
1356  ExecReScan(node->js.ps.righttree);
1357  }
1358  }
1359 
1360  /* Always reset intra-tuple state */
1361  node->hj_CurHashValue = 0;
1362  node->hj_CurBucketNo = 0;
1364  node->hj_CurTuple = NULL;
1365 
1366  node->hj_MatchedOuter = false;
1367  node->hj_FirstOuterTupleSlot = NULL;
1368 
1369  /*
1370  * if chgParam of subnode is not null then plan will be re-scanned by
1371  * first ExecProcNode.
1372  */
1373  if (node->js.ps.lefttree->chgParam == NULL)
1374  ExecReScan(node->js.ps.lefttree);
1375 }
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
PlanState ps
Definition: execnodes.h:1765
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
bool hj_MatchedOuter
Definition: execnodes.h:1884
struct PlanState * righttree
Definition: execnodes.h:955
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1882
struct PlanState * lefttree
Definition: execnodes.h:954
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
uint32 hj_CurHashValue
Definition: execnodes.h:1874
int hj_CurSkewBucketNo
Definition: execnodes.h:1876
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1877
Bitmapset * chgParam
Definition: execnodes.h:964
int hj_CurBucketNo
Definition: execnodes.h:1875
bool hj_OuterNotEmpty
Definition: execnodes.h:1885
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:125
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1873
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:2155
JoinState js
Definition: execnodes.h:1868
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:849

◆ ExecShutdownHashJoin()

void ExecShutdownHashJoin ( HashJoinState node)

Definition at line 1378 of file nodeHashjoin.c.

References ExecHashTableDetach(), ExecHashTableDetachBatch(), and HashJoinState::hj_HashTable.

Referenced by ExecShutdownNode().

1379 {
1380  if (node->hj_HashTable)
1381  {
1382  /*
1383  * Detach from shared state before DSM memory goes away. This makes
1384  * sure that we don't have any pointers into DSM memory by the time
1385  * ExecEndHashJoin runs.
1386  */
1389  }
1390 }
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3075
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3132
HashJoinTable hj_HashTable
Definition: execnodes.h:1873