PostgreSQL Source Code  git master
nodeHashjoin.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "access/parallel.h"
#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "utils/memutils.h"
#include "utils/sharedtuplestore.h"
Include dependency graph for nodeHashjoin.c:

Go to the source code of this file.

Macros

#define HJ_BUILD_HASHTABLE   1
 
#define HJ_NEED_NEW_OUTER   2
 
#define HJ_SCAN_BUCKET   3
 
#define HJ_FILL_OUTER_TUPLE   4
 
#define HJ_FILL_INNER_TUPLES   5
 
#define HJ_NEED_NEW_BATCH   6
 
#define HJ_FILL_OUTER(hjstate)   ((hjstate)->hj_NullInnerTupleSlot != NULL)
 
#define HJ_FILL_INNER(hjstate)   ((hjstate)->hj_NullOuterTupleSlot != NULL)
 

Functions

static TupleTableSlotExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecParallelHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
 
static bool ExecHashJoinNewBatch (HashJoinState *hjstate)
 
static bool ExecParallelHashJoinNewBatch (HashJoinState *hjstate)
 
static void ExecParallelHashJoinPartitionOuter (HashJoinState *node)
 
static pg_attribute_always_inline TupleTableSlotExecHashJoinImpl (PlanState *pstate, bool parallel)
 
static TupleTableSlotExecHashJoin (PlanState *pstate)
 
static TupleTableSlotExecParallelHashJoin (PlanState *pstate)
 
HashJoinStateExecInitHashJoin (HashJoin *node, EState *estate, int eflags)
 
void ExecEndHashJoin (HashJoinState *node)
 
void ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
 
void ExecReScanHashJoin (HashJoinState *node)
 
void ExecShutdownHashJoin (HashJoinState *node)
 
void ExecHashJoinEstimate (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinReInitializeDSM (HashJoinState *state, ParallelContext *cxt)
 
void ExecHashJoinInitializeWorker (HashJoinState *state, ParallelWorkerContext *pwcxt)
 

Macro Definition Documentation

◆ HJ_BUILD_HASHTABLE

#define HJ_BUILD_HASHTABLE   1

Definition at line 124 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), ExecInitHashJoin(), and ExecReScanHashJoin().

◆ HJ_FILL_INNER

#define HJ_FILL_INNER (   hjstate)    ((hjstate)->hj_NullOuterTupleSlot != NULL)

Definition at line 134 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), ExecHashJoinNewBatch(), and ExecReScanHashJoin().

◆ HJ_FILL_INNER_TUPLES

#define HJ_FILL_INNER_TUPLES   5

Definition at line 128 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_FILL_OUTER

#define HJ_FILL_OUTER (   hjstate)    ((hjstate)->hj_NullInnerTupleSlot != NULL)

◆ HJ_FILL_OUTER_TUPLE

#define HJ_FILL_OUTER_TUPLE   4

Definition at line 127 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_NEED_NEW_BATCH

#define HJ_NEED_NEW_BATCH   6

Definition at line 129 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

◆ HJ_NEED_NEW_OUTER

#define HJ_NEED_NEW_OUTER   2

Definition at line 125 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl(), and ExecReScanHashJoin().

◆ HJ_SCAN_BUCKET

#define HJ_SCAN_BUCKET   3

Definition at line 126 of file nodeHashjoin.c.

Referenced by ExecHashJoinImpl().

Function Documentation

◆ ExecEndHashJoin()

void ExecEndHashJoin ( HashJoinState node)

Definition at line 760 of file nodeHashjoin.c.

References ExecClearTuple(), ExecEndNode(), ExecFreeExprContext(), ExecHashTableDestroy(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_OuterTupleSlot, innerPlanState, HashJoinState::js, outerPlanState, JoinState::ps, and PlanState::ps_ResultTupleSlot.

Referenced by ExecEndNode().

761 {
762  /*
763  * Free hash table
764  */
765  if (node->hj_HashTable)
766  {
768  node->hj_HashTable = NULL;
769  }
770 
771  /*
772  * Free the exprcontext
773  */
774  ExecFreeExprContext(&node->js.ps);
775 
776  /*
777  * clean out the tuple table
778  */
782 
783  /*
784  * clean up subtrees
785  */
788 }
PlanState ps
Definition: execnodes.h:1687
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:538
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:475
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1800
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:566
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:946
#define outerPlanState(node)
Definition: execnodes.h:966
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1801
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
#define innerPlanState(node)
Definition: execnodes.h:965
JoinState js
Definition: execnodes.h:1790
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:849

◆ ExecHashJoin()

static TupleTableSlot* ExecHashJoin ( PlanState pstate)
static

Definition at line 559 of file nodeHashjoin.c.

References ExecHashJoinImpl().

Referenced by ExecInitHashJoin().

560 {
561  /*
562  * On sufficiently smart compilers this should be inlined with the
563  * parallel-aware branches removed.
564  */
565  return ExecHashJoinImpl(pstate, false);
566 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:165

◆ ExecHashJoinEstimate()

void ExecHashJoinEstimate ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1418 of file nodeHashjoin.c.

References ParallelContext::estimator, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

1419 {
1421  shm_toc_estimate_keys(&pcxt->estimator, 1);
1422 }
shm_toc_estimator estimator
Definition: parallel.h:41
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53

◆ ExecHashJoinGetSavedTuple()

static TupleTableSlot * ExecHashJoinGetSavedTuple ( HashJoinState hjstate,
BufFile file,
uint32 hashvalue,
TupleTableSlot tupleSlot 
)
static

Definition at line 1245 of file nodeHashjoin.c.

References BufFileRead(), CHECK_FOR_INTERRUPTS, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecClearTuple(), ExecStoreMinimalTuple(), header(), palloc(), and MinimalTupleData::t_len.

Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().

1249 {
1250  uint32 header[2];
1251  size_t nread;
1252  MinimalTuple tuple;
1253 
1254  /*
1255  * We check for interrupts here because this is typically taken as an
1256  * alternative code path to an ExecProcNode() call, which would include
1257  * such a check.
1258  */
1260 
1261  /*
1262  * Since both the hash value and the MinimalTuple length word are uint32,
1263  * we can read them both in one BufFileRead() call without any type
1264  * cheating.
1265  */
1266  nread = BufFileRead(file, (void *) header, sizeof(header));
1267  if (nread == 0) /* end of file */
1268  {
1269  ExecClearTuple(tupleSlot);
1270  return NULL;
1271  }
1272  if (nread != sizeof(header))
1273  ereport(ERROR,
1275  errmsg("could not read from hash-join temporary file: %m")));
1276  *hashvalue = header[0];
1277  tuple = (MinimalTuple) palloc(header[1]);
1278  tuple->t_len = header[1];
1279  nread = BufFileRead(file,
1280  (void *) ((char *) tuple + sizeof(uint32)),
1281  header[1] - sizeof(uint32));
1282  if (nread != header[1] - sizeof(uint32))
1283  ereport(ERROR,
1285  errmsg("could not read from hash-join temporary file: %m")));
1286  return ExecStoreMinimalTuple(tuple, tupleSlot, true);
1287 }
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:420
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:475
#define ERROR
Definition: elog.h:43
MinimalTupleData * MinimalTuple
Definition: htup.h:27
int errcode_for_file_access(void)
Definition: elog.c:598
unsigned int uint32
Definition: c.h:325
#define ereport(elevel, rest)
Definition: elog.h:122
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:208
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:554
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98

◆ ExecHashJoinImpl()

static pg_attribute_always_inline TupleTableSlot* ExecHashJoinImpl ( PlanState pstate,
bool  parallel 
)
static

Definition at line 165 of file nodeHashjoin.c.

References Assert, BarrierArriveAndWait(), BarrierPhase(), ParallelHashJoinState::build_barrier, castNode, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog, ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecParallelHashJoinNewBatch(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), ExecParallelScanHashBucket(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), HashState::hashtable, HeapTupleHeaderSetMatch, HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashOperators, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterNotEmpty, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, HashJoinTableData::outerBatchFile, outerPlanState, HashState::parallel_state, PHJ_BUILD_DONE, PHJ_BUILD_HASHING_OUTER, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::qual, ResetExprContext, JoinState::single_match, Plan::startup_cost, Plan::total_cost, HashJoinTableData::totalTuples, TupIsNull, and WAIT_EVENT_HASH_BUILD_HASHING_OUTER.

Referenced by ExecHashJoin(), and ExecParallelHashJoin().

166 {
167  HashJoinState *node = castNode(HashJoinState, pstate);
168  PlanState *outerNode;
169  HashState *hashNode;
170  ExprState *joinqual;
171  ExprState *otherqual;
172  ExprContext *econtext;
173  HashJoinTable hashtable;
174  TupleTableSlot *outerTupleSlot;
175  uint32 hashvalue;
176  int batchno;
177  ParallelHashJoinState *parallel_state;
178 
179  /*
180  * get information from HashJoin node
181  */
182  joinqual = node->js.joinqual;
183  otherqual = node->js.ps.qual;
184  hashNode = (HashState *) innerPlanState(node);
185  outerNode = outerPlanState(node);
186  hashtable = node->hj_HashTable;
187  econtext = node->js.ps.ps_ExprContext;
188  parallel_state = hashNode->parallel_state;
189 
190  /*
191  * Reset per-tuple memory context to free any expression evaluation
192  * storage allocated in the previous tuple cycle.
193  */
194  ResetExprContext(econtext);
195 
196  /*
197  * run the hash join state machine
198  */
199  for (;;)
200  {
201  /*
202  * It's possible to iterate this loop many times before returning a
203  * tuple, in some pathological cases such as needing to move much of
204  * the current batch to a later batch. So let's check for interrupts
205  * each time through.
206  */
208 
209  switch (node->hj_JoinState)
210  {
211  case HJ_BUILD_HASHTABLE:
212 
213  /*
214  * First time through: build hash table for inner relation.
215  */
216  Assert(hashtable == NULL);
217 
218  /*
219  * If the outer relation is completely empty, and it's not
220  * right/full join, we can quit without building the hash
221  * table. However, for an inner join it is only a win to
222  * check this when the outer relation's startup cost is less
223  * than the projected cost of building the hash table.
224  * Otherwise it's best to build the hash table first and see
225  * if the inner relation is empty. (When it's a left join, we
226  * should always make this check, since we aren't going to be
227  * able to skip the join on the strength of an empty inner
228  * relation anyway.)
229  *
230  * If we are rescanning the join, we make use of information
231  * gained on the previous scan: don't bother to try the
232  * prefetch if the previous scan found the outer relation
233  * nonempty. This is not 100% reliable since with new
234  * parameters the outer relation might yield different
235  * results, but it's a good heuristic.
236  *
237  * The only way to make the check is to try to fetch a tuple
238  * from the outer plan node. If we succeed, we have to stash
239  * it away for later consumption by ExecHashJoinOuterGetTuple.
240  */
241  if (HJ_FILL_INNER(node))
242  {
243  /* no chance to not build the hash table */
244  node->hj_FirstOuterTupleSlot = NULL;
245  }
246  else if (parallel)
247  {
248  /*
249  * The empty-outer optimization is not implemented for
250  * shared hash tables, because no one participant can
251  * determine that there are no outer tuples, and it's not
252  * yet clear that it's worth the synchronization overhead
253  * of reaching consensus to figure that out. So we have
254  * to build the hash table.
255  */
256  node->hj_FirstOuterTupleSlot = NULL;
257  }
258  else if (HJ_FILL_OUTER(node) ||
259  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
260  !node->hj_OuterNotEmpty))
261  {
262  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
264  {
265  node->hj_OuterNotEmpty = false;
266  return NULL;
267  }
268  else
269  node->hj_OuterNotEmpty = true;
270  }
271  else
272  node->hj_FirstOuterTupleSlot = NULL;
273 
274  /*
275  * Create the hash table. If using Parallel Hash, then
276  * whoever gets here first will create the hash table and any
277  * later arrivals will merely attach to it.
278  */
279  hashtable = ExecHashTableCreate(hashNode,
280  node->hj_HashOperators,
281  HJ_FILL_INNER(node));
282  node->hj_HashTable = hashtable;
283 
284  /*
285  * Execute the Hash node, to build the hash table. If using
286  * Parallel Hash, then we'll try to help hashing unless we
287  * arrived too late.
288  */
289  hashNode->hashtable = hashtable;
290  (void) MultiExecProcNode((PlanState *) hashNode);
291 
292  /*
293  * If the inner relation is completely empty, and we're not
294  * doing a left outer join, we can quit without scanning the
295  * outer relation.
296  */
297  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
298  return NULL;
299 
300  /*
301  * need to remember whether nbatch has increased since we
302  * began scanning the outer relation
303  */
304  hashtable->nbatch_outstart = hashtable->nbatch;
305 
306  /*
307  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
308  * tuple above, because ExecHashJoinOuterGetTuple will
309  * immediately set it again.)
310  */
311  node->hj_OuterNotEmpty = false;
312 
313  if (parallel)
314  {
315  Barrier *build_barrier;
316 
317  build_barrier = &parallel_state->build_barrier;
318  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
319  BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
320  if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER)
321  {
322  /*
323  * If multi-batch, we need to hash the outer relation
324  * up front.
325  */
326  if (hashtable->nbatch > 1)
328  BarrierArriveAndWait(build_barrier,
330  }
331  Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
332 
333  /* Each backend should now select a batch to work on. */
334  hashtable->curbatch = -1;
336 
337  continue;
338  }
339  else
341 
342  /* FALL THRU */
343 
344  case HJ_NEED_NEW_OUTER:
345 
346  /*
347  * We don't have an outer tuple, try to get the next one
348  */
349  if (parallel)
350  outerTupleSlot =
351  ExecParallelHashJoinOuterGetTuple(outerNode, node,
352  &hashvalue);
353  else
354  outerTupleSlot =
355  ExecHashJoinOuterGetTuple(outerNode, node, &hashvalue);
356 
357  if (TupIsNull(outerTupleSlot))
358  {
359  /* end of batch, or maybe whole join */
360  if (HJ_FILL_INNER(node))
361  {
362  /* set up to scan for unmatched inner tuples */
365  }
366  else
368  continue;
369  }
370 
371  econtext->ecxt_outertuple = outerTupleSlot;
372  node->hj_MatchedOuter = false;
373 
374  /*
375  * Find the corresponding bucket for this tuple in the main
376  * hash table or skew hash table.
377  */
378  node->hj_CurHashValue = hashvalue;
379  ExecHashGetBucketAndBatch(hashtable, hashvalue,
380  &node->hj_CurBucketNo, &batchno);
381  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
382  hashvalue);
383  node->hj_CurTuple = NULL;
384 
385  /*
386  * The tuple might not belong to the current batch (where
387  * "current batch" includes the skew buckets if any).
388  */
389  if (batchno != hashtable->curbatch &&
391  {
392  /*
393  * Need to postpone this outer tuple to a later batch.
394  * Save it in the corresponding outer-batch file.
395  */
396  Assert(parallel_state == NULL);
397  Assert(batchno > hashtable->curbatch);
399  hashvalue,
400  &hashtable->outerBatchFile[batchno]);
401 
402  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
403  continue;
404  }
405 
406  /* OK, let's scan the bucket for matches */
408 
409  /* FALL THRU */
410 
411  case HJ_SCAN_BUCKET:
412 
413  /*
414  * Scan the selected hash bucket for matches to current outer
415  */
416  if (parallel)
417  {
418  if (!ExecParallelScanHashBucket(node, econtext))
419  {
420  /* out of matches; check for possible outer-join fill */
422  continue;
423  }
424  }
425  else
426  {
427  if (!ExecScanHashBucket(node, econtext))
428  {
429  /* out of matches; check for possible outer-join fill */
431  continue;
432  }
433  }
434 
435  /*
436  * We've got a match, but still need to test non-hashed quals.
437  * ExecScanHashBucket already set up all the state needed to
438  * call ExecQual.
439  *
440  * If we pass the qual, then save state for next call and have
441  * ExecProject form the projection, store it in the tuple
442  * table, and return the slot.
443  *
444  * Only the joinquals determine tuple match status, but all
445  * quals must pass to actually return the tuple.
446  */
447  if (joinqual == NULL || ExecQual(joinqual, econtext))
448  {
449  node->hj_MatchedOuter = true;
451 
452  /* In an antijoin, we never return a matched tuple */
453  if (node->js.jointype == JOIN_ANTI)
454  {
456  continue;
457  }
458 
459  /*
460  * If we only need to join to the first matching inner
461  * tuple, then consider returning this one, but after that
462  * continue with next outer tuple.
463  */
464  if (node->js.single_match)
466 
467  if (otherqual == NULL || ExecQual(otherqual, econtext))
468  return ExecProject(node->js.ps.ps_ProjInfo);
469  else
470  InstrCountFiltered2(node, 1);
471  }
472  else
473  InstrCountFiltered1(node, 1);
474  break;
475 
476  case HJ_FILL_OUTER_TUPLE:
477 
478  /*
479  * The current outer tuple has run out of matches, so check
480  * whether to emit a dummy outer-join tuple. Whether we emit
481  * one or not, the next state is NEED_NEW_OUTER.
482  */
484 
485  if (!node->hj_MatchedOuter &&
486  HJ_FILL_OUTER(node))
487  {
488  /*
489  * Generate a fake join tuple with nulls for the inner
490  * tuple, and return it if it passes the non-join quals.
491  */
492  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
493 
494  if (otherqual == NULL || ExecQual(otherqual, econtext))
495  return ExecProject(node->js.ps.ps_ProjInfo);
496  else
497  InstrCountFiltered2(node, 1);
498  }
499  break;
500 
502 
503  /*
504  * We have finished a batch, but we are doing right/full join,
505  * so any unmatched inner tuples in the hashtable have to be
506  * emitted before we continue to the next batch.
507  */
508  if (!ExecScanHashTableForUnmatched(node, econtext))
509  {
510  /* no more unmatched tuples */
512  continue;
513  }
514 
515  /*
516  * Generate a fake join tuple with nulls for the outer tuple,
517  * and return it if it passes the non-join quals.
518  */
519  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
520 
521  if (otherqual == NULL || ExecQual(otherqual, econtext))
522  return ExecProject(node->js.ps.ps_ProjInfo);
523  else
524  InstrCountFiltered2(node, 1);
525  break;
526 
527  case HJ_NEED_NEW_BATCH:
528 
529  /*
530  * Try to advance to next batch. Done if there are no more.
531  */
532  if (parallel)
533  {
534  if (!ExecParallelHashJoinNewBatch(node))
535  return NULL; /* end of parallel-aware join */
536  }
537  else
538  {
539  if (!ExecHashJoinNewBatch(node))
540  return NULL; /* end of parallel-oblivious join */
541  }
543  break;
544 
545  default:
546  elog(ERROR, "unrecognized hashjoin state: %d",
547  (int) node->hj_JoinState);
548  }
549  }
550 }
JoinType jointype
Definition: execnodes.h:1688
HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
Definition: nodeHash.c:428
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2137
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:129
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:126
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1803
ExprState * joinqual
Definition: execnodes.h:1691
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:948
static TupleTableSlot * ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:877
PlanState ps
Definition: execnodes.h:1687
#define castNode(_type_, nodeptr)
Definition: nodes.h:586
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:2043
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
Definition: execTuples.c:708
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:2019
ExprContext * ps_ExprContext
Definition: execnodes.h:947
bool single_match
Definition: execnodes.h:1689
HashJoinTable hashtable
Definition: execnodes.h:2129
bool hj_MatchedOuter
Definition: execnodes.h:1806
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:803
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:361
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1804
#define PHJ_BUILD_HASHING_OUTER
Definition: hashjoin.h:260
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:2343
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1875
uint32 hj_CurHashValue
Definition: execnodes.h:1796
int hj_CurSkewBucketNo
Definition: execnodes.h:1798
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1802
static void ExecParallelHashJoinPartitionOuter(HashJoinState *node)
BufFile ** outerBatchFile
Definition: hashjoin.h:330
#define outerPlanState(node)
Definition: execnodes.h:966
Cost startup_cost
Definition: plannodes.h:127
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1799
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1907
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:220
#define TupIsNull(slot)
Definition: tuptable.h:146
unsigned int uint32
Definition: c.h:325
PlanState ps
Definition: execnodes.h:2128
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:974
List * hj_HashOperators
Definition: execnodes.h:1794
#define PHJ_BUILD_DONE
Definition: hashjoin.h:261
int hj_CurBucketNo
Definition: execnodes.h:1797
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:128
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:233
Plan * plan
Definition: execnodes.h:912
double totalTuples
Definition: hashjoin.h:318
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:80
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:222
bool hj_OuterNotEmpty
Definition: execnodes.h:1807
#define Assert(condition)
Definition: c.h:699
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:979
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:243
static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:125
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
ExprState * qual
Definition: execnodes.h:930
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:483
Cost total_cost
Definition: plannodes.h:128
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:534
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:127
#define elog
Definition: elog.h:219
#define innerPlanState(node)
Definition: execnodes.h:965
bool ExecParallelScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1968
JoinState js
Definition: execnodes.h:1790
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
Definition: nodeHashjoin.c:941
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: executor.h:324
#define ResetExprContext(econtext)
Definition: executor.h:483

◆ ExecHashJoinInitializeDSM()

void ExecHashJoinInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1425 of file nodeHashjoin.c.

References BarrierInit(), ParallelHashJoinState::batches, ParallelHashJoinState::build_barrier, ParallelHashJoinState::chunk_work_queue, ParallelHashJoinState::distributor, ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, ParallelHashJoinState::grow_batches_barrier, ParallelHashJoinState::grow_buckets_barrier, ParallelHashJoinState::growth, innerPlanState, InvalidDsaPointer, HashJoinState::js, ParallelHashJoinState::lock, LWLockInitialize(), LWTRANCHE_PARALLEL_HASH_JOIN, ParallelHashJoinState::nbatch, ParallelHashJoinState::nbuckets, ParallelHashJoinState::nparticipants, ParallelContext::nworkers, ParallelHashJoinState::old_batches, HashState::parallel_state, pg_atomic_init_u32(), PHJ_GROWTH_OK, PlanState::plan, Plan::plan_node_id, JoinState::ps, ParallelContext::seg, SharedFileSetInit(), shm_toc_allocate(), shm_toc_insert(), ParallelHashJoinState::space_allowed, ParallelContext::toc, and ParallelHashJoinState::total_tuples.

Referenced by ExecParallelInitializeDSM().

1426 {
1427  int plan_node_id = state->js.ps.plan->plan_node_id;
1428  HashState *hashNode;
1429  ParallelHashJoinState *pstate;
1430 
1431  /*
1432  * Disable shared hash table mode if we failed to create a real DSM
1433  * segment, because that means that we don't have a DSA area to work with.
1434  */
1435  if (pcxt->seg == NULL)
1436  return;
1437 
1439 
1440  /*
1441  * Set up the state needed to coordinate access to the shared hash
1442  * table(s), using the plan node ID as the toc key.
1443  */
1444  pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelHashJoinState));
1445  shm_toc_insert(pcxt->toc, plan_node_id, pstate);
1446 
1447  /*
1448  * Set up the shared hash join state with no batches initially.
1449  * ExecHashTableCreate() will prepare at least one later and set nbatch
1450  * and space_allowed.
1451  */
1452  pstate->nbatch = 0;
1453  pstate->space_allowed = 0;
1454  pstate->batches = InvalidDsaPointer;
1455  pstate->old_batches = InvalidDsaPointer;
1456  pstate->nbuckets = 0;
1457  pstate->growth = PHJ_GROWTH_OK;
1459  pg_atomic_init_u32(&pstate->distributor, 0);
1460  pstate->nparticipants = pcxt->nworkers + 1;
1461  pstate->total_tuples = 0;
1462  LWLockInitialize(&pstate->lock,
1464  BarrierInit(&pstate->build_barrier, 0);
1465  BarrierInit(&pstate->grow_batches_barrier, 0);
1466  BarrierInit(&pstate->grow_buckets_barrier, 0);
1467 
1468  /* Set up the space we'll use for shared temporary files. */
1469  SharedFileSetInit(&pstate->fileset, pcxt->seg);
1470 
1471  /* Initialize the shared state in the hash node. */
1472  hashNode = (HashState *) innerPlanState(state);
1473  hashNode->parallel_state = pstate;
1474 }
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2137
dsa_pointer chunk_work_queue
Definition: hashjoin.h:242
#define InvalidDsaPointer
Definition: dsa.h:78
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:47
PlanState ps
Definition: execnodes.h:1687
dsm_segment * seg
Definition: parallel.h:42
int plan_node_id
Definition: plannodes.h:145
SharedFileSet fileset
Definition: hashjoin.h:253
Barrier grow_buckets_barrier
Definition: hashjoin.h:250
dsa_pointer batches
Definition: hashjoin.h:236
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:677
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:406
Plan * plan
Definition: execnodes.h:912
ParallelHashGrowth growth
Definition: hashjoin.h:241
dsa_pointer old_batches
Definition: hashjoin.h:237
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:575
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
pg_atomic_uint32 distributor
Definition: hashjoin.h:251
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:226
#define innerPlanState(node)
Definition: execnodes.h:965
JoinState js
Definition: execnodes.h:1790
Barrier grow_batches_barrier
Definition: hashjoin.h:249
shm_toc * toc
Definition: parallel.h:44

◆ ExecHashJoinInitializeWorker()

void ExecHashJoinInitializeWorker ( HashJoinState state,
ParallelWorkerContext pwcxt 
)

Definition at line 1516 of file nodeHashjoin.c.

References ExecParallelHashJoin(), ExecSetExecProcNode(), ParallelHashJoinState::fileset, innerPlanState, HashJoinState::js, HashState::parallel_state, PlanState::plan, Plan::plan_node_id, JoinState::ps, ParallelWorkerContext::seg, SharedFileSetAttach(), shm_toc_lookup(), and ParallelWorkerContext::toc.

Referenced by ExecParallelInitializeWorker().

1518 {
1519  HashState *hashNode;
1520  int plan_node_id = state->js.ps.plan->plan_node_id;
1521  ParallelHashJoinState *pstate =
1522  shm_toc_lookup(pwcxt->toc, plan_node_id, false);
1523 
1524  /* Attach to the space for shared temporary files. */
1525  SharedFileSetAttach(&pstate->fileset, pwcxt->seg);
1526 
1527  /* Attach to the shared state in the hash node. */
1528  hashNode = (HashState *) innerPlanState(state);
1529  hashNode->parallel_state = pstate;
1530 
1532 }
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2137
PlanState ps
Definition: execnodes.h:1687
int plan_node_id
Definition: plannodes.h:145
SharedFileSet fileset
Definition: hashjoin.h:253
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
Definition: execProcnode.c:406
Plan * plan
Definition: execnodes.h:912
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:575
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
Definition: sharedfileset.c:76
#define innerPlanState(node)
Definition: execnodes.h:965
JoinState js
Definition: execnodes.h:1790
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
dsm_segment * seg
Definition: parallel.h:52

◆ ExecHashJoinNewBatch()

static bool ExecHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 941 of file nodeHashjoin.c.

References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg(), ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.

Referenced by ExecHashJoinImpl().

942 {
943  HashJoinTable hashtable = hjstate->hj_HashTable;
944  int nbatch;
945  int curbatch;
946  BufFile *innerFile;
947  TupleTableSlot *slot;
948  uint32 hashvalue;
949 
950  nbatch = hashtable->nbatch;
951  curbatch = hashtable->curbatch;
952 
953  if (curbatch > 0)
954  {
955  /*
956  * We no longer need the previous outer batch file; close it right
957  * away to free disk space.
958  */
959  if (hashtable->outerBatchFile[curbatch])
960  BufFileClose(hashtable->outerBatchFile[curbatch]);
961  hashtable->outerBatchFile[curbatch] = NULL;
962  }
963  else /* we just finished the first batch */
964  {
965  /*
966  * Reset some of the skew optimization state variables, since we no
967  * longer need to consider skew tuples after the first batch. The
968  * memory context reset we are about to do will release the skew
969  * hashtable itself.
970  */
971  hashtable->skewEnabled = false;
972  hashtable->skewBucket = NULL;
973  hashtable->skewBucketNums = NULL;
974  hashtable->nSkewBuckets = 0;
975  hashtable->spaceUsedSkew = 0;
976  }
977 
978  /*
979  * We can always skip over any batches that are completely empty on both
980  * sides. We can sometimes skip over batches that are empty on only one
981  * side, but there are exceptions:
982  *
983  * 1. In a left/full outer join, we have to process outer batches even if
984  * the inner batch is empty. Similarly, in a right/full outer join, we
985  * have to process inner batches even if the outer batch is empty.
986  *
987  * 2. If we have increased nbatch since the initial estimate, we have to
988  * scan inner batches since they might contain tuples that need to be
989  * reassigned to later inner batches.
990  *
991  * 3. Similarly, if we have increased nbatch since starting the outer
992  * scan, we have to rescan outer batches in case they contain tuples that
993  * need to be reassigned.
994  */
995  curbatch++;
996  while (curbatch < nbatch &&
997  (hashtable->outerBatchFile[curbatch] == NULL ||
998  hashtable->innerBatchFile[curbatch] == NULL))
999  {
1000  if (hashtable->outerBatchFile[curbatch] &&
1001  HJ_FILL_OUTER(hjstate))
1002  break; /* must process due to rule 1 */
1003  if (hashtable->innerBatchFile[curbatch] &&
1004  HJ_FILL_INNER(hjstate))
1005  break; /* must process due to rule 1 */
1006  if (hashtable->innerBatchFile[curbatch] &&
1007  nbatch != hashtable->nbatch_original)
1008  break; /* must process due to rule 2 */
1009  if (hashtable->outerBatchFile[curbatch] &&
1010  nbatch != hashtable->nbatch_outstart)
1011  break; /* must process due to rule 3 */
1012  /* We can ignore this batch. */
1013  /* Release associated temp files right away. */
1014  if (hashtable->innerBatchFile[curbatch])
1015  BufFileClose(hashtable->innerBatchFile[curbatch]);
1016  hashtable->innerBatchFile[curbatch] = NULL;
1017  if (hashtable->outerBatchFile[curbatch])
1018  BufFileClose(hashtable->outerBatchFile[curbatch]);
1019  hashtable->outerBatchFile[curbatch] = NULL;
1020  curbatch++;
1021  }
1022 
1023  if (curbatch >= nbatch)
1024  return false; /* no more batches */
1025 
1026  hashtable->curbatch = curbatch;
1027 
1028  /*
1029  * Reload the hash table with the new inner batch (which could be empty)
1030  */
1031  ExecHashTableReset(hashtable);
1032 
1033  innerFile = hashtable->innerBatchFile[curbatch];
1034 
1035  if (innerFile != NULL)
1036  {
1037  if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
1038  ereport(ERROR,
1040  errmsg("could not rewind hash-join temporary file: %m")));
1041 
1042  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
1043  innerFile,
1044  &hashvalue,
1045  hjstate->hj_HashTupleSlot)))
1046  {
1047  /*
1048  * NOTE: some tuples may be sent to future batches. Also, it is
1049  * possible for hashtable->nbatch to be increased here!
1050  */
1051  ExecHashTableInsert(hashtable, slot, hashvalue);
1052  }
1053 
1054  /*
1055  * after we build the hash table, the inner batch file is no longer
1056  * needed
1057  */
1058  BufFileClose(innerFile);
1059  hashtable->innerBatchFile[curbatch] = NULL;
1060  }
1061 
1062  /*
1063  * Rewind outer batch file (if present), so that we can start reading it.
1064  */
1065  if (hashtable->outerBatchFile[curbatch] != NULL)
1066  {
1067  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
1068  ereport(ERROR,
1070  errmsg("could not rewind hash-join temporary file: %m")));
1071  }
1072 
1073  return true;
1074 }
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:676
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:2114
void BufFileClose(BufFile *file)
Definition: buffile.c:397
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
int * skewBucketNums
Definition: hashjoin.h:308
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1589
#define ERROR
Definition: elog.h:43
BufFile ** outerBatchFile
Definition: hashjoin.h:330
int errcode_for_file_access(void)
Definition: elog.c:598
unsigned int uint32
Definition: c.h:325
#define ereport(elevel, rest)
Definition: elog.h:122
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
HashSkewBucket ** skewBucket
Definition: hashjoin.h:305
BufFile ** innerBatchFile
Definition: hashjoin.h:329
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1801
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
int errmsg(const char *fmt,...)
Definition: elog.c:797

◆ ExecHashJoinOuterGetTuple()

static TupleTableSlot * ExecHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 803 of file nodeHashjoin.c.

References HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecHashGetHashValue(), ExecHashJoinGetSavedTuple(), ExecProcNode(), HJ_FILL_OUTER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::outerBatchFile, JoinState::ps, PlanState::ps_ExprContext, and TupIsNull.

Referenced by ExecHashJoinImpl().

806 {
807  HashJoinTable hashtable = hjstate->hj_HashTable;
808  int curbatch = hashtable->curbatch;
809  TupleTableSlot *slot;
810 
811  if (curbatch == 0) /* if it is the first pass */
812  {
813  /*
814  * Check to see if first outer tuple was already fetched by
815  * ExecHashJoin() and not used yet.
816  */
817  slot = hjstate->hj_FirstOuterTupleSlot;
818  if (!TupIsNull(slot))
819  hjstate->hj_FirstOuterTupleSlot = NULL;
820  else
821  slot = ExecProcNode(outerNode);
822 
823  while (!TupIsNull(slot))
824  {
825  /*
826  * We have to compute the tuple's hash value.
827  */
828  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
829 
830  econtext->ecxt_outertuple = slot;
831  if (ExecHashGetHashValue(hashtable, econtext,
832  hjstate->hj_OuterHashKeys,
833  true, /* outer tuple */
834  HJ_FILL_OUTER(hjstate),
835  hashvalue))
836  {
837  /* remember outer relation is not empty for possible rescan */
838  hjstate->hj_OuterNotEmpty = true;
839 
840  return slot;
841  }
842 
843  /*
844  * That tuple couldn't match because of a NULL, so discard it and
845  * continue with the next one.
846  */
847  slot = ExecProcNode(outerNode);
848  }
849  }
850  else if (curbatch < hashtable->nbatch)
851  {
852  BufFile *file = hashtable->outerBatchFile[curbatch];
853 
854  /*
855  * In outer-join cases, we could get here even though the batch file
856  * is empty.
857  */
858  if (file == NULL)
859  return NULL;
860 
861  slot = ExecHashJoinGetSavedTuple(hjstate,
862  file,
863  hashvalue,
864  hjstate->hj_OuterTupleSlot);
865  if (!TupIsNull(slot))
866  return slot;
867  }
868 
869  /* End of this batch */
870  return NULL;
871 }
PlanState ps
Definition: execnodes.h:1687
ExprContext * ps_ExprContext
Definition: execnodes.h:947
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1800
List * hj_OuterHashKeys
Definition: execnodes.h:1792
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1804
BufFile ** outerBatchFile
Definition: hashjoin.h:330
#define TupIsNull(slot)
Definition: tuptable.h:146
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:233
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:222
bool hj_OuterNotEmpty
Definition: execnodes.h:1807
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1771
JoinState js
Definition: execnodes.h:1790

◆ ExecHashJoinReInitializeDSM()

void ExecHashJoinReInitializeDSM ( HashJoinState state,
ParallelContext cxt 
)

Definition at line 1483 of file nodeHashjoin.c.

References BarrierInit(), ParallelHashJoinState::build_barrier, ExecHashTableDetach(), ExecHashTableDetachBatch(), ParallelHashJoinState::fileset, HashJoinState::hj_HashTable, HashJoinState::js, PlanState::plan, Plan::plan_node_id, JoinState::ps, SharedFileSetDeleteAll(), shm_toc_lookup(), and ParallelContext::toc.

Referenced by ExecParallelReInitializeDSM().

1484 {
1485  int plan_node_id = state->js.ps.plan->plan_node_id;
1486  ParallelHashJoinState *pstate =
1487  shm_toc_lookup(cxt->toc, plan_node_id, false);
1488 
1489  /*
1490  * It would be possible to reuse the shared hash table in single-batch
1491  * cases by resetting and then fast-forwarding build_barrier to
1492  * PHJ_BUILD_DONE and batch 0's batch_barrier to PHJ_BATCH_PROBING, but
1493  * currently shared hash tables are already freed by now (by the last
1494  * participant to detach from the batch). We could consider keeping it
1495  * around for single-batch joins. We'd also need to adjust
1496  * finalize_plan() so that it doesn't record a dummy dependency for
1497  * Parallel Hash nodes, preventing the rescan optimization. For now we
1498  * don't try.
1499  */
1500 
1501  /* Detach, freeing any remaining shared memory. */
1502  if (state->hj_HashTable != NULL)
1503  {
1506  }
1507 
1508  /* Clear any shared batch files. */
1509  SharedFileSetDeleteAll(&pstate->fileset);
1510 
1511  /* Reset build_barrier to PHJ_BUILD_ELECTING so we can go around again. */
1512  BarrierInit(&pstate->build_barrier, 0);
1513 }
void BarrierInit(Barrier *barrier, int participants)
Definition: barrier.c:100
PlanState ps
Definition: execnodes.h:1687
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3056
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3113
int plan_node_id
Definition: plannodes.h:145
SharedFileSet fileset
Definition: hashjoin.h:253
Plan * plan
Definition: execnodes.h:912
void SharedFileSetDeleteAll(SharedFileSet *fileset)
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
JoinState js
Definition: execnodes.h:1790
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
shm_toc * toc
Definition: parallel.h:44

◆ ExecHashJoinSaveTuple()

void ExecHashJoinSaveTuple ( MinimalTuple  tuple,
uint32  hashvalue,
BufFile **  fileptr 
)

Definition at line 1211 of file nodeHashjoin.c.

References BufFileCreateTemp(), BufFileWrite(), ereport, errcode_for_file_access(), errmsg(), ERROR, and MinimalTupleData::t_len.

Referenced by ExecHashIncreaseNumBatches(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().

1213 {
1214  BufFile *file = *fileptr;
1215  size_t written;
1216 
1217  if (file == NULL)
1218  {
1219  /* First write to this batch file, so open it. */
1220  file = BufFileCreateTemp(false);
1221  *fileptr = file;
1222  }
1223 
1224  written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
1225  if (written != sizeof(uint32))
1226  ereport(ERROR,
1228  errmsg("could not write to hash-join temporary file: %m")));
1229 
1230  written = BufFileWrite(file, (void *) tuple, tuple->t_len);
1231  if (written != tuple->t_len)
1232  ereport(ERROR,
1234  errmsg("could not write to hash-join temporary file: %m")));
1235 }
#define ERROR
Definition: elog.h:43
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:182
int errcode_for_file_access(void)
Definition: elog.c:598
unsigned int uint32
Definition: c.h:325
#define ereport(elevel, rest)
Definition: elog.h:122
int errmsg(const char *fmt,...)
Definition: elog.c:797
size_t BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:601

◆ ExecInitHashJoin()

HashJoinState* ExecInitHashJoin ( HashJoin node,
EState estate,
int  eflags 
)

Definition at line 591 of file nodeHashjoin.c.

References OpExpr::args, Assert, elog, ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecGetResultType(), ExecHashJoin(), ExecInitExpr(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitQual(), ExecInitResultTupleSlotTL(), PlanState::ExecProcNode, HashJoin::hashclauses, HJ_BUILD_HASHTABLE, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_OuterTupleSlot, Join::inner_unique, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_SEMI, Join::joinqual, Join::jointype, JoinState::jointype, HashJoinState::js, lappend(), lappend_oid(), lfirst_node, linitial, lsecond, makeNode, NIL, OpExpr::opno, outerPlan, outerPlanState, Join::plan, PlanState::plan, JoinState::ps, HashState::ps, PlanState::ps_ResultTupleSlot, Plan::qual, PlanState::qual, JoinState::single_match, and PlanState::state.

Referenced by ExecInitNode().

592 {
593  HashJoinState *hjstate;
594  Plan *outerNode;
595  Hash *hashNode;
596  List *lclauses;
597  List *rclauses;
598  List *hoperators;
599  TupleDesc outerDesc,
600  innerDesc;
601  ListCell *l;
602 
603  /* check for unsupported flags */
604  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
605 
606  /*
607  * create state structure
608  */
609  hjstate = makeNode(HashJoinState);
610  hjstate->js.ps.plan = (Plan *) node;
611  hjstate->js.ps.state = estate;
612 
613  /*
614  * See ExecHashJoinInitializeDSM() and ExecHashJoinInitializeWorker()
615  * where this function may be replaced with a parallel version, if we
616  * managed to launch a parallel query.
617  */
618  hjstate->js.ps.ExecProcNode = ExecHashJoin;
619  hjstate->js.jointype = node->join.jointype;
620 
621  /*
622  * Miscellaneous initialization
623  *
624  * create expression context for node
625  */
626  ExecAssignExprContext(estate, &hjstate->js.ps);
627 
628  /*
629  * initialize child nodes
630  *
631  * Note: we could suppress the REWIND flag for the inner input, which
632  * would amount to betting that the hash will be a single batch. Not
633  * clear if this would be a win or not.
634  */
635  outerNode = outerPlan(node);
636  hashNode = (Hash *) innerPlan(node);
637 
638  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
639  outerDesc = ExecGetResultType(outerPlanState(hjstate));
640  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
641  innerDesc = ExecGetResultType(innerPlanState(hjstate));
642 
643  /*
644  * Initialize result slot, type and projection.
645  */
646  ExecInitResultTupleSlotTL(estate, &hjstate->js.ps);
647  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
648 
649  /*
650  * tuple table initialization
651  */
652  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate, outerDesc);
653 
654  /*
655  * detect whether we need only consider the first matching inner tuple
656  */
657  hjstate->js.single_match = (node->join.inner_unique ||
658  node->join.jointype == JOIN_SEMI);
659 
660  /* set up null tuples for outer joins, if needed */
661  switch (node->join.jointype)
662  {
663  case JOIN_INNER:
664  case JOIN_SEMI:
665  break;
666  case JOIN_LEFT:
667  case JOIN_ANTI:
668  hjstate->hj_NullInnerTupleSlot =
669  ExecInitNullTupleSlot(estate, innerDesc);
670  break;
671  case JOIN_RIGHT:
672  hjstate->hj_NullOuterTupleSlot =
673  ExecInitNullTupleSlot(estate, outerDesc);
674  break;
675  case JOIN_FULL:
676  hjstate->hj_NullOuterTupleSlot =
677  ExecInitNullTupleSlot(estate, outerDesc);
678  hjstate->hj_NullInnerTupleSlot =
679  ExecInitNullTupleSlot(estate, innerDesc);
680  break;
681  default:
682  elog(ERROR, "unrecognized join type: %d",
683  (int) node->join.jointype);
684  }
685 
686  /*
687  * now for some voodoo. our temporary tuple slot is actually the result
688  * tuple slot of the Hash node (which is our inner plan). we can do this
689  * because Hash nodes don't return tuples via ExecProcNode() -- instead
690  * the hash join node uses ExecScanHashBucket() to get at the contents of
691  * the hash table. -cim 6/9/91
692  */
693  {
694  HashState *hashstate = (HashState *) innerPlanState(hjstate);
695  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
696 
697  hjstate->hj_HashTupleSlot = slot;
698  }
699 
700  /*
701  * initialize child expressions
702  */
703  hjstate->js.ps.qual =
704  ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
705  hjstate->js.joinqual =
706  ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
707  hjstate->hashclauses =
708  ExecInitQual(node->hashclauses, (PlanState *) hjstate);
709 
710  /*
711  * initialize hash-specific info
712  */
713  hjstate->hj_HashTable = NULL;
714  hjstate->hj_FirstOuterTupleSlot = NULL;
715 
716  hjstate->hj_CurHashValue = 0;
717  hjstate->hj_CurBucketNo = 0;
719  hjstate->hj_CurTuple = NULL;
720 
721  /*
722  * Deconstruct the hash clauses into outer and inner argument values, so
723  * that we can evaluate those subexpressions separately. Also make a list
724  * of the hash operator OIDs, in preparation for looking up the hash
725  * functions to use.
726  */
727  lclauses = NIL;
728  rclauses = NIL;
729  hoperators = NIL;
730  foreach(l, node->hashclauses)
731  {
732  OpExpr *hclause = lfirst_node(OpExpr, l);
733 
734  lclauses = lappend(lclauses, ExecInitExpr(linitial(hclause->args),
735  (PlanState *) hjstate));
736  rclauses = lappend(rclauses, ExecInitExpr(lsecond(hclause->args),
737  (PlanState *) hjstate));
738  hoperators = lappend_oid(hoperators, hclause->opno);
739  }
740  hjstate->hj_OuterHashKeys = lclauses;
741  hjstate->hj_InnerHashKeys = rclauses;
742  hjstate->hj_HashOperators = hoperators;
743  /* child Hash node needs to evaluate inner hash keys, too */
744  ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
745 
746  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
747  hjstate->hj_MatchedOuter = false;
748  hjstate->hj_OuterNotEmpty = false;
749 
750  return hjstate;
751 }
JoinType jointype
Definition: execnodes.h:1688
#define NIL
Definition: pg_list.h:69
List * qual
Definition: plannodes.h:147
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1803
ExprState * joinqual
Definition: execnodes.h:1691
PlanState ps
Definition: execnodes.h:1687
List * hashclauses
Definition: plannodes.h:740
bool single_match
Definition: execnodes.h:1689
bool hj_MatchedOuter
Definition: execnodes.h:1806
EState * state
Definition: execnodes.h:914
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1800
List * hj_OuterHashKeys
Definition: execnodes.h:1792
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1804
#define lsecond(l)
Definition: pg_list.h:116
Join join
Definition: plannodes.h:739
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc)
Definition: execTuples.c:931
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:205
JoinType jointype
Definition: plannodes.h:681
uint32 hj_CurHashValue
Definition: execnodes.h:1796
int hj_CurSkewBucketNo
Definition: execnodes.h:1798
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:946
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType)
Definition: execTuples.c:945
#define linitial(l)
Definition: pg_list.h:111
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1802
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
#define lfirst_node(type, lc)
Definition: pg_list.h:109
#define outerPlanState(node)
Definition: execnodes.h:966
#define innerPlan(node)
Definition: plannodes.h:175
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:456
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1799
PlanState ps
Definition: execnodes.h:2128
List * hj_HashOperators
Definition: execnodes.h:1794
#define outerPlan(node)
Definition: plannodes.h:176
List * lappend(List *list, void *datum)
Definition: list.c:128
int hj_CurBucketNo
Definition: execnodes.h:1797
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:918
void ExecInitResultTupleSlotTL(EState *estate, PlanState *planstate)
Definition: execTuples.c:890
Plan * plan
Definition: execnodes.h:912
#define makeNode(_type_)
Definition: nodes.h:565
bool hj_OuterNotEmpty
Definition: execnodes.h:1807
#define Assert(condition)
Definition: c.h:699
#define EXEC_FLAG_MARK
Definition: executor.h:61
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:428
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:438
ExprState * qual
Definition: execnodes.h:930
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1801
List * hj_InnerHashKeys
Definition: execnodes.h:1793
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
static TupleTableSlot * ExecHashJoin(PlanState *pstate)
Definition: nodeHashjoin.c:559
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:119
Oid opno
Definition: primnodes.h:497
#define elog
Definition: elog.h:219
bool inner_unique
Definition: plannodes.h:682
List * args
Definition: primnodes.h:503
#define innerPlanState(node)
Definition: execnodes.h:965
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
Definition: pg_list.h:45
JoinState js
Definition: execnodes.h:1790
List * joinqual
Definition: plannodes.h:683
ExprState * hashclauses
Definition: execnodes.h:1791
Plan plan
Definition: plannodes.h:680

◆ ExecParallelHashJoin()

static TupleTableSlot* ExecParallelHashJoin ( PlanState pstate)
static

Definition at line 575 of file nodeHashjoin.c.

References ExecHashJoinImpl().

Referenced by ExecHashJoinInitializeDSM(), and ExecHashJoinInitializeWorker().

576 {
577  /*
578  * On sufficiently smart compilers this should be inlined with the
579  * parallel-oblivious branches removed.
580  */
581  return ExecHashJoinImpl(pstate, true);
582 }
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)
Definition: nodeHashjoin.c:165

◆ ExecParallelHashJoinNewBatch()

static bool ExecParallelHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1081 of file nodeHashjoin.c.

References BarrierArriveAndWait(), BarrierAttach(), BarrierDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, HashJoinTableData::curbatch, ParallelHashJoinState::distributor, ParallelHashJoinBatchAccessor::done, elog, ERROR, ExecHashTableDetachBatch(), ExecParallelHashTableAlloc(), ExecParallelHashTableInsertCurrentBatch(), ExecParallelHashTableSetCurrentBatch(), ExecStoreMinimalTuple(), HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, ParallelHashJoinBatchAccessor::inner_tuples, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, pg_atomic_fetch_add_u32(), PHJ_BATCH_ALLOCATING, PHJ_BATCH_DONE, PHJ_BATCH_ELECTING, PHJ_BATCH_LOADING, PHJ_BATCH_PROBING, ParallelHashJoinBatchAccessor::shared, sts_begin_parallel_scan(), sts_end_parallel_scan(), sts_parallel_scan_next(), WAIT_EVENT_HASH_BATCH_ALLOCATING, WAIT_EVENT_HASH_BATCH_ELECTING, and WAIT_EVENT_HASH_BATCH_LOADING.

Referenced by ExecHashJoinImpl().

1082 {
1083  HashJoinTable hashtable = hjstate->hj_HashTable;
1084  int start_batchno;
1085  int batchno;
1086 
1087  /*
1088  * If we started up so late that the batch tracking array has been freed
1089  * already by ExecHashTableDetach(), then we are finished. See also
1090  * ExecParallelHashEnsureBatchAccessors().
1091  */
1092  if (hashtable->batches == NULL)
1093  return false;
1094 
1095  /*
1096  * If we were already attached to a batch, remember not to bother checking
1097  * it again, and detach from it (possibly freeing the hash table if we are
1098  * last to detach).
1099  */
1100  if (hashtable->curbatch >= 0)
1101  {
1102  hashtable->batches[hashtable->curbatch].done = true;
1103  ExecHashTableDetachBatch(hashtable);
1104  }
1105 
1106  /*
1107  * Search for a batch that isn't done. We use an atomic counter to start
1108  * our search at a different batch in every participant when there are
1109  * more batches than participants.
1110  */
1111  batchno = start_batchno =
1113  hashtable->nbatch;
1114  do
1115  {
1116  uint32 hashvalue;
1117  MinimalTuple tuple;
1118  TupleTableSlot *slot;
1119 
1120  if (!hashtable->batches[batchno].done)
1121  {
1122  SharedTuplestoreAccessor *inner_tuples;
1123  Barrier *batch_barrier =
1124  &hashtable->batches[batchno].shared->batch_barrier;
1125 
1126  switch (BarrierAttach(batch_barrier))
1127  {
1128  case PHJ_BATCH_ELECTING:
1129 
1130  /* One backend allocates the hash table. */
1131  if (BarrierArriveAndWait(batch_barrier,
1133  ExecParallelHashTableAlloc(hashtable, batchno);
1134  /* Fall through. */
1135 
1136  case PHJ_BATCH_ALLOCATING:
1137  /* Wait for allocation to complete. */
1138  BarrierArriveAndWait(batch_barrier,
1140  /* Fall through. */
1141 
1142  case PHJ_BATCH_LOADING:
1143  /* Start (or join in) loading tuples. */
1144  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1145  inner_tuples = hashtable->batches[batchno].inner_tuples;
1146  sts_begin_parallel_scan(inner_tuples);
1147  while ((tuple = sts_parallel_scan_next(inner_tuples,
1148  &hashvalue)))
1149  {
1150  slot = ExecStoreMinimalTuple(tuple,
1151  hjstate->hj_HashTupleSlot,
1152  false);
1154  hashvalue);
1155  }
1156  sts_end_parallel_scan(inner_tuples);
1157  BarrierArriveAndWait(batch_barrier,
1159  /* Fall through. */
1160 
1161  case PHJ_BATCH_PROBING:
1162 
1163  /*
1164  * This batch is ready to probe. Return control to
1165  * caller. We stay attached to batch_barrier so that the
1166  * hash table stays alive until everyone's finished
1167  * probing it, but no participant is allowed to wait at
1168  * this barrier again (or else a deadlock could occur).
1169  * All attached participants must eventually call
1170  * BarrierArriveAndDetach() so that the final phase
1171  * PHJ_BATCH_DONE can be reached.
1172  */
1173  ExecParallelHashTableSetCurrentBatch(hashtable, batchno);
1174  sts_begin_parallel_scan(hashtable->batches[batchno].outer_tuples);
1175  return true;
1176 
1177  case PHJ_BATCH_DONE:
1178 
1179  /*
1180  * Already done. Detach and go around again (if any
1181  * remain).
1182  */
1183  BarrierDetach(batch_barrier);
1184  hashtable->batches[batchno].done = true;
1185  hashtable->curbatch = -1;
1186  break;
1187 
1188  default:
1189  elog(ERROR, "unexpected batch phase %d",
1190  BarrierPhase(batch_barrier));
1191  }
1192  }
1193  batchno = (batchno + 1) % hashtable->nbatch;
1194  } while (batchno != start_batchno);
1195 
1196  return false;
1197 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
#define PHJ_BATCH_DONE
Definition: hashjoin.h:268
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:420
void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:1735
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3056
#define PHJ_BATCH_ALLOCATING
Definition: hashjoin.h:265
void ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3198
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3036
#define PHJ_BATCH_LOADING
Definition: hashjoin.h:266
#define PHJ_BATCH_ELECTING
Definition: hashjoin.h:264
SharedTuplestoreAccessor * inner_tuples
Definition: hashjoin.h:208
#define ERROR
Definition: elog.h:43
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
unsigned int uint32
Definition: c.h:325
void sts_begin_parallel_scan(SharedTuplestoreAccessor *accessor)
int BarrierAttach(Barrier *barrier)
Definition: barrier.c:214
ParallelHashJoinState * parallel_state
Definition: hashjoin.h:356
#define PHJ_BATCH_PROBING
Definition: hashjoin.h:267
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:331
bool BarrierDetach(Barrier *barrier)
Definition: barrier.c:234
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:243
ParallelHashJoinBatch * shared
Definition: hashjoin.h:197
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1801
pg_atomic_uint32 distributor
Definition: hashjoin.h:251
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
#define elog
Definition: elog.h:219

◆ ExecParallelHashJoinOuterGetTuple()

static TupleTableSlot * ExecParallelHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 877 of file nodeHashjoin.c.

References HashJoinTableData::batches, HashJoinTableData::curbatch, ExprContext::ecxt_outertuple, ExecClearTuple(), ExecHashGetHashValue(), ExecProcNode(), ExecStoreMinimalTuple(), HJ_FILL_OUTER, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, HashJoinState::hj_OuterTupleSlot, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, JoinState::ps, PlanState::ps_ExprContext, sts_parallel_scan_next(), and TupIsNull.

Referenced by ExecHashJoinImpl().

880 {
881  HashJoinTable hashtable = hjstate->hj_HashTable;
882  int curbatch = hashtable->curbatch;
883  TupleTableSlot *slot;
884 
885  /*
886  * In the Parallel Hash case we only run the outer plan directly for
887  * single-batch hash joins. Otherwise we have to go to batch files, even
888  * for batch 0.
889  */
890  if (curbatch == 0 && hashtable->nbatch == 1)
891  {
892  slot = ExecProcNode(outerNode);
893 
894  while (!TupIsNull(slot))
895  {
896  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
897 
898  econtext->ecxt_outertuple = slot;
899  if (ExecHashGetHashValue(hashtable, econtext,
900  hjstate->hj_OuterHashKeys,
901  true, /* outer tuple */
902  HJ_FILL_OUTER(hjstate),
903  hashvalue))
904  return slot;
905 
906  /*
907  * That tuple couldn't match because of a NULL, so discard it and
908  * continue with the next one.
909  */
910  slot = ExecProcNode(outerNode);
911  }
912  }
913  else if (curbatch < hashtable->nbatch)
914  {
915  MinimalTuple tuple;
916 
917  tuple = sts_parallel_scan_next(hashtable->batches[curbatch].outer_tuples,
918  hashvalue);
919  if (tuple != NULL)
920  {
921  slot = ExecStoreMinimalTuple(tuple,
922  hjstate->hj_OuterTupleSlot,
923  false);
924  return slot;
925  }
926  else
928  }
929 
930  /* End of this batch */
931  return NULL;
932 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:420
PlanState ps
Definition: execnodes.h:1687
ExprContext * ps_ExprContext
Definition: execnodes.h:947
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:475
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1800
List * hj_OuterHashKeys
Definition: execnodes.h:1792
#define TupIsNull(slot)
Definition: tuptable.h:146
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:132
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:233
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:222
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1771
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
JoinState js
Definition: execnodes.h:1790

◆ ExecParallelHashJoinPartitionOuter()

static void ExecParallelHashJoinPartitionOuter ( HashJoinState node)
static

Definition at line 1377 of file nodeHashjoin.c.

References Assert, HashJoinTableData::batches, CHECK_FOR_INTERRUPTS, ExprContext::ecxt_outertuple, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetHashValue(), ExecProcNode(), HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_OuterHashKeys, i, HashJoinState::js, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, outerPlanState, JoinState::ps, PlanState::ps_ExprContext, sts_end_write(), sts_puttuple(), and TupIsNull.

Referenced by ExecHashJoinImpl().

1378 {
1379  PlanState *outerState = outerPlanState(hjstate);
1380  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1381  HashJoinTable hashtable = hjstate->hj_HashTable;
1382  TupleTableSlot *slot;
1383  uint32 hashvalue;
1384  int i;
1385 
1386  Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
1387 
1388  /* Execute outer plan, writing all tuples to shared tuplestores. */
1389  for (;;)
1390  {
1391  slot = ExecProcNode(outerState);
1392  if (TupIsNull(slot))
1393  break;
1394  econtext->ecxt_outertuple = slot;
1395  if (ExecHashGetHashValue(hashtable, econtext,
1396  hjstate->hj_OuterHashKeys,
1397  true, /* outer tuple */
1398  false, /* outer join, currently unsupported */
1399  &hashvalue))
1400  {
1401  int batchno;
1402  int bucketno;
1403 
1404  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno,
1405  &batchno);
1406  sts_puttuple(hashtable->batches[batchno].outer_tuples,
1407  &hashvalue, ExecFetchSlotMinimalTuple(slot));
1408  }
1410  }
1411 
1412  /* Make sure all outer partitions are readable by any backend. */
1413  for (i = 0; i < hashtable->nbatch; ++i)
1414  sts_end_write(hashtable->batches[i].outer_tuples);
1415 }
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
Definition: execTuples.c:708
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1875
#define outerPlanState(node)
Definition: execnodes.h:966
#define TupIsNull(slot)
Definition: tuptable.h:146
unsigned int uint32
Definition: c.h:325
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:233
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:357
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:222
#define Assert(condition)
Definition: c.h:699
int i
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:1771
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
void sts_end_write(SharedTuplestoreAccessor *accessor)

◆ ExecReScanHashJoin()

void ExecReScanHashJoin ( HashJoinState node)

Definition at line 1291 of file nodeHashjoin.c.

References PlanState::chgParam, ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_OUTER, HashJoinState::hj_OuterNotEmpty, INVALID_SKEW_BUCKET_NO, HashJoinState::js, PlanState::lefttree, HashJoinTableData::nbatch, JoinState::ps, and PlanState::righttree.

Referenced by ExecReScan().

1292 {
1293  /*
1294  * In a multi-batch join, we currently have to do rescans the hard way,
1295  * primarily because batch temp files may have already been released. But
1296  * if it's a single-batch join, and there is no parameter change for the
1297  * inner subnode, then we can just re-use the existing hash table without
1298  * rebuilding it.
1299  */
1300  if (node->hj_HashTable != NULL)
1301  {
1302  if (node->hj_HashTable->nbatch == 1 &&
1303  node->js.ps.righttree->chgParam == NULL)
1304  {
1305  /*
1306  * Okay to reuse the hash table; needn't rescan inner, either.
1307  *
1308  * However, if it's a right/full join, we'd better reset the
1309  * inner-tuple match flags contained in the table.
1310  */
1311  if (HJ_FILL_INNER(node))
1313 
1314  /*
1315  * Also, we need to reset our state about the emptiness of the
1316  * outer relation, so that the new scan of the outer will update
1317  * it correctly if it turns out to be empty this time. (There's no
1318  * harm in clearing it now because ExecHashJoin won't need the
1319  * info. In the other cases, where the hash table doesn't exist
1320  * or we are destroying it, we leave this state alone because
1321  * ExecHashJoin will need it the first time through.)
1322  */
1323  node->hj_OuterNotEmpty = false;
1324 
1325  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
1327  }
1328  else
1329  {
1330  /* must destroy and rebuild hash table */
1332  node->hj_HashTable = NULL;
1334 
1335  /*
1336  * if chgParam of subnode is not null then plan will be re-scanned
1337  * by first ExecProcNode.
1338  */
1339  if (node->js.ps.righttree->chgParam == NULL)
1340  ExecReScan(node->js.ps.righttree);
1341  }
1342  }
1343 
1344  /* Always reset intra-tuple state */
1345  node->hj_CurHashValue = 0;
1346  node->hj_CurBucketNo = 0;
1348  node->hj_CurTuple = NULL;
1349 
1350  node->hj_MatchedOuter = false;
1351  node->hj_FirstOuterTupleSlot = NULL;
1352 
1353  /*
1354  * if chgParam of subnode is not null then plan will be re-scanned by
1355  * first ExecProcNode.
1356  */
1357  if (node->js.ps.lefttree->chgParam == NULL)
1358  ExecReScan(node->js.ps.lefttree);
1359 }
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
PlanState ps
Definition: execnodes.h:1687
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
bool hj_MatchedOuter
Definition: execnodes.h:1806
struct PlanState * righttree
Definition: execnodes.h:932
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1804
struct PlanState * lefttree
Definition: execnodes.h:931
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:134
uint32 hj_CurHashValue
Definition: execnodes.h:1796
int hj_CurSkewBucketNo
Definition: execnodes.h:1798
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1799
Bitmapset * chgParam
Definition: execnodes.h:941
int hj_CurBucketNo
Definition: execnodes.h:1797
bool hj_OuterNotEmpty
Definition: execnodes.h:1807
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:125
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:124
HashJoinTable hj_HashTable
Definition: execnodes.h:1795
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:2143
JoinState js
Definition: execnodes.h:1790
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:849

◆ ExecShutdownHashJoin()

void ExecShutdownHashJoin ( HashJoinState node)

Definition at line 1362 of file nodeHashjoin.c.

References ExecHashTableDetach(), ExecHashTableDetachBatch(), and HashJoinState::hj_HashTable.

Referenced by ExecShutdownNode().

1363 {
1364  if (node->hj_HashTable)
1365  {
1366  /*
1367  * Detach from shared state before DSM memory goes away. This makes
1368  * sure that we don't have any pointers into DSM memory by the time
1369  * ExecEndHashJoin runs.
1370  */
1373  }
1374 }
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition: nodeHash.c:3056
void ExecHashTableDetach(HashJoinTable hashtable)
Definition: nodeHash.c:3113
HashJoinTable hj_HashTable
Definition: execnodes.h:1795