PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nodeHashjoin.c File Reference
Include dependency graph for nodeHashjoin.c:

Go to the source code of this file.

Macros

#define HJ_BUILD_HASHTABLE   1
 
#define HJ_NEED_NEW_OUTER   2
 
#define HJ_SCAN_BUCKET   3
 
#define HJ_FILL_OUTER_TUPLE   4
 
#define HJ_FILL_INNER_TUPLES   5
 
#define HJ_FILL_OUTER_NULL_TUPLES   6
 
#define HJ_FILL_INNER_NULL_TUPLES   7
 
#define HJ_NEED_NEW_BATCH   8
 
#define HJ_FILL_OUTER(hjstate)   ((hjstate)->hj_NullInnerTupleSlot != NULL)
 
#define HJ_FILL_INNER(hjstate)   ((hjstate)->hj_NullOuterTupleSlot != NULL)
 

Functions

static TupleTableSlotExecHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecParallelHashJoinOuterGetTuple (PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
 
static TupleTableSlotExecHashJoinGetSavedTuple (HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
 
static bool ExecHashJoinNewBatch (HashJoinState *hjstate)
 
static bool ExecParallelHashJoinNewBatch (HashJoinState *hjstate)
 
static void ExecParallelHashJoinPartitionOuter (HashJoinState *hjstate)
 
static pg_attribute_always_inline TupleTableSlotExecHashJoinImpl (PlanState *pstate, bool parallel)
 
static TupleTableSlotExecHashJoin (PlanState *pstate)
 
static TupleTableSlotExecParallelHashJoin (PlanState *pstate)
 
HashJoinStateExecInitHashJoin (HashJoin *node, EState *estate, int eflags)
 
void ExecEndHashJoin (HashJoinState *node)
 
void ExecHashJoinSaveTuple (MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
 
void ExecReScanHashJoin (HashJoinState *node)
 
void ExecShutdownHashJoin (HashJoinState *node)
 
void ExecHashJoinEstimate (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinReInitializeDSM (HashJoinState *state, ParallelContext *pcxt)
 
void ExecHashJoinInitializeWorker (HashJoinState *state, ParallelWorkerContext *pwcxt)
 

Macro Definition Documentation

◆ HJ_BUILD_HASHTABLE

#define HJ_BUILD_HASHTABLE   1

Definition at line 182 of file nodeHashjoin.c.

◆ HJ_FILL_INNER

#define HJ_FILL_INNER (   hjstate)    ((hjstate)->hj_NullOuterTupleSlot != NULL)

Definition at line 194 of file nodeHashjoin.c.

◆ HJ_FILL_INNER_NULL_TUPLES

#define HJ_FILL_INNER_NULL_TUPLES   7

Definition at line 188 of file nodeHashjoin.c.

◆ HJ_FILL_INNER_TUPLES

#define HJ_FILL_INNER_TUPLES   5

Definition at line 186 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER

#define HJ_FILL_OUTER (   hjstate)    ((hjstate)->hj_NullInnerTupleSlot != NULL)

Definition at line 192 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER_NULL_TUPLES

#define HJ_FILL_OUTER_NULL_TUPLES   6

Definition at line 187 of file nodeHashjoin.c.

◆ HJ_FILL_OUTER_TUPLE

#define HJ_FILL_OUTER_TUPLE   4

Definition at line 185 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_BATCH

#define HJ_NEED_NEW_BATCH   8

Definition at line 189 of file nodeHashjoin.c.

◆ HJ_NEED_NEW_OUTER

#define HJ_NEED_NEW_OUTER   2

Definition at line 183 of file nodeHashjoin.c.

◆ HJ_SCAN_BUCKET

#define HJ_SCAN_BUCKET   3

Definition at line 184 of file nodeHashjoin.c.

Function Documentation

◆ ExecEndHashJoin()

void ExecEndHashJoin ( HashJoinState node)

Definition at line 1062 of file nodeHashjoin.c.

1063{
1065
1066 /*
1067 * Free tuple stores if we made them (must do this before
1068 * ExecHashTableDestroy deletes hashCxt)
1069 */
1070 if (node->hj_NullOuterTupleStore)
1071 {
1074 }
1075 if (hashNode->null_tuple_store)
1076 {
1077 tuplestore_end(hashNode->null_tuple_store);
1078 hashNode->null_tuple_store = NULL;
1079 }
1080
1081 /*
1082 * Free hash table
1083 */
1084 if (node->hj_HashTable)
1085 {
1087 node->hj_HashTable = NULL;
1088 }
1089
1090 /*
1091 * clean up subtrees
1092 */
1095}
void ExecEndNode(PlanState *node)
#define outerPlanState(node)
Definition execnodes.h:1273
#define innerPlanState(node)
Definition execnodes.h:1272
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition nodeHash.c:981
#define castNode(_type_, nodeptr)
Definition nodes.h:182
static int fb(int x)
Tuplestorestate * hj_NullOuterTupleStore
Definition execnodes.h:2232
HashJoinTable hj_HashTable
Definition execnodes.h:2223
void tuplestore_end(Tuplestorestate *state)
Definition tuplestore.c:493

References castNode, ExecEndNode(), ExecHashTableDestroy(), fb(), HashJoinState::hj_HashTable, HashJoinState::hj_NullOuterTupleStore, innerPlanState, outerPlanState, and tuplestore_end().

Referenced by ExecEndNode().

◆ ExecHashJoin()

static TupleTableSlot * ExecHashJoin ( PlanState pstate)
static

Definition at line 802 of file nodeHashjoin.c.

803{
804 /*
805 * On sufficiently smart compilers this should be inlined with the
806 * parallel-aware branches removed.
807 */
808 return ExecHashJoinImpl(pstate, false);
809}
static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl(PlanState *pstate, bool parallel)

References ExecHashJoinImpl().

Referenced by ExecInitHashJoin().

◆ ExecHashJoinEstimate()

void ExecHashJoinEstimate ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1843 of file nodeHashjoin.c.

1844{
1847}
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
shm_toc_estimator estimator
Definition parallel.h:43

References ParallelContext::estimator, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

◆ ExecHashJoinGetSavedTuple()

static TupleTableSlot * ExecHashJoinGetSavedTuple ( HashJoinState hjstate,
BufFile file,
uint32 hashvalue,
TupleTableSlot tupleSlot 
)
static

Definition at line 1612 of file nodeHashjoin.c.

1616{
1617 uint32 header[2];
1618 size_t nread;
1619 MinimalTuple tuple;
1620
1621 /*
1622 * We check for interrupts here because this is typically taken as an
1623 * alternative code path to an ExecProcNode() call, which would include
1624 * such a check.
1625 */
1627
1628 /*
1629 * Since both the hash value and the MinimalTuple length word are uint32,
1630 * we can read them both in one BufFileRead() call without any type
1631 * cheating.
1632 */
1633 nread = BufFileReadMaybeEOF(file, header, sizeof(header), true);
1634 if (nread == 0) /* end of file */
1635 {
1637 return NULL;
1638 }
1639 *hashvalue = header[0];
1640 tuple = (MinimalTuple) palloc(header[1]);
1641 tuple->t_len = header[1];
1642 BufFileReadExact(file,
1643 (char *) tuple + sizeof(uint32),
1644 header[1] - sizeof(uint32));
1646 return tupleSlot;
1647}
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition buffile.c:655
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition buffile.c:665
uint32_t uint32
Definition c.h:618
void ExecForceStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
MinimalTupleData * MinimalTuple
Definition htup.h:27
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476

References BufFileReadExact(), BufFileReadMaybeEOF(), CHECK_FOR_INTERRUPTS, ExecClearTuple(), ExecForceStoreMinimalTuple(), fb(), palloc(), and MinimalTupleData::t_len.

Referenced by ExecHashJoinNewBatch(), and ExecHashJoinOuterGetTuple().

◆ ExecHashJoinImpl()

static pg_attribute_always_inline TupleTableSlot * ExecHashJoinImpl ( PlanState pstate,
bool  parallel 
)
static

Definition at line 225 of file nodeHashjoin.c.

226{
227 HashJoinState *node = castNode(HashJoinState, pstate);
230 ExprState *joinqual;
232 ExprContext *econtext;
233 HashJoinTable hashtable;
235 uint32 hashvalue;
236 int batchno;
237 ParallelHashJoinState *parallel_state;
238
239 /*
240 * get information from HashJoin node
241 */
242 joinqual = node->js.joinqual;
243 otherqual = node->js.ps.qual;
246 hashtable = node->hj_HashTable;
247 econtext = node->js.ps.ps_ExprContext;
248 parallel_state = hashNode->parallel_state;
249
250 /*
251 * Reset per-tuple memory context to free any expression evaluation
252 * storage allocated in the previous tuple cycle.
253 */
254 ResetExprContext(econtext);
255
256 /*
257 * run the hash join state machine
258 */
259 for (;;)
260 {
261 /*
262 * It's possible to iterate this loop many times before returning a
263 * tuple, in some pathological cases such as needing to move much of
264 * the current batch to a later batch. So let's check for interrupts
265 * each time through.
266 */
268
269 switch (node->hj_JoinState)
270 {
272
273 /*
274 * First time through: build hash table for inner relation.
275 */
276 Assert(hashtable == NULL);
277
278 /*
279 * If the outer relation is completely empty, and it's not
280 * right/right-anti/full join, we can quit without building
281 * the hash table. However, for an inner join it is only a
282 * win to check this when the outer relation's startup cost is
283 * less than the projected cost of building the hash table.
284 * Otherwise it's best to build the hash table first and see
285 * if the inner relation is empty. (When it's a left join, we
286 * should always make this check, since we aren't going to be
287 * able to skip the join on the strength of an empty inner
288 * relation anyway.)
289 *
290 * If we are rescanning the join, we make use of information
291 * gained on the previous scan: don't bother to try the
292 * prefetch if the previous scan found the outer relation
293 * nonempty. This is not 100% reliable since with new
294 * parameters the outer relation might yield different
295 * results, but it's a good heuristic.
296 *
297 * The only way to make the check is to try to fetch a tuple
298 * from the outer plan node. If we succeed, we have to stash
299 * it away for later consumption by ExecHashJoinOuterGetTuple.
300 */
301 if (HJ_FILL_INNER(node))
302 {
303 /* no chance to not build the hash table */
305 }
306 else if (parallel)
307 {
308 /*
309 * The empty-outer optimization is not implemented for
310 * shared hash tables, because no one participant can
311 * determine that there are no outer tuples, and it's not
312 * yet clear that it's worth the synchronization overhead
313 * of reaching consensus to figure that out. So we have
314 * to build the hash table.
315 */
317 }
318 else if (HJ_FILL_OUTER(node) ||
319 (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
320 !node->hj_OuterNotEmpty))
321 {
324 {
325 node->hj_OuterNotEmpty = false;
326 return NULL;
327 }
328 else
329 node->hj_OuterNotEmpty = true;
330 }
331 else
333
334 /*
335 * Create the hash table. If using Parallel Hash, then
336 * whoever gets here first will create the hash table and any
337 * later arrivals will merely attach to it.
338 */
339 hashtable = ExecHashTableCreate(hashNode);
340 node->hj_HashTable = hashtable;
341
342 /*
343 * Execute the Hash node, to build the hash table. If using
344 * Parallel Hash, then we'll try to help hashing unless we
345 * arrived too late.
346 */
347 hashNode->hashtable = hashtable;
349
350 /*
351 * If the inner relation is completely empty, and we're not
352 * doing a left outer join, we can quit without scanning the
353 * outer relation. (If the inner relation contains only
354 * null-keyed tuples that we need to emit, we'll fall through
355 * and do the outer-relation scan. In principle we could go
356 * emit those tuples then quit, but it would complicate the
357 * state machine logic. The case seems rare enough to not be
358 * worth optimizing.)
359 */
360 if (hashtable->totalTuples == 0 &&
361 hashNode->null_tuple_store == NULL &&
362 !HJ_FILL_OUTER(node))
363 {
364 if (parallel)
365 {
366 /*
367 * Advance the build barrier to PHJ_BUILD_RUN before
368 * proceeding so we can negotiate resource cleanup.
369 */
370 Barrier *build_barrier = &parallel_state->build_barrier;
371
372 while (BarrierPhase(build_barrier) < PHJ_BUILD_RUN)
373 BarrierArriveAndWait(build_barrier, 0);
374 }
375 return NULL;
376 }
377
378 /*
379 * need to remember whether nbatch has increased since we
380 * began scanning the outer relation
381 */
382 hashtable->nbatch_outstart = hashtable->nbatch;
383
384 /*
385 * Reset OuterNotEmpty for scan. (It's OK if we fetched a
386 * tuple above, because ExecHashJoinOuterGetTuple will
387 * immediately set it again.)
388 */
389 node->hj_OuterNotEmpty = false;
390
391 if (parallel)
392 {
393 Barrier *build_barrier;
394
395 build_barrier = &parallel_state->build_barrier;
396 Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER ||
397 BarrierPhase(build_barrier) == PHJ_BUILD_RUN ||
398 BarrierPhase(build_barrier) == PHJ_BUILD_FREE);
399 if (BarrierPhase(build_barrier) == PHJ_BUILD_HASH_OUTER)
400 {
401 /*
402 * If multi-batch, we need to hash the outer relation
403 * up front.
404 */
405 if (hashtable->nbatch > 1)
407 BarrierArriveAndWait(build_barrier,
409 Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUN);
410 }
411
412 /*
413 * Each backend should now select a batch to work on.
414 * However, if we've already collected some null-keyed
415 * tuples, dump them first. (That is critical when we
416 * arrive late enough that no more batches are available;
417 * otherwise we'd fail to dump those tuples at all.)
418 */
419 hashtable->curbatch = -1;
420
421 if (node->hj_NullOuterTupleStore)
423 else if (hashNode->null_tuple_store)
425 else
427
428 continue;
429 }
430 else
432
434
436
437 /*
438 * We don't have an outer tuple, try to get the next one
439 */
440 if (parallel)
443 &hashvalue);
444 else
446 ExecHashJoinOuterGetTuple(outerNode, node, &hashvalue);
447
449 {
450 /* end of batch, or maybe whole join */
451 if (HJ_FILL_INNER(node))
452 {
453 /* set up to scan for unmatched inner tuples */
454 if (parallel)
455 {
456 /*
457 * Only one process is currently allowed to handle
458 * each batch's unmatched tuples, in a parallel
459 * join. However, each process must deal with any
460 * null-keyed tuples it found.
461 */
464 else if (node->hj_NullOuterTupleStore)
466 else if (hashNode->null_tuple_store)
468 else
470 }
471 else
472 {
475 }
476 }
477 else
478 {
479 /* might have outer null-keyed tuples to fill */
480 Assert(hashNode->null_tuple_store == NULL);
481 if (node->hj_NullOuterTupleStore)
483 else
485 }
486 continue;
487 }
488
490 node->hj_MatchedOuter = false;
491
492 /*
493 * Find the corresponding bucket for this tuple in the main
494 * hash table or skew hash table.
495 */
496 node->hj_CurHashValue = hashvalue;
497 ExecHashGetBucketAndBatch(hashtable, hashvalue,
498 &node->hj_CurBucketNo, &batchno);
500 hashvalue);
501 node->hj_CurTuple = NULL;
502
503 /*
504 * The tuple might not belong to the current batch (where
505 * "current batch" includes the skew buckets if any).
506 */
507 if (batchno != hashtable->curbatch &&
509 {
510 bool shouldFree;
512 &shouldFree);
513
514 /*
515 * Need to postpone this outer tuple to a later batch.
516 * Save it in the corresponding outer-batch file.
517 */
518 Assert(parallel_state == NULL);
519 Assert(batchno > hashtable->curbatch);
520 ExecHashJoinSaveTuple(mintuple, hashvalue,
521 &hashtable->outerBatchFile[batchno],
522 hashtable);
523
524 if (shouldFree)
525 heap_free_minimal_tuple(mintuple);
526
527 /* Loop around, staying in HJ_NEED_NEW_OUTER state */
528 continue;
529 }
530
531 /* OK, let's scan the bucket for matches */
533
535
536 case HJ_SCAN_BUCKET:
537
538 /*
539 * Scan the selected hash bucket for matches to current outer
540 */
541 if (parallel)
542 {
543 if (!ExecParallelScanHashBucket(node, econtext))
544 {
545 /* out of matches; check for possible outer-join fill */
547 continue;
548 }
549 }
550 else
551 {
552 if (!ExecScanHashBucket(node, econtext))
553 {
554 /* out of matches; check for possible outer-join fill */
556 continue;
557 }
558 }
559
560 /*
561 * In a right-semijoin, we only need the first match for each
562 * inner tuple.
563 */
564 if (node->js.jointype == JOIN_RIGHT_SEMI &&
566 continue;
567
568 /*
569 * We've got a match, but still need to test non-hashed quals.
570 * ExecScanHashBucket already set up all the state needed to
571 * call ExecQual.
572 *
573 * If we pass the qual, then save state for next call and have
574 * ExecProject form the projection, store it in the tuple
575 * table, and return the slot.
576 *
577 * Only the joinquals determine tuple match status, but all
578 * quals must pass to actually return the tuple.
579 */
580 if (joinqual == NULL || ExecQual(joinqual, econtext))
581 {
582 node->hj_MatchedOuter = true;
583
584 /*
585 * This is really only needed if HJ_FILL_INNER(node) or if
586 * we are in a right-semijoin, but we'll avoid the branch
587 * and just set it always.
588 */
591
592 /* In an antijoin, we never return a matched tuple */
593 if (node->js.jointype == JOIN_ANTI)
594 {
596 continue;
597 }
598
599 /*
600 * If we only need to consider the first matching inner
601 * tuple, then advance to next outer tuple after we've
602 * processed this one.
603 */
604 if (node->js.single_match)
606
607 /*
608 * In a right-antijoin, we never return a matched tuple.
609 * If it's not an inner_unique join, we need to stay on
610 * the current outer tuple to continue scanning the inner
611 * side for matches.
612 */
613 if (node->js.jointype == JOIN_RIGHT_ANTI)
614 continue;
615
616 if (otherqual == NULL || ExecQual(otherqual, econtext))
617 return ExecProject(node->js.ps.ps_ProjInfo);
618 else
619 InstrCountFiltered2(node, 1);
620 }
621 else
622 InstrCountFiltered1(node, 1);
623 break;
624
626
627 /*
628 * The current outer tuple has run out of matches, so check
629 * whether to emit a dummy outer-join tuple. Whether we emit
630 * one or not, the next state is NEED_NEW_OUTER.
631 */
633
634 if (!node->hj_MatchedOuter &&
635 HJ_FILL_OUTER(node))
636 {
637 /*
638 * Generate a fake join tuple with nulls for the inner
639 * tuple, and return it if it passes the non-join quals.
640 */
641 econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
642
643 if (otherqual == NULL || ExecQual(otherqual, econtext))
644 return ExecProject(node->js.ps.ps_ProjInfo);
645 else
646 InstrCountFiltered2(node, 1);
647 }
648 break;
649
651
652 /*
653 * We have finished a batch, but we are doing
654 * right/right-anti/full join, so any unmatched inner tuples
655 * in the hashtable have to be emitted before we continue to
656 * the next batch.
657 */
658 if (!(parallel ? ExecParallelScanHashTableForUnmatched(node, econtext)
659 : ExecScanHashTableForUnmatched(node, econtext)))
660 {
661 /* no more unmatched tuples, but maybe there are nulls */
662 if (node->hj_NullOuterTupleStore)
664 else if (hashNode->null_tuple_store)
666 else
668 continue;
669 }
670
671 /*
672 * Generate a fake join tuple with nulls for the outer tuple,
673 * and return it if it passes the non-join quals.
674 */
675 econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
676
677 if (otherqual == NULL || ExecQual(otherqual, econtext))
678 return ExecProject(node->js.ps.ps_ProjInfo);
679 else
680 InstrCountFiltered2(node, 1);
681 break;
682
684
685 /*
686 * We have finished a batch, but we are doing left/full join,
687 * so any null-keyed outer tuples have to be emitted before we
688 * continue to the next batch.
689 *
690 * (We could delay this till the end of the join, but there
691 * seems little percentage in that.)
692 *
693 * We have to use tuplestore_gettupleslot_force because
694 * hj_OuterTupleSlot may not be able to store a MinimalTuple.
695 */
697 true, false,
698 node->hj_OuterTupleSlot))
699 {
700 /*
701 * Generate a fake join tuple with nulls for the inner
702 * tuple, and return it if it passes the non-join quals.
703 */
704 econtext->ecxt_outertuple = node->hj_OuterTupleSlot;
705 econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
706
707 if (otherqual == NULL || ExecQual(otherqual, econtext))
708 return ExecProject(node->js.ps.ps_ProjInfo);
709 else
710 InstrCountFiltered2(node, 1);
711
712 ResetExprContext(econtext);
713
714 /* allow this loop to be cancellable */
716 }
717
718 /* We don't need the tuplestore any more, so discard it. */
721
722 /* Fill inner tuples too if it's a full join, else advance. */
723 if (hashNode->null_tuple_store)
725 else
727 break;
728
730
731 /*
732 * We have finished a batch, but we are doing
733 * right/right-anti/full join, so any null-keyed inner tuples
734 * have to be emitted before we continue to the next batch.
735 *
736 * (We could delay this till the end of the join, but there
737 * seems little percentage in that.)
738 */
739 while (tuplestore_gettupleslot(hashNode->null_tuple_store,
740 true, false,
741 node->hj_HashTupleSlot))
742 {
743 /*
744 * Generate a fake join tuple with nulls for the outer
745 * tuple, and return it if it passes the non-join quals.
746 */
747 econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
748 econtext->ecxt_innertuple = node->hj_HashTupleSlot;
749
750 if (otherqual == NULL || ExecQual(otherqual, econtext))
751 return ExecProject(node->js.ps.ps_ProjInfo);
752 else
753 InstrCountFiltered2(node, 1);
754
755 ResetExprContext(econtext);
756
757 /* allow this loop to be cancellable */
759 }
760
761 /*
762 * Ideally we'd discard the tuplestore now, but we can't
763 * because we might need it for rescans.
764 */
765
766 /* Now we can advance to the next batch. */
768 break;
769
771
772 /*
773 * Try to advance to next batch. Done if there are no more.
774 */
775 if (parallel)
776 {
778 return NULL; /* end of parallel-aware join */
779 }
780 else
781 {
782 if (!ExecHashJoinNewBatch(node))
783 return NULL; /* end of parallel-oblivious join */
784 }
786 break;
787
788 default:
789 elog(ERROR, "unrecognized hashjoin state: %d",
790 (int) node->hj_JoinState);
791 }
792 }
793}
int BarrierPhase(Barrier *barrier)
Definition barrier.c:265
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition barrier.c:125
#define Assert(condition)
Definition c.h:945
#define pg_fallthrough
Definition c.h:152
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
Node * MultiExecProcNode(PlanState *node)
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
#define InstrCountFiltered1(node, delta)
Definition execnodes.h:1281
#define InstrCountFiltered2(node, delta)
Definition execnodes.h:1286
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition executor.h:486
#define ResetExprContext(econtext)
Definition executor.h:654
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition executor.h:522
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition executor.h:315
#define PHJ_BUILD_FREE
Definition hashjoin.h:285
#define PHJ_BUILD_HASH_OUTER
Definition hashjoin.h:283
#define HJTUPLE_MINTUPLE(hjtup)
Definition hashjoin.h:102
#define PHJ_BUILD_RUN
Definition hashjoin.h:284
#define INVALID_SKEW_BUCKET_NO
Definition hashjoin.h:131
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition heaptuple.c:1478
static void HeapTupleHeaderSetMatch(MinimalTupleData *tup)
static bool HeapTupleHeaderHasMatch(const MinimalTupleData *tup)
bool ExecParallelScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition nodeHash.c:2079
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition nodeHash.c:2130
bool ExecParallelScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition nodeHash.c:2290
HashJoinTable ExecHashTableCreate(HashState *state)
Definition nodeHash.c:471
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition nodeHash.c:2581
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition nodeHash.c:2216
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition nodeHash.c:1986
bool ExecParallelPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition nodeHash.c:2151
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition nodeHash.c:2018
#define HJ_NEED_NEW_BATCH
#define HJ_SCAN_BUCKET
#define HJ_FILL_OUTER_TUPLE
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
static TupleTableSlot * ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
#define HJ_FILL_INNER(hjstate)
static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
#define HJ_FILL_OUTER_NULL_TUPLES
#define HJ_FILL_INNER_TUPLES
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr, HashJoinTable hashtable)
#define HJ_NEED_NEW_OUTER
#define HJ_FILL_OUTER(hjstate)
#define HJ_FILL_INNER_NULL_TUPLES
static void ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
#define HJ_BUILD_HASHTABLE
@ JOIN_RIGHT_SEMI
Definition nodes.h:319
@ JOIN_RIGHT_ANTI
Definition nodes.h:320
@ JOIN_ANTI
Definition nodes.h:318
TupleTableSlot * ecxt_innertuple
Definition execnodes.h:286
TupleTableSlot * ecxt_outertuple
Definition execnodes.h:288
HashJoinTuple hj_CurTuple
Definition execnodes.h:2227
int hj_CurSkewBucketNo
Definition execnodes.h:2226
TupleTableSlot * hj_NullOuterTupleSlot
Definition execnodes.h:2230
TupleTableSlot * hj_OuterTupleSlot
Definition execnodes.h:2228
bool hj_OuterNotEmpty
Definition execnodes.h:2237
TupleTableSlot * hj_NullInnerTupleSlot
Definition execnodes.h:2231
JoinState js
Definition execnodes.h:2220
TupleTableSlot * hj_FirstOuterTupleSlot
Definition execnodes.h:2233
bool hj_MatchedOuter
Definition execnodes.h:2236
uint32 hj_CurHashValue
Definition execnodes.h:2224
TupleTableSlot * hj_HashTupleSlot
Definition execnodes.h:2229
BufFile ** outerBatchFile
Definition hashjoin.h:360
JoinType jointype
Definition execnodes.h:2116
PlanState ps
Definition execnodes.h:2115
ExprState * joinqual
Definition execnodes.h:2119
bool single_match
Definition execnodes.h:2117
ExprState * qual
Definition execnodes.h:1198
ExprContext * ps_ExprContext
Definition execnodes.h:1216
ProjectionInfo * ps_ProjInfo
Definition execnodes.h:1217
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
bool tuplestore_gettupleslot_force(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
#define TupIsNull(slot)
Definition tuptable.h:325

References Assert, BarrierArriveAndWait(), BarrierPhase(), ParallelHashJoinState::build_barrier, castNode, CHECK_FOR_INTERRUPTS, HashJoinTableData::curbatch, ExprContext::ecxt_innertuple, ExprContext::ecxt_outertuple, elog, ERROR, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashGetSkewBucket(), ExecHashJoinNewBatch(), ExecHashJoinOuterGetTuple(), ExecHashJoinSaveTuple(), ExecHashTableCreate(), ExecParallelHashJoinNewBatch(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), ExecParallelPrepHashTableForUnmatched(), ExecParallelScanHashBucket(), ExecParallelScanHashTableForUnmatched(), ExecPrepHashTableForUnmatched(), ExecProcNode(), ExecProject(), ExecQual(), ExecScanHashBucket(), ExecScanHashTableForUnmatched(), fb(), heap_free_minimal_tuple(), HeapTupleHeaderHasMatch(), HeapTupleHeaderSetMatch(), HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HJ_FILL_INNER_NULL_TUPLES, HJ_FILL_INNER_TUPLES, HJ_FILL_OUTER, HJ_FILL_OUTER_NULL_TUPLES, HJ_FILL_OUTER_TUPLE, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_BATCH, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullInnerTupleSlot, HashJoinState::hj_NullOuterTupleSlot, HashJoinState::hj_NullOuterTupleStore, HashJoinState::hj_OuterNotEmpty, HashJoinState::hj_OuterTupleSlot, HJ_SCAN_BUCKET, HJTUPLE_MINTUPLE, innerPlanState, InstrCountFiltered1, InstrCountFiltered2, INVALID_SKEW_BUCKET_NO, JOIN_ANTI, JOIN_RIGHT_ANTI, JOIN_RIGHT_SEMI, JoinState::joinqual, JoinState::jointype, HashJoinState::js, MultiExecProcNode(), HashJoinTableData::nbatch, HashJoinTableData::nbatch_outstart, HashJoinTableData::outerBatchFile, outerPlanState, pg_fallthrough, PHJ_BUILD_FREE, PHJ_BUILD_HASH_OUTER, PHJ_BUILD_RUN, JoinState::ps, PlanState::ps_ExprContext, PlanState::ps_ProjInfo, PlanState::qual, ResetExprContext, JoinState::single_match, HashJoinTableData::totalTuples, TupIsNull, tuplestore_end(), tuplestore_gettupleslot(), and tuplestore_gettupleslot_force().

Referenced by ExecHashJoin(), and ExecParallelHashJoin().

◆ ExecHashJoinInitializeDSM()

void ExecHashJoinInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1850 of file nodeHashjoin.c.

1851{
1852 int plan_node_id = state->js.ps.plan->plan_node_id;
1854 ParallelHashJoinState *pstate;
1855
1856 /*
1857 * Disable shared hash table mode if we failed to create a real DSM
1858 * segment, because that means that we don't have a DSA area to work with.
1859 */
1860 if (pcxt->seg == NULL)
1861 return;
1862
1864
1865 /*
1866 * Set up the state needed to coordinate access to the shared hash
1867 * table(s), using the plan node ID as the toc key.
1868 */
1869 pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelHashJoinState));
1870 shm_toc_insert(pcxt->toc, plan_node_id, pstate);
1871
1872 /*
1873 * Set up the shared hash join state with no batches initially.
1874 * ExecHashTableCreate() will prepare at least one later and set nbatch
1875 * and space_allowed.
1876 */
1877 pstate->nbatch = 0;
1878 pstate->space_allowed = 0;
1879 pstate->batches = InvalidDsaPointer;
1881 pstate->nbuckets = 0;
1882 pstate->growth = PHJ_GROWTH_OK;
1884 pg_atomic_init_u32(&pstate->distributor, 0);
1885 pstate->nparticipants = pcxt->nworkers + 1;
1886 pstate->total_tuples = 0;
1887 LWLockInitialize(&pstate->lock,
1889 BarrierInit(&pstate->build_barrier, 0);
1890 BarrierInit(&pstate->grow_batches_barrier, 0);
1891 BarrierInit(&pstate->grow_buckets_barrier, 0);
1892
1893 /* Set up the space we'll use for shared temporary files. */
1894 SharedFileSetInit(&pstate->fileset, pcxt->seg);
1895
1896 /* Initialize the shared state in the hash node. */
1898 hashNode->parallel_state = pstate;
1899}
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition atomics.h:219
void BarrierInit(Barrier *barrier, int participants)
Definition barrier.c:100
#define InvalidDsaPointer
Definition dsa.h:78
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
@ PHJ_GROWTH_OK
Definition hashjoin.h:244
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:699
static TupleTableSlot * ExecParallelHashJoin(PlanState *pstate)
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
dsm_segment * seg
Definition parallel.h:44
shm_toc * toc
Definition parallel.h:46
Barrier grow_batches_barrier
Definition hashjoin.h:272
dsa_pointer old_batches
Definition hashjoin.h:260
dsa_pointer chunk_work_queue
Definition hashjoin.h:265
Barrier grow_buckets_barrier
Definition hashjoin.h:273
ParallelHashGrowth growth
Definition hashjoin.h:264
pg_atomic_uint32 distributor
Definition hashjoin.h:274
SharedFileSet fileset
Definition hashjoin.h:276
dsa_pointer batches
Definition hashjoin.h:259

References BarrierInit(), ParallelHashJoinState::batches, ParallelHashJoinState::build_barrier, ParallelHashJoinState::chunk_work_queue, ParallelHashJoinState::distributor, ExecParallelHashJoin(), ExecSetExecProcNode(), fb(), ParallelHashJoinState::fileset, ParallelHashJoinState::grow_batches_barrier, ParallelHashJoinState::grow_buckets_barrier, ParallelHashJoinState::growth, innerPlanState, InvalidDsaPointer, ParallelHashJoinState::lock, LWLockInitialize(), ParallelHashJoinState::nbatch, ParallelHashJoinState::nbuckets, ParallelHashJoinState::nparticipants, ParallelContext::nworkers, ParallelHashJoinState::old_batches, pg_atomic_init_u32(), PHJ_GROWTH_OK, ParallelContext::seg, SharedFileSetInit(), shm_toc_allocate(), shm_toc_insert(), ParallelHashJoinState::space_allowed, ParallelContext::toc, and ParallelHashJoinState::total_tuples.

Referenced by ExecParallelInitializeDSM().

◆ ExecHashJoinInitializeWorker()

void ExecHashJoinInitializeWorker ( HashJoinState state,
ParallelWorkerContext pwcxt 
)

Definition at line 1961 of file nodeHashjoin.c.

1963{
1965 int plan_node_id = state->js.ps.plan->plan_node_id;
1966 ParallelHashJoinState *pstate =
1967 shm_toc_lookup(pwcxt->toc, plan_node_id, false);
1968
1969 /* Attach to the space for shared temporary files. */
1970 SharedFileSetAttach(&pstate->fileset, pwcxt->seg);
1971
1972 /* Attach to the shared state in the hash node. */
1974 hashNode->parallel_state = pstate;
1975
1977}
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232

References ExecParallelHashJoin(), ExecSetExecProcNode(), fb(), ParallelHashJoinState::fileset, innerPlanState, SharedFileSetAttach(), and shm_toc_lookup().

Referenced by ExecParallelInitializeWorker().

◆ ExecHashJoinNewBatch()

static bool ExecHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1279 of file nodeHashjoin.c.

1280{
1281 HashJoinTable hashtable = hjstate->hj_HashTable;
1282 int nbatch;
1283 int curbatch;
1285 TupleTableSlot *slot;
1286 uint32 hashvalue;
1287
1288 nbatch = hashtable->nbatch;
1289 curbatch = hashtable->curbatch;
1290
1291 if (curbatch > 0)
1292 {
1293 /*
1294 * We no longer need the previous outer batch file; close it right
1295 * away to free disk space.
1296 */
1297 if (hashtable->outerBatchFile[curbatch])
1298 BufFileClose(hashtable->outerBatchFile[curbatch]);
1299 hashtable->outerBatchFile[curbatch] = NULL;
1300 }
1301 else /* we just finished the first batch */
1302 {
1303 /*
1304 * Reset some of the skew optimization state variables, since we no
1305 * longer need to consider skew tuples after the first batch. The
1306 * memory context reset we are about to do will release the skew
1307 * hashtable itself.
1308 */
1309 hashtable->skewEnabled = false;
1310 hashtable->skewBucket = NULL;
1311 hashtable->skewBucketNums = NULL;
1312 hashtable->nSkewBuckets = 0;
1313 hashtable->spaceUsedSkew = 0;
1314 }
1315
1316 /*
1317 * We can always skip over any batches that are completely empty on both
1318 * sides. We can sometimes skip over batches that are empty on only one
1319 * side, but there are exceptions:
1320 *
1321 * 1. In a left/full outer join, we have to process outer batches even if
1322 * the inner batch is empty. Similarly, in a right/right-anti/full outer
1323 * join, we have to process inner batches even if the outer batch is
1324 * empty.
1325 *
1326 * 2. If we have increased nbatch since the initial estimate, we have to
1327 * scan inner batches since they might contain tuples that need to be
1328 * reassigned to later inner batches.
1329 *
1330 * 3. Similarly, if we have increased nbatch since starting the outer
1331 * scan, we have to rescan outer batches in case they contain tuples that
1332 * need to be reassigned.
1333 */
1334 curbatch++;
1335 while (curbatch < nbatch &&
1336 (hashtable->outerBatchFile[curbatch] == NULL ||
1337 hashtable->innerBatchFile[curbatch] == NULL))
1338 {
1339 if (hashtable->outerBatchFile[curbatch] &&
1341 break; /* must process due to rule 1 */
1342 if (hashtable->innerBatchFile[curbatch] &&
1344 break; /* must process due to rule 1 */
1345 if (hashtable->innerBatchFile[curbatch] &&
1346 nbatch != hashtable->nbatch_original)
1347 break; /* must process due to rule 2 */
1348 if (hashtable->outerBatchFile[curbatch] &&
1349 nbatch != hashtable->nbatch_outstart)
1350 break; /* must process due to rule 3 */
1351 /* We can ignore this batch. */
1352 /* Release associated temp files right away. */
1353 if (hashtable->innerBatchFile[curbatch])
1354 BufFileClose(hashtable->innerBatchFile[curbatch]);
1355 hashtable->innerBatchFile[curbatch] = NULL;
1356 if (hashtable->outerBatchFile[curbatch])
1357 BufFileClose(hashtable->outerBatchFile[curbatch]);
1358 hashtable->outerBatchFile[curbatch] = NULL;
1359 curbatch++;
1360 }
1361
1362 if (curbatch >= nbatch)
1363 return false; /* no more batches */
1364
1365 hashtable->curbatch = curbatch;
1366
1367 /*
1368 * Reload the hash table with the new inner batch (which could be empty)
1369 */
1370 ExecHashTableReset(hashtable);
1371
1372 innerFile = hashtable->innerBatchFile[curbatch];
1373
1374 if (innerFile != NULL)
1375 {
1376 if (BufFileSeek(innerFile, 0, 0, SEEK_SET))
1377 ereport(ERROR,
1379 errmsg("could not rewind hash-join temporary file")));
1380
1381 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
1382 innerFile,
1383 &hashvalue,
1384 hjstate->hj_HashTupleSlot)))
1385 {
1386 /*
1387 * NOTE: some tuples may be sent to future batches. Also, it is
1388 * possible for hashtable->nbatch to be increased here!
1389 */
1390 ExecHashTableInsert(hashtable, slot, hashvalue);
1391 }
1392
1393 /*
1394 * after we build the hash table, the inner batch file is no longer
1395 * needed
1396 */
1398 hashtable->innerBatchFile[curbatch] = NULL;
1399 }
1400
1401 /*
1402 * Rewind outer batch file (if present), so that we can start reading it.
1403 */
1404 if (hashtable->outerBatchFile[curbatch] != NULL)
1405 {
1406 if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0, SEEK_SET))
1407 ereport(ERROR,
1409 errmsg("could not rewind hash-join temporary file")));
1410 }
1411
1412 return true;
1413}
int BufFileSeek(BufFile *file, int fileno, pgoff_t offset, int whence)
Definition buffile.c:741
void BufFileClose(BufFile *file)
Definition buffile.c:413
int errcode_for_file_access(void)
Definition elog.c:897
#define ereport(elevel,...)
Definition elog.h:150
void ExecHashTableReset(HashJoinTable hashtable)
Definition nodeHash.c:2353
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition nodeHash.c:1774
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
static char * errmsg
BufFile ** innerBatchFile
Definition hashjoin.h:359
HashSkewBucket ** skewBucket
Definition hashjoin.h:328

References BufFileClose(), BufFileSeek(), HashJoinTableData::curbatch, ereport, errcode_for_file_access(), errmsg, ERROR, ExecHashJoinGetSavedTuple(), ExecHashTableInsert(), ExecHashTableReset(), fb(), HJ_FILL_INNER, HJ_FILL_OUTER, HashJoinTableData::innerBatchFile, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, HashJoinTableData::nSkewBuckets, HashJoinTableData::outerBatchFile, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, and HashJoinTableData::spaceUsedSkew.

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinOuterGetTuple()

static TupleTableSlot * ExecHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 1110 of file nodeHashjoin.c.

1113{
1114 HashJoinTable hashtable = hjstate->hj_HashTable;
1115 int curbatch = hashtable->curbatch;
1116 TupleTableSlot *slot;
1117
1118 if (curbatch == 0) /* if it is the first pass */
1119 {
1120 /*
1121 * Check to see if first outer tuple was already fetched by
1122 * ExecHashJoin() and not used yet.
1123 */
1124 slot = hjstate->hj_FirstOuterTupleSlot;
1125 if (!TupIsNull(slot))
1126 hjstate->hj_FirstOuterTupleSlot = NULL;
1127 else
1128 slot = ExecProcNode(outerNode);
1129
1130 while (!TupIsNull(slot))
1131 {
1132 bool isnull;
1133
1134 /*
1135 * We have to compute the tuple's hash value.
1136 */
1137 ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1138
1139 econtext->ecxt_outertuple = slot;
1140
1141 ResetExprContext(econtext);
1142
1143 *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
1144 econtext,
1145 &isnull));
1146
1147 if (!isnull)
1148 {
1149 /* normal case with a non-null join key */
1150 /* remember outer relation is not empty for possible rescan */
1151 hjstate->hj_OuterNotEmpty = true;
1152
1153 return slot;
1154 }
1155 else if (hjstate->hj_KeepNullTuples)
1156 {
1157 /* null join key, but we must save tuple to be emitted later */
1158 if (hjstate->hj_NullOuterTupleStore == NULL)
1159 hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
1160 tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
1161 }
1162
1163 /*
1164 * That tuple couldn't match because of a NULL, so discard it and
1165 * continue with the next one.
1166 */
1167 slot = ExecProcNode(outerNode);
1168 }
1169 }
1170 else if (curbatch < hashtable->nbatch)
1171 {
1172 BufFile *file = hashtable->outerBatchFile[curbatch];
1173
1174 /*
1175 * In outer-join cases, we could get here even though the batch file
1176 * is empty.
1177 */
1178 if (file == NULL)
1179 return NULL;
1180
1182 file,
1183 hashvalue,
1184 hjstate->hj_OuterTupleSlot);
1185 if (!TupIsNull(slot))
1186 return slot;
1187 }
1188
1189 /* End of this batch */
1190 return NULL;
1191}
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition executor.h:439
Tuplestorestate * ExecHashBuildNullTupleStore(HashJoinTable hashtable)
Definition nodeHash.c:2799
static uint32 DatumGetUInt32(Datum X)
Definition postgres.h:222
void tuplestore_puttupleslot(Tuplestorestate *state, TupleTableSlot *slot)
Definition tuplestore.c:743

References HashJoinTableData::curbatch, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecEvalExprSwitchContext(), ExecHashBuildNullTupleStore(), ExecHashJoinGetSavedTuple(), ExecProcNode(), fb(), HashJoinTableData::outerBatchFile, ResetExprContext, TupIsNull, and tuplestore_puttupleslot().

Referenced by ExecHashJoinImpl().

◆ ExecHashJoinReInitializeDSM()

void ExecHashJoinReInitializeDSM ( HashJoinState state,
ParallelContext pcxt 
)

Definition at line 1908 of file nodeHashjoin.c.

1909{
1910 int plan_node_id = state->js.ps.plan->plan_node_id;
1911 ParallelHashJoinState *pstate;
1913
1914 /* Nothing to do if we failed to create a DSM segment. */
1915 if (pcxt->seg == NULL)
1916 return;
1917
1918 pstate = shm_toc_lookup(pcxt->toc, plan_node_id, false);
1919
1920 /*
1921 * It would be possible to reuse the shared hash table in single-batch
1922 * cases by resetting and then fast-forwarding build_barrier to
1923 * PHJ_BUILD_FREE and batch 0's batch_barrier to PHJ_BATCH_PROBE, but
1924 * currently shared hash tables are already freed by now (by the last
1925 * participant to detach from the batch). We could consider keeping it
1926 * around for single-batch joins. We'd also need to adjust
1927 * finalize_plan() so that it doesn't record a dummy dependency for
1928 * Parallel Hash nodes, preventing the rescan optimization. For now we
1929 * don't try.
1930 */
1931
1932 /* Detach, freeing any remaining shared memory. */
1933 if (state->hj_HashTable != NULL)
1934 {
1935 ExecHashTableDetachBatch(state->hj_HashTable);
1936 ExecHashTableDetach(state->hj_HashTable);
1937 }
1938
1939 /* Clear any shared batch files. */
1941
1942 /* We'd better clear our local null-key tuplestores, too. */
1943 if (state->hj_NullOuterTupleStore)
1944 {
1945 tuplestore_end(state->hj_NullOuterTupleStore);
1946 state->hj_NullOuterTupleStore = NULL;
1947 }
1949 if (hashNode->null_tuple_store)
1950 {
1951 tuplestore_end(hashNode->null_tuple_store);
1952 hashNode->null_tuple_store = NULL;
1953 }
1954
1955
1956 /* Reset build_barrier to PHJ_BUILD_ELECT so we can go around again. */
1957 BarrierInit(&pstate->build_barrier, 0);
1958}
void ExecHashTableDetachBatch(HashJoinTable hashtable)
Definition nodeHash.c:3367
void ExecHashTableDetach(HashJoinTable hashtable)
Definition nodeHash.c:3459
void SharedFileSetDeleteAll(SharedFileSet *fileset)

References BarrierInit(), ParallelHashJoinState::build_barrier, ExecHashTableDetach(), ExecHashTableDetachBatch(), fb(), ParallelHashJoinState::fileset, innerPlanState, ParallelContext::seg, SharedFileSetDeleteAll(), shm_toc_lookup(), ParallelContext::toc, and tuplestore_end().

Referenced by ExecParallelReInitializeDSM().

◆ ExecHashJoinSaveTuple()

void ExecHashJoinSaveTuple ( MinimalTuple  tuple,
uint32  hashvalue,
BufFile **  fileptr,
HashJoinTable  hashtable 
)

Definition at line 1571 of file nodeHashjoin.c.

1573{
1574 BufFile *file = *fileptr;
1575
1576 /*
1577 * The batch file is lazily created. If this is the first tuple written to
1578 * this batch, the batch file is created and its buffer is allocated in
1579 * the spillCxt context, NOT in the batchCxt.
1580 *
1581 * During the build phase, buffered files are created for inner batches.
1582 * Each batch's buffered file is closed (and its buffer freed) after the
1583 * batch is loaded into memory during the outer side scan. Therefore, it
1584 * is necessary to allocate the batch file buffer in a memory context
1585 * which outlives the batch itself.
1586 *
1587 * Also, we use spillCxt instead of hashCxt for a better accounting of the
1588 * spilling memory consumption.
1589 */
1590 if (file == NULL)
1591 {
1593
1594 file = BufFileCreateTemp(false);
1595 *fileptr = file;
1596
1598 }
1599
1600 BufFileWrite(file, &hashvalue, sizeof(uint32));
1601 BufFileWrite(file, tuple, tuple->t_len);
1602}
BufFile * BufFileCreateTemp(bool interXact)
Definition buffile.c:194
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition buffile.c:677
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
MemoryContext spillCxt
Definition hashjoin.h:370

References BufFileCreateTemp(), BufFileWrite(), fb(), MemoryContextSwitchTo(), HashJoinTableData::spillCxt, and MinimalTupleData::t_len.

Referenced by ExecHashIncreaseNumBatches(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), and ExecHashTableInsert().

◆ ExecInitHashJoin()

HashJoinState * ExecInitHashJoin ( HashJoin node,
EState estate,
int  eflags 
)

Definition at line 834 of file nodeHashjoin.c.

835{
838 Hash *hashNode;
840 innerDesc;
841 const TupleTableSlotOps *ops;
842
843 /* check for unsupported flags */
845
846 /*
847 * create state structure
848 */
850 hjstate->js.ps.plan = (Plan *) node;
851 hjstate->js.ps.state = estate;
852
853 /*
854 * See ExecHashJoinInitializeDSM() and ExecHashJoinInitializeWorker()
855 * where this function may be replaced with a parallel version, if we
856 * managed to launch a parallel query.
857 */
858 hjstate->js.ps.ExecProcNode = ExecHashJoin;
859 hjstate->js.jointype = node->join.jointype;
860
861 /*
862 * Miscellaneous initialization
863 *
864 * create expression context for node
865 */
866 ExecAssignExprContext(estate, &hjstate->js.ps);
867
868 /*
869 * initialize child nodes
870 *
871 * Note: we could suppress the REWIND flag for the inner input, which
872 * would amount to betting that the hash will be a single batch. Not
873 * clear if this would be a win or not.
874 */
875 outerNode = outerPlan(node);
876 hashNode = (Hash *) innerPlan(node);
877
878 outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
880 innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
882
883 /*
884 * Initialize result slot, type and projection.
885 */
888
889 /*
890 * tuple table initialization
891 */
893 hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate, outerDesc,
894 ops);
895
896 /*
897 * detect whether we need only consider the first matching inner tuple
898 */
899 hjstate->js.single_match = (node->join.inner_unique ||
900 node->join.jointype == JOIN_SEMI);
901
902 /* set up null tuples for outer joins, if needed */
903 switch (node->join.jointype)
904 {
905 case JOIN_INNER:
906 case JOIN_SEMI:
907 case JOIN_RIGHT_SEMI:
908 break;
909 case JOIN_LEFT:
910 case JOIN_ANTI:
911 hjstate->hj_NullInnerTupleSlot =
913 break;
914 case JOIN_RIGHT:
915 case JOIN_RIGHT_ANTI:
916 hjstate->hj_NullOuterTupleSlot =
918 break;
919 case JOIN_FULL:
920 hjstate->hj_NullOuterTupleSlot =
922 hjstate->hj_NullInnerTupleSlot =
924 break;
925 default:
926 elog(ERROR, "unrecognized join type: %d",
927 (int) node->join.jointype);
928 }
929
930 /*
931 * now for some voodoo. our temporary tuple slot is actually the result
932 * tuple slot of the Hash node (which is our inner plan). we can do this
933 * because Hash nodes don't return tuples via ExecProcNode() -- instead
934 * the hash join node uses ExecScanHashBucket() to get at the contents of
935 * the hash table. -cim 6/9/91
936 */
937 {
939 Hash *hash = (Hash *) hashstate->ps.plan;
940 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
943 bool *hash_strict;
944 ListCell *lc;
945 int nkeys;
946
947
948 hjstate->hj_HashTupleSlot = slot;
949
950 /*
951 * Build ExprStates to obtain hash values for either side of the join.
952 * Note: must build the ExprStates before ExecHashTableCreate() so we
953 * properly attribute any SubPlans that exist in the hash expressions
954 * to the correct PlanState.
955 */
956 nkeys = list_length(node->hashoperators);
957
960 hash_strict = palloc_array(bool, nkeys);
961
962 /*
963 * Determine the hash function for each side of the join for the given
964 * join operator, and detect whether the join operator is strict.
965 */
966 foreach(lc, node->hashoperators)
967 {
970
974 elog(ERROR,
975 "could not find hash function for hash operator %u",
976 hashop);
978 }
979
980 /*
981 * Build an ExprState to generate the hash value for the expressions
982 * on the outer side of the join.
983 */
984 hjstate->hj_OuterHash =
985 ExecBuildHash32Expr(hjstate->js.ps.ps_ResultTupleDesc,
986 hjstate->js.ps.resultops,
988 node->hashcollations,
989 node->hashkeys,
991 &hjstate->js.ps,
992 0);
993
994 /* As above, but for the inner side of the join */
995 hashstate->hash_expr =
996 ExecBuildHash32Expr(hashstate->ps.ps_ResultTupleDesc,
997 hashstate->ps.resultops,
999 node->hashcollations,
1000 hash->hashkeys,
1002 &hashstate->ps,
1003 0);
1004
1005 /* Remember whether we need to save tuples with null join keys */
1006 hjstate->hj_KeepNullTuples = HJ_FILL_OUTER(hjstate);
1007 hashstate->keep_null_tuples = HJ_FILL_INNER(hjstate);
1008
1009 /*
1010 * Set up the skew table hash function while we have a record of the
1011 * first key's hash function Oid.
1012 */
1013 if (OidIsValid(hash->skewTable))
1014 {
1015 hashstate->skew_hashfunction = palloc0_object(FmgrInfo);
1016 hashstate->skew_collation = linitial_oid(node->hashcollations);
1017 fmgr_info(outer_hashfuncid[0], hashstate->skew_hashfunction);
1018 }
1019
1020 /* no need to keep these */
1024 }
1025
1026 /*
1027 * initialize child expressions
1028 */
1029 hjstate->js.ps.qual =
1030 ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
1031 hjstate->js.joinqual =
1033 hjstate->hashclauses =
1035
1036 /*
1037 * initialize hash-specific info
1038 */
1039 hjstate->hj_HashTable = NULL;
1040 hjstate->hj_NullOuterTupleStore = NULL;
1041 hjstate->hj_FirstOuterTupleSlot = NULL;
1042
1043 hjstate->hj_CurHashValue = 0;
1044 hjstate->hj_CurBucketNo = 0;
1045 hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
1046 hjstate->hj_CurTuple = NULL;
1047
1048 hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
1049 hjstate->hj_MatchedOuter = false;
1050 hjstate->hj_OuterNotEmpty = false;
1051
1052 return hjstate;
1053}
#define OidIsValid(objectId)
Definition c.h:860
ExprState * ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops, const Oid *hashfunc_oids, const List *collations, const List *hash_exprs, const bool *opstrict, PlanState *parent, uint32 init_value)
Definition execExpr.c:4329
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:250
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
const TupleTableSlotOps TTSOpsVirtual
Definition execTuples.c:84
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType, const TupleTableSlotOps *tts_ops)
TupleDesc ExecGetResultType(PlanState *planstate)
Definition execUtils.c:500
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition execUtils.c:490
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition execUtils.c:588
const TupleTableSlotOps * ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
Definition execUtils.c:509
#define EXEC_FLAG_BACKWARD
Definition executor.h:70
#define EXEC_FLAG_MARK
Definition executor.h:71
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition fmgr.c:129
int i
Definition isn.c:77
bool op_strict(Oid opno)
Definition lsyscache.c:1697
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition lsyscache.c:577
void pfree(void *pointer)
Definition mcxt.c:1616
static TupleTableSlot * ExecHashJoin(PlanState *pstate)
#define makeNode(_type_)
Definition nodes.h:161
@ JOIN_SEMI
Definition nodes.h:317
@ JOIN_FULL
Definition nodes.h:305
@ JOIN_INNER
Definition nodes.h:303
@ JOIN_RIGHT
Definition nodes.h:306
@ JOIN_LEFT
Definition nodes.h:304
static int list_length(const List *l)
Definition pg_list.h:152
#define foreach_current_index(var_or_cell)
Definition pg_list.h:403
#define linitial_oid(l)
Definition pg_list.h:180
#define lfirst_oid(lc)
Definition pg_list.h:174
#define innerPlan(node)
Definition plannodes.h:264
#define outerPlan(node)
Definition plannodes.h:265
unsigned int Oid
static unsigned hash(unsigned *uv, int n)
Definition rege_dfa.c:715
List * hashcollations
Definition plannodes.h:1065
List * hashclauses
Definition plannodes.h:1063
List * hashoperators
Definition plannodes.h:1064
Join join
Definition plannodes.h:1062
List * hashkeys
Definition plannodes.h:1071
Plan plan
Definition plannodes.h:1419
List * joinqual
Definition plannodes.h:988
JoinType jointype
Definition plannodes.h:985
bool inner_unique
Definition plannodes.h:986

References Assert, elog, ERROR, EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecAssignProjectionInfo(), ExecBuildHash32Expr(), ExecGetResultSlotOps(), ExecGetResultType(), ExecHashJoin(), ExecInitExtraTupleSlot(), ExecInitNode(), ExecInitNullTupleSlot(), ExecInitQual(), ExecInitResultTupleSlotTL(), fb(), fmgr_info(), foreach_current_index, get_op_hash_functions(), hash(), HashJoin::hashclauses, HashJoin::hashcollations, HashJoin::hashkeys, HashJoin::hashoperators, HJ_BUILD_HASHTABLE, HJ_FILL_INNER, HJ_FILL_OUTER, i, Join::inner_unique, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, HashJoin::join, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_RIGHT, JOIN_RIGHT_ANTI, JOIN_RIGHT_SEMI, JOIN_SEMI, Join::joinqual, Join::jointype, lfirst_oid, linitial_oid, list_length(), makeNode, OidIsValid, op_strict(), outerPlan, outerPlanState, palloc0_object, palloc_array, pfree(), Hash::plan, and TTSOpsVirtual.

Referenced by ExecInitNode().

◆ ExecParallelHashJoin()

static TupleTableSlot * ExecParallelHashJoin ( PlanState pstate)
static

Definition at line 818 of file nodeHashjoin.c.

819{
820 /*
821 * On sufficiently smart compilers this should be inlined with the
822 * parallel-oblivious branches removed.
823 */
824 return ExecHashJoinImpl(pstate, true);
825}

References ExecHashJoinImpl().

Referenced by ExecHashJoinInitializeDSM(), and ExecHashJoinInitializeWorker().

◆ ExecParallelHashJoinNewBatch()

static bool ExecParallelHashJoinNewBatch ( HashJoinState hjstate)
static

Definition at line 1420 of file nodeHashjoin.c.

1421{
1422 HashJoinTable hashtable = hjstate->hj_HashTable;
1423 int start_batchno;
1424 int batchno;
1425
1426 /*
1427 * If we are a very slow worker, MultiExecParallelHash could have observed
1428 * build_barrier phase PHJ_BUILD_FREE and not bothered to set up batch
1429 * accessors. In that case we must be done.
1430 */
1431 if (hashtable->batches == NULL)
1432 return false;
1433
1434 /*
1435 * If we were already attached to a batch, remember not to bother checking
1436 * it again, and detach from it (possibly freeing the hash table if we are
1437 * last to detach).
1438 */
1439 if (hashtable->curbatch >= 0)
1440 {
1441 hashtable->batches[hashtable->curbatch].done = true;
1442 ExecHashTableDetachBatch(hashtable);
1443 }
1444
1445 /*
1446 * Search for a batch that isn't done. We use an atomic counter to start
1447 * our search at a different batch in every participant when there are
1448 * more batches than participants.
1449 */
1452 hashtable->nbatch;
1453 do
1454 {
1455 uint32 hashvalue;
1456 MinimalTuple tuple;
1457 TupleTableSlot *slot;
1458
1459 if (!hashtable->batches[batchno].done)
1460 {
1461 SharedTuplestoreAccessor *inner_tuples;
1462 Barrier *batch_barrier =
1463 &hashtable->batches[batchno].shared->batch_barrier;
1464
1465 switch (BarrierAttach(batch_barrier))
1466 {
1467 case PHJ_BATCH_ELECT:
1468
1469 /* One backend allocates the hash table. */
1470 if (BarrierArriveAndWait(batch_barrier,
1474
1475 case PHJ_BATCH_ALLOCATE:
1476 /* Wait for allocation to complete. */
1477 BarrierArriveAndWait(batch_barrier,
1480
1481 case PHJ_BATCH_LOAD:
1482 /* Start (or join in) loading tuples. */
1484 inner_tuples = hashtable->batches[batchno].inner_tuples;
1485 sts_begin_parallel_scan(inner_tuples);
1486 while ((tuple = sts_parallel_scan_next(inner_tuples,
1487 &hashvalue)))
1488 {
1490 hjstate->hj_HashTupleSlot,
1491 false);
1492 slot = hjstate->hj_HashTupleSlot;
1494 hashvalue);
1495 }
1496 sts_end_parallel_scan(inner_tuples);
1497 BarrierArriveAndWait(batch_barrier,
1500
1501 case PHJ_BATCH_PROBE:
1502
1503 /*
1504 * This batch is ready to probe. Return control to
1505 * caller. We stay attached to batch_barrier so that the
1506 * hash table stays alive until everyone's finished
1507 * probing it, but no participant is allowed to wait at
1508 * this barrier again (or else a deadlock could occur).
1509 * All attached participants must eventually detach from
1510 * the barrier and one worker must advance the phase so
1511 * that the final phase is reached.
1512 */
1515
1516 return true;
1517 case PHJ_BATCH_SCAN:
1518
1519 /*
1520 * In principle, we could help scan for unmatched tuples,
1521 * since that phase is already underway (the thing we
1522 * can't do under current deadlock-avoidance rules is wait
1523 * for others to arrive at PHJ_BATCH_SCAN, because
1524 * PHJ_BATCH_PROBE emits tuples, but in this case we just
1525 * got here without waiting). That is not yet done. For
1526 * now, we just detach and go around again. We have to
1527 * use ExecHashTableDetachBatch() because there's a small
1528 * chance we'll be the last to detach, and then we're
1529 * responsible for freeing memory.
1530 */
1532 hashtable->batches[batchno].done = true;
1533 ExecHashTableDetachBatch(hashtable);
1534 break;
1535
1536 case PHJ_BATCH_FREE:
1537
1538 /*
1539 * Already done. Detach and go around again (if any
1540 * remain).
1541 */
1542 BarrierDetach(batch_barrier);
1543 hashtable->batches[batchno].done = true;
1544 hashtable->curbatch = -1;
1545 break;
1546
1547 default:
1548 elog(ERROR, "unexpected batch phase %d",
1549 BarrierPhase(batch_barrier));
1550 }
1551 }
1552 batchno = (batchno + 1) % hashtable->nbatch;
1553 } while (batchno != start_batchno);
1554
1555 return false;
1556}
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition atomics.h:366
int BarrierAttach(Barrier *barrier)
Definition barrier.c:236
bool BarrierDetach(Barrier *barrier)
Definition barrier.c:256
#define PHJ_BATCH_SCAN
Definition hashjoin.h:292
#define PHJ_BATCH_PROBE
Definition hashjoin.h:291
#define PHJ_BATCH_LOAD
Definition hashjoin.h:290
#define PHJ_BATCH_ELECT
Definition hashjoin.h:288
#define PHJ_BATCH_ALLOCATE
Definition hashjoin.h:289
#define PHJ_BATCH_FREE
Definition hashjoin.h:293
void ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
Definition nodeHash.c:3557
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition nodeHash.c:3347
void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition nodeHash.c:1931
MinimalTuple sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data)
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
void sts_begin_parallel_scan(SharedTuplestoreAccessor *accessor)
ParallelHashJoinBatchAccessor * batches
Definition hashjoin.h:379
ParallelHashJoinState * parallel_state
Definition hashjoin.h:378
SharedTuplestoreAccessor * outer_tuples
Definition hashjoin.h:232
ParallelHashJoinBatch * shared
Definition hashjoin.h:220
SharedTuplestoreAccessor * inner_tuples
Definition hashjoin.h:231

References BarrierArriveAndWait(), BarrierAttach(), BarrierDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, HashJoinTableData::curbatch, ParallelHashJoinState::distributor, ParallelHashJoinBatchAccessor::done, elog, ERROR, ExecForceStoreMinimalTuple(), ExecHashTableDetachBatch(), ExecParallelHashTableAlloc(), ExecParallelHashTableInsertCurrentBatch(), ExecParallelHashTableSetCurrentBatch(), fb(), ParallelHashJoinBatchAccessor::inner_tuples, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, pg_atomic_fetch_add_u32(), pg_fallthrough, PHJ_BATCH_ALLOCATE, PHJ_BATCH_ELECT, PHJ_BATCH_FREE, PHJ_BATCH_LOAD, PHJ_BATCH_PROBE, PHJ_BATCH_SCAN, ParallelHashJoinBatchAccessor::shared, sts_begin_parallel_scan(), sts_end_parallel_scan(), and sts_parallel_scan_next().

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinOuterGetTuple()

static TupleTableSlot * ExecParallelHashJoinOuterGetTuple ( PlanState outerNode,
HashJoinState hjstate,
uint32 hashvalue 
)
static

Definition at line 1197 of file nodeHashjoin.c.

1200{
1201 HashJoinTable hashtable = hjstate->hj_HashTable;
1202 int curbatch = hashtable->curbatch;
1203 TupleTableSlot *slot;
1204
1205 /*
1206 * In the Parallel Hash case we only run the outer plan directly for
1207 * single-batch hash joins. Otherwise we have to go to batch files, even
1208 * for batch 0.
1209 */
1210 if (curbatch == 0 && hashtable->nbatch == 1)
1211 {
1212 slot = ExecProcNode(outerNode);
1213
1214 while (!TupIsNull(slot))
1215 {
1216 bool isnull;
1217
1218 ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1219
1220 econtext->ecxt_outertuple = slot;
1221
1222 ResetExprContext(econtext);
1223
1224 *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
1225 econtext,
1226 &isnull));
1227
1228 if (!isnull)
1229 {
1230 /* normal case with a non-null join key */
1231 return slot;
1232 }
1233 else if (hjstate->hj_KeepNullTuples)
1234 {
1235 /* null join key, but we must save tuple to be emitted later */
1236 if (hjstate->hj_NullOuterTupleStore == NULL)
1237 hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
1238 tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
1239 }
1240
1241 /*
1242 * That tuple couldn't match because of a NULL, so discard it and
1243 * continue with the next one.
1244 */
1245 slot = ExecProcNode(outerNode);
1246 }
1247 }
1248 else if (curbatch < hashtable->nbatch)
1249 {
1250 MinimalTuple tuple;
1251
1252 tuple = sts_parallel_scan_next(hashtable->batches[curbatch].outer_tuples,
1253 hashvalue);
1254 if (tuple != NULL)
1255 {
1257 hjstate->hj_OuterTupleSlot,
1258 false);
1259 slot = hjstate->hj_OuterTupleSlot;
1260 return slot;
1261 }
1262 else
1263 ExecClearTuple(hjstate->hj_OuterTupleSlot);
1264 }
1265
1266 /* End of this batch */
1267 hashtable->batches[curbatch].outer_eof = true;
1268
1269 return NULL;
1270}

References HashJoinTableData::batches, HashJoinTableData::curbatch, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecClearTuple(), ExecEvalExprSwitchContext(), ExecForceStoreMinimalTuple(), ExecHashBuildNullTupleStore(), ExecProcNode(), fb(), HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_eof, ParallelHashJoinBatchAccessor::outer_tuples, ResetExprContext, sts_parallel_scan_next(), TupIsNull, and tuplestore_puttupleslot().

Referenced by ExecHashJoinImpl().

◆ ExecParallelHashJoinPartitionOuter()

static void ExecParallelHashJoinPartitionOuter ( HashJoinState hjstate)
static

Definition at line 1782 of file nodeHashjoin.c.

1783{
1785 ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
1786 HashJoinTable hashtable = hjstate->hj_HashTable;
1787 TupleTableSlot *slot;
1788 int i;
1789
1790 Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
1791
1792 /* Execute outer plan, writing all tuples to shared tuplestores. */
1793 for (;;)
1794 {
1795 bool isnull;
1796 uint32 hashvalue;
1797
1798 slot = ExecProcNode(outerState);
1799 if (TupIsNull(slot))
1800 break;
1801 econtext->ecxt_outertuple = slot;
1802
1803 ResetExprContext(econtext);
1804
1805 hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash,
1806 econtext,
1807 &isnull));
1808
1809 if (!isnull)
1810 {
1811 /* normal case with a non-null join key */
1812 int batchno;
1813 int bucketno;
1814 bool shouldFree;
1816
1817 ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno,
1818 &batchno);
1820 &hashvalue, mintup);
1821
1822 if (shouldFree)
1824 }
1825 else if (hjstate->hj_KeepNullTuples)
1826 {
1827 /* null join key, but we must save tuple to be emitted later */
1828 if (hjstate->hj_NullOuterTupleStore == NULL)
1829 hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
1830 tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
1831 }
1832 /* else we can just discard the tuple immediately */
1833
1835 }
1836
1837 /* Make sure all outer partitions are readable by any backend. */
1838 for (i = 0; i < hashtable->nbatch; ++i)
1839 sts_end_write(hashtable->batches[i].outer_tuples);
1840}
void sts_end_write(SharedTuplestoreAccessor *accessor)
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)

References Assert, HashJoinTableData::batches, CHECK_FOR_INTERRUPTS, DatumGetUInt32(), ExprContext::ecxt_outertuple, ExecEvalExprSwitchContext(), ExecFetchSlotMinimalTuple(), ExecHashBuildNullTupleStore(), ExecHashGetBucketAndBatch(), ExecProcNode(), fb(), heap_free_minimal_tuple(), i, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, outerPlanState, ResetExprContext, sts_end_write(), sts_puttuple(), TupIsNull, and tuplestore_puttupleslot().

Referenced by ExecHashJoinImpl().

◆ ExecReScanHashJoin()

void ExecReScanHashJoin ( HashJoinState node)

Definition at line 1651 of file nodeHashjoin.c.

1652{
1655
1656 /*
1657 * We're always going to rescan the outer rel, so drop the associated
1658 * null-keys tuplestore; we'll rebuild it during the rescan. (Must do
1659 * this before ExecHashTableDestroy deletes hashCxt.)
1660 */
1661 if (node->hj_NullOuterTupleStore)
1662 {
1665 }
1666
1667 /*
1668 * In a multi-batch join, we currently have to do rescans the hard way,
1669 * primarily because batch temp files may have already been released. But
1670 * if it's a single-batch join, and there is no parameter change for the
1671 * inner subnode, then we can just re-use the existing hash table without
1672 * rebuilding it.
1673 */
1674 if (node->hj_HashTable != NULL)
1675 {
1677
1678 Assert(hashNode->hashtable == node->hj_HashTable);
1679
1680 if (node->hj_HashTable->nbatch == 1 &&
1681 innerPlan->chgParam == NULL)
1682 {
1683 /*
1684 * Okay to reuse the hash table; needn't rescan inner, either.
1685 *
1686 * However, if it's a right/right-anti/right-semi/full join, we'd
1687 * better reset the inner-tuple match flags contained in the
1688 * table.
1689 */
1690 if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
1692
1693 /*
1694 * Also, we need to reset our state about the emptiness of the
1695 * outer relation, so that the new scan of the outer will update
1696 * it correctly if it turns out to be empty this time. (There's no
1697 * harm in clearing it now because ExecHashJoin won't need the
1698 * info. In the other cases, where the hash table doesn't exist
1699 * or we are destroying it, we leave this state alone because
1700 * ExecHashJoin will need it the first time through.)
1701 */
1702 node->hj_OuterNotEmpty = false;
1703
1704 /*
1705 * Also, rewind inner null-key tuplestore so that we can return
1706 * those tuples again.
1707 */
1708 if (hashNode->null_tuple_store)
1709 tuplestore_rescan(hashNode->null_tuple_store);
1710
1711 /* ExecHashJoin can skip the BUILD_HASHTABLE step */
1713 }
1714 else
1715 {
1716 /* must destroy and rebuild hash table */
1717
1718 /* accumulate stats from old hash table, if wanted */
1719 /* (this should match ExecShutdownHash) */
1720 if (hashNode->ps.instrument && !hashNode->hinstrument)
1722 if (hashNode->hinstrument)
1724 hashNode->hashtable);
1725
1726 /* free inner null-key tuplestore before ExecHashTableDestroy */
1727 if (hashNode->null_tuple_store)
1728 {
1729 tuplestore_end(hashNode->null_tuple_store);
1730 hashNode->null_tuple_store = NULL;
1731 }
1732
1733 /* for safety, be sure to clear child plan node's pointer too */
1734 hashNode->hashtable = NULL;
1735
1737 node->hj_HashTable = NULL;
1739
1740 /*
1741 * if chgParam of subnode is not null then plan will be re-scanned
1742 * by first ExecProcNode.
1743 */
1744 if (innerPlan->chgParam == NULL)
1746 }
1747 }
1748
1749 /* Always reset intra-tuple state */
1750 node->hj_CurHashValue = 0;
1751 node->hj_CurBucketNo = 0;
1753 node->hj_CurTuple = NULL;
1754
1755 node->hj_MatchedOuter = false;
1757
1758 /*
1759 * if chgParam of subnode is not null then plan will be re-scanned by
1760 * first ExecProcNode.
1761 */
1762 if (outerPlan->chgParam == NULL)
1764}
void ExecReScan(PlanState *node)
Definition execAmi.c:78
void ExecHashAccumInstrumentation(HashInstrumentation *instrument, HashJoinTable hashtable)
Definition nodeHash.c:2935
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition nodeHash.c:2381
void tuplestore_rescan(Tuplestorestate *state)

References Assert, castNode, ExecHashAccumInstrumentation(), ExecHashTableDestroy(), ExecHashTableResetMatchFlags(), ExecReScan(), fb(), HJ_BUILD_HASHTABLE, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HJ_FILL_INNER, HashJoinState::hj_FirstOuterTupleSlot, HashJoinState::hj_HashTable, HashJoinState::hj_JoinState, HashJoinState::hj_MatchedOuter, HJ_NEED_NEW_OUTER, HashJoinState::hj_NullOuterTupleStore, HashJoinState::hj_OuterNotEmpty, innerPlan, innerPlanState, INVALID_SKEW_BUCKET_NO, JOIN_RIGHT_SEMI, JoinState::jointype, HashJoinState::js, HashJoinTableData::nbatch, outerPlan, outerPlanState, palloc0_object, tuplestore_end(), and tuplestore_rescan().

Referenced by ExecReScan().

◆ ExecShutdownHashJoin()

void ExecShutdownHashJoin ( HashJoinState node)

Definition at line 1767 of file nodeHashjoin.c.

1768{
1769 if (node->hj_HashTable)
1770 {
1771 /*
1772 * Detach from shared state before DSM memory goes away. This makes
1773 * sure that we don't have any pointers into DSM memory by the time
1774 * ExecEndHashJoin runs.
1775 */
1778 }
1779}

References ExecHashTableDetach(), ExecHashTableDetachBatch(), and HashJoinState::hj_HashTable.

Referenced by ExecShutdownNode_walker().