PostgreSQL Source Code  git master
nodeHash.h File Reference
#include "access/parallel.h"
#include "nodes/execnodes.h"
Include dependency graph for nodeHash.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

HashStateExecInitHash (Hash *node, EState *estate, int eflags)
 
NodeMultiExecHash (HashState *node)
 
void ExecEndHash (HashState *node)
 
void ExecReScanHash (HashState *node)
 
HashJoinTable ExecHashTableCreate (HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
 
void ExecParallelHashTableAlloc (HashJoinTable hashtable, int batchno)
 
void ExecHashTableDestroy (HashJoinTable hashtable)
 
void ExecHashTableDetach (HashJoinTable hashtable)
 
void ExecHashTableDetachBatch (HashJoinTable hashtable)
 
void ExecParallelHashTableSetCurrentBatch (HashJoinTable hashtable, int batchno)
 
void ExecHashTableInsert (HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
 
void ExecParallelHashTableInsert (HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
 
void ExecParallelHashTableInsertCurrentBatch (HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
 
bool ExecHashGetHashValue (HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
 
void ExecHashGetBucketAndBatch (HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
 
bool ExecScanHashBucket (HashJoinState *hjstate, ExprContext *econtext)
 
bool ExecParallelScanHashBucket (HashJoinState *hjstate, ExprContext *econtext)
 
void ExecPrepHashTableForUnmatched (HashJoinState *hjstate)
 
bool ExecScanHashTableForUnmatched (HashJoinState *hjstate, ExprContext *econtext)
 
void ExecHashTableReset (HashJoinTable hashtable)
 
void ExecHashTableResetMatchFlags (HashJoinTable hashtable)
 
void ExecChooseHashTableSize (double ntuples, int tupwidth, bool useskew, bool try_combined_hash_mem, int parallel_workers, size_t *space_allowed, int *numbuckets, int *numbatches, int *num_skew_mcvs)
 
int ExecHashGetSkewBucket (HashJoinTable hashtable, uint32 hashvalue)
 
void ExecHashEstimate (HashState *node, ParallelContext *pcxt)
 
void ExecHashInitializeDSM (HashState *node, ParallelContext *pcxt)
 
void ExecHashInitializeWorker (HashState *node, ParallelWorkerContext *pwcxt)
 
void ExecHashRetrieveInstrumentation (HashState *node)
 
void ExecShutdownHash (HashState *node)
 
void ExecHashAccumInstrumentation (HashInstrumentation *instrument, HashJoinTable hashtable)
 

Function Documentation

◆ ExecChooseHashTableSize()

void ExecChooseHashTableSize ( double  ntuples,
int  tupwidth,
bool  useskew,
bool  try_combined_hash_mem,
int  parallel_workers,
size_t *  space_allowed,
int *  numbuckets,
int *  numbatches,
int *  num_skew_mcvs 
)

Definition at line 663 of file nodeHash.c.

670 {
671  int tupsize;
672  double inner_rel_bytes;
673  size_t hash_table_bytes;
674  size_t bucket_bytes;
675  size_t max_pointers;
676  int nbatch = 1;
677  int nbuckets;
678  double dbuckets;
679 
680  /* Force a plausible relation size if no info */
681  if (ntuples <= 0.0)
682  ntuples = 1000.0;
683 
684  /*
685  * Estimate tupsize based on footprint of tuple in hashtable... note this
686  * does not allow for any palloc overhead. The manipulations of spaceUsed
687  * don't count palloc overhead either.
688  */
689  tupsize = HJTUPLE_OVERHEAD +
691  MAXALIGN(tupwidth);
692  inner_rel_bytes = ntuples * tupsize;
693 
694  /*
695  * Compute in-memory hashtable size limit from GUCs.
696  */
697  hash_table_bytes = get_hash_memory_limit();
698 
699  /*
700  * Parallel Hash tries to use the combined hash_mem of all workers to
701  * avoid the need to batch. If that won't work, it falls back to hash_mem
702  * per worker and tries to process batches in parallel.
703  */
704  if (try_combined_hash_mem)
705  {
706  /* Careful, this could overflow size_t */
707  double newlimit;
708 
709  newlimit = (double) hash_table_bytes * (double) (parallel_workers + 1);
710  newlimit = Min(newlimit, (double) SIZE_MAX);
711  hash_table_bytes = (size_t) newlimit;
712  }
713 
714  *space_allowed = hash_table_bytes;
715 
716  /*
717  * If skew optimization is possible, estimate the number of skew buckets
718  * that will fit in the memory allowed, and decrement the assumed space
719  * available for the main hash table accordingly.
720  *
721  * We make the optimistic assumption that each skew bucket will contain
722  * one inner-relation tuple. If that turns out to be low, we will recover
723  * at runtime by reducing the number of skew buckets.
724  *
725  * hashtable->skewBucket will have up to 8 times as many HashSkewBucket
726  * pointers as the number of MCVs we allow, since ExecHashBuildSkewHash
727  * will round up to the next power of 2 and then multiply by 4 to reduce
728  * collisions.
729  */
730  if (useskew)
731  {
732  size_t bytes_per_mcv;
733  size_t skew_mcvs;
734 
735  /*----------
736  * Compute number of MCVs we could hold in hash_table_bytes
737  *
738  * Divisor is:
739  * size of a hash tuple +
740  * worst-case size of skewBucket[] per MCV +
741  * size of skewBucketNums[] entry +
742  * size of skew bucket struct itself
743  *----------
744  */
745  bytes_per_mcv = tupsize +
746  (8 * sizeof(HashSkewBucket *)) +
747  sizeof(int) +
749  skew_mcvs = hash_table_bytes / bytes_per_mcv;
750 
751  /*
752  * Now scale by SKEW_HASH_MEM_PERCENT (we do it in this order so as
753  * not to worry about size_t overflow in the multiplication)
754  */
755  skew_mcvs = (skew_mcvs * SKEW_HASH_MEM_PERCENT) / 100;
756 
757  /* Now clamp to integer range */
758  skew_mcvs = Min(skew_mcvs, INT_MAX);
759 
760  *num_skew_mcvs = (int) skew_mcvs;
761 
762  /* Reduce hash_table_bytes by the amount needed for the skew table */
763  if (skew_mcvs > 0)
764  hash_table_bytes -= skew_mcvs * bytes_per_mcv;
765  }
766  else
767  *num_skew_mcvs = 0;
768 
769  /*
770  * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
771  * memory is filled, assuming a single batch; but limit the value so that
772  * the pointer arrays we'll try to allocate do not exceed hash_table_bytes
773  * nor MaxAllocSize.
774  *
775  * Note that both nbuckets and nbatch must be powers of 2 to make
776  * ExecHashGetBucketAndBatch fast.
777  */
778  max_pointers = hash_table_bytes / sizeof(HashJoinTuple);
779  max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));
780  /* If max_pointers isn't a power of 2, must round it down to one */
781  max_pointers = pg_prevpower2_size_t(max_pointers);
782 
783  /* Also ensure we avoid integer overflow in nbatch and nbuckets */
784  /* (this step is redundant given the current value of MaxAllocSize) */
785  max_pointers = Min(max_pointers, INT_MAX / 2 + 1);
786 
787  dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
788  dbuckets = Min(dbuckets, max_pointers);
789  nbuckets = (int) dbuckets;
790  /* don't let nbuckets be really small, though ... */
791  nbuckets = Max(nbuckets, 1024);
792  /* ... and force it to be a power of 2. */
793  nbuckets = pg_nextpower2_32(nbuckets);
794 
795  /*
796  * If there's not enough space to store the projected number of tuples and
797  * the required bucket headers, we will need multiple batches.
798  */
799  bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
800  if (inner_rel_bytes + bucket_bytes > hash_table_bytes)
801  {
802  /* We'll need multiple batches */
803  size_t sbuckets;
804  double dbatch;
805  int minbatch;
806  size_t bucket_size;
807 
808  /*
809  * If Parallel Hash with combined hash_mem would still need multiple
810  * batches, we'll have to fall back to regular hash_mem budget.
811  */
812  if (try_combined_hash_mem)
813  {
814  ExecChooseHashTableSize(ntuples, tupwidth, useskew,
815  false, parallel_workers,
816  space_allowed,
817  numbuckets,
818  numbatches,
819  num_skew_mcvs);
820  return;
821  }
822 
823  /*
824  * Estimate the number of buckets we'll want to have when hash_mem is
825  * entirely full. Each bucket will contain a bucket pointer plus
826  * NTUP_PER_BUCKET tuples, whose projected size already includes
827  * overhead for the hash code, pointer to the next tuple, etc.
828  */
829  bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof(HashJoinTuple));
830  if (hash_table_bytes <= bucket_size)
831  sbuckets = 1; /* avoid pg_nextpower2_size_t(0) */
832  else
833  sbuckets = pg_nextpower2_size_t(hash_table_bytes / bucket_size);
834  sbuckets = Min(sbuckets, max_pointers);
835  nbuckets = (int) sbuckets;
836  nbuckets = pg_nextpower2_32(nbuckets);
837  bucket_bytes = nbuckets * sizeof(HashJoinTuple);
838 
839  /*
840  * Buckets are simple pointers to hashjoin tuples, while tupsize
841  * includes the pointer, hash code, and MinimalTupleData. So buckets
842  * should never really exceed 25% of hash_mem (even for
843  * NTUP_PER_BUCKET=1); except maybe for hash_mem values that are not
844  * 2^N bytes, where we might get more because of doubling. So let's
845  * look for 50% here.
846  */
847  Assert(bucket_bytes <= hash_table_bytes / 2);
848 
849  /* Calculate required number of batches. */
850  dbatch = ceil(inner_rel_bytes / (hash_table_bytes - bucket_bytes));
851  dbatch = Min(dbatch, max_pointers);
852  minbatch = (int) dbatch;
853  nbatch = pg_nextpower2_32(Max(2, minbatch));
854  }
855 
856  Assert(nbuckets > 0);
857  Assert(nbatch > 0);
858 
859  *numbuckets = nbuckets;
860  *numbatches = nbatch;
861 }
#define Min(x, y)
Definition: c.h:937
#define MAXALIGN(LEN)
Definition: c.h:747
#define Max(x, y)
Definition: c.h:931
struct HashJoinTupleData * HashJoinTuple
Definition: execnodes.h:2080
#define HJTUPLE_OVERHEAD
Definition: hashjoin.h:79
#define SKEW_BUCKET_OVERHEAD
Definition: hashjoin.h:108
#define SKEW_HASH_MEM_PERCENT
Definition: hashjoin.h:110
#define SizeofMinimalTupleHeader
Definition: htup_details.h:643
Assert(fmt[strlen(fmt) - 1] !='\n')
#define MaxAllocSize
Definition: memutils.h:40
void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, bool try_combined_hash_mem, int parallel_workers, size_t *space_allowed, int *numbuckets, int *numbatches, int *num_skew_mcvs)
Definition: nodeHash.c:663
#define NTUP_PER_BUCKET
Definition: nodeHash.c:660
size_t get_hash_memory_limit(void)
Definition: nodeHash.c:3390
static uint32 pg_nextpower2_32(uint32 num)
Definition: pg_bitutils.h:140
#define pg_nextpower2_size_t
Definition: pg_bitutils.h:290
#define pg_prevpower2_size_t
Definition: pg_bitutils.h:291

References Assert(), get_hash_memory_limit(), HJTUPLE_OVERHEAD, Max, MAXALIGN, MaxAllocSize, Min, NTUP_PER_BUCKET, pg_nextpower2_32(), pg_nextpower2_size_t, pg_prevpower2_size_t, SizeofMinimalTupleHeader, SKEW_BUCKET_OVERHEAD, and SKEW_HASH_MEM_PERCENT.

Referenced by ExecHashTableCreate(), and initial_cost_hashjoin().

◆ ExecEndHash()

void ExecEndHash ( HashState node)

Definition at line 407 of file nodeHash.c.

408 {
410 
411  /*
412  * free exprcontext
413  */
414  ExecFreeExprContext(&node->ps);
415 
416  /*
417  * shut down the subplan
418  */
419  outerPlan = outerPlanState(node);
421 }
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:654
#define outerPlanState(node)
Definition: execnodes.h:1126
#define outerPlan(node)
Definition: plannodes.h:186
PlanState ps
Definition: execnodes.h:2641

References ExecEndNode(), ExecFreeExprContext(), outerPlan, outerPlanState, and HashState::ps.

Referenced by ExecEndNode().

◆ ExecHashAccumInstrumentation()

void ExecHashAccumInstrumentation ( HashInstrumentation instrument,
HashJoinTable  hashtable 
)

Definition at line 2697 of file nodeHash.c.

2699 {
2700  instrument->nbuckets = Max(instrument->nbuckets,
2701  hashtable->nbuckets);
2702  instrument->nbuckets_original = Max(instrument->nbuckets_original,
2703  hashtable->nbuckets_original);
2704  instrument->nbatch = Max(instrument->nbatch,
2705  hashtable->nbatch);
2706  instrument->nbatch_original = Max(instrument->nbatch_original,
2707  hashtable->nbatch_original);
2708  instrument->space_peak = Max(instrument->space_peak,
2709  hashtable->spacePeak);
2710 }

References Max, HashJoinTableData::nbatch, HashInstrumentation::nbatch, HashJoinTableData::nbatch_original, HashInstrumentation::nbatch_original, HashJoinTableData::nbuckets, HashInstrumentation::nbuckets, HashJoinTableData::nbuckets_original, HashInstrumentation::nbuckets_original, HashInstrumentation::space_peak, and HashJoinTableData::spacePeak.

Referenced by ExecReScanHashJoin(), and ExecShutdownHash().

◆ ExecHashEstimate()

void ExecHashEstimate ( HashState node,
ParallelContext pcxt 
)

Definition at line 2581 of file nodeHash.c.

2582 {
2583  size_t size;
2584 
2585  /* don't need this if not instrumenting or no workers */
2586  if (!node->ps.instrument || pcxt->nworkers == 0)
2587  return;
2588 
2589  size = mul_size(pcxt->nworkers, sizeof(HashInstrumentation));
2590  size = add_size(size, offsetof(SharedHashInfo, hinstrument));
2591  shm_toc_estimate_chunk(&pcxt->estimator, size);
2592  shm_toc_estimate_keys(&pcxt->estimator, 1);
2593 }
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
shm_toc_estimator estimator
Definition: parallel.h:42
Instrumentation * instrument
Definition: execnodes.h:1040

References add_size(), ParallelContext::estimator, PlanState::instrument, mul_size(), ParallelContext::nworkers, HashState::ps, shm_toc_estimate_chunk, and shm_toc_estimate_keys.

Referenced by ExecParallelEstimate().

◆ ExecHashGetBucketAndBatch()

void ExecHashGetBucketAndBatch ( HashJoinTable  hashtable,
uint32  hashvalue,
int *  bucketno,
int *  batchno 
)

Definition at line 1908 of file nodeHash.c.

1912 {
1913  uint32 nbuckets = (uint32) hashtable->nbuckets;
1914  uint32 nbatch = (uint32) hashtable->nbatch;
1915 
1916  if (nbatch > 1)
1917  {
1918  *bucketno = hashvalue & (nbuckets - 1);
1919  *batchno = pg_rotate_right32(hashvalue,
1920  hashtable->log2_nbuckets) & (nbatch - 1);
1921  }
1922  else
1923  {
1924  *bucketno = hashvalue & (nbuckets - 1);
1925  *batchno = 0;
1926  }
1927 }
unsigned int uint32
Definition: c.h:442
static uint32 pg_rotate_right32(uint32 word, int n)
Definition: pg_bitutils.h:271

References HashJoinTableData::log2_nbuckets, HashJoinTableData::nbatch, HashJoinTableData::nbuckets, and pg_rotate_right32().

Referenced by ExecHashIncreaseNumBatches(), ExecHashIncreaseNumBuckets(), ExecHashJoinImpl(), ExecHashRemoveNextSkewBucket(), ExecHashTableInsert(), ExecParallelHashIncreaseNumBuckets(), ExecParallelHashJoinPartitionOuter(), ExecParallelHashRepartitionFirst(), ExecParallelHashRepartitionRest(), ExecParallelHashTableInsert(), and ExecParallelHashTableInsertCurrentBatch().

◆ ExecHashGetHashValue()

bool ExecHashGetHashValue ( HashJoinTable  hashtable,
ExprContext econtext,
List hashkeys,
bool  outer_tuple,
bool  keep_nulls,
uint32 hashvalue 
)

Definition at line 1800 of file nodeHash.c.

1806 {
1807  uint32 hashkey = 0;
1808  FmgrInfo *hashfunctions;
1809  ListCell *hk;
1810  int i = 0;
1811  MemoryContext oldContext;
1812 
1813  /*
1814  * We reset the eval context each time to reclaim any memory leaked in the
1815  * hashkey expressions.
1816  */
1817  ResetExprContext(econtext);
1818 
1819  oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
1820 
1821  if (outer_tuple)
1822  hashfunctions = hashtable->outer_hashfunctions;
1823  else
1824  hashfunctions = hashtable->inner_hashfunctions;
1825 
1826  foreach(hk, hashkeys)
1827  {
1828  ExprState *keyexpr = (ExprState *) lfirst(hk);
1829  Datum keyval;
1830  bool isNull;
1831 
1832  /* combine successive hashkeys by rotating */
1833  hashkey = pg_rotate_left32(hashkey, 1);
1834 
1835  /*
1836  * Get the join attribute value of the tuple
1837  */
1838  keyval = ExecEvalExpr(keyexpr, econtext, &isNull);
1839 
1840  /*
1841  * If the attribute is NULL, and the join operator is strict, then
1842  * this tuple cannot pass the join qual so we can reject it
1843  * immediately (unless we're scanning the outside of an outer join, in
1844  * which case we must not reject it). Otherwise we act like the
1845  * hashcode of NULL is zero (this will support operators that act like
1846  * IS NOT DISTINCT, though not any more-random behavior). We treat
1847  * the hash support function as strict even if the operator is not.
1848  *
1849  * Note: currently, all hashjoinable operators must be strict since
1850  * the hash index AM assumes that. However, it takes so little extra
1851  * code here to allow non-strict that we may as well do it.
1852  */
1853  if (isNull)
1854  {
1855  if (hashtable->hashStrict[i] && !keep_nulls)
1856  {
1857  MemoryContextSwitchTo(oldContext);
1858  return false; /* cannot match */
1859  }
1860  /* else, leave hashkey unmodified, equivalent to hashcode 0 */
1861  }
1862  else
1863  {
1864  /* Compute the hash function */
1865  uint32 hkey;
1866 
1867  hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
1868  hashkey ^= hkey;
1869  }
1870 
1871  i++;
1872  }
1873 
1874  MemoryContextSwitchTo(oldContext);
1875 
1876  *hashvalue = hashkey;
1877  return true;
1878 }
#define ResetExprContext(econtext)
Definition: executor.h:532
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:321
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1114
int i
Definition: isn.c:73
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
static uint32 pg_rotate_left32(uint32 word, int n)
Definition: pg_bitutils.h:277
#define lfirst(lc)
Definition: pg_list.h:170
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:570
uintptr_t Datum
Definition: postgres.h:412
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:255
Definition: fmgr.h:57
FmgrInfo * outer_hashfunctions
Definition: hashjoin.h:337
bool * hashStrict
Definition: hashjoin.h:339
FmgrInfo * inner_hashfunctions
Definition: hashjoin.h:338

References HashJoinTableData::collations, DatumGetUInt32(), ExprContext::ecxt_per_tuple_memory, ExecEvalExpr(), FunctionCall1Coll(), HashJoinTableData::hashStrict, i, HashJoinTableData::inner_hashfunctions, lfirst, MemoryContextSwitchTo(), HashJoinTableData::outer_hashfunctions, pg_rotate_left32(), and ResetExprContext.

Referenced by ExecHashJoinOuterGetTuple(), ExecParallelHashJoinOuterGetTuple(), ExecParallelHashJoinPartitionOuter(), MultiExecParallelHash(), and MultiExecPrivateHash().

◆ ExecHashGetSkewBucket()

int ExecHashGetSkewBucket ( HashJoinTable  hashtable,
uint32  hashvalue 
)

Definition at line 2376 of file nodeHash.c.

2377 {
2378  int bucket;
2379 
2380  /*
2381  * Always return INVALID_SKEW_BUCKET_NO if not doing skew optimization (in
2382  * particular, this happens after the initial batch is done).
2383  */
2384  if (!hashtable->skewEnabled)
2385  return INVALID_SKEW_BUCKET_NO;
2386 
2387  /*
2388  * Since skewBucketLen is a power of 2, we can do a modulo by ANDing.
2389  */
2390  bucket = hashvalue & (hashtable->skewBucketLen - 1);
2391 
2392  /*
2393  * While we have not hit a hole in the hashtable and have not hit the
2394  * desired bucket, we have collided with some other hash value, so try the
2395  * next bucket location.
2396  */
2397  while (hashtable->skewBucket[bucket] != NULL &&
2398  hashtable->skewBucket[bucket]->hashvalue != hashvalue)
2399  bucket = (bucket + 1) & (hashtable->skewBucketLen - 1);
2400 
2401  /*
2402  * Found the desired bucket?
2403  */
2404  if (hashtable->skewBucket[bucket] != NULL)
2405  return bucket;
2406 
2407  /*
2408  * There must not be any hashtable entry for this hash value.
2409  */
2410  return INVALID_SKEW_BUCKET_NO;
2411 }
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:109
HashSkewBucket ** skewBucket
Definition: hashjoin.h:305
uint32 hashvalue
Definition: hashjoin.h:104

References HashSkewBucket::hashvalue, INVALID_SKEW_BUCKET_NO, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketLen, and HashJoinTableData::skewEnabled.

Referenced by ExecHashJoinImpl(), and MultiExecPrivateHash().

◆ ExecHashInitializeDSM()

void ExecHashInitializeDSM ( HashState node,
ParallelContext pcxt 
)

Definition at line 2600 of file nodeHash.c.

2601 {
2602  size_t size;
2603 
2604  /* don't need this if not instrumenting or no workers */
2605  if (!node->ps.instrument || pcxt->nworkers == 0)
2606  return;
2607 
2608  size = offsetof(SharedHashInfo, hinstrument) +
2609  pcxt->nworkers * sizeof(HashInstrumentation);
2610  node->shared_info = (SharedHashInfo *) shm_toc_allocate(pcxt->toc, size);
2611 
2612  /* Each per-worker area must start out as zeroes. */
2613  memset(node->shared_info, 0, size);
2614 
2615  node->shared_info->num_workers = pcxt->nworkers;
2616  shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id,
2617  node->shared_info);
2618 }
struct HashInstrumentation HashInstrumentation
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
SharedHashInfo * shared_info
Definition: execnodes.h:2651
shm_toc * toc
Definition: parallel.h:45
Plan * plan
Definition: execnodes.h:1030
int plan_node_id
Definition: plannodes.h:155

References PlanState::instrument, SharedHashInfo::num_workers, ParallelContext::nworkers, PlanState::plan, Plan::plan_node_id, HashState::ps, HashState::shared_info, shm_toc_allocate(), shm_toc_insert(), and ParallelContext::toc.

Referenced by ExecParallelInitializeDSM().

◆ ExecHashInitializeWorker()

void ExecHashInitializeWorker ( HashState node,
ParallelWorkerContext pwcxt 
)

Definition at line 2625 of file nodeHash.c.

2626 {
2627  SharedHashInfo *shared_info;
2628 
2629  /* don't need this if not instrumenting */
2630  if (!node->ps.instrument)
2631  return;
2632 
2633  /*
2634  * Find our entry in the shared area, and set up a pointer to it so that
2635  * we'll accumulate stats there when shutting down or rebuilding the hash
2636  * table.
2637  */
2638  shared_info = (SharedHashInfo *)
2639  shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
2640  node->hinstrument = &shared_info->hinstrument[ParallelWorkerNumber];
2641 }
int ParallelWorkerNumber
Definition: parallel.c:113
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
HashInstrumentation * hinstrument
Definition: execnodes.h:2658
HashInstrumentation hinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2632

References SharedHashInfo::hinstrument, HashState::hinstrument, PlanState::instrument, ParallelWorkerNumber, PlanState::plan, Plan::plan_node_id, HashState::ps, shm_toc_lookup(), and ParallelWorkerContext::toc.

Referenced by ExecParallelInitializeWorker().

◆ ExecHashRetrieveInstrumentation()

void ExecHashRetrieveInstrumentation ( HashState node)

Definition at line 2666 of file nodeHash.c.

2667 {
2668  SharedHashInfo *shared_info = node->shared_info;
2669  size_t size;
2670 
2671  if (shared_info == NULL)
2672  return;
2673 
2674  /* Replace node->shared_info with a copy in backend-local memory. */
2675  size = offsetof(SharedHashInfo, hinstrument) +
2676  shared_info->num_workers * sizeof(HashInstrumentation);
2677  node->shared_info = palloc(size);
2678  memcpy(node->shared_info, shared_info, size);
2679 }
void * palloc(Size size)
Definition: mcxt.c:1199

References SharedHashInfo::num_workers, palloc(), and HashState::shared_info.

Referenced by ExecParallelRetrieveInstrumentation().

◆ ExecHashTableCreate()

HashJoinTable ExecHashTableCreate ( HashState state,
List hashOperators,
List hashCollations,
bool  keepNulls 
)

Definition at line 431 of file nodeHash.c.

432 {
433  Hash *node;
434  HashJoinTable hashtable;
435  Plan *outerNode;
436  size_t space_allowed;
437  int nbuckets;
438  int nbatch;
439  double rows;
440  int num_skew_mcvs;
441  int log2_nbuckets;
442  int nkeys;
443  int i;
444  ListCell *ho;
445  ListCell *hc;
446  MemoryContext oldcxt;
447 
448  /*
449  * Get information about the size of the relation to be hashed (it's the
450  * "outer" subtree of this node, but the inner relation of the hashjoin).
451  * Compute the appropriate size of the hash table.
452  */
453  node = (Hash *) state->ps.plan;
454  outerNode = outerPlan(node);
455 
456  /*
457  * If this is shared hash table with a partial plan, then we can't use
458  * outerNode->plan_rows to estimate its size. We need an estimate of the
459  * total number of rows across all copies of the partial plan.
460  */
461  rows = node->plan.parallel_aware ? node->rows_total : outerNode->plan_rows;
462 
463  ExecChooseHashTableSize(rows, outerNode->plan_width,
464  OidIsValid(node->skewTable),
465  state->parallel_state != NULL,
466  state->parallel_state != NULL ?
467  state->parallel_state->nparticipants - 1 : 0,
468  &space_allowed,
469  &nbuckets, &nbatch, &num_skew_mcvs);
470 
471  /* nbuckets must be a power of 2 */
472  log2_nbuckets = my_log2(nbuckets);
473  Assert(nbuckets == (1 << log2_nbuckets));
474 
475  /*
476  * Initialize the hash table control block.
477  *
478  * The hashtable control block is just palloc'd from the executor's
479  * per-query memory context. Everything else should be kept inside the
480  * subsidiary hashCxt or batchCxt.
481  */
482  hashtable = palloc_object(HashJoinTableData);
483  hashtable->nbuckets = nbuckets;
484  hashtable->nbuckets_original = nbuckets;
485  hashtable->nbuckets_optimal = nbuckets;
486  hashtable->log2_nbuckets = log2_nbuckets;
487  hashtable->log2_nbuckets_optimal = log2_nbuckets;
488  hashtable->buckets.unshared = NULL;
489  hashtable->keepNulls = keepNulls;
490  hashtable->skewEnabled = false;
491  hashtable->skewBucket = NULL;
492  hashtable->skewBucketLen = 0;
493  hashtable->nSkewBuckets = 0;
494  hashtable->skewBucketNums = NULL;
495  hashtable->nbatch = nbatch;
496  hashtable->curbatch = 0;
497  hashtable->nbatch_original = nbatch;
498  hashtable->nbatch_outstart = nbatch;
499  hashtable->growEnabled = true;
500  hashtable->totalTuples = 0;
501  hashtable->partialTuples = 0;
502  hashtable->skewTuples = 0;
503  hashtable->innerBatchFile = NULL;
504  hashtable->outerBatchFile = NULL;
505  hashtable->spaceUsed = 0;
506  hashtable->spacePeak = 0;
507  hashtable->spaceAllowed = space_allowed;
508  hashtable->spaceUsedSkew = 0;
509  hashtable->spaceAllowedSkew =
510  hashtable->spaceAllowed * SKEW_HASH_MEM_PERCENT / 100;
511  hashtable->chunks = NULL;
512  hashtable->current_chunk = NULL;
513  hashtable->parallel_state = state->parallel_state;
514  hashtable->area = state->ps.state->es_query_dsa;
515  hashtable->batches = NULL;
516 
517 #ifdef HJDEBUG
518  printf("Hashjoin %p: initial nbatch = %d, nbuckets = %d\n",
519  hashtable, nbatch, nbuckets);
520 #endif
521 
522  /*
523  * Create temporary memory contexts in which to keep the hashtable working
524  * storage. See notes in executor/hashjoin.h.
525  */
527  "HashTableContext",
529 
530  hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt,
531  "HashBatchContext",
533 
534  /* Allocate data that will live for the life of the hashjoin */
535 
536  oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
537 
538  /*
539  * Get info about the hash functions to be used for each hash key. Also
540  * remember whether the join operators are strict.
541  */
542  nkeys = list_length(hashOperators);
543  hashtable->outer_hashfunctions = palloc_array(FmgrInfo, nkeys);
544  hashtable->inner_hashfunctions = palloc_array(FmgrInfo, nkeys);
545  hashtable->hashStrict = palloc_array(bool, nkeys);
546  hashtable->collations = palloc_array(Oid, nkeys);
547  i = 0;
548  forboth(ho, hashOperators, hc, hashCollations)
549  {
550  Oid hashop = lfirst_oid(ho);
551  Oid left_hashfn;
552  Oid right_hashfn;
553 
554  if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
555  elog(ERROR, "could not find hash function for hash operator %u",
556  hashop);
557  fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
558  fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
559  hashtable->hashStrict[i] = op_strict(hashop);
560  hashtable->collations[i] = lfirst_oid(hc);
561  i++;
562  }
563 
564  if (nbatch > 1 && hashtable->parallel_state == NULL)
565  {
566  /*
567  * allocate and initialize the file arrays in hashCxt (not needed for
568  * parallel case which uses shared tuplestores instead of raw files)
569  */
570  hashtable->innerBatchFile = palloc0_array(BufFile *, nbatch);
571  hashtable->outerBatchFile = palloc0_array(BufFile *, nbatch);
572  /* The files will not be opened until needed... */
573  /* ... but make sure we have temp tablespaces established for them */
575  }
576 
577  MemoryContextSwitchTo(oldcxt);
578 
579  if (hashtable->parallel_state)
580  {
581  ParallelHashJoinState *pstate = hashtable->parallel_state;
582  Barrier *build_barrier;
583 
584  /*
585  * Attach to the build barrier. The corresponding detach operation is
586  * in ExecHashTableDetach. Note that we won't attach to the
587  * batch_barrier for batch 0 yet. We'll attach later and start it out
588  * in PHJ_BATCH_PROBING phase, because batch 0 is allocated up front
589  * and then loaded while hashing (the standard hybrid hash join
590  * algorithm), and we'll coordinate that using build_barrier.
591  */
592  build_barrier = &pstate->build_barrier;
593  BarrierAttach(build_barrier);
594 
595  /*
596  * So far we have no idea whether there are any other participants,
597  * and if so, what phase they are working on. The only thing we care
598  * about at this point is whether someone has already created the
599  * SharedHashJoinBatch objects and the hash table for batch 0. One
600  * backend will be elected to do that now if necessary.
601  */
602  if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECTING &&
604  {
605  pstate->nbatch = nbatch;
606  pstate->space_allowed = space_allowed;
607  pstate->growth = PHJ_GROWTH_OK;
608 
609  /* Set up the shared state for coordinating batches. */
610  ExecParallelHashJoinSetUpBatches(hashtable, nbatch);
611 
612  /*
613  * Allocate batch 0's hash table up front so we can load it
614  * directly while hashing.
615  */
616  pstate->nbuckets = nbuckets;
617  ExecParallelHashTableAlloc(hashtable, 0);
618  }
619 
620  /*
621  * The next Parallel Hash synchronization point is in
622  * MultiExecParallelHash(), which will progress it all the way to
623  * PHJ_BUILD_DONE. The caller must not return control from this
624  * executor node between now and then.
625  */
626  }
627  else
628  {
629  /*
630  * Prepare context for the first-scan space allocations; allocate the
631  * hashbucket array therein, and set each bucket "empty".
632  */
633  MemoryContextSwitchTo(hashtable->batchCxt);
634 
635  hashtable->buckets.unshared = palloc0_array(HashJoinTuple, nbuckets);
636 
637  /*
638  * Set up for skew optimization, if possible and there's a need for
639  * more than one batch. (In a one-batch join, there's no point in
640  * it.)
641  */
642  if (nbatch > 1)
643  ExecHashBuildSkewHash(hashtable, node, num_skew_mcvs);
644 
645  MemoryContextSwitchTo(oldcxt);
646  }
647 
648  return hashtable;
649 }
void PrepareTempTablespaces(void)
Definition: tablespace.c:1337
int BarrierAttach(Barrier *barrier)
Definition: barrier.c:236
int BarrierPhase(Barrier *barrier)
Definition: barrier.c:265
bool BarrierArriveAndWait(Barrier *barrier, uint32 wait_event_info)
Definition: barrier.c:125
#define OidIsValid(objectId)
Definition: c.h:711
int my_log2(long num)
Definition: dynahash.c:1760
#define ERROR
Definition: elog.h:35
#define palloc_object(type)
Definition: fe_memutils.h:62
#define palloc_array(type, count)
Definition: fe_memutils.h:64
#define palloc0_array(type, count)
Definition: fe_memutils.h:65
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
#define PHJ_BUILD_ELECTING
Definition: hashjoin.h:257
@ PHJ_GROWTH_OK
Definition: hashjoin.h:221
bool op_strict(Oid opno)
Definition: lsyscache.c:1459
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition: lsyscache.c:509
MemoryContext CurrentMemoryContext
Definition: mcxt.c:124
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
Definition: nodeHash.c:2223
static void ExecParallelHashJoinSetUpBatches(HashJoinTable hashtable, int nbatch)
Definition: nodeHash.c:2944
void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno)
Definition: nodeHash.c:3105
static int list_length(const List *l)
Definition: pg_list.h:150
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:465
#define lfirst_oid(lc)
Definition: pg_list.h:172
#define printf(...)
Definition: port.h:244
unsigned int Oid
Definition: postgres_ext.h:31
struct HashJoinTupleData ** unshared
Definition: hashjoin.h:297
union HashJoinTableData::@94 buckets
HashMemoryChunk chunks
Definition: hashjoin.h:352
ParallelHashJoinBatchAccessor * batches
Definition: hashjoin.h:358
MemoryContext hashCxt
Definition: hashjoin.h:348
double totalTuples
Definition: hashjoin.h:318
double partialTuples
Definition: hashjoin.h:319
ParallelHashJoinState * parallel_state
Definition: hashjoin.h:357
HashMemoryChunk current_chunk
Definition: hashjoin.h:355
Size spaceAllowedSkew
Definition: hashjoin.h:346
int * skewBucketNums
Definition: hashjoin.h:308
BufFile ** innerBatchFile
Definition: hashjoin.h:329
int log2_nbuckets_optimal
Definition: hashjoin.h:291
dsa_area * area
Definition: hashjoin.h:356
BufFile ** outerBatchFile
Definition: hashjoin.h:330
MemoryContext batchCxt
Definition: hashjoin.h:349
double skewTuples
Definition: hashjoin.h:320
Oid skewTable
Definition: plannodes.h:1205
Cardinality rows_total
Definition: plannodes.h:1209
Plan plan
Definition: plannodes.h:1198
ParallelHashGrowth growth
Definition: hashjoin.h:241
bool parallel_aware
Definition: plannodes.h:144
int plan_width
Definition: plannodes.h:139
Cardinality plan_rows
Definition: plannodes.h:138
Definition: regguts.h:318
@ WAIT_EVENT_HASH_BUILD_ELECT
Definition: wait_event.h:97

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, HashJoinTableData::area, Assert(), BarrierArriveAndWait(), BarrierAttach(), BarrierPhase(), HashJoinTableData::batchCxt, HashJoinTableData::batches, HashJoinTableData::buckets, ParallelHashJoinState::build_barrier, HashJoinTableData::chunks, HashJoinTableData::collations, HashJoinTableData::curbatch, HashJoinTableData::current_chunk, CurrentMemoryContext, elog(), ERROR, ExecChooseHashTableSize(), ExecHashBuildSkewHash(), ExecParallelHashJoinSetUpBatches(), ExecParallelHashTableAlloc(), fmgr_info(), forboth, get_op_hash_functions(), HashJoinTableData::growEnabled, ParallelHashJoinState::growth, HashJoinTableData::hashCxt, HashJoinTableData::hashStrict, i, HashJoinTableData::inner_hashfunctions, HashJoinTableData::innerBatchFile, HashJoinTableData::keepNulls, lfirst_oid, list_length(), HashJoinTableData::log2_nbuckets, HashJoinTableData::log2_nbuckets_optimal, MemoryContextSwitchTo(), my_log2(), ParallelHashJoinState::nbatch, HashJoinTableData::nbatch, HashJoinTableData::nbatch_original, HashJoinTableData::nbatch_outstart, ParallelHashJoinState::nbuckets, HashJoinTableData::nbuckets, HashJoinTableData::nbuckets_optimal, HashJoinTableData::nbuckets_original, HashJoinTableData::nSkewBuckets, OidIsValid, op_strict(), HashJoinTableData::outer_hashfunctions, HashJoinTableData::outerBatchFile, outerPlan, palloc0_array, palloc_array, palloc_object, Plan::parallel_aware, HashJoinTableData::parallel_state, HashJoinTableData::partialTuples, PHJ_BUILD_ELECTING, PHJ_GROWTH_OK, Hash::plan, Plan::plan_rows, Plan::plan_width, PrepareTempTablespaces(), printf, Hash::rows_total, SKEW_HASH_MEM_PERCENT, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketLen, HashJoinTableData::skewBucketNums, HashJoinTableData::skewEnabled, Hash::skewTable, HashJoinTableData::skewTuples, ParallelHashJoinState::space_allowed, HashJoinTableData::spaceAllowed, HashJoinTableData::spaceAllowedSkew, HashJoinTableData::spacePeak, HashJoinTableData::spaceUsed, HashJoinTableData::spaceUsedSkew, HashJoinTableData::totalTuples, HashJoinTableData::unshared, and WAIT_EVENT_HASH_BUILD_ELECT.

Referenced by ExecHashJoinImpl().

◆ ExecHashTableDestroy()

void ExecHashTableDestroy ( HashJoinTable  hashtable)

Definition at line 871 of file nodeHash.c.

872 {
873  int i;
874 
875  /*
876  * Make sure all the temp files are closed. We skip batch 0, since it
877  * can't have any temp files (and the arrays might not even exist if
878  * nbatch is only 1). Parallel hash joins don't use these files.
879  */
880  if (hashtable->innerBatchFile != NULL)
881  {
882  for (i = 1; i < hashtable->nbatch; i++)
883  {
884  if (hashtable->innerBatchFile[i])
885  BufFileClose(hashtable->innerBatchFile[i]);
886  if (hashtable->outerBatchFile[i])
887  BufFileClose(hashtable->outerBatchFile[i]);
888  }
889  }
890 
891  /* Release working memory (batchCxt is a child, so it goes away too) */
892  MemoryContextDelete(hashtable->hashCxt);
893 
894  /* And drop the control block */
895  pfree(hashtable);
896 }
void BufFileClose(BufFile *file)
Definition: buffile.c:407
void pfree(void *pointer)
Definition: mcxt.c:1306
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:376

References BufFileClose(), HashJoinTableData::hashCxt, i, HashJoinTableData::innerBatchFile, MemoryContextDelete(), HashJoinTableData::nbatch, HashJoinTableData::outerBatchFile, and pfree().

Referenced by ExecEndHashJoin(), and ExecReScanHashJoin().

◆ ExecHashTableDetach()

void ExecHashTableDetach ( HashJoinTable  hashtable)

Definition at line 3182 of file nodeHash.c.

3183 {
3184  if (hashtable->parallel_state)
3185  {
3186  ParallelHashJoinState *pstate = hashtable->parallel_state;
3187  int i;
3188 
3189  /* Make sure any temporary files are closed. */
3190  if (hashtable->batches)
3191  {
3192  for (i = 0; i < hashtable->nbatch; ++i)
3193  {
3194  sts_end_write(hashtable->batches[i].inner_tuples);
3195  sts_end_write(hashtable->batches[i].outer_tuples);
3198  }
3199  }
3200 
3201  /* If we're last to detach, clean up shared memory. */
3202  if (BarrierDetach(&pstate->build_barrier))
3203  {
3204  if (DsaPointerIsValid(pstate->batches))
3205  {
3206  dsa_free(hashtable->area, pstate->batches);
3207  pstate->batches = InvalidDsaPointer;
3208  }
3209  }
3210 
3211  hashtable->parallel_state = NULL;
3212  }
3213 }
bool BarrierDetach(Barrier *barrier)
Definition: barrier.c:256
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:832
#define InvalidDsaPointer
Definition: dsa.h:78
#define DsaPointerIsValid(x)
Definition: dsa.h:81
void sts_end_write(SharedTuplestoreAccessor *accessor)
void sts_end_parallel_scan(SharedTuplestoreAccessor *accessor)
SharedTuplestoreAccessor * outer_tuples
Definition: hashjoin.h:209
SharedTuplestoreAccessor * inner_tuples
Definition: hashjoin.h:208
dsa_pointer batches
Definition: hashjoin.h:236

References HashJoinTableData::area, BarrierDetach(), ParallelHashJoinState::batches, HashJoinTableData::batches, ParallelHashJoinState::build_barrier, dsa_free(), DsaPointerIsValid, i, ParallelHashJoinBatchAccessor::inner_tuples, InvalidDsaPointer, HashJoinTableData::nbatch, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, sts_end_parallel_scan(), and sts_end_write().

Referenced by ExecHashJoinReInitializeDSM(), and ExecShutdownHashJoin().

◆ ExecHashTableDetachBatch()

void ExecHashTableDetachBatch ( HashJoinTable  hashtable)

Definition at line 3125 of file nodeHash.c.

3126 {
3127  if (hashtable->parallel_state != NULL &&
3128  hashtable->curbatch >= 0)
3129  {
3130  int curbatch = hashtable->curbatch;
3131  ParallelHashJoinBatch *batch = hashtable->batches[curbatch].shared;
3132 
3133  /* Make sure any temporary files are closed. */
3134  sts_end_parallel_scan(hashtable->batches[curbatch].inner_tuples);
3135  sts_end_parallel_scan(hashtable->batches[curbatch].outer_tuples);
3136 
3137  /* Detach from the batch we were last working on. */
3139  {
3140  /*
3141  * Technically we shouldn't access the barrier because we're no
3142  * longer attached, but since there is no way it's moving after
3143  * this point it seems safe to make the following assertion.
3144  */
3146 
3147  /* Free shared chunks and buckets. */
3148  while (DsaPointerIsValid(batch->chunks))
3149  {
3150  HashMemoryChunk chunk =
3151  dsa_get_address(hashtable->area, batch->chunks);
3152  dsa_pointer next = chunk->next.shared;
3153 
3154  dsa_free(hashtable->area, batch->chunks);
3155  batch->chunks = next;
3156  }
3157  if (DsaPointerIsValid(batch->buckets))
3158  {
3159  dsa_free(hashtable->area, batch->buckets);
3160  batch->buckets = InvalidDsaPointer;
3161  }
3162  }
3163 
3164  /*
3165  * Track the largest batch we've been attached to. Though each
3166  * backend might see a different subset of batches, explain.c will
3167  * scan the results from all backends to find the largest value.
3168  */
3169  hashtable->spacePeak =
3170  Max(hashtable->spacePeak,
3171  batch->size + sizeof(dsa_pointer_atomic) * hashtable->nbuckets);
3172 
3173  /* Remember that we are not attached to a batch. */
3174  hashtable->curbatch = -1;
3175  }
3176 }
bool BarrierArriveAndDetach(Barrier *barrier)
Definition: barrier.c:203
static int32 next
Definition: blutils.c:219
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:944
uint64 dsa_pointer
Definition: dsa.h:62
#define PHJ_BATCH_DONE
Definition: hashjoin.h:268
dsa_pointer shared
Definition: hashjoin.h:127
union HashMemoryChunkData::@93 next
ParallelHashJoinBatch * shared
Definition: hashjoin.h:197
dsa_pointer chunks
Definition: hashjoin.h:156
dsa_pointer buckets
Definition: hashjoin.h:153

References HashJoinTableData::area, Assert(), BarrierArriveAndDetach(), BarrierPhase(), ParallelHashJoinBatch::batch_barrier, HashJoinTableData::batches, ParallelHashJoinBatch::buckets, ParallelHashJoinBatch::chunks, HashJoinTableData::curbatch, dsa_free(), dsa_get_address(), DsaPointerIsValid, ParallelHashJoinBatchAccessor::inner_tuples, InvalidDsaPointer, Max, HashJoinTableData::nbuckets, next, HashMemoryChunkData::next, ParallelHashJoinBatchAccessor::outer_tuples, HashJoinTableData::parallel_state, PHJ_BATCH_DONE, HashMemoryChunkData::shared, ParallelHashJoinBatchAccessor::shared, ParallelHashJoinBatch::size, HashJoinTableData::spacePeak, and sts_end_parallel_scan().

Referenced by ExecHashJoinReInitializeDSM(), ExecParallelHashJoinNewBatch(), and ExecShutdownHashJoin().

◆ ExecHashTableInsert()

void ExecHashTableInsert ( HashJoinTable  hashtable,
TupleTableSlot slot,
uint32  hashvalue 
)

Definition at line 1602 of file nodeHash.c.

1605 {
1606  bool shouldFree;
1607  MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1608  int bucketno;
1609  int batchno;
1610 
1611  ExecHashGetBucketAndBatch(hashtable, hashvalue,
1612  &bucketno, &batchno);
1613 
1614  /*
1615  * decide whether to put the tuple in the hash table or a temp file
1616  */
1617  if (batchno == hashtable->curbatch)
1618  {
1619  /*
1620  * put the tuple in hash table
1621  */
1622  HashJoinTuple hashTuple;
1623  int hashTupleSize;
1624  double ntuples = (hashtable->totalTuples - hashtable->skewTuples);
1625 
1626  /* Create the HashJoinTuple */
1627  hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
1628  hashTuple = (HashJoinTuple) dense_alloc(hashtable, hashTupleSize);
1629 
1630  hashTuple->hashvalue = hashvalue;
1631  memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
1632 
1633  /*
1634  * We always reset the tuple-matched flag on insertion. This is okay
1635  * even when reloading a tuple from a batch file, since the tuple
1636  * could not possibly have been matched to an outer tuple before it
1637  * went into the batch file.
1638  */
1640 
1641  /* Push it onto the front of the bucket's list */
1642  hashTuple->next.unshared = hashtable->buckets.unshared[bucketno];
1643  hashtable->buckets.unshared[bucketno] = hashTuple;
1644 
1645  /*
1646  * Increase the (optimal) number of buckets if we just exceeded the
1647  * NTUP_PER_BUCKET threshold, but only when there's still a single
1648  * batch.
1649  */
1650  if (hashtable->nbatch == 1 &&
1651  ntuples > (hashtable->nbuckets_optimal * NTUP_PER_BUCKET))
1652  {
1653  /* Guard against integer overflow and alloc size overflow */
1654  if (hashtable->nbuckets_optimal <= INT_MAX / 2 &&
1655  hashtable->nbuckets_optimal * 2 <= MaxAllocSize / sizeof(HashJoinTuple))
1656  {
1657  hashtable->nbuckets_optimal *= 2;
1658  hashtable->log2_nbuckets_optimal += 1;
1659  }
1660  }
1661 
1662  /* Account for space used, and back off if we've used too much */
1663  hashtable->spaceUsed += hashTupleSize;
1664  if (hashtable->spaceUsed > hashtable->spacePeak)
1665  hashtable->spacePeak = hashtable->spaceUsed;
1666  if (hashtable->spaceUsed +
1667  hashtable->nbuckets_optimal * sizeof(HashJoinTuple)
1668  > hashtable->spaceAllowed)
1669  ExecHashIncreaseNumBatches(hashtable);
1670  }
1671  else
1672  {
1673  /*
1674  * put the tuple into a temp file for later batches
1675  */
1676  Assert(batchno > hashtable->curbatch);
1677  ExecHashJoinSaveTuple(tuple,
1678  hashvalue,
1679  &hashtable->innerBatchFile[batchno]);
1680  }
1681 
1682  if (shouldFree)
1683  heap_free_minimal_tuple(tuple);
1684 }
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot, bool *shouldFree)
Definition: execTuples.c:1692
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:80
void heap_free_minimal_tuple(MinimalTuple mtup)
Definition: heaptuple.c:1427
#define HeapTupleHeaderClearMatch(tup)
Definition: htup_details.h:520
static void * dense_alloc(HashJoinTable hashtable, Size size)
Definition: nodeHash.c:2716
static void ExecHashIncreaseNumBatches(HashJoinTable hashtable)
Definition: nodeHash.c:904
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1908
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
uint32 hashvalue
Definition: hashjoin.h:75
union HashJoinTupleData::@92 next
struct HashJoinTupleData * unshared
Definition: hashjoin.h:72

References Assert(), HashJoinTableData::buckets, HashJoinTableData::curbatch, dense_alloc(), ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecHashIncreaseNumBatches(), ExecHashJoinSaveTuple(), HashJoinTupleData::hashvalue, heap_free_minimal_tuple(), HeapTupleHeaderClearMatch, HJTUPLE_MINTUPLE, HJTUPLE_OVERHEAD, HashJoinTableData::innerBatchFile, HashJoinTableData::log2_nbuckets_optimal, MaxAllocSize, HashJoinTableData::nbatch, HashJoinTableData::nbuckets_optimal, HashJoinTupleData::next, NTUP_PER_BUCKET, HashJoinTableData::skewTuples, HashJoinTableData::spaceAllowed, HashJoinTableData::spacePeak, HashJoinTableData::spaceUsed, MinimalTupleData::t_len, HashJoinTableData::totalTuples, HashJoinTupleData::unshared, and HashJoinTableData::unshared.

Referenced by ExecHashJoinNewBatch(), and MultiExecPrivateHash().

◆ ExecHashTableReset()

void ExecHashTableReset ( HashJoinTable  hashtable)

Definition at line 2147 of file nodeHash.c.

2148 {
2149  MemoryContext oldcxt;
2150  int nbuckets = hashtable->nbuckets;
2151 
2152  /*
2153  * Release all the hash buckets and tuples acquired in the prior pass, and
2154  * reinitialize the context for a new pass.
2155  */
2156  MemoryContextReset(hashtable->batchCxt);
2157  oldcxt = MemoryContextSwitchTo(hashtable->batchCxt);
2158 
2159  /* Reallocate and reinitialize the hash bucket headers. */
2160  hashtable->buckets.unshared = palloc0_array(HashJoinTuple, nbuckets);
2161 
2162  hashtable->spaceUsed = 0;
2163 
2164  MemoryContextSwitchTo(oldcxt);
2165 
2166  /* Forget the chunks (the memory was freed by the context reset above). */
2167  hashtable->chunks = NULL;
2168 }
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:303

References HashJoinTableData::batchCxt, HashJoinTableData::buckets, HashJoinTableData::chunks, MemoryContextReset(), MemoryContextSwitchTo(), HashJoinTableData::nbuckets, palloc0_array, HashJoinTableData::spaceUsed, and HashJoinTableData::unshared.

Referenced by ExecHashJoinNewBatch().

◆ ExecHashTableResetMatchFlags()

void ExecHashTableResetMatchFlags ( HashJoinTable  hashtable)

Definition at line 2175 of file nodeHash.c.

2176 {
2177  HashJoinTuple tuple;
2178  int i;
2179 
2180  /* Reset all flags in the main table ... */
2181  for (i = 0; i < hashtable->nbuckets; i++)
2182  {
2183  for (tuple = hashtable->buckets.unshared[i]; tuple != NULL;
2184  tuple = tuple->next.unshared)
2186  }
2187 
2188  /* ... and the same for the skew buckets, if any */
2189  for (i = 0; i < hashtable->nSkewBuckets; i++)
2190  {
2191  int j = hashtable->skewBucketNums[i];
2192  HashSkewBucket *skewBucket = hashtable->skewBucket[j];
2193 
2194  for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next.unshared)
2196  }
2197 }
int j
Definition: isn.c:74
HashJoinTuple tuples
Definition: hashjoin.h:105

References HashJoinTableData::buckets, HeapTupleHeaderClearMatch, HJTUPLE_MINTUPLE, i, j, HashJoinTableData::nbuckets, HashJoinTupleData::next, HashJoinTableData::nSkewBuckets, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashSkewBucket::tuples, HashJoinTupleData::unshared, and HashJoinTableData::unshared.

Referenced by ExecReScanHashJoin().

◆ ExecInitHash()

HashState* ExecInitHash ( Hash node,
EState estate,
int  eflags 
)

Definition at line 354 of file nodeHash.c.

355 {
356  HashState *hashstate;
357 
358  /* check for unsupported flags */
359  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
360 
361  /*
362  * create state structure
363  */
364  hashstate = makeNode(HashState);
365  hashstate->ps.plan = (Plan *) node;
366  hashstate->ps.state = estate;
367  hashstate->ps.ExecProcNode = ExecHash;
368  hashstate->hashtable = NULL;
369  hashstate->hashkeys = NIL; /* will be set by parent HashJoin */
370 
371  /*
372  * Miscellaneous initialization
373  *
374  * create expression context for node
375  */
376  ExecAssignExprContext(estate, &hashstate->ps);
377 
378  /*
379  * initialize child nodes
380  */
381  outerPlanState(hashstate) = ExecInitNode(outerPlan(node), estate, eflags);
382 
383  /*
384  * initialize our result slot and type. No need to build projection
385  * because this node doesn't do projections.
386  */
388  hashstate->ps.ps_ProjInfo = NULL;
389 
390  /*
391  * initialize child expressions
392  */
393  Assert(node->plan.qual == NIL);
394  hashstate->hashkeys =
395  ExecInitExprList(node->hashkeys, (PlanState *) hashstate);
396 
397  return hashstate;
398 }
List * ExecInitExprList(List *nodes, PlanState *parent)
Definition: execExpr.c:319
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1799
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:484
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define EXEC_FLAG_MARK
Definition: executor.h:59
static TupleTableSlot * ExecHash(PlanState *pstate)
Definition: nodeHash.c:92
#define makeNode(_type_)
Definition: nodes.h:165
#define NIL
Definition: pg_list.h:66
HashJoinTable hashtable
Definition: execnodes.h:2642
List * hashkeys
Definition: execnodes.h:2643
List * hashkeys
Definition: plannodes.h:1204
EState * state
Definition: execnodes.h:1032
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1070
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1036
List * qual
Definition: plannodes.h:157

References Assert(), EXEC_FLAG_BACKWARD, EXEC_FLAG_MARK, ExecAssignExprContext(), ExecHash(), ExecInitExprList(), ExecInitNode(), ExecInitResultTupleSlotTL(), PlanState::ExecProcNode, HashState::hashkeys, Hash::hashkeys, HashState::hashtable, makeNode, NIL, outerPlan, outerPlanState, PlanState::plan, Hash::plan, HashState::ps, PlanState::ps_ProjInfo, Plan::qual, PlanState::state, and TTSOpsMinimalTuple.

Referenced by ExecInitNode().

◆ ExecParallelHashTableAlloc()

void ExecParallelHashTableAlloc ( HashJoinTable  hashtable,
int  batchno 
)

Definition at line 3105 of file nodeHash.c.

3106 {
3107  ParallelHashJoinBatch *batch = hashtable->batches[batchno].shared;
3108  dsa_pointer_atomic *buckets;
3109  int nbuckets = hashtable->parallel_state->nbuckets;
3110  int i;
3111 
3112  batch->buckets =
3113  dsa_allocate(hashtable->area, sizeof(dsa_pointer_atomic) * nbuckets);
3114  buckets = (dsa_pointer_atomic *)
3115  dsa_get_address(hashtable->area, batch->buckets);
3116  for (i = 0; i < nbuckets; ++i)
3118 }
#define dsa_pointer_atomic_init
Definition: dsa.h:64
#define dsa_allocate(area, size)
Definition: dsa.h:84

References HashJoinTableData::area, HashJoinTableData::batches, ParallelHashJoinBatch::buckets, dsa_allocate, dsa_get_address(), dsa_pointer_atomic_init, i, InvalidDsaPointer, ParallelHashJoinState::nbuckets, HashJoinTableData::parallel_state, and ParallelHashJoinBatchAccessor::shared.

Referenced by ExecHashTableCreate(), and ExecParallelHashJoinNewBatch().

◆ ExecParallelHashTableInsert()

void ExecParallelHashTableInsert ( HashJoinTable  hashtable,
TupleTableSlot slot,
uint32  hashvalue 
)

Definition at line 1691 of file nodeHash.c.

1694 {
1695  bool shouldFree;
1696  MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1697  dsa_pointer shared;
1698  int bucketno;
1699  int batchno;
1700 
1701 retry:
1702  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno, &batchno);
1703 
1704  if (batchno == 0)
1705  {
1706  HashJoinTuple hashTuple;
1707 
1708  /* Try to load it into memory. */
1711  hashTuple = ExecParallelHashTupleAlloc(hashtable,
1712  HJTUPLE_OVERHEAD + tuple->t_len,
1713  &shared);
1714  if (hashTuple == NULL)
1715  goto retry;
1716 
1717  /* Store the hash value in the HashJoinTuple header. */
1718  hashTuple->hashvalue = hashvalue;
1719  memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
1720 
1721  /* Push it onto the front of the bucket's list */
1722  ExecParallelHashPushTuple(&hashtable->buckets.shared[bucketno],
1723  hashTuple, shared);
1724  }
1725  else
1726  {
1727  size_t tuple_size = MAXALIGN(HJTUPLE_OVERHEAD + tuple->t_len);
1728 
1729  Assert(batchno > 0);
1730 
1731  /* Try to preallocate space in the batch if necessary. */
1732  if (hashtable->batches[batchno].preallocated < tuple_size)
1733  {
1734  if (!ExecParallelHashTuplePrealloc(hashtable, batchno, tuple_size))
1735  goto retry;
1736  }
1737 
1738  Assert(hashtable->batches[batchno].preallocated >= tuple_size);
1739  hashtable->batches[batchno].preallocated -= tuple_size;
1740  sts_puttuple(hashtable->batches[batchno].inner_tuples, &hashvalue,
1741  tuple);
1742  }
1743  ++hashtable->batches[batchno].ntuples;
1744 
1745  if (shouldFree)
1746  heap_free_minimal_tuple(tuple);
1747 }
#define PHJ_BUILD_HASHING_INNER
Definition: hashjoin.h:259
static bool ExecParallelHashTuplePrealloc(HashJoinTable hashtable, int batchno, size_t size)
Definition: nodeHash.c:3329
static HashJoinTuple ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size, dsa_pointer *shared)
Definition: nodeHash.c:2796
static void ExecParallelHashPushTuple(dsa_pointer_atomic *head, HashJoinTuple tuple, dsa_pointer tuple_shared)
Definition: nodeHash.c:3249
void sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data, MinimalTuple tuple)
dsa_pointer_atomic * shared
Definition: hashjoin.h:299

References Assert(), BarrierPhase(), HashJoinTableData::batches, HashJoinTableData::buckets, ParallelHashJoinState::build_barrier, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecParallelHashPushTuple(), ExecParallelHashTupleAlloc(), ExecParallelHashTuplePrealloc(), HashJoinTupleData::hashvalue, heap_free_minimal_tuple(), HJTUPLE_MINTUPLE, HJTUPLE_OVERHEAD, ParallelHashJoinBatchAccessor::inner_tuples, MAXALIGN, ParallelHashJoinBatchAccessor::ntuples, HashJoinTableData::parallel_state, PHJ_BUILD_HASHING_INNER, ParallelHashJoinBatchAccessor::preallocated, HashJoinTableData::shared, sts_puttuple(), and MinimalTupleData::t_len.

Referenced by MultiExecParallelHash().

◆ ExecParallelHashTableInsertCurrentBatch()

void ExecParallelHashTableInsertCurrentBatch ( HashJoinTable  hashtable,
TupleTableSlot slot,
uint32  hashvalue 
)

Definition at line 1756 of file nodeHash.c.

1759 {
1760  bool shouldFree;
1761  MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree);
1762  HashJoinTuple hashTuple;
1763  dsa_pointer shared;
1764  int batchno;
1765  int bucketno;
1766 
1767  ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno, &batchno);
1768  Assert(batchno == hashtable->curbatch);
1769  hashTuple = ExecParallelHashTupleAlloc(hashtable,
1770  HJTUPLE_OVERHEAD + tuple->t_len,
1771  &shared);
1772  hashTuple->hashvalue = hashvalue;
1773  memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
1775  ExecParallelHashPushTuple(&hashtable->buckets.shared[bucketno],
1776  hashTuple, shared);
1777 
1778  if (shouldFree)
1779  heap_free_minimal_tuple(tuple);
1780 }

References Assert(), HashJoinTableData::buckets, HashJoinTableData::curbatch, ExecFetchSlotMinimalTuple(), ExecHashGetBucketAndBatch(), ExecParallelHashPushTuple(), ExecParallelHashTupleAlloc(), HashJoinTupleData::hashvalue, heap_free_minimal_tuple(), HeapTupleHeaderClearMatch, HJTUPLE_MINTUPLE, HJTUPLE_OVERHEAD, HashJoinTableData::shared, and MinimalTupleData::t_len.

Referenced by ExecParallelHashJoinNewBatch().

◆ ExecParallelHashTableSetCurrentBatch()

void ExecParallelHashTableSetCurrentBatch ( HashJoinTable  hashtable,
int  batchno 
)

Definition at line 3267 of file nodeHash.c.

3268 {
3269  Assert(hashtable->batches[batchno].shared->buckets != InvalidDsaPointer);
3270 
3271  hashtable->curbatch = batchno;
3272  hashtable->buckets.shared = (dsa_pointer_atomic *)
3273  dsa_get_address(hashtable->area,
3274  hashtable->batches[batchno].shared->buckets);
3275  hashtable->nbuckets = hashtable->parallel_state->nbuckets;
3276  hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
3277  hashtable->current_chunk = NULL;
3279  hashtable->batches[batchno].at_least_one_chunk = false;
3280 }
dsa_pointer current_chunk_shared
Definition: hashjoin.h:359

References HashJoinTableData::area, Assert(), ParallelHashJoinBatchAccessor::at_least_one_chunk, HashJoinTableData::batches, ParallelHashJoinBatch::buckets, HashJoinTableData::buckets, HashJoinTableData::curbatch, HashJoinTableData::current_chunk, HashJoinTableData::current_chunk_shared, dsa_get_address(), InvalidDsaPointer, HashJoinTableData::log2_nbuckets, my_log2(), ParallelHashJoinState::nbuckets, HashJoinTableData::nbuckets, HashJoinTableData::parallel_state, ParallelHashJoinBatchAccessor::shared, and HashJoinTableData::shared.

Referenced by ExecParallelHashIncreaseNumBatches(), ExecParallelHashIncreaseNumBuckets(), ExecParallelHashJoinNewBatch(), and MultiExecParallelHash().

◆ ExecParallelScanHashBucket()

bool ExecParallelScanHashBucket ( HashJoinState hjstate,
ExprContext econtext 
)

Definition at line 2001 of file nodeHash.c.

2003 {
2004  ExprState *hjclauses = hjstate->hashclauses;
2005  HashJoinTable hashtable = hjstate->hj_HashTable;
2006  HashJoinTuple hashTuple = hjstate->hj_CurTuple;
2007  uint32 hashvalue = hjstate->hj_CurHashValue;
2008 
2009  /*
2010  * hj_CurTuple is the address of the tuple last returned from the current
2011  * bucket, or NULL if it's time to start scanning a new bucket.
2012  */
2013  if (hashTuple != NULL)
2014  hashTuple = ExecParallelHashNextTuple(hashtable, hashTuple);
2015  else
2016  hashTuple = ExecParallelHashFirstTuple(hashtable,
2017  hjstate->hj_CurBucketNo);
2018 
2019  while (hashTuple != NULL)
2020  {
2021  if (hashTuple->hashvalue == hashvalue)
2022  {
2023  TupleTableSlot *inntuple;
2024 
2025  /* insert hashtable's tuple into exec slot so ExecQual sees it */
2026  inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
2027  hjstate->hj_HashTupleSlot,
2028  false); /* do not pfree */
2029  econtext->ecxt_innertuple = inntuple;
2030 
2031  if (ExecQualAndReset(hjclauses, econtext))
2032  {
2033  hjstate->hj_CurTuple = hashTuple;
2034  return true;
2035  }
2036  }
2037 
2038  hashTuple = ExecParallelHashNextTuple(hashtable, hashTuple);
2039  }
2040 
2041  /*
2042  * no match
2043  */
2044  return false;
2045 }
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1446
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:428
static HashJoinTuple ExecParallelHashFirstTuple(HashJoinTable hashtable, int bucketno)
Definition: nodeHash.c:3219
static HashJoinTuple ExecParallelHashNextTuple(HashJoinTable hashtable, HashJoinTuple tuple)
Definition: nodeHash.c:3235
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:249
HashJoinTuple hj_CurTuple
Definition: execnodes.h:2094
ExprState * hashclauses
Definition: execnodes.h:2086
uint32 hj_CurHashValue
Definition: execnodes.h:2091
int hj_CurBucketNo
Definition: execnodes.h:2092
HashJoinTable hj_HashTable
Definition: execnodes.h:2090
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:2096

References ExprContext::ecxt_innertuple, ExecParallelHashFirstTuple(), ExecParallelHashNextTuple(), ExecQualAndReset(), ExecStoreMinimalTuple(), HashJoinState::hashclauses, HashJoinTupleData::hashvalue, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurTuple, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, and HJTUPLE_MINTUPLE.

Referenced by ExecHashJoinImpl().

◆ ExecPrepHashTableForUnmatched()

void ExecPrepHashTableForUnmatched ( HashJoinState hjstate)

Definition at line 2052 of file nodeHash.c.

2053 {
2054  /*----------
2055  * During this scan we use the HashJoinState fields as follows:
2056  *
2057  * hj_CurBucketNo: next regular bucket to scan
2058  * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
2059  * hj_CurTuple: last tuple returned, or NULL to start next bucket
2060  *----------
2061  */
2062  hjstate->hj_CurBucketNo = 0;
2063  hjstate->hj_CurSkewBucketNo = 0;
2064  hjstate->hj_CurTuple = NULL;
2065 }
int hj_CurSkewBucketNo
Definition: execnodes.h:2093

References HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurSkewBucketNo, and HashJoinState::hj_CurTuple.

Referenced by ExecHashJoinImpl().

◆ ExecReScanHash()

void ExecReScanHash ( HashState node)

Definition at line 2201 of file nodeHash.c.

2202 {
2204 
2205  /*
2206  * if chgParam of subnode is not null then plan will be re-scanned by
2207  * first ExecProcNode.
2208  */
2209  if (outerPlan->chgParam == NULL)
2211 }
void ExecReScan(PlanState *node)
Definition: execAmi.c:78

References ExecReScan(), outerPlan, and outerPlanState.

Referenced by ExecReScan().

◆ ExecScanHashBucket()

bool ExecScanHashBucket ( HashJoinState hjstate,
ExprContext econtext 
)

Definition at line 1940 of file nodeHash.c.

1942 {
1943  ExprState *hjclauses = hjstate->hashclauses;
1944  HashJoinTable hashtable = hjstate->hj_HashTable;
1945  HashJoinTuple hashTuple = hjstate->hj_CurTuple;
1946  uint32 hashvalue = hjstate->hj_CurHashValue;
1947 
1948  /*
1949  * hj_CurTuple is the address of the tuple last returned from the current
1950  * bucket, or NULL if it's time to start scanning a new bucket.
1951  *
1952  * If the tuple hashed to a skew bucket then scan the skew bucket
1953  * otherwise scan the standard hashtable bucket.
1954  */
1955  if (hashTuple != NULL)
1956  hashTuple = hashTuple->next.unshared;
1957  else if (hjstate->hj_CurSkewBucketNo != INVALID_SKEW_BUCKET_NO)
1958  hashTuple = hashtable->skewBucket[hjstate->hj_CurSkewBucketNo]->tuples;
1959  else
1960  hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
1961 
1962  while (hashTuple != NULL)
1963  {
1964  if (hashTuple->hashvalue == hashvalue)
1965  {
1966  TupleTableSlot *inntuple;
1967 
1968  /* insert hashtable's tuple into exec slot so ExecQual sees it */
1969  inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
1970  hjstate->hj_HashTupleSlot,
1971  false); /* do not pfree */
1972  econtext->ecxt_innertuple = inntuple;
1973 
1974  if (ExecQualAndReset(hjclauses, econtext))
1975  {
1976  hjstate->hj_CurTuple = hashTuple;
1977  return true;
1978  }
1979  }
1980 
1981  hashTuple = hashTuple->next.unshared;
1982  }
1983 
1984  /*
1985  * no match
1986  */
1987  return false;
1988 }

References HashJoinTableData::buckets, ExprContext::ecxt_innertuple, ExecQualAndReset(), ExecStoreMinimalTuple(), HashJoinState::hashclauses, HashJoinTupleData::hashvalue, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurHashValue, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HJTUPLE_MINTUPLE, INVALID_SKEW_BUCKET_NO, HashJoinTupleData::next, HashJoinTableData::skewBucket, HashSkewBucket::tuples, HashJoinTupleData::unshared, and HashJoinTableData::unshared.

Referenced by ExecHashJoinImpl().

◆ ExecScanHashTableForUnmatched()

bool ExecScanHashTableForUnmatched ( HashJoinState hjstate,
ExprContext econtext 
)

Definition at line 2076 of file nodeHash.c.

2077 {
2078  HashJoinTable hashtable = hjstate->hj_HashTable;
2079  HashJoinTuple hashTuple = hjstate->hj_CurTuple;
2080 
2081  for (;;)
2082  {
2083  /*
2084  * hj_CurTuple is the address of the tuple last returned from the
2085  * current bucket, or NULL if it's time to start scanning a new
2086  * bucket.
2087  */
2088  if (hashTuple != NULL)
2089  hashTuple = hashTuple->next.unshared;
2090  else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
2091  {
2092  hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
2093  hjstate->hj_CurBucketNo++;
2094  }
2095  else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
2096  {
2097  int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
2098 
2099  hashTuple = hashtable->skewBucket[j]->tuples;
2100  hjstate->hj_CurSkewBucketNo++;
2101  }
2102  else
2103  break; /* finished all buckets */
2104 
2105  while (hashTuple != NULL)
2106  {
2107  if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
2108  {
2109  TupleTableSlot *inntuple;
2110 
2111  /* insert hashtable's tuple into exec slot */
2112  inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
2113  hjstate->hj_HashTupleSlot,
2114  false); /* do not pfree */
2115  econtext->ecxt_innertuple = inntuple;
2116 
2117  /*
2118  * Reset temp memory each time; although this function doesn't
2119  * do any qual eval, the caller will, so let's keep it
2120  * parallel to ExecScanHashBucket.
2121  */
2122  ResetExprContext(econtext);
2123 
2124  hjstate->hj_CurTuple = hashTuple;
2125  return true;
2126  }
2127 
2128  hashTuple = hashTuple->next.unshared;
2129  }
2130 
2131  /* allow this loop to be cancellable */
2133  }
2134 
2135  /*
2136  * no more unmatched tuples
2137  */
2138  return false;
2139 }
#define HeapTupleHeaderHasMatch(tup)
Definition: htup_details.h:510
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121

References HashJoinTableData::buckets, CHECK_FOR_INTERRUPTS, ExprContext::ecxt_innertuple, ExecStoreMinimalTuple(), HeapTupleHeaderHasMatch, HashJoinState::hj_CurBucketNo, HashJoinState::hj_CurSkewBucketNo, HashJoinState::hj_CurTuple, HashJoinState::hj_HashTable, HashJoinState::hj_HashTupleSlot, HJTUPLE_MINTUPLE, j, HashJoinTableData::nbuckets, HashJoinTupleData::next, HashJoinTableData::nSkewBuckets, ResetExprContext, HashJoinTableData::skewBucket, HashJoinTableData::skewBucketNums, HashSkewBucket::tuples, HashJoinTupleData::unshared, and HashJoinTableData::unshared.

Referenced by ExecHashJoinImpl().

◆ ExecShutdownHash()

void ExecShutdownHash ( HashState node)

Definition at line 2651 of file nodeHash.c.

2652 {
2653  /* Allocate save space if EXPLAIN'ing and we didn't do so already */
2654  if (node->ps.instrument && !node->hinstrument)
2656  /* Now accumulate data for the current (final) hash table */
2657  if (node->hinstrument && node->hashtable)
2659 }
#define palloc0_object(type)
Definition: fe_memutils.h:63
void ExecHashAccumInstrumentation(HashInstrumentation *instrument, HashJoinTable hashtable)
Definition: nodeHash.c:2697

References ExecHashAccumInstrumentation(), HashState::hashtable, HashState::hinstrument, PlanState::instrument, palloc0_object, and HashState::ps.

Referenced by ExecShutdownNode_walker().

◆ MultiExecHash()

Node* MultiExecHash ( HashState node)

Definition at line 106 of file nodeHash.c.

107 {
108  /* must provide our own instrumentation support */
109  if (node->ps.instrument)
111 
112  if (node->parallel_state != NULL)
113  MultiExecParallelHash(node);
114  else
115  MultiExecPrivateHash(node);
116 
117  /* must provide our own instrumentation support */
118  if (node->ps.instrument)
120 
121  /*
122  * We do not return the hash table directly because it's not a subtype of
123  * Node, and so would violate the MultiExecProcNode API. Instead, our
124  * parent Hashjoin node is expected to know how to fish it out of our node
125  * state. Ugly but not really worth cleaning up, since Hashjoin knows
126  * quite a bit more about Hash besides that.
127  */
128  return NULL;
129 }
void InstrStartNode(Instrumentation *instr)
Definition: instrument.c:68
void InstrStopNode(Instrumentation *instr, double nTuples)
Definition: instrument.c:84
static void MultiExecParallelHash(HashState *node)
Definition: nodeHash.c:215
static void MultiExecPrivateHash(HashState *node)
Definition: nodeHash.c:139
struct ParallelHashJoinState * parallel_state
Definition: execnodes.h:2661

References HashState::hashtable, InstrStartNode(), InstrStopNode(), PlanState::instrument, MultiExecParallelHash(), MultiExecPrivateHash(), HashState::parallel_state, HashJoinTableData::partialTuples, and HashState::ps.

Referenced by MultiExecProcNode().