PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nodeHashjoin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  * Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeHashjoin.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/htup_details.h"
19 #include "executor/executor.h"
20 #include "executor/hashjoin.h"
21 #include "executor/nodeHash.h"
22 #include "executor/nodeHashjoin.h"
23 #include "miscadmin.h"
24 #include "utils/memutils.h"
25 
26 
27 /*
28  * States of the ExecHashJoin state machine
29  */
30 #define HJ_BUILD_HASHTABLE 1
31 #define HJ_NEED_NEW_OUTER 2
32 #define HJ_SCAN_BUCKET 3
33 #define HJ_FILL_OUTER_TUPLE 4
34 #define HJ_FILL_INNER_TUPLES 5
35 #define HJ_NEED_NEW_BATCH 6
36 
37 /* Returns true if doing null-fill on outer relation */
38 #define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
39 /* Returns true if doing null-fill on inner relation */
40 #define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
41 
43  HashJoinState *hjstate,
44  uint32 *hashvalue);
46  BufFile *file,
47  uint32 *hashvalue,
48  TupleTableSlot *tupleSlot);
49 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
50 
51 
52 /* ----------------------------------------------------------------
53  * ExecHashJoin
54  *
55  * This function implements the Hybrid Hashjoin algorithm.
56  *
57  * Note: the relation we build hash table on is the "inner"
58  * the other one is "outer".
59  * ----------------------------------------------------------------
60  */
61 TupleTableSlot * /* return: a tuple or NULL */
63 {
64  PlanState *outerNode;
65  HashState *hashNode;
66  List *joinqual;
67  List *otherqual;
68  ExprContext *econtext;
69  HashJoinTable hashtable;
70  TupleTableSlot *outerTupleSlot;
71  uint32 hashvalue;
72  int batchno;
73 
74  /*
75  * get information from HashJoin node
76  */
77  joinqual = node->js.joinqual;
78  otherqual = node->js.ps.qual;
79  hashNode = (HashState *) innerPlanState(node);
80  outerNode = outerPlanState(node);
81  hashtable = node->hj_HashTable;
82  econtext = node->js.ps.ps_ExprContext;
83 
84  /*
85  * Reset per-tuple memory context to free any expression evaluation
86  * storage allocated in the previous tuple cycle.
87  */
88  ResetExprContext(econtext);
89 
90  /*
91  * run the hash join state machine
92  */
93  for (;;)
94  {
95  switch (node->hj_JoinState)
96  {
97  case HJ_BUILD_HASHTABLE:
98 
99  /*
100  * First time through: build hash table for inner relation.
101  */
102  Assert(hashtable == NULL);
103 
104  /*
105  * If the outer relation is completely empty, and it's not
106  * right/full join, we can quit without building the hash
107  * table. However, for an inner join it is only a win to
108  * check this when the outer relation's startup cost is less
109  * than the projected cost of building the hash table.
110  * Otherwise it's best to build the hash table first and see
111  * if the inner relation is empty. (When it's a left join, we
112  * should always make this check, since we aren't going to be
113  * able to skip the join on the strength of an empty inner
114  * relation anyway.)
115  *
116  * If we are rescanning the join, we make use of information
117  * gained on the previous scan: don't bother to try the
118  * prefetch if the previous scan found the outer relation
119  * nonempty. This is not 100% reliable since with new
120  * parameters the outer relation might yield different
121  * results, but it's a good heuristic.
122  *
123  * The only way to make the check is to try to fetch a tuple
124  * from the outer plan node. If we succeed, we have to stash
125  * it away for later consumption by ExecHashJoinOuterGetTuple.
126  */
127  if (HJ_FILL_INNER(node))
128  {
129  /* no chance to not build the hash table */
131  }
132  else if (HJ_FILL_OUTER(node) ||
133  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
134  !node->hj_OuterNotEmpty))
135  {
136  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
138  {
139  node->hj_OuterNotEmpty = false;
140  return NULL;
141  }
142  else
143  node->hj_OuterNotEmpty = true;
144  }
145  else
147 
148  /*
149  * create the hash table
150  */
151  hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
152  node->hj_HashOperators,
153  HJ_FILL_INNER(node));
154  node->hj_HashTable = hashtable;
155 
156  /*
157  * execute the Hash node, to build the hash table
158  */
159  hashNode->hashtable = hashtable;
160  (void) MultiExecProcNode((PlanState *) hashNode);
161 
162  /*
163  * If the inner relation is completely empty, and we're not
164  * doing a left outer join, we can quit without scanning the
165  * outer relation.
166  */
167  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
168  return NULL;
169 
170  /*
171  * need to remember whether nbatch has increased since we
172  * began scanning the outer relation
173  */
174  hashtable->nbatch_outstart = hashtable->nbatch;
175 
176  /*
177  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
178  * tuple above, because ExecHashJoinOuterGetTuple will
179  * immediately set it again.)
180  */
181  node->hj_OuterNotEmpty = false;
182 
184 
185  /* FALL THRU */
186 
187  case HJ_NEED_NEW_OUTER:
188 
189  /*
190  * We don't have an outer tuple, try to get the next one
191  */
192  outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
193  node,
194  &hashvalue);
195  if (TupIsNull(outerTupleSlot))
196  {
197  /* end of batch, or maybe whole join */
198  if (HJ_FILL_INNER(node))
199  {
200  /* set up to scan for unmatched inner tuples */
203  }
204  else
206  continue;
207  }
208 
209  econtext->ecxt_outertuple = outerTupleSlot;
210  node->hj_MatchedOuter = false;
211 
212  /*
213  * Find the corresponding bucket for this tuple in the main
214  * hash table or skew hash table.
215  */
216  node->hj_CurHashValue = hashvalue;
217  ExecHashGetBucketAndBatch(hashtable, hashvalue,
218  &node->hj_CurBucketNo, &batchno);
219  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
220  hashvalue);
221  node->hj_CurTuple = NULL;
222 
223  /*
224  * The tuple might not belong to the current batch (where
225  * "current batch" includes the skew buckets if any).
226  */
227  if (batchno != hashtable->curbatch &&
229  {
230  /*
231  * Need to postpone this outer tuple to a later batch.
232  * Save it in the corresponding outer-batch file.
233  */
234  Assert(batchno > hashtable->curbatch);
236  hashvalue,
237  &hashtable->outerBatchFile[batchno]);
238  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
239  continue;
240  }
241 
242  /* OK, let's scan the bucket for matches */
244 
245  /* FALL THRU */
246 
247  case HJ_SCAN_BUCKET:
248 
249  /*
250  * We check for interrupts here because this corresponds to
251  * where we'd fetch a row from a child plan node in other join
252  * types.
253  */
255 
256  /*
257  * Scan the selected hash bucket for matches to current outer
258  */
259  if (!ExecScanHashBucket(node, econtext))
260  {
261  /* out of matches; check for possible outer-join fill */
263  continue;
264  }
265 
266  /*
267  * We've got a match, but still need to test non-hashed quals.
268  * ExecScanHashBucket already set up all the state needed to
269  * call ExecQual.
270  *
271  * If we pass the qual, then save state for next call and have
272  * ExecProject form the projection, store it in the tuple
273  * table, and return the slot.
274  *
275  * Only the joinquals determine tuple match status, but all
276  * quals must pass to actually return the tuple.
277  */
278  if (joinqual == NIL || ExecQual(joinqual, econtext, false))
279  {
280  node->hj_MatchedOuter = true;
282 
283  /* In an antijoin, we never return a matched tuple */
284  if (node->js.jointype == JOIN_ANTI)
285  {
287  continue;
288  }
289 
290  /*
291  * In a semijoin, we'll consider returning the first
292  * match, but after that we're done with this outer tuple.
293  */
294  if (node->js.jointype == JOIN_SEMI)
296 
297  if (otherqual == NIL ||
298  ExecQual(otherqual, econtext, false))
299  return ExecProject(node->js.ps.ps_ProjInfo);
300  else
301  InstrCountFiltered2(node, 1);
302  }
303  else
304  InstrCountFiltered1(node, 1);
305  break;
306 
307  case HJ_FILL_OUTER_TUPLE:
308 
309  /*
310  * The current outer tuple has run out of matches, so check
311  * whether to emit a dummy outer-join tuple. Whether we emit
312  * one or not, the next state is NEED_NEW_OUTER.
313  */
315 
316  if (!node->hj_MatchedOuter &&
317  HJ_FILL_OUTER(node))
318  {
319  /*
320  * Generate a fake join tuple with nulls for the inner
321  * tuple, and return it if it passes the non-join quals.
322  */
323  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
324 
325  if (otherqual == NIL ||
326  ExecQual(otherqual, econtext, false))
327  return ExecProject(node->js.ps.ps_ProjInfo);
328  else
329  InstrCountFiltered2(node, 1);
330  }
331  break;
332 
334 
335  /*
336  * We have finished a batch, but we are doing right/full join,
337  * so any unmatched inner tuples in the hashtable have to be
338  * emitted before we continue to the next batch.
339  */
340  if (!ExecScanHashTableForUnmatched(node, econtext))
341  {
342  /* no more unmatched tuples */
344  continue;
345  }
346 
347  /*
348  * Generate a fake join tuple with nulls for the outer tuple,
349  * and return it if it passes the non-join quals.
350  */
351  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
352 
353  if (otherqual == NIL ||
354  ExecQual(otherqual, econtext, false))
355  return ExecProject(node->js.ps.ps_ProjInfo);
356  else
357  InstrCountFiltered2(node, 1);
358  break;
359 
360  case HJ_NEED_NEW_BATCH:
361 
362  /*
363  * Try to advance to next batch. Done if there are no more.
364  */
365  if (!ExecHashJoinNewBatch(node))
366  return NULL; /* end of join */
368  break;
369 
370  default:
371  elog(ERROR, "unrecognized hashjoin state: %d",
372  (int) node->hj_JoinState);
373  }
374  }
375 }
376 
377 /* ----------------------------------------------------------------
378  * ExecInitHashJoin
379  *
380  * Init routine for HashJoin node.
381  * ----------------------------------------------------------------
382  */
384 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
385 {
386  HashJoinState *hjstate;
387  Plan *outerNode;
388  Hash *hashNode;
389  List *lclauses;
390  List *rclauses;
391  List *hoperators;
392  ListCell *l;
393 
394  /* check for unsupported flags */
395  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
396 
397  /*
398  * create state structure
399  */
400  hjstate = makeNode(HashJoinState);
401  hjstate->js.ps.plan = (Plan *) node;
402  hjstate->js.ps.state = estate;
403 
404  /*
405  * Miscellaneous initialization
406  *
407  * create expression context for node
408  */
409  ExecAssignExprContext(estate, &hjstate->js.ps);
410 
411  /*
412  * initialize child expressions
413  */
414  hjstate->js.ps.targetlist = (List *)
416  (PlanState *) hjstate);
417  hjstate->js.ps.qual = (List *)
418  ExecInitExpr((Expr *) node->join.plan.qual,
419  (PlanState *) hjstate);
420  hjstate->js.jointype = node->join.jointype;
421  hjstate->js.joinqual = (List *)
422  ExecInitExpr((Expr *) node->join.joinqual,
423  (PlanState *) hjstate);
424  hjstate->hashclauses = (List *)
425  ExecInitExpr((Expr *) node->hashclauses,
426  (PlanState *) hjstate);
427 
428  /*
429  * initialize child nodes
430  *
431  * Note: we could suppress the REWIND flag for the inner input, which
432  * would amount to betting that the hash will be a single batch. Not
433  * clear if this would be a win or not.
434  */
435  outerNode = outerPlan(node);
436  hashNode = (Hash *) innerPlan(node);
437 
438  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
439  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
440 
441  /*
442  * tuple table initialization
443  */
444  ExecInitResultTupleSlot(estate, &hjstate->js.ps);
445  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
446 
447  /* set up null tuples for outer joins, if needed */
448  switch (node->join.jointype)
449  {
450  case JOIN_INNER:
451  case JOIN_SEMI:
452  break;
453  case JOIN_LEFT:
454  case JOIN_ANTI:
455  hjstate->hj_NullInnerTupleSlot =
456  ExecInitNullTupleSlot(estate,
458  break;
459  case JOIN_RIGHT:
460  hjstate->hj_NullOuterTupleSlot =
461  ExecInitNullTupleSlot(estate,
463  break;
464  case JOIN_FULL:
465  hjstate->hj_NullOuterTupleSlot =
466  ExecInitNullTupleSlot(estate,
468  hjstate->hj_NullInnerTupleSlot =
469  ExecInitNullTupleSlot(estate,
471  break;
472  default:
473  elog(ERROR, "unrecognized join type: %d",
474  (int) node->join.jointype);
475  }
476 
477  /*
478  * now for some voodoo. our temporary tuple slot is actually the result
479  * tuple slot of the Hash node (which is our inner plan). we can do this
480  * because Hash nodes don't return tuples via ExecProcNode() -- instead
481  * the hash join node uses ExecScanHashBucket() to get at the contents of
482  * the hash table. -cim 6/9/91
483  */
484  {
485  HashState *hashstate = (HashState *) innerPlanState(hjstate);
486  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
487 
488  hjstate->hj_HashTupleSlot = slot;
489  }
490 
491  /*
492  * initialize tuple type and projection info
493  */
494  ExecAssignResultTypeFromTL(&hjstate->js.ps);
495  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
496 
499 
500  /*
501  * initialize hash-specific info
502  */
503  hjstate->hj_HashTable = NULL;
504  hjstate->hj_FirstOuterTupleSlot = NULL;
505 
506  hjstate->hj_CurHashValue = 0;
507  hjstate->hj_CurBucketNo = 0;
509  hjstate->hj_CurTuple = NULL;
510 
511  /*
512  * Deconstruct the hash clauses into outer and inner argument values, so
513  * that we can evaluate those subexpressions separately. Also make a list
514  * of the hash operator OIDs, in preparation for looking up the hash
515  * functions to use.
516  */
517  lclauses = NIL;
518  rclauses = NIL;
519  hoperators = NIL;
520  foreach(l, hjstate->hashclauses)
521  {
523  OpExpr *hclause = castNode(OpExpr, fstate->xprstate.expr);
524 
525  lclauses = lappend(lclauses, linitial(fstate->args));
526  rclauses = lappend(rclauses, lsecond(fstate->args));
527  hoperators = lappend_oid(hoperators, hclause->opno);
528  }
529  hjstate->hj_OuterHashKeys = lclauses;
530  hjstate->hj_InnerHashKeys = rclauses;
531  hjstate->hj_HashOperators = hoperators;
532  /* child Hash node needs to evaluate inner hash keys, too */
533  ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
534 
535  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
536  hjstate->hj_MatchedOuter = false;
537  hjstate->hj_OuterNotEmpty = false;
538 
539  return hjstate;
540 }
541 
542 /* ----------------------------------------------------------------
543  * ExecEndHashJoin
544  *
545  * clean up routine for HashJoin node
546  * ----------------------------------------------------------------
547  */
548 void
550 {
551  /*
552  * Free hash table
553  */
554  if (node->hj_HashTable)
555  {
557  node->hj_HashTable = NULL;
558  }
559 
560  /*
561  * Free the exprcontext
562  */
563  ExecFreeExprContext(&node->js.ps);
564 
565  /*
566  * clean out the tuple table
567  */
571 
572  /*
573  * clean up subtrees
574  */
577 }
578 
579 /*
580  * ExecHashJoinOuterGetTuple
581  *
582  * get the next outer tuple for hashjoin: either by
583  * executing the outer plan node in the first pass, or from
584  * the temp files for the hashjoin batches.
585  *
586  * Returns a null slot if no more outer tuples (within the current batch).
587  *
588  * On success, the tuple's hash value is stored at *hashvalue --- this is
589  * either originally computed, or re-read from the temp file.
590  */
591 static TupleTableSlot *
593  HashJoinState *hjstate,
594  uint32 *hashvalue)
595 {
596  HashJoinTable hashtable = hjstate->hj_HashTable;
597  int curbatch = hashtable->curbatch;
598  TupleTableSlot *slot;
599 
600  if (curbatch == 0) /* if it is the first pass */
601  {
602  /*
603  * Check to see if first outer tuple was already fetched by
604  * ExecHashJoin() and not used yet.
605  */
606  slot = hjstate->hj_FirstOuterTupleSlot;
607  if (!TupIsNull(slot))
608  hjstate->hj_FirstOuterTupleSlot = NULL;
609  else
610  slot = ExecProcNode(outerNode);
611 
612  while (!TupIsNull(slot))
613  {
614  /*
615  * We have to compute the tuple's hash value.
616  */
617  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
618 
619  econtext->ecxt_outertuple = slot;
620  if (ExecHashGetHashValue(hashtable, econtext,
621  hjstate->hj_OuterHashKeys,
622  true, /* outer tuple */
623  HJ_FILL_OUTER(hjstate),
624  hashvalue))
625  {
626  /* remember outer relation is not empty for possible rescan */
627  hjstate->hj_OuterNotEmpty = true;
628 
629  return slot;
630  }
631 
632  /*
633  * That tuple couldn't match because of a NULL, so discard it and
634  * continue with the next one.
635  */
636  slot = ExecProcNode(outerNode);
637  }
638  }
639  else if (curbatch < hashtable->nbatch)
640  {
641  BufFile *file = hashtable->outerBatchFile[curbatch];
642 
643  /*
644  * In outer-join cases, we could get here even though the batch file
645  * is empty.
646  */
647  if (file == NULL)
648  return NULL;
649 
650  slot = ExecHashJoinGetSavedTuple(hjstate,
651  file,
652  hashvalue,
653  hjstate->hj_OuterTupleSlot);
654  if (!TupIsNull(slot))
655  return slot;
656  }
657 
658  /* End of this batch */
659  return NULL;
660 }
661 
662 /*
663  * ExecHashJoinNewBatch
664  * switch to a new hashjoin batch
665  *
666  * Returns true if successful, false if there are no more batches.
667  */
668 static bool
670 {
671  HashJoinTable hashtable = hjstate->hj_HashTable;
672  int nbatch;
673  int curbatch;
674  BufFile *innerFile;
675  TupleTableSlot *slot;
676  uint32 hashvalue;
677 
678  nbatch = hashtable->nbatch;
679  curbatch = hashtable->curbatch;
680 
681  if (curbatch > 0)
682  {
683  /*
684  * We no longer need the previous outer batch file; close it right
685  * away to free disk space.
686  */
687  if (hashtable->outerBatchFile[curbatch])
688  BufFileClose(hashtable->outerBatchFile[curbatch]);
689  hashtable->outerBatchFile[curbatch] = NULL;
690  }
691  else /* we just finished the first batch */
692  {
693  /*
694  * Reset some of the skew optimization state variables, since we no
695  * longer need to consider skew tuples after the first batch. The
696  * memory context reset we are about to do will release the skew
697  * hashtable itself.
698  */
699  hashtable->skewEnabled = false;
700  hashtable->skewBucket = NULL;
701  hashtable->skewBucketNums = NULL;
702  hashtable->nSkewBuckets = 0;
703  hashtable->spaceUsedSkew = 0;
704  }
705 
706  /*
707  * We can always skip over any batches that are completely empty on both
708  * sides. We can sometimes skip over batches that are empty on only one
709  * side, but there are exceptions:
710  *
711  * 1. In a left/full outer join, we have to process outer batches even if
712  * the inner batch is empty. Similarly, in a right/full outer join, we
713  * have to process inner batches even if the outer batch is empty.
714  *
715  * 2. If we have increased nbatch since the initial estimate, we have to
716  * scan inner batches since they might contain tuples that need to be
717  * reassigned to later inner batches.
718  *
719  * 3. Similarly, if we have increased nbatch since starting the outer
720  * scan, we have to rescan outer batches in case they contain tuples that
721  * need to be reassigned.
722  */
723  curbatch++;
724  while (curbatch < nbatch &&
725  (hashtable->outerBatchFile[curbatch] == NULL ||
726  hashtable->innerBatchFile[curbatch] == NULL))
727  {
728  if (hashtable->outerBatchFile[curbatch] &&
729  HJ_FILL_OUTER(hjstate))
730  break; /* must process due to rule 1 */
731  if (hashtable->innerBatchFile[curbatch] &&
732  HJ_FILL_INNER(hjstate))
733  break; /* must process due to rule 1 */
734  if (hashtable->innerBatchFile[curbatch] &&
735  nbatch != hashtable->nbatch_original)
736  break; /* must process due to rule 2 */
737  if (hashtable->outerBatchFile[curbatch] &&
738  nbatch != hashtable->nbatch_outstart)
739  break; /* must process due to rule 3 */
740  /* We can ignore this batch. */
741  /* Release associated temp files right away. */
742  if (hashtable->innerBatchFile[curbatch])
743  BufFileClose(hashtable->innerBatchFile[curbatch]);
744  hashtable->innerBatchFile[curbatch] = NULL;
745  if (hashtable->outerBatchFile[curbatch])
746  BufFileClose(hashtable->outerBatchFile[curbatch]);
747  hashtable->outerBatchFile[curbatch] = NULL;
748  curbatch++;
749  }
750 
751  if (curbatch >= nbatch)
752  return false; /* no more batches */
753 
754  hashtable->curbatch = curbatch;
755 
756  /*
757  * Reload the hash table with the new inner batch (which could be empty)
758  */
759  ExecHashTableReset(hashtable);
760 
761  innerFile = hashtable->innerBatchFile[curbatch];
762 
763  if (innerFile != NULL)
764  {
765  if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
766  ereport(ERROR,
768  errmsg("could not rewind hash-join temporary file: %m")));
769 
770  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
771  innerFile,
772  &hashvalue,
773  hjstate->hj_HashTupleSlot)))
774  {
775  /*
776  * NOTE: some tuples may be sent to future batches. Also, it is
777  * possible for hashtable->nbatch to be increased here!
778  */
779  ExecHashTableInsert(hashtable, slot, hashvalue);
780  }
781 
782  /*
783  * after we build the hash table, the inner batch file is no longer
784  * needed
785  */
786  BufFileClose(innerFile);
787  hashtable->innerBatchFile[curbatch] = NULL;
788  }
789 
790  /*
791  * Rewind outer batch file (if present), so that we can start reading it.
792  */
793  if (hashtable->outerBatchFile[curbatch] != NULL)
794  {
795  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
796  ereport(ERROR,
798  errmsg("could not rewind hash-join temporary file: %m")));
799  }
800 
801  return true;
802 }
803 
804 /*
805  * ExecHashJoinSaveTuple
806  * save a tuple to a batch file.
807  *
808  * The data recorded in the file for each tuple is its hash value,
809  * then the tuple in MinimalTuple format.
810  *
811  * Note: it is important always to call this in the regular executor
812  * context, not in a shorter-lived context; else the temp file buffers
813  * will get messed up.
814  */
815 void
817  BufFile **fileptr)
818 {
819  BufFile *file = *fileptr;
820  size_t written;
821 
822  if (file == NULL)
823  {
824  /* First write to this batch file, so open it. */
825  file = BufFileCreateTemp(false);
826  *fileptr = file;
827  }
828 
829  written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
830  if (written != sizeof(uint32))
831  ereport(ERROR,
833  errmsg("could not write to hash-join temporary file: %m")));
834 
835  written = BufFileWrite(file, (void *) tuple, tuple->t_len);
836  if (written != tuple->t_len)
837  ereport(ERROR,
839  errmsg("could not write to hash-join temporary file: %m")));
840 }
841 
842 /*
843  * ExecHashJoinGetSavedTuple
844  * read the next tuple from a batch file. Return NULL if no more.
845  *
846  * On success, *hashvalue is set to the tuple's hash value, and the tuple
847  * itself is stored in the given slot.
848  */
849 static TupleTableSlot *
851  BufFile *file,
852  uint32 *hashvalue,
853  TupleTableSlot *tupleSlot)
854 {
855  uint32 header[2];
856  size_t nread;
857  MinimalTuple tuple;
858 
859  /*
860  * We check for interrupts here because this is typically taken as an
861  * alternative code path to an ExecProcNode() call, which would include
862  * such a check.
863  */
865 
866  /*
867  * Since both the hash value and the MinimalTuple length word are uint32,
868  * we can read them both in one BufFileRead() call without any type
869  * cheating.
870  */
871  nread = BufFileRead(file, (void *) header, sizeof(header));
872  if (nread == 0) /* end of file */
873  {
874  ExecClearTuple(tupleSlot);
875  return NULL;
876  }
877  if (nread != sizeof(header))
878  ereport(ERROR,
880  errmsg("could not read from hash-join temporary file: %m")));
881  *hashvalue = header[0];
882  tuple = (MinimalTuple) palloc(header[1]);
883  tuple->t_len = header[1];
884  nread = BufFileRead(file,
885  (void *) ((char *) tuple + sizeof(uint32)),
886  header[1] - sizeof(uint32));
887  if (nread != header[1] - sizeof(uint32))
888  ereport(ERROR,
890  errmsg("could not read from hash-join temporary file: %m")));
891  return ExecStoreMinimalTuple(tuple, tupleSlot, true);
892 }
893 
894 
895 void
897 {
898  /*
899  * In a multi-batch join, we currently have to do rescans the hard way,
900  * primarily because batch temp files may have already been released. But
901  * if it's a single-batch join, and there is no parameter change for the
902  * inner subnode, then we can just re-use the existing hash table without
903  * rebuilding it.
904  */
905  if (node->hj_HashTable != NULL)
906  {
907  if (node->hj_HashTable->nbatch == 1 &&
908  node->js.ps.righttree->chgParam == NULL)
909  {
910  /*
911  * Okay to reuse the hash table; needn't rescan inner, either.
912  *
913  * However, if it's a right/full join, we'd better reset the
914  * inner-tuple match flags contained in the table.
915  */
916  if (HJ_FILL_INNER(node))
918 
919  /*
920  * Also, we need to reset our state about the emptiness of the
921  * outer relation, so that the new scan of the outer will update
922  * it correctly if it turns out to be empty this time. (There's no
923  * harm in clearing it now because ExecHashJoin won't need the
924  * info. In the other cases, where the hash table doesn't exist
925  * or we are destroying it, we leave this state alone because
926  * ExecHashJoin will need it the first time through.)
927  */
928  node->hj_OuterNotEmpty = false;
929 
930  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
932  }
933  else
934  {
935  /* must destroy and rebuild hash table */
937  node->hj_HashTable = NULL;
939 
940  /*
941  * if chgParam of subnode is not null then plan will be re-scanned
942  * by first ExecProcNode.
943  */
944  if (node->js.ps.righttree->chgParam == NULL)
945  ExecReScan(node->js.ps.righttree);
946  }
947  }
948 
949  /* Always reset intra-tuple state */
950  node->hj_CurHashValue = 0;
951  node->hj_CurBucketNo = 0;
953  node->hj_CurTuple = NULL;
954 
955  node->hj_MatchedOuter = false;
957 
958  /*
959  * if chgParam of subnode is not null then plan will be re-scanned by
960  * first ExecProcNode.
961  */
962  if (node->js.ps.lefttree->chgParam == NULL)
963  ExecReScan(node->js.ps.lefttree);
964 }
JoinType jointype
Definition: execnodes.h:1761
#define NIL
Definition: pg_list.h:69
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:35
List * qual
Definition: plannodes.h:133
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:101
TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: execQual.c:5215
TupleTableSlot * ExecProcNode(PlanState *node)
Definition: execProcnode.c:392
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:32
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate)
Definition: execTuples.c:852
HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
Definition: nodeHash.c:246
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:384
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1872
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1081
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:485
PlanState ps
Definition: execnodes.h:1760
#define castNode(_type_, nodeptr)
Definition: nodes.h:588
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:644
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1145
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
Definition: execTuples.c:652
List * hashclauses
Definition: plannodes.h:697
List * args
Definition: execnodes.h:699
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:1121
ExprContext * ps_ExprContext
Definition: execnodes.h:1080
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:1213
HashJoinTable hashtable
Definition: execnodes.h:2133
void ExecReScan(PlanState *node)
Definition: execAmi.c:74
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
List * qual
Definition: execnodes.h:1064
bool hj_MatchedOuter
Definition: execnodes.h:1875
List * targetlist
Definition: execnodes.h:1063
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:592
EState * state
Definition: execnodes.h:1051
List * joinqual
Definition: execnodes.h:1762
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1869
struct PlanState * righttree
Definition: execnodes.h:1066
List * hj_OuterHashKeys
Definition: execnodes.h:1861
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1873
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:686
#define lsecond(l)
Definition: pg_list.h:114
Join join
Definition: plannodes.h:696
void BufFileClose(BufFile *file)
Definition: buffile.c:203
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:1448
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:431
struct PlanState * lefttree
Definition: execnodes.h:1065
void ExecEndHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:549
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:40
int * skewBucketNums
Definition: hashjoin.h:146
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:833
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1031
JoinType jointype
Definition: plannodes.h:640
uint32 hj_CurHashValue
Definition: execnodes.h:1865
int hj_CurSkewBucketNo
Definition: execnodes.h:1867
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1079
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execQual.c:4267
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType)
Definition: execTuples.c:866
#define linitial(l)
Definition: pg_list.h:110
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1871
Expr * expr
Definition: execnodes.h:600
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:167
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:832
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
BufFile ** outerBatchFile
Definition: hashjoin.h:167
#define outerPlanState(node)
Definition: execnodes.h:1092
#define innerPlan(node)
Definition: plannodes.h:161
Cost startup_cost
Definition: plannodes.h:114
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:658
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1868
MinimalTupleData * MinimalTuple
Definition: htup.h:27
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1063
int errcode_for_file_access(void)
Definition: elog.c:598
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:131
bool ExecQual(List *qual, ExprContext *econtext, bool resultForNull)
Definition: execQual.c:5056
#define TupIsNull(slot)
Definition: tuptable.h:138
unsigned int uint32
Definition: c.h:268
PlanState ps
Definition: execnodes.h:2132
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:1095
#define ereport(elevel, rest)
Definition: elog.h:122
List * hj_HashOperators
Definition: execnodes.h:1863
Bitmapset * chgParam
Definition: execnodes.h:1074
#define outerPlan(node)
Definition: plannodes.h:162
List * lappend(List *list, void *datum)
Definition: list.c:128
int hj_CurBucketNo
Definition: execnodes.h:1866
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:34
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:38
HashSkewBucket ** skewBucket
Definition: hashjoin.h:143
HashJoinState * ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
Definition: nodeHashjoin.c:384
void ExecSetSlotDescriptor(TupleTableSlot *slot, TupleDesc tupdesc)
Definition: execTuples.c:247
Plan * plan
Definition: execnodes.h:1049
double totalTuples
Definition: hashjoin.h:156
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:72
#define makeNode(_type_)
Definition: nodes.h:567
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:132
bool hj_OuterNotEmpty
Definition: execnodes.h:1876
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
#define EXEC_FLAG_MARK
Definition: executor.h:61
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1100
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:409
BufFile ** innerBatchFile
Definition: hashjoin.h:166
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
Definition: nodeHashjoin.c:850
ExprState xprstate
Definition: execnodes.h:698
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:207
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:460
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:31
List * targetlist
Definition: plannodes.h:132
List * hashclauses
Definition: execnodes.h:1860
TupleTableSlot * ExecHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:62
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1870
List * hj_InnerHashKeys
Definition: execnodes.h:1862
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:30
void * palloc(Size size)
Definition: mcxt.c:849
HashJoinTable hj_HashTable
Definition: execnodes.h:1864
int errmsg(const char *fmt,...)
Definition: elog.c:797
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:591
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:365
Cost total_cost
Definition: plannodes.h:115
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:522
size_t BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:412
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:1242
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:927
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
Oid opno
Definition: primnodes.h:495
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:33
#define elog
Definition: elog.h:219
#define innerPlanState(node)
Definition: execnodes.h:1091
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
Definition: pg_list.h:45
JoinState js
Definition: execnodes.h:1859
List * joinqual
Definition: plannodes.h:641
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
Definition: nodeHashjoin.c:669
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
Definition: nodeHashjoin.c:816
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:563
#define ResetExprContext(econtext)
Definition: executor.h:333
Plan plan
Definition: plannodes.h:639
void ExecReScanHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:896