PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nodeHashjoin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  * Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeHashjoin.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/htup_details.h"
19 #include "executor/executor.h"
20 #include "executor/hashjoin.h"
21 #include "executor/nodeHash.h"
22 #include "executor/nodeHashjoin.h"
23 #include "miscadmin.h"
24 #include "utils/memutils.h"
25 
26 
27 /*
28  * States of the ExecHashJoin state machine
29  */
30 #define HJ_BUILD_HASHTABLE 1
31 #define HJ_NEED_NEW_OUTER 2
32 #define HJ_SCAN_BUCKET 3
33 #define HJ_FILL_OUTER_TUPLE 4
34 #define HJ_FILL_INNER_TUPLES 5
35 #define HJ_NEED_NEW_BATCH 6
36 
37 /* Returns true if doing null-fill on outer relation */
38 #define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
39 /* Returns true if doing null-fill on inner relation */
40 #define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
41 
43  HashJoinState *hjstate,
44  uint32 *hashvalue);
46  BufFile *file,
47  uint32 *hashvalue,
48  TupleTableSlot *tupleSlot);
49 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
50 
51 
52 /* ----------------------------------------------------------------
53  * ExecHashJoin
54  *
55  * This function implements the Hybrid Hashjoin algorithm.
56  *
57  * Note: the relation we build hash table on is the "inner"
58  * the other one is "outer".
59  * ----------------------------------------------------------------
60  */
61 TupleTableSlot * /* return: a tuple or NULL */
63 {
64  PlanState *outerNode;
65  HashState *hashNode;
66  ExprState *joinqual;
67  ExprState *otherqual;
68  ExprContext *econtext;
69  HashJoinTable hashtable;
70  TupleTableSlot *outerTupleSlot;
71  uint32 hashvalue;
72  int batchno;
73 
74  /*
75  * get information from HashJoin node
76  */
77  joinqual = node->js.joinqual;
78  otherqual = node->js.ps.qual;
79  hashNode = (HashState *) innerPlanState(node);
80  outerNode = outerPlanState(node);
81  hashtable = node->hj_HashTable;
82  econtext = node->js.ps.ps_ExprContext;
83 
84  /*
85  * Reset per-tuple memory context to free any expression evaluation
86  * storage allocated in the previous tuple cycle.
87  */
88  ResetExprContext(econtext);
89 
90  /*
91  * run the hash join state machine
92  */
93  for (;;)
94  {
95  switch (node->hj_JoinState)
96  {
97  case HJ_BUILD_HASHTABLE:
98 
99  /*
100  * First time through: build hash table for inner relation.
101  */
102  Assert(hashtable == NULL);
103 
104  /*
105  * If the outer relation is completely empty, and it's not
106  * right/full join, we can quit without building the hash
107  * table. However, for an inner join it is only a win to
108  * check this when the outer relation's startup cost is less
109  * than the projected cost of building the hash table.
110  * Otherwise it's best to build the hash table first and see
111  * if the inner relation is empty. (When it's a left join, we
112  * should always make this check, since we aren't going to be
113  * able to skip the join on the strength of an empty inner
114  * relation anyway.)
115  *
116  * If we are rescanning the join, we make use of information
117  * gained on the previous scan: don't bother to try the
118  * prefetch if the previous scan found the outer relation
119  * nonempty. This is not 100% reliable since with new
120  * parameters the outer relation might yield different
121  * results, but it's a good heuristic.
122  *
123  * The only way to make the check is to try to fetch a tuple
124  * from the outer plan node. If we succeed, we have to stash
125  * it away for later consumption by ExecHashJoinOuterGetTuple.
126  */
127  if (HJ_FILL_INNER(node))
128  {
129  /* no chance to not build the hash table */
131  }
132  else if (HJ_FILL_OUTER(node) ||
133  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
134  !node->hj_OuterNotEmpty))
135  {
136  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
138  {
139  node->hj_OuterNotEmpty = false;
140  return NULL;
141  }
142  else
143  node->hj_OuterNotEmpty = true;
144  }
145  else
147 
148  /*
149  * create the hash table
150  */
151  hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
152  node->hj_HashOperators,
153  HJ_FILL_INNER(node));
154  node->hj_HashTable = hashtable;
155 
156  /*
157  * execute the Hash node, to build the hash table
158  */
159  hashNode->hashtable = hashtable;
160  (void) MultiExecProcNode((PlanState *) hashNode);
161 
162  /*
163  * If the inner relation is completely empty, and we're not
164  * doing a left outer join, we can quit without scanning the
165  * outer relation.
166  */
167  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
168  return NULL;
169 
170  /*
171  * need to remember whether nbatch has increased since we
172  * began scanning the outer relation
173  */
174  hashtable->nbatch_outstart = hashtable->nbatch;
175 
176  /*
177  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
178  * tuple above, because ExecHashJoinOuterGetTuple will
179  * immediately set it again.)
180  */
181  node->hj_OuterNotEmpty = false;
182 
184 
185  /* FALL THRU */
186 
187  case HJ_NEED_NEW_OUTER:
188 
189  /*
190  * We don't have an outer tuple, try to get the next one
191  */
192  outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
193  node,
194  &hashvalue);
195  if (TupIsNull(outerTupleSlot))
196  {
197  /* end of batch, or maybe whole join */
198  if (HJ_FILL_INNER(node))
199  {
200  /* set up to scan for unmatched inner tuples */
203  }
204  else
206  continue;
207  }
208 
209  econtext->ecxt_outertuple = outerTupleSlot;
210  node->hj_MatchedOuter = false;
211 
212  /*
213  * Find the corresponding bucket for this tuple in the main
214  * hash table or skew hash table.
215  */
216  node->hj_CurHashValue = hashvalue;
217  ExecHashGetBucketAndBatch(hashtable, hashvalue,
218  &node->hj_CurBucketNo, &batchno);
219  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
220  hashvalue);
221  node->hj_CurTuple = NULL;
222 
223  /*
224  * The tuple might not belong to the current batch (where
225  * "current batch" includes the skew buckets if any).
226  */
227  if (batchno != hashtable->curbatch &&
229  {
230  /*
231  * Need to postpone this outer tuple to a later batch.
232  * Save it in the corresponding outer-batch file.
233  */
234  Assert(batchno > hashtable->curbatch);
236  hashvalue,
237  &hashtable->outerBatchFile[batchno]);
238  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
239  continue;
240  }
241 
242  /* OK, let's scan the bucket for matches */
244 
245  /* FALL THRU */
246 
247  case HJ_SCAN_BUCKET:
248 
249  /*
250  * We check for interrupts here because this corresponds to
251  * where we'd fetch a row from a child plan node in other join
252  * types.
253  */
255 
256  /*
257  * Scan the selected hash bucket for matches to current outer
258  */
259  if (!ExecScanHashBucket(node, econtext))
260  {
261  /* out of matches; check for possible outer-join fill */
263  continue;
264  }
265 
266  /*
267  * We've got a match, but still need to test non-hashed quals.
268  * ExecScanHashBucket already set up all the state needed to
269  * call ExecQual.
270  *
271  * If we pass the qual, then save state for next call and have
272  * ExecProject form the projection, store it in the tuple
273  * table, and return the slot.
274  *
275  * Only the joinquals determine tuple match status, but all
276  * quals must pass to actually return the tuple.
277  */
278  if (joinqual == NULL || ExecQual(joinqual, econtext))
279  {
280  node->hj_MatchedOuter = true;
282 
283  /* In an antijoin, we never return a matched tuple */
284  if (node->js.jointype == JOIN_ANTI)
285  {
287  continue;
288  }
289 
290  /*
291  * If we only need to join to the first matching inner
292  * tuple, then consider returning this one, but after that
293  * continue with next outer tuple.
294  */
295  if (node->js.single_match)
297 
298  if (otherqual == NULL || ExecQual(otherqual, econtext))
299  return ExecProject(node->js.ps.ps_ProjInfo);
300  else
301  InstrCountFiltered2(node, 1);
302  }
303  else
304  InstrCountFiltered1(node, 1);
305  break;
306 
307  case HJ_FILL_OUTER_TUPLE:
308 
309  /*
310  * The current outer tuple has run out of matches, so check
311  * whether to emit a dummy outer-join tuple. Whether we emit
312  * one or not, the next state is NEED_NEW_OUTER.
313  */
315 
316  if (!node->hj_MatchedOuter &&
317  HJ_FILL_OUTER(node))
318  {
319  /*
320  * Generate a fake join tuple with nulls for the inner
321  * tuple, and return it if it passes the non-join quals.
322  */
323  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
324 
325  if (otherqual == NULL || ExecQual(otherqual, econtext))
326  return ExecProject(node->js.ps.ps_ProjInfo);
327  else
328  InstrCountFiltered2(node, 1);
329  }
330  break;
331 
333 
334  /*
335  * We have finished a batch, but we are doing right/full join,
336  * so any unmatched inner tuples in the hashtable have to be
337  * emitted before we continue to the next batch.
338  */
339  if (!ExecScanHashTableForUnmatched(node, econtext))
340  {
341  /* no more unmatched tuples */
343  continue;
344  }
345 
346  /*
347  * Generate a fake join tuple with nulls for the outer tuple,
348  * and return it if it passes the non-join quals.
349  */
350  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
351 
352  if (otherqual == NULL || ExecQual(otherqual, econtext))
353  return ExecProject(node->js.ps.ps_ProjInfo);
354  else
355  InstrCountFiltered2(node, 1);
356  break;
357 
358  case HJ_NEED_NEW_BATCH:
359 
360  /*
361  * Try to advance to next batch. Done if there are no more.
362  */
363  if (!ExecHashJoinNewBatch(node))
364  return NULL; /* end of join */
366  break;
367 
368  default:
369  elog(ERROR, "unrecognized hashjoin state: %d",
370  (int) node->hj_JoinState);
371  }
372  }
373 }
374 
375 /* ----------------------------------------------------------------
376  * ExecInitHashJoin
377  *
378  * Init routine for HashJoin node.
379  * ----------------------------------------------------------------
380  */
382 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
383 {
384  HashJoinState *hjstate;
385  Plan *outerNode;
386  Hash *hashNode;
387  List *lclauses;
388  List *rclauses;
389  List *hoperators;
390  ListCell *l;
391 
392  /* check for unsupported flags */
393  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
394 
395  /*
396  * create state structure
397  */
398  hjstate = makeNode(HashJoinState);
399  hjstate->js.ps.plan = (Plan *) node;
400  hjstate->js.ps.state = estate;
401 
402  /*
403  * Miscellaneous initialization
404  *
405  * create expression context for node
406  */
407  ExecAssignExprContext(estate, &hjstate->js.ps);
408 
409  /*
410  * initialize child expressions
411  */
412  hjstate->js.ps.qual =
413  ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
414  hjstate->js.jointype = node->join.jointype;
415  hjstate->js.joinqual =
416  ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
417  hjstate->hashclauses =
418  ExecInitQual(node->hashclauses, (PlanState *) hjstate);
419 
420  /*
421  * initialize child nodes
422  *
423  * Note: we could suppress the REWIND flag for the inner input, which
424  * would amount to betting that the hash will be a single batch. Not
425  * clear if this would be a win or not.
426  */
427  outerNode = outerPlan(node);
428  hashNode = (Hash *) innerPlan(node);
429 
430  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
431  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
432 
433  /*
434  * tuple table initialization
435  */
436  ExecInitResultTupleSlot(estate, &hjstate->js.ps);
437  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
438 
439  /*
440  * detect whether we need only consider the first matching inner tuple
441  */
442  hjstate->js.single_match = (node->join.inner_unique ||
443  node->join.jointype == JOIN_SEMI);
444 
445  /* set up null tuples for outer joins, if needed */
446  switch (node->join.jointype)
447  {
448  case JOIN_INNER:
449  case JOIN_SEMI:
450  break;
451  case JOIN_LEFT:
452  case JOIN_ANTI:
453  hjstate->hj_NullInnerTupleSlot =
454  ExecInitNullTupleSlot(estate,
456  break;
457  case JOIN_RIGHT:
458  hjstate->hj_NullOuterTupleSlot =
459  ExecInitNullTupleSlot(estate,
461  break;
462  case JOIN_FULL:
463  hjstate->hj_NullOuterTupleSlot =
464  ExecInitNullTupleSlot(estate,
466  hjstate->hj_NullInnerTupleSlot =
467  ExecInitNullTupleSlot(estate,
469  break;
470  default:
471  elog(ERROR, "unrecognized join type: %d",
472  (int) node->join.jointype);
473  }
474 
475  /*
476  * now for some voodoo. our temporary tuple slot is actually the result
477  * tuple slot of the Hash node (which is our inner plan). we can do this
478  * because Hash nodes don't return tuples via ExecProcNode() -- instead
479  * the hash join node uses ExecScanHashBucket() to get at the contents of
480  * the hash table. -cim 6/9/91
481  */
482  {
483  HashState *hashstate = (HashState *) innerPlanState(hjstate);
484  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
485 
486  hjstate->hj_HashTupleSlot = slot;
487  }
488 
489  /*
490  * initialize tuple type and projection info
491  */
492  ExecAssignResultTypeFromTL(&hjstate->js.ps);
493  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
494 
497 
498  /*
499  * initialize hash-specific info
500  */
501  hjstate->hj_HashTable = NULL;
502  hjstate->hj_FirstOuterTupleSlot = NULL;
503 
504  hjstate->hj_CurHashValue = 0;
505  hjstate->hj_CurBucketNo = 0;
507  hjstate->hj_CurTuple = NULL;
508 
509  /*
510  * Deconstruct the hash clauses into outer and inner argument values, so
511  * that we can evaluate those subexpressions separately. Also make a list
512  * of the hash operator OIDs, in preparation for looking up the hash
513  * functions to use.
514  */
515  lclauses = NIL;
516  rclauses = NIL;
517  hoperators = NIL;
518  foreach(l, node->hashclauses)
519  {
520  OpExpr *hclause = lfirst_node(OpExpr, l);
521 
522  lclauses = lappend(lclauses, ExecInitExpr(linitial(hclause->args),
523  (PlanState *) hjstate));
524  rclauses = lappend(rclauses, ExecInitExpr(lsecond(hclause->args),
525  (PlanState *) hjstate));
526  hoperators = lappend_oid(hoperators, hclause->opno);
527  }
528  hjstate->hj_OuterHashKeys = lclauses;
529  hjstate->hj_InnerHashKeys = rclauses;
530  hjstate->hj_HashOperators = hoperators;
531  /* child Hash node needs to evaluate inner hash keys, too */
532  ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
533 
534  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
535  hjstate->hj_MatchedOuter = false;
536  hjstate->hj_OuterNotEmpty = false;
537 
538  return hjstate;
539 }
540 
541 /* ----------------------------------------------------------------
542  * ExecEndHashJoin
543  *
544  * clean up routine for HashJoin node
545  * ----------------------------------------------------------------
546  */
547 void
549 {
550  /*
551  * Free hash table
552  */
553  if (node->hj_HashTable)
554  {
556  node->hj_HashTable = NULL;
557  }
558 
559  /*
560  * Free the exprcontext
561  */
562  ExecFreeExprContext(&node->js.ps);
563 
564  /*
565  * clean out the tuple table
566  */
570 
571  /*
572  * clean up subtrees
573  */
576 }
577 
578 /*
579  * ExecHashJoinOuterGetTuple
580  *
581  * get the next outer tuple for hashjoin: either by
582  * executing the outer plan node in the first pass, or from
583  * the temp files for the hashjoin batches.
584  *
585  * Returns a null slot if no more outer tuples (within the current batch).
586  *
587  * On success, the tuple's hash value is stored at *hashvalue --- this is
588  * either originally computed, or re-read from the temp file.
589  */
590 static TupleTableSlot *
592  HashJoinState *hjstate,
593  uint32 *hashvalue)
594 {
595  HashJoinTable hashtable = hjstate->hj_HashTable;
596  int curbatch = hashtable->curbatch;
597  TupleTableSlot *slot;
598 
599  if (curbatch == 0) /* if it is the first pass */
600  {
601  /*
602  * Check to see if first outer tuple was already fetched by
603  * ExecHashJoin() and not used yet.
604  */
605  slot = hjstate->hj_FirstOuterTupleSlot;
606  if (!TupIsNull(slot))
607  hjstate->hj_FirstOuterTupleSlot = NULL;
608  else
609  slot = ExecProcNode(outerNode);
610 
611  while (!TupIsNull(slot))
612  {
613  /*
614  * We have to compute the tuple's hash value.
615  */
616  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
617 
618  econtext->ecxt_outertuple = slot;
619  if (ExecHashGetHashValue(hashtable, econtext,
620  hjstate->hj_OuterHashKeys,
621  true, /* outer tuple */
622  HJ_FILL_OUTER(hjstate),
623  hashvalue))
624  {
625  /* remember outer relation is not empty for possible rescan */
626  hjstate->hj_OuterNotEmpty = true;
627 
628  return slot;
629  }
630 
631  /*
632  * That tuple couldn't match because of a NULL, so discard it and
633  * continue with the next one.
634  */
635  slot = ExecProcNode(outerNode);
636  }
637  }
638  else if (curbatch < hashtable->nbatch)
639  {
640  BufFile *file = hashtable->outerBatchFile[curbatch];
641 
642  /*
643  * In outer-join cases, we could get here even though the batch file
644  * is empty.
645  */
646  if (file == NULL)
647  return NULL;
648 
649  slot = ExecHashJoinGetSavedTuple(hjstate,
650  file,
651  hashvalue,
652  hjstate->hj_OuterTupleSlot);
653  if (!TupIsNull(slot))
654  return slot;
655  }
656 
657  /* End of this batch */
658  return NULL;
659 }
660 
661 /*
662  * ExecHashJoinNewBatch
663  * switch to a new hashjoin batch
664  *
665  * Returns true if successful, false if there are no more batches.
666  */
667 static bool
669 {
670  HashJoinTable hashtable = hjstate->hj_HashTable;
671  int nbatch;
672  int curbatch;
673  BufFile *innerFile;
674  TupleTableSlot *slot;
675  uint32 hashvalue;
676 
677  nbatch = hashtable->nbatch;
678  curbatch = hashtable->curbatch;
679 
680  if (curbatch > 0)
681  {
682  /*
683  * We no longer need the previous outer batch file; close it right
684  * away to free disk space.
685  */
686  if (hashtable->outerBatchFile[curbatch])
687  BufFileClose(hashtable->outerBatchFile[curbatch]);
688  hashtable->outerBatchFile[curbatch] = NULL;
689  }
690  else /* we just finished the first batch */
691  {
692  /*
693  * Reset some of the skew optimization state variables, since we no
694  * longer need to consider skew tuples after the first batch. The
695  * memory context reset we are about to do will release the skew
696  * hashtable itself.
697  */
698  hashtable->skewEnabled = false;
699  hashtable->skewBucket = NULL;
700  hashtable->skewBucketNums = NULL;
701  hashtable->nSkewBuckets = 0;
702  hashtable->spaceUsedSkew = 0;
703  }
704 
705  /*
706  * We can always skip over any batches that are completely empty on both
707  * sides. We can sometimes skip over batches that are empty on only one
708  * side, but there are exceptions:
709  *
710  * 1. In a left/full outer join, we have to process outer batches even if
711  * the inner batch is empty. Similarly, in a right/full outer join, we
712  * have to process inner batches even if the outer batch is empty.
713  *
714  * 2. If we have increased nbatch since the initial estimate, we have to
715  * scan inner batches since they might contain tuples that need to be
716  * reassigned to later inner batches.
717  *
718  * 3. Similarly, if we have increased nbatch since starting the outer
719  * scan, we have to rescan outer batches in case they contain tuples that
720  * need to be reassigned.
721  */
722  curbatch++;
723  while (curbatch < nbatch &&
724  (hashtable->outerBatchFile[curbatch] == NULL ||
725  hashtable->innerBatchFile[curbatch] == NULL))
726  {
727  if (hashtable->outerBatchFile[curbatch] &&
728  HJ_FILL_OUTER(hjstate))
729  break; /* must process due to rule 1 */
730  if (hashtable->innerBatchFile[curbatch] &&
731  HJ_FILL_INNER(hjstate))
732  break; /* must process due to rule 1 */
733  if (hashtable->innerBatchFile[curbatch] &&
734  nbatch != hashtable->nbatch_original)
735  break; /* must process due to rule 2 */
736  if (hashtable->outerBatchFile[curbatch] &&
737  nbatch != hashtable->nbatch_outstart)
738  break; /* must process due to rule 3 */
739  /* We can ignore this batch. */
740  /* Release associated temp files right away. */
741  if (hashtable->innerBatchFile[curbatch])
742  BufFileClose(hashtable->innerBatchFile[curbatch]);
743  hashtable->innerBatchFile[curbatch] = NULL;
744  if (hashtable->outerBatchFile[curbatch])
745  BufFileClose(hashtable->outerBatchFile[curbatch]);
746  hashtable->outerBatchFile[curbatch] = NULL;
747  curbatch++;
748  }
749 
750  if (curbatch >= nbatch)
751  return false; /* no more batches */
752 
753  hashtable->curbatch = curbatch;
754 
755  /*
756  * Reload the hash table with the new inner batch (which could be empty)
757  */
758  ExecHashTableReset(hashtable);
759 
760  innerFile = hashtable->innerBatchFile[curbatch];
761 
762  if (innerFile != NULL)
763  {
764  if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
765  ereport(ERROR,
767  errmsg("could not rewind hash-join temporary file: %m")));
768 
769  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
770  innerFile,
771  &hashvalue,
772  hjstate->hj_HashTupleSlot)))
773  {
774  /*
775  * NOTE: some tuples may be sent to future batches. Also, it is
776  * possible for hashtable->nbatch to be increased here!
777  */
778  ExecHashTableInsert(hashtable, slot, hashvalue);
779  }
780 
781  /*
782  * after we build the hash table, the inner batch file is no longer
783  * needed
784  */
785  BufFileClose(innerFile);
786  hashtable->innerBatchFile[curbatch] = NULL;
787  }
788 
789  /*
790  * Rewind outer batch file (if present), so that we can start reading it.
791  */
792  if (hashtable->outerBatchFile[curbatch] != NULL)
793  {
794  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
795  ereport(ERROR,
797  errmsg("could not rewind hash-join temporary file: %m")));
798  }
799 
800  return true;
801 }
802 
803 /*
804  * ExecHashJoinSaveTuple
805  * save a tuple to a batch file.
806  *
807  * The data recorded in the file for each tuple is its hash value,
808  * then the tuple in MinimalTuple format.
809  *
810  * Note: it is important always to call this in the regular executor
811  * context, not in a shorter-lived context; else the temp file buffers
812  * will get messed up.
813  */
814 void
816  BufFile **fileptr)
817 {
818  BufFile *file = *fileptr;
819  size_t written;
820 
821  if (file == NULL)
822  {
823  /* First write to this batch file, so open it. */
824  file = BufFileCreateTemp(false);
825  *fileptr = file;
826  }
827 
828  written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
829  if (written != sizeof(uint32))
830  ereport(ERROR,
832  errmsg("could not write to hash-join temporary file: %m")));
833 
834  written = BufFileWrite(file, (void *) tuple, tuple->t_len);
835  if (written != tuple->t_len)
836  ereport(ERROR,
838  errmsg("could not write to hash-join temporary file: %m")));
839 }
840 
841 /*
842  * ExecHashJoinGetSavedTuple
843  * read the next tuple from a batch file. Return NULL if no more.
844  *
845  * On success, *hashvalue is set to the tuple's hash value, and the tuple
846  * itself is stored in the given slot.
847  */
848 static TupleTableSlot *
850  BufFile *file,
851  uint32 *hashvalue,
852  TupleTableSlot *tupleSlot)
853 {
854  uint32 header[2];
855  size_t nread;
856  MinimalTuple tuple;
857 
858  /*
859  * We check for interrupts here because this is typically taken as an
860  * alternative code path to an ExecProcNode() call, which would include
861  * such a check.
862  */
864 
865  /*
866  * Since both the hash value and the MinimalTuple length word are uint32,
867  * we can read them both in one BufFileRead() call without any type
868  * cheating.
869  */
870  nread = BufFileRead(file, (void *) header, sizeof(header));
871  if (nread == 0) /* end of file */
872  {
873  ExecClearTuple(tupleSlot);
874  return NULL;
875  }
876  if (nread != sizeof(header))
877  ereport(ERROR,
879  errmsg("could not read from hash-join temporary file: %m")));
880  *hashvalue = header[0];
881  tuple = (MinimalTuple) palloc(header[1]);
882  tuple->t_len = header[1];
883  nread = BufFileRead(file,
884  (void *) ((char *) tuple + sizeof(uint32)),
885  header[1] - sizeof(uint32));
886  if (nread != header[1] - sizeof(uint32))
887  ereport(ERROR,
889  errmsg("could not read from hash-join temporary file: %m")));
890  return ExecStoreMinimalTuple(tuple, tupleSlot, true);
891 }
892 
893 
894 void
896 {
897  /*
898  * In a multi-batch join, we currently have to do rescans the hard way,
899  * primarily because batch temp files may have already been released. But
900  * if it's a single-batch join, and there is no parameter change for the
901  * inner subnode, then we can just re-use the existing hash table without
902  * rebuilding it.
903  */
904  if (node->hj_HashTable != NULL)
905  {
906  if (node->hj_HashTable->nbatch == 1 &&
907  node->js.ps.righttree->chgParam == NULL)
908  {
909  /*
910  * Okay to reuse the hash table; needn't rescan inner, either.
911  *
912  * However, if it's a right/full join, we'd better reset the
913  * inner-tuple match flags contained in the table.
914  */
915  if (HJ_FILL_INNER(node))
917 
918  /*
919  * Also, we need to reset our state about the emptiness of the
920  * outer relation, so that the new scan of the outer will update
921  * it correctly if it turns out to be empty this time. (There's no
922  * harm in clearing it now because ExecHashJoin won't need the
923  * info. In the other cases, where the hash table doesn't exist
924  * or we are destroying it, we leave this state alone because
925  * ExecHashJoin will need it the first time through.)
926  */
927  node->hj_OuterNotEmpty = false;
928 
929  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
931  }
932  else
933  {
934  /* must destroy and rebuild hash table */
936  node->hj_HashTable = NULL;
938 
939  /*
940  * if chgParam of subnode is not null then plan will be re-scanned
941  * by first ExecProcNode.
942  */
943  if (node->js.ps.righttree->chgParam == NULL)
944  ExecReScan(node->js.ps.righttree);
945  }
946  }
947 
948  /* Always reset intra-tuple state */
949  node->hj_CurHashValue = 0;
950  node->hj_CurBucketNo = 0;
952  node->hj_CurTuple = NULL;
953 
954  node->hj_MatchedOuter = false;
956 
957  /*
958  * if chgParam of subnode is not null then plan will be re-scanned by
959  * first ExecProcNode.
960  */
961  if (node->js.ps.lefttree->chgParam == NULL)
962  ExecReScan(node->js.ps.lefttree);
963 }
JoinType jointype
Definition: execnodes.h:1549
#define NIL
Definition: pg_list.h:69
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:35
List * qual
Definition: plannodes.h:145
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:101
TupleTableSlot * ExecProcNode(PlanState *node)
Definition: execProcnode.c:398
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:32
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate)
Definition: execTuples.c:852
HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
Definition: nodeHash.c:242
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:384
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1664
ExprState * joinqual
Definition: execnodes.h:1552
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:844
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:485
PlanState ps
Definition: execnodes.h:1548
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:654
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1141
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
Definition: execTuples.c:652
List * hashclauses
Definition: plannodes.h:728
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:1117
ExprContext * ps_ExprContext
Definition: execnodes.h:843
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:1209
bool single_match
Definition: execnodes.h:1550
HashJoinTable hashtable
Definition: execnodes.h:1924
void ExecReScan(PlanState *node)
Definition: execAmi.c:75
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
bool hj_MatchedOuter
Definition: execnodes.h:1667
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:591
EState * state
Definition: execnodes.h:815
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1661
struct PlanState * righttree
Definition: execnodes.h:829
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:346
List * hj_OuterHashKeys
Definition: execnodes.h:1653
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1665
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:516
#define lsecond(l)
Definition: pg_list.h:116
Join join
Definition: plannodes.h:727
void BufFileClose(BufFile *file)
Definition: buffile.c:203
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:1437
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:160
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:440
struct PlanState * lefttree
Definition: execnodes.h:828
void ExecEndHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:548
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:40
int * skewBucketNums
Definition: hashjoin.h:146
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:829
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1027
JoinType jointype
Definition: plannodes.h:669
uint32 hj_CurHashValue
Definition: execnodes.h:1657
int hj_CurSkewBucketNo
Definition: execnodes.h:1659
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:842
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType)
Definition: execTuples.c:866
#define linitial(l)
Definition: pg_list.h:111
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1663
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:167
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:832
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
BufFile ** outerBatchFile
Definition: hashjoin.h:167
#define lfirst_node(type, lc)
Definition: pg_list.h:109
#define outerPlanState(node)
Definition: execnodes.h:855
#define innerPlan(node)
Definition: plannodes.h:173
Cost startup_cost
Definition: plannodes.h:125
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:487
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1660
MinimalTupleData * MinimalTuple
Definition: htup.h:27
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1059
int errcode_for_file_access(void)
Definition: elog.c:598
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:198
#define TupIsNull(slot)
Definition: tuptable.h:138
unsigned int uint32
Definition: c.h:268
PlanState ps
Definition: execnodes.h:1923
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:858
#define ereport(elevel, rest)
Definition: elog.h:122
List * hj_HashOperators
Definition: execnodes.h:1655
Bitmapset * chgParam
Definition: execnodes.h:837
#define outerPlan(node)
Definition: plannodes.h:174
List * lappend(List *list, void *datum)
Definition: list.c:128
int hj_CurBucketNo
Definition: execnodes.h:1658
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:34
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:38
HashSkewBucket ** skewBucket
Definition: hashjoin.h:143
HashJoinState * ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
Definition: nodeHashjoin.c:382
void ExecSetSlotDescriptor(TupleTableSlot *slot, TupleDesc tupdesc)
Definition: execTuples.c:247
Plan * plan
Definition: execnodes.h:813
double totalTuples
Definition: hashjoin.h:156
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:72
#define makeNode(_type_)
Definition: nodes.h:557
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:199
bool hj_OuterNotEmpty
Definition: execnodes.h:1668
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define EXEC_FLAG_MARK
Definition: executor.h:61
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:863
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:418
BufFile ** innerBatchFile
Definition: hashjoin.h:166
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
Definition: nodeHashjoin.c:849
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:207
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:469
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:31
ExprState * qual
Definition: execnodes.h:827
TupleTableSlot * ExecHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:62
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1662
List * hj_InnerHashKeys
Definition: execnodes.h:1654
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:30
void * palloc(Size size)
Definition: mcxt.c:849
HashJoinTable hj_HashTable
Definition: execnodes.h:1656
int errmsg(const char *fmt,...)
Definition: elog.c:797
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:601
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:365
Cost total_cost
Definition: plannodes.h:126
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:522
size_t BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:412
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:1238
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:923
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:113
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
Oid opno
Definition: primnodes.h:495
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:33
#define elog
Definition: elog.h:219
bool inner_unique
Definition: plannodes.h:670
List * args
Definition: primnodes.h:501
#define innerPlanState(node)
Definition: execnodes.h:854
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:140
Definition: pg_list.h:45
JoinState js
Definition: execnodes.h:1651
List * joinqual
Definition: plannodes.h:671
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
Definition: nodeHashjoin.c:668
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
Definition: nodeHashjoin.c:815
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:559
ExprState * hashclauses
Definition: execnodes.h:1652
static TupleTableSlot * ExecProject(ProjectionInfo *projInfo)
Definition: executor.h:309
#define ResetExprContext(econtext)
Definition: executor.h:450
Plan plan
Definition: plannodes.h:668
void ExecReScanHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:895