PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/transam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/execdebug.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "storage/bufmgr.h"
48 #include "storage/predicate.h"
49 #include "utils/memutils.h"
50 #include "utils/rel.h"
51 #include "utils/spccache.h"
52 #include "utils/snapmgr.h"
53 #include "utils/tqual.h"
54 
55 
57 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
58 static inline void BitmapDoneInitializingSharedState(
59  ParallelBitmapHeapState *pstate);
60 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
61  TBMIterateResult *tbmres);
62 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
63 static inline void BitmapPrefetch(BitmapHeapScanState *node,
64  HeapScanDesc scan);
66  ParallelBitmapHeapState *pstate);
67 
68 
69 /* ----------------------------------------------------------------
70  * BitmapHeapNext
71  *
72  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
73  * ----------------------------------------------------------------
74  */
75 static TupleTableSlot *
77 {
78  ExprContext *econtext;
79  HeapScanDesc scan;
80  TIDBitmap *tbm;
81  TBMIterator *tbmiterator = NULL;
82  TBMSharedIterator *shared_tbmiterator = NULL;
83  TBMIterateResult *tbmres;
84  OffsetNumber targoffset;
85  TupleTableSlot *slot;
86  ParallelBitmapHeapState *pstate = node->pstate;
87  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
88 
89  /*
90  * extract necessary information from index scan node
91  */
92  econtext = node->ss.ps.ps_ExprContext;
93  slot = node->ss.ss_ScanTupleSlot;
94  scan = node->ss.ss_currentScanDesc;
95  tbm = node->tbm;
96  if (pstate == NULL)
97  tbmiterator = node->tbmiterator;
98  else
99  shared_tbmiterator = node->shared_tbmiterator;
100  tbmres = node->tbmres;
101 
102  /*
103  * If we haven't yet performed the underlying index scan, do it, and begin
104  * the iteration over the bitmap.
105  *
106  * For prefetching, we use *two* iterators, one for the pages we are
107  * actually scanning and another that runs ahead of the first for
108  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
109  * the prefetch iterator is. Also, node->prefetch_target tracks the
110  * desired prefetch distance, which starts small and increases up to the
111  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
112  * a scan that stops after a few tuples because of a LIMIT.
113  */
114  if (!node->initialized)
115  {
116  if (!pstate)
117  {
118  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
119 
120  if (!tbm || !IsA(tbm, TIDBitmap))
121  elog(ERROR, "unrecognized result from subplan");
122 
123  node->tbm = tbm;
124  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
125  node->tbmres = tbmres = NULL;
126 
127 #ifdef USE_PREFETCH
128  if (node->prefetch_maximum > 0)
129  {
131  node->prefetch_pages = 0;
132  node->prefetch_target = -1;
133  }
134 #endif /* USE_PREFETCH */
135  }
136  else
137  {
138  /*
139  * The leader will immediately come out of the function, but
140  * others will be blocked until leader populates the TBM and wakes
141  * them up.
142  */
144  {
145  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
146  if (!tbm || !IsA(tbm, TIDBitmap))
147  elog(ERROR, "unrecognized result from subplan");
148 
149  node->tbm = tbm;
150 
151  /*
152  * Prepare to iterate over the TBM. This will return the
153  * dsa_pointer of the iterator state which will be used by
154  * multiple processes to iterate jointly.
155  */
157 #ifdef USE_PREFETCH
158  if (node->prefetch_maximum > 0)
159  {
160  pstate->prefetch_iterator =
162 
163  /*
164  * We don't need the mutex here as we haven't yet woke up
165  * others.
166  */
167  pstate->prefetch_pages = 0;
168  pstate->prefetch_target = -1;
169  }
170 #endif
171 
172  /* We have initialized the shared state so wake up others. */
174  }
175 
176  /* Allocate a private iterator and attach the shared state to it */
177  node->shared_tbmiterator = shared_tbmiterator =
179  node->tbmres = tbmres = NULL;
180 
181 #ifdef USE_PREFETCH
182  if (node->prefetch_maximum > 0)
183  {
186  }
187 #endif /* USE_PREFETCH */
188  }
189  node->initialized = true;
190  }
191 
192  for (;;)
193  {
194  Page dp;
195  ItemId lp;
196 
198 
199  /*
200  * Get next page of results if needed
201  */
202  if (tbmres == NULL)
203  {
204  if (!pstate)
205  node->tbmres = tbmres = tbm_iterate(tbmiterator);
206  else
207  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
208  if (tbmres == NULL)
209  {
210  /* no more entries in the bitmap */
211  break;
212  }
213 
214  BitmapAdjustPrefetchIterator(node, tbmres);
215 
216  /*
217  * Ignore any claimed entries past what we think is the end of the
218  * relation. (This is probably not necessary given that we got at
219  * least AccessShareLock on the table before performing any of the
220  * indexscans, but let's be safe.)
221  */
222  if (tbmres->blockno >= scan->rs_nblocks)
223  {
224  node->tbmres = tbmres = NULL;
225  continue;
226  }
227 
228  /*
229  * We can skip fetching the heap page if we don't need any fields
230  * from the heap, and the bitmap entries don't need rechecking,
231  * and all tuples on the page are visible to our transaction.
232  */
233  node->skip_fetch = (node->can_skip_fetch &&
234  !tbmres->recheck &&
236  tbmres->blockno,
237  &node->vmbuffer));
238 
239  if (node->skip_fetch)
240  {
241  /*
242  * The number of tuples on this page is put into
243  * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
244  */
245  scan->rs_ntuples = tbmres->ntuples;
246  }
247  else
248  {
249  /*
250  * Fetch the current heap page and identify candidate tuples.
251  */
252  bitgetpage(scan, tbmres);
253  }
254 
255  if (tbmres->ntuples >= 0)
256  node->exact_pages++;
257  else
258  node->lossy_pages++;
259 
260  /*
261  * Set rs_cindex to first slot to examine
262  */
263  scan->rs_cindex = 0;
264 
265  /* Adjust the prefetch target */
267  }
268  else
269  {
270  /*
271  * Continuing in previously obtained page; advance rs_cindex
272  */
273  scan->rs_cindex++;
274 
275 #ifdef USE_PREFETCH
276 
277  /*
278  * Try to prefetch at least a few pages even before we get to the
279  * second page if we don't stop reading after the first tuple.
280  */
281  if (!pstate)
282  {
283  if (node->prefetch_target < node->prefetch_maximum)
284  node->prefetch_target++;
285  }
286  else if (pstate->prefetch_target < node->prefetch_maximum)
287  {
288  /* take spinlock while updating shared state */
289  SpinLockAcquire(&pstate->mutex);
290  if (pstate->prefetch_target < node->prefetch_maximum)
291  pstate->prefetch_target++;
292  SpinLockRelease(&pstate->mutex);
293  }
294 #endif /* USE_PREFETCH */
295  }
296 
297  /*
298  * Out of range? If so, nothing more to look at on this page
299  */
300  if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
301  {
302  node->tbmres = tbmres = NULL;
303  continue;
304  }
305 
306  /*
307  * We issue prefetch requests *after* fetching the current page to try
308  * to avoid having prefetching interfere with the main I/O. Also, this
309  * should happen only when we have determined there is still something
310  * to do on the current page, else we may uselessly prefetch the same
311  * page we are just about to request for real.
312  */
313  BitmapPrefetch(node, scan);
314 
315  if (node->skip_fetch)
316  {
317  /*
318  * If we don't have to fetch the tuple, just return nulls.
319  */
320  ExecStoreAllNullTuple(slot);
321  }
322  else
323  {
324  /*
325  * Okay to fetch the tuple.
326  */
327  targoffset = scan->rs_vistuples[scan->rs_cindex];
328  dp = (Page) BufferGetPage(scan->rs_cbuf);
329  lp = PageGetItemId(dp, targoffset);
330  Assert(ItemIdIsNormal(lp));
331 
332  scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
333  scan->rs_ctup.t_len = ItemIdGetLength(lp);
334  scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
335  ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
336 
338 
339  /*
340  * Set up the result slot to point to this tuple. Note that the
341  * slot acquires a pin on the buffer.
342  */
343  ExecStoreTuple(&scan->rs_ctup,
344  slot,
345  scan->rs_cbuf,
346  false);
347 
348  /*
349  * If we are using lossy info, we have to recheck the qual
350  * conditions at every tuple.
351  */
352  if (tbmres->recheck)
353  {
354  econtext->ecxt_scantuple = slot;
355  ResetExprContext(econtext);
356 
357  if (!ExecQual(node->bitmapqualorig, econtext))
358  {
359  /* Fails recheck, so drop it and loop back for another */
360  InstrCountFiltered2(node, 1);
361  ExecClearTuple(slot);
362  continue;
363  }
364  }
365  }
366 
367  /* OK to return this tuple */
368  return slot;
369  }
370 
371  /*
372  * if we get here it means we are at the end of the scan..
373  */
374  return ExecClearTuple(slot);
375 }
376 
377 /*
378  * bitgetpage - subroutine for BitmapHeapNext()
379  *
380  * This routine reads and pins the specified page of the relation, then
381  * builds an array indicating which tuples on the page are both potentially
382  * interesting according to the bitmap, and visible according to the snapshot.
383  */
384 static void
386 {
387  BlockNumber page = tbmres->blockno;
388  Buffer buffer;
389  Snapshot snapshot;
390  int ntup;
391 
392  /*
393  * Acquire pin on the target heap page, trading in any pin we held before.
394  */
395  Assert(page < scan->rs_nblocks);
396 
397  scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
398  scan->rs_rd,
399  page);
400  buffer = scan->rs_cbuf;
401  snapshot = scan->rs_snapshot;
402 
403  ntup = 0;
404 
405  /*
406  * Prune and repair fragmentation for the whole page, if possible.
407  */
408  heap_page_prune_opt(scan->rs_rd, buffer);
409 
410  /*
411  * We must hold share lock on the buffer content while examining tuple
412  * visibility. Afterwards, however, the tuples we have found to be
413  * visible are guaranteed good as long as we hold the buffer pin.
414  */
415  LockBuffer(buffer, BUFFER_LOCK_SHARE);
416 
417  /*
418  * We need two separate strategies for lossy and non-lossy cases.
419  */
420  if (tbmres->ntuples >= 0)
421  {
422  /*
423  * Bitmap is non-lossy, so we just look through the offsets listed in
424  * tbmres; but we have to follow any HOT chain starting at each such
425  * offset.
426  */
427  int curslot;
428 
429  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
430  {
431  OffsetNumber offnum = tbmres->offsets[curslot];
432  ItemPointerData tid;
433  HeapTupleData heapTuple;
434 
435  ItemPointerSet(&tid, page, offnum);
436  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
437  &heapTuple, NULL, true))
438  scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
439  }
440  }
441  else
442  {
443  /*
444  * Bitmap is lossy, so we must examine each item pointer on the page.
445  * But we can ignore HOT chains, since we'll check each tuple anyway.
446  */
447  Page dp = (Page) BufferGetPage(buffer);
449  OffsetNumber offnum;
450 
451  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
452  {
453  ItemId lp;
454  HeapTupleData loctup;
455  bool valid;
456 
457  lp = PageGetItemId(dp, offnum);
458  if (!ItemIdIsNormal(lp))
459  continue;
460  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
461  loctup.t_len = ItemIdGetLength(lp);
462  loctup.t_tableOid = scan->rs_rd->rd_id;
463  ItemPointerSet(&loctup.t_self, page, offnum);
464  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
465  if (valid)
466  {
467  scan->rs_vistuples[ntup++] = offnum;
468  PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
469  }
470  CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
471  buffer, snapshot);
472  }
473  }
474 
476 
477  Assert(ntup <= MaxHeapTuplesPerPage);
478  scan->rs_ntuples = ntup;
479 }
480 
481 /*
482  * BitmapDoneInitializingSharedState - Shared state is initialized
483  *
484  * By this time the leader has already populated the TBM and initialized the
485  * shared state so wake up other processes.
486  */
487 static inline void
489 {
490  SpinLockAcquire(&pstate->mutex);
491  pstate->state = BM_FINISHED;
492  SpinLockRelease(&pstate->mutex);
493  ConditionVariableBroadcast(&pstate->cv);
494 }
495 
496 /*
497  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
498  */
499 static inline void
501  TBMIterateResult *tbmres)
502 {
503 #ifdef USE_PREFETCH
504  ParallelBitmapHeapState *pstate = node->pstate;
505 
506  if (pstate == NULL)
507  {
508  TBMIterator *prefetch_iterator = node->prefetch_iterator;
509 
510  if (node->prefetch_pages > 0)
511  {
512  /* The main iterator has closed the distance by one page */
513  node->prefetch_pages--;
514  }
515  else if (prefetch_iterator)
516  {
517  /* Do not let the prefetch iterator get behind the main one */
518  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
519 
520  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
521  elog(ERROR, "prefetch and main iterators are out of sync");
522  }
523  return;
524  }
525 
526  if (node->prefetch_maximum > 0)
527  {
528  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
529 
530  SpinLockAcquire(&pstate->mutex);
531  if (pstate->prefetch_pages > 0)
532  {
533  pstate->prefetch_pages--;
534  SpinLockRelease(&pstate->mutex);
535  }
536  else
537  {
538  /* Release the mutex before iterating */
539  SpinLockRelease(&pstate->mutex);
540 
541  /*
542  * In case of shared mode, we can not ensure that the current
543  * blockno of the main iterator and that of the prefetch iterator
544  * are same. It's possible that whatever blockno we are
545  * prefetching will be processed by another process. Therefore,
546  * we don't validate the blockno here as we do in non-parallel
547  * case.
548  */
549  if (prefetch_iterator)
550  tbm_shared_iterate(prefetch_iterator);
551  }
552  }
553 #endif /* USE_PREFETCH */
554 }
555 
556 /*
557  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
558  *
559  * Increase prefetch target if it's not yet at the max. Note that
560  * we will increase it to zero after fetching the very first
561  * page/tuple, then to one after the second tuple is fetched, then
562  * it doubles as later pages are fetched.
563  */
564 static inline void
566 {
567 #ifdef USE_PREFETCH
568  ParallelBitmapHeapState *pstate = node->pstate;
569 
570  if (pstate == NULL)
571  {
572  if (node->prefetch_target >= node->prefetch_maximum)
573  /* don't increase any further */ ;
574  else if (node->prefetch_target >= node->prefetch_maximum / 2)
575  node->prefetch_target = node->prefetch_maximum;
576  else if (node->prefetch_target > 0)
577  node->prefetch_target *= 2;
578  else
579  node->prefetch_target++;
580  return;
581  }
582 
583  /* Do an unlocked check first to save spinlock acquisitions. */
584  if (pstate->prefetch_target < node->prefetch_maximum)
585  {
586  SpinLockAcquire(&pstate->mutex);
587  if (pstate->prefetch_target >= node->prefetch_maximum)
588  /* don't increase any further */ ;
589  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
590  pstate->prefetch_target = node->prefetch_maximum;
591  else if (pstate->prefetch_target > 0)
592  pstate->prefetch_target *= 2;
593  else
594  pstate->prefetch_target++;
595  SpinLockRelease(&pstate->mutex);
596  }
597 #endif /* USE_PREFETCH */
598 }
599 
600 /*
601  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
602  */
603 static inline void
605 {
606 #ifdef USE_PREFETCH
607  ParallelBitmapHeapState *pstate = node->pstate;
608 
609  if (pstate == NULL)
610  {
611  TBMIterator *prefetch_iterator = node->prefetch_iterator;
612 
613  if (prefetch_iterator)
614  {
615  while (node->prefetch_pages < node->prefetch_target)
616  {
617  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
618  bool skip_fetch;
619 
620  if (tbmpre == NULL)
621  {
622  /* No more pages to prefetch */
623  tbm_end_iterate(prefetch_iterator);
624  node->prefetch_iterator = NULL;
625  break;
626  }
627  node->prefetch_pages++;
628 
629  /*
630  * If we expect not to have to actually read this heap page,
631  * skip this prefetch call, but continue to run the prefetch
632  * logic normally. (Would it be better not to increment
633  * prefetch_pages?)
634  *
635  * This depends on the assumption that the index AM will
636  * report the same recheck flag for this future heap page as
637  * it did for the current heap page; which is not a certainty
638  * but is true in many cases.
639  */
640  skip_fetch = (node->can_skip_fetch &&
641  (node->tbmres ? !node->tbmres->recheck : false) &&
643  tbmpre->blockno,
644  &node->pvmbuffer));
645 
646  if (!skip_fetch)
647  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
648  }
649  }
650 
651  return;
652  }
653 
654  if (pstate->prefetch_pages < pstate->prefetch_target)
655  {
656  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
657 
658  if (prefetch_iterator)
659  {
660  while (1)
661  {
662  TBMIterateResult *tbmpre;
663  bool do_prefetch = false;
664  bool skip_fetch;
665 
666  /*
667  * Recheck under the mutex. If some other process has already
668  * done enough prefetching then we need not to do anything.
669  */
670  SpinLockAcquire(&pstate->mutex);
671  if (pstate->prefetch_pages < pstate->prefetch_target)
672  {
673  pstate->prefetch_pages++;
674  do_prefetch = true;
675  }
676  SpinLockRelease(&pstate->mutex);
677 
678  if (!do_prefetch)
679  return;
680 
681  tbmpre = tbm_shared_iterate(prefetch_iterator);
682  if (tbmpre == NULL)
683  {
684  /* No more pages to prefetch */
685  tbm_end_shared_iterate(prefetch_iterator);
686  node->shared_prefetch_iterator = NULL;
687  break;
688  }
689 
690  /* As above, skip prefetch if we expect not to need page */
691  skip_fetch = (node->can_skip_fetch &&
692  (node->tbmres ? !node->tbmres->recheck : false) &&
694  tbmpre->blockno,
695  &node->pvmbuffer));
696 
697  if (!skip_fetch)
698  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
699  }
700  }
701  }
702 #endif /* USE_PREFETCH */
703 }
704 
705 /*
706  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
707  */
708 static bool
710 {
711  ExprContext *econtext;
712 
713  /*
714  * extract necessary information from index scan node
715  */
716  econtext = node->ss.ps.ps_ExprContext;
717 
718  /* Does the tuple meet the original qual conditions? */
719  econtext->ecxt_scantuple = slot;
720 
721  ResetExprContext(econtext);
722 
723  return ExecQual(node->bitmapqualorig, econtext);
724 }
725 
726 /* ----------------------------------------------------------------
727  * ExecBitmapHeapScan(node)
728  * ----------------------------------------------------------------
729  */
730 static TupleTableSlot *
732 {
734 
735  return ExecScan(&node->ss,
738 }
739 
740 /* ----------------------------------------------------------------
741  * ExecReScanBitmapHeapScan(node)
742  * ----------------------------------------------------------------
743  */
744 void
746 {
748 
749  /* rescan to release any page pin */
750  heap_rescan(node->ss.ss_currentScanDesc, NULL);
751 
752  /* release bitmaps and buffers if any */
753  if (node->tbmiterator)
755  if (node->prefetch_iterator)
757  if (node->shared_tbmiterator)
759  if (node->shared_prefetch_iterator)
761  if (node->tbm)
762  tbm_free(node->tbm);
763  if (node->vmbuffer != InvalidBuffer)
764  ReleaseBuffer(node->vmbuffer);
765  if (node->pvmbuffer != InvalidBuffer)
766  ReleaseBuffer(node->pvmbuffer);
767  node->tbm = NULL;
768  node->tbmiterator = NULL;
769  node->tbmres = NULL;
770  node->prefetch_iterator = NULL;
771  node->initialized = false;
772  node->shared_tbmiterator = NULL;
773  node->shared_prefetch_iterator = NULL;
774  node->vmbuffer = InvalidBuffer;
775  node->pvmbuffer = InvalidBuffer;
776 
777  ExecScanReScan(&node->ss);
778 
779  /*
780  * if chgParam of subnode is not null then plan will be re-scanned by
781  * first ExecProcNode.
782  */
783  if (outerPlan->chgParam == NULL)
784  ExecReScan(outerPlan);
785 }
786 
787 /* ----------------------------------------------------------------
788  * ExecEndBitmapHeapScan
789  * ----------------------------------------------------------------
790  */
791 void
793 {
794  Relation relation;
795  HeapScanDesc scanDesc;
796 
797  /*
798  * extract information from the node
799  */
800  relation = node->ss.ss_currentRelation;
801  scanDesc = node->ss.ss_currentScanDesc;
802 
803  /*
804  * Free the exprcontext
805  */
806  ExecFreeExprContext(&node->ss.ps);
807 
808  /*
809  * clear out tuple table slots
810  */
813 
814  /*
815  * close down subplans
816  */
818 
819  /*
820  * release bitmaps and buffers if any
821  */
822  if (node->tbmiterator)
824  if (node->prefetch_iterator)
826  if (node->tbm)
827  tbm_free(node->tbm);
828  if (node->shared_tbmiterator)
830  if (node->shared_prefetch_iterator)
832  if (node->vmbuffer != InvalidBuffer)
833  ReleaseBuffer(node->vmbuffer);
834  if (node->pvmbuffer != InvalidBuffer)
835  ReleaseBuffer(node->pvmbuffer);
836 
837  /*
838  * close heap scan
839  */
840  heap_endscan(scanDesc);
841 
842  /*
843  * close the heap relation.
844  */
845  ExecCloseScanRelation(relation);
846 }
847 
848 /* ----------------------------------------------------------------
849  * ExecInitBitmapHeapScan
850  *
851  * Initializes the scan's state information.
852  * ----------------------------------------------------------------
853  */
855 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
856 {
857  BitmapHeapScanState *scanstate;
858  Relation currentRelation;
859  int io_concurrency;
860 
861  /* check for unsupported flags */
862  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
863 
864  /*
865  * Assert caller didn't ask for an unsafe snapshot --- see comments at
866  * head of file.
867  */
869 
870  /*
871  * create state structure
872  */
873  scanstate = makeNode(BitmapHeapScanState);
874  scanstate->ss.ps.plan = (Plan *) node;
875  scanstate->ss.ps.state = estate;
876  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
877 
878  scanstate->tbm = NULL;
879  scanstate->tbmiterator = NULL;
880  scanstate->tbmres = NULL;
881  scanstate->skip_fetch = false;
882  scanstate->vmbuffer = InvalidBuffer;
883  scanstate->pvmbuffer = InvalidBuffer;
884  scanstate->exact_pages = 0;
885  scanstate->lossy_pages = 0;
886  scanstate->prefetch_iterator = NULL;
887  scanstate->prefetch_pages = 0;
888  scanstate->prefetch_target = 0;
889  /* may be updated below */
891  scanstate->pscan_len = 0;
892  scanstate->initialized = false;
893  scanstate->shared_tbmiterator = NULL;
894  scanstate->shared_prefetch_iterator = NULL;
895  scanstate->pstate = NULL;
896 
897  /*
898  * We can potentially skip fetching heap pages if we do not need any
899  * columns of the table, either for checking non-indexable quals or for
900  * returning data. This test is a bit simplistic, as it checks the
901  * stronger condition that there's no qual or return tlist at all. But in
902  * most cases it's probably not worth working harder than that.
903  */
904  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
905  node->scan.plan.targetlist == NIL);
906 
907  /*
908  * Miscellaneous initialization
909  *
910  * create expression context for node
911  */
912  ExecAssignExprContext(estate, &scanstate->ss.ps);
913 
914  /*
915  * initialize child expressions
916  */
917  scanstate->ss.ps.qual =
918  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
919  scanstate->bitmapqualorig =
920  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
921 
922  /*
923  * tuple table initialization
924  */
925  ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
926  ExecInitScanTupleSlot(estate, &scanstate->ss);
927 
928  /*
929  * open the base relation and acquire appropriate lock on it.
930  */
931  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
932 
933  /*
934  * Determine the maximum for prefetch_target. If the tablespace has a
935  * specific IO concurrency set, use that to compute the corresponding
936  * maximum value; otherwise, we already initialized to the value computed
937  * by the GUC machinery.
938  */
939  io_concurrency =
940  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
941  if (io_concurrency != effective_io_concurrency)
942  {
943  double maximum;
944 
945  if (ComputeIoConcurrency(io_concurrency, &maximum))
946  scanstate->prefetch_maximum = rint(maximum);
947  }
948 
949  scanstate->ss.ss_currentRelation = currentRelation;
950 
951  /*
952  * Even though we aren't going to do a conventional seqscan, it is useful
953  * to create a HeapScanDesc --- most of the fields in it are usable.
954  */
955  scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
956  estate->es_snapshot,
957  0,
958  NULL);
959 
960  /*
961  * get the scan type from the relation descriptor.
962  */
963  ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
964 
965  /*
966  * Initialize result tuple type and projection info.
967  */
968  ExecAssignResultTypeFromTL(&scanstate->ss.ps);
969  ExecAssignScanProjectionInfo(&scanstate->ss);
970 
971  /*
972  * initialize child nodes
973  *
974  * We do this last because the child nodes will open indexscans on our
975  * relation's indexes, and we want to be sure we have acquired a lock on
976  * the relation first.
977  */
978  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
979 
980  /*
981  * all done.
982  */
983  return scanstate;
984 }
985 
986 /*----------------
987  * BitmapShouldInitializeSharedState
988  *
989  * The first process to come here and see the state to the BM_INITIAL
990  * will become the leader for the parallel bitmap scan and will be
991  * responsible for populating the TIDBitmap. The other processes will
992  * be blocked by the condition variable until the leader wakes them up.
993  * ---------------
994  */
995 static bool
997 {
999 
1000  while (1)
1001  {
1002  SpinLockAcquire(&pstate->mutex);
1003  state = pstate->state;
1004  if (pstate->state == BM_INITIAL)
1005  pstate->state = BM_INPROGRESS;
1006  SpinLockRelease(&pstate->mutex);
1007 
1008  /* Exit if bitmap is done, or if we're the leader. */
1009  if (state != BM_INPROGRESS)
1010  break;
1011 
1012  /* Wait for the leader to wake us up. */
1014  }
1015 
1017 
1018  return (state == BM_INITIAL);
1019 }
1020 
1021 /* ----------------------------------------------------------------
1022  * ExecBitmapHeapEstimate
1023  *
1024  * Compute the amount of space we'll need in the parallel
1025  * query DSM, and inform pcxt->estimator about our needs.
1026  * ----------------------------------------------------------------
1027  */
1028 void
1030  ParallelContext *pcxt)
1031 {
1032  EState *estate = node->ss.ps.state;
1033 
1035  phs_snapshot_data),
1037 
1039  shm_toc_estimate_keys(&pcxt->estimator, 1);
1040 }
1041 
1042 /* ----------------------------------------------------------------
1043  * ExecBitmapHeapInitializeDSM
1044  *
1045  * Set up a parallel bitmap heap scan descriptor.
1046  * ----------------------------------------------------------------
1047  */
1048 void
1050  ParallelContext *pcxt)
1051 {
1052  ParallelBitmapHeapState *pstate;
1053  EState *estate = node->ss.ps.state;
1054 
1055  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
1056 
1057  pstate->tbmiterator = 0;
1058  pstate->prefetch_iterator = 0;
1059 
1060  /* Initialize the mutex */
1061  SpinLockInit(&pstate->mutex);
1062  pstate->prefetch_pages = 0;
1063  pstate->prefetch_target = 0;
1064  pstate->state = BM_INITIAL;
1065 
1066  ConditionVariableInit(&pstate->cv);
1068 
1069  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
1070  node->pstate = pstate;
1071 }
1072 
1073 /* ----------------------------------------------------------------
1074  * ExecBitmapHeapReInitializeDSM
1075  *
1076  * Reset shared state before beginning a fresh scan.
1077  * ----------------------------------------------------------------
1078  */
1079 void
1081  ParallelContext *pcxt)
1082 {
1083  ParallelBitmapHeapState *pstate = node->pstate;
1084  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
1085 
1086  pstate->state = BM_INITIAL;
1087 
1088  if (DsaPointerIsValid(pstate->tbmiterator))
1089  tbm_free_shared_area(dsa, pstate->tbmiterator);
1090 
1091  if (DsaPointerIsValid(pstate->prefetch_iterator))
1093 
1094  pstate->tbmiterator = InvalidDsaPointer;
1096 }
1097 
1098 /* ----------------------------------------------------------------
1099  * ExecBitmapHeapInitializeWorker
1100  *
1101  * Copy relevant information from TOC into planstate.
1102  * ----------------------------------------------------------------
1103  */
1104 void
1106  ParallelWorkerContext *pwcxt)
1107 {
1108  ParallelBitmapHeapState *pstate;
1109  Snapshot snapshot;
1110 
1111  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
1112  node->pstate = pstate;
1113 
1114  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1115  heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
1116 }
static void BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
#define NIL
Definition: pg_list.h:69
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
List * qual
Definition: plannodes.h:145
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
int target_prefetch_pages
Definition: bufmgr.c:129
struct dsa_area * es_query_dsa
Definition: execnodes.h:513
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1145
Plan plan
Definition: plannodes.h:328
#define IsA(nodeptr, _type_)
Definition: nodes.h:561
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExprState * bitmapqualorig
Definition: execnodes.h:1354
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate)
Definition: execTuples.c:842
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2129
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1565
Index scanrelid
Definition: plannodes.h:329
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
#define InvalidDsaPointer
Definition: dsa.h:78
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:765
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:512
void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
Definition: heapam.c:1774
#define RelationGetDescr(relation)
Definition: rel.h:437
#define castNode(_type_, nodeptr)
Definition: nodes.h:579
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:523
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:123
ExprContext * ps_ExprContext
Definition: execnodes.h:883
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
TIDBitmap * tbm
Definition: execnodes.h:1355
int ConditionVariableBroadcast(ConditionVariable *cv)
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
#define MaxHeapTuplesPerPage
Definition: htup_details.h:580
int plan_node_id
Definition: plannodes.h:143
#define InvalidBuffer
Definition: buf.h:25
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
Snapshot es_snapshot
Definition: execnodes.h:429
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1106
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
Relation ss_currentRelation
Definition: execnodes.h:1104
EState * state
Definition: execnodes.h:851
Form_pg_class rd_rel
Definition: rel.h:114
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:356
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
SharedBitmapState
Definition: execnodes.h:1295
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:523
int effective_io_concurrency
Definition: bufmgr.c:112
void CheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: predicate.c:3945
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:160
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:447
#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer)
Definition: tqual.h:45
HeapTupleData rs_ctup
Definition: relscan.h:69
bool ComputeIoConcurrency(int io_concurrency, double *target)
Definition: bufmgr.c:467
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:67
BlockNumber blockno
Definition: tidbitmap.h:42
PlanState ps
Definition: execnodes.h:1103
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:2011
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableInit(ConditionVariable *cv)
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:882
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void ConditionVariableCancelSleep(void)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
#define ERROR
Definition: elog.h:43
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:237
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1372
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2070
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:832
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:610
ItemPointerData t_self
Definition: htup.h:65
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1270
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
#define outerPlanState(node)
Definition: execnodes.h:895
uint32 t_len
Definition: htup.h:64
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:321
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define FirstOffsetNumber
Definition: off.h:27
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
Snapshot rs_snapshot
Definition: relscan.h:49
void heap_rescan(HeapScanDesc scan, ScanKey key)
Definition: heapam.c:1521
Oid t_tableOid
Definition: htup.h:66
dsa_pointer tbmiterator
Definition: execnodes.h:1317
double rint(double x)
Definition: rint.c:22
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
List * bitmapqualorig
Definition: plannodes.h:463
TBMIterateResult * tbmres
Definition: execnodes.h:1357
Oid rd_id
Definition: rel.h:116
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:340
Bitmapset * chgParam
Definition: execnodes.h:877
#define outerPlan(node)
Definition: plannodes.h:174
HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: heapam.c:1425
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:401
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SpinLockRelease(lock)
Definition: spin.h:64
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2046
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:855
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
BlockNumber rs_nblocks
Definition: relscan.h:60
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
Plan * plan
Definition: execnodes.h:849
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:529
Relation rs_rd
Definition: relscan.h:48
dsa_pointer prefetch_iterator
Definition: execnodes.h:1318
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:970
Buffer rs_cbuf
Definition: relscan.h:71
#define makeNode(_type_)
Definition: nodes.h:558
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define Assert(condition)
Definition: c.h:670
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define EXEC_FLAG_MARK
Definition: executor.h:61
Definition: regguts.h:298
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: relscan.h:78
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:903
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:425
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:688
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1051
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void ExecCloseScanRelation(Relation scanrel)
Definition: execUtils.c:668
SharedBitmapState state
Definition: execnodes.h:1322
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1513
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:197
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:95
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1370
TBMIterator * tbmiterator
Definition: execnodes.h:1356
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
List * targetlist
Definition: plannodes.h:144
ExprState * qual
Definition: execnodes.h:867
void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
Definition: predicate.c:2543
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1324
#define DsaPointerIsValid(x)
Definition: dsa.h:81
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
HeapScanDesc ss_currentScanDesc
Definition: execnodes.h:1105
Definition: dsa.c:354
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable cv
Definition: execnodes.h:1323
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:468
TBMIterator * prefetch_iterator
Definition: execnodes.h:1364
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecScanReScan(ScanState *node)
Definition: execScan.c:329
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
void ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
Definition: execUtils.c:547
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
#define elog
Definition: elog.h:219
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:74
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:400
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1464
int Buffer
Definition: buf.h:23
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1371
#define offsetof(type, field)
Definition: c.h:593
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ResetExprContext(econtext)
Definition: executor.h:461
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
shm_toc * toc
Definition: parallel.h:44
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1157