PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/transam.h"
42 #include "executor/execdebug.h"
44 #include "pgstat.h"
45 #include "storage/bufmgr.h"
46 #include "storage/predicate.h"
47 #include "utils/memutils.h"
48 #include "utils/rel.h"
49 #include "utils/spccache.h"
50 #include "utils/snapmgr.h"
51 #include "utils/tqual.h"
52 
53 
55 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
56 static inline void BitmapDoneInitializingSharedState(
57  ParallelBitmapHeapState *pstate);
58 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
59  TBMIterateResult *tbmres);
60 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
61 static inline void BitmapPrefetch(BitmapHeapScanState *node,
62  HeapScanDesc scan);
64  ParallelBitmapHeapState *pstate);
65 
66 
67 /* ----------------------------------------------------------------
68  * BitmapHeapNext
69  *
70  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
71  * ----------------------------------------------------------------
72  */
73 static TupleTableSlot *
75 {
76  ExprContext *econtext;
77  HeapScanDesc scan;
78  TIDBitmap *tbm;
79  TBMIterator *tbmiterator = NULL;
80  TBMSharedIterator *shared_tbmiterator = NULL;
81  TBMIterateResult *tbmres;
82  OffsetNumber targoffset;
83  TupleTableSlot *slot;
84  ParallelBitmapHeapState *pstate = node->pstate;
85  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
86 
87  /*
88  * extract necessary information from index scan node
89  */
90  econtext = node->ss.ps.ps_ExprContext;
91  slot = node->ss.ss_ScanTupleSlot;
92  scan = node->ss.ss_currentScanDesc;
93  tbm = node->tbm;
94  if (pstate == NULL)
95  tbmiterator = node->tbmiterator;
96  else
97  shared_tbmiterator = node->shared_tbmiterator;
98  tbmres = node->tbmres;
99 
100  /*
101  * If we haven't yet performed the underlying index scan, do it, and begin
102  * the iteration over the bitmap.
103  *
104  * For prefetching, we use *two* iterators, one for the pages we are
105  * actually scanning and another that runs ahead of the first for
106  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
107  * the prefetch iterator is. Also, node->prefetch_target tracks the
108  * desired prefetch distance, which starts small and increases up to the
109  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
110  * a scan that stops after a few tuples because of a LIMIT.
111  */
112  if (!node->initialized)
113  {
114  if (!pstate)
115  {
116  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
117 
118  if (!tbm || !IsA(tbm, TIDBitmap))
119  elog(ERROR, "unrecognized result from subplan");
120 
121  node->tbm = tbm;
122  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
123  node->tbmres = tbmres = NULL;
124 
125 #ifdef USE_PREFETCH
126  if (node->prefetch_maximum > 0)
127  {
129  node->prefetch_pages = 0;
130  node->prefetch_target = -1;
131  }
132 #endif /* USE_PREFETCH */
133  }
134  else
135  {
136  /*
137  * The leader will immediately come out of the function, but
138  * others will be blocked until leader populates the TBM and wakes
139  * them up.
140  */
142  {
143  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
144  if (!tbm || !IsA(tbm, TIDBitmap))
145  elog(ERROR, "unrecognized result from subplan");
146 
147  node->tbm = tbm;
148 
149  /*
150  * Prepare to iterate over the TBM. This will return the
151  * dsa_pointer of the iterator state which will be used by
152  * multiple processes to iterate jointly.
153  */
155 #ifdef USE_PREFETCH
156  if (node->prefetch_maximum > 0)
157  {
158  pstate->prefetch_iterator =
160 
161  /*
162  * We don't need the mutex here as we haven't yet woke up
163  * others.
164  */
165  pstate->prefetch_pages = 0;
166  pstate->prefetch_target = -1;
167  }
168 #endif
169 
170  /* We have initialized the shared state so wake up others. */
172  }
173 
174  /* Allocate a private iterator and attach the shared state to it */
175  node->shared_tbmiterator = shared_tbmiterator =
177  node->tbmres = tbmres = NULL;
178 
179 #ifdef USE_PREFETCH
180  if (node->prefetch_maximum > 0)
181  {
184  }
185 #endif /* USE_PREFETCH */
186  }
187  node->initialized = true;
188  }
189 
190  for (;;)
191  {
192  Page dp;
193  ItemId lp;
194 
195  /*
196  * Get next page of results if needed
197  */
198  if (tbmres == NULL)
199  {
200  if (!pstate)
201  node->tbmres = tbmres = tbm_iterate(tbmiterator);
202  else
203  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
204  if (tbmres == NULL)
205  {
206  /* no more entries in the bitmap */
207  break;
208  }
209 
210  BitmapAdjustPrefetchIterator(node, tbmres);
211 
212  /*
213  * Ignore any claimed entries past what we think is the end of the
214  * relation. (This is probably not necessary given that we got at
215  * least AccessShareLock on the table before performing any of the
216  * indexscans, but let's be safe.)
217  */
218  if (tbmres->blockno >= scan->rs_nblocks)
219  {
220  node->tbmres = tbmres = NULL;
221  continue;
222  }
223 
224  /*
225  * Fetch the current heap page and identify candidate tuples.
226  */
227  bitgetpage(scan, tbmres);
228 
229  if (tbmres->ntuples >= 0)
230  node->exact_pages++;
231  else
232  node->lossy_pages++;
233 
234  /*
235  * Set rs_cindex to first slot to examine
236  */
237  scan->rs_cindex = 0;
238 
239  /* Adjust the prefetch target */
241  }
242  else
243  {
244  /*
245  * Continuing in previously obtained page; advance rs_cindex
246  */
247  scan->rs_cindex++;
248 
249 #ifdef USE_PREFETCH
250 
251  /*
252  * Try to prefetch at least a few pages even before we get to the
253  * second page if we don't stop reading after the first tuple.
254  */
255  if (!pstate)
256  {
257  if (node->prefetch_target < node->prefetch_maximum)
258  node->prefetch_target++;
259  }
260  else if (pstate->prefetch_target < node->prefetch_maximum)
261  {
262  /* take spinlock while updating shared state */
263  SpinLockAcquire(&pstate->mutex);
264  if (pstate->prefetch_target < node->prefetch_maximum)
265  pstate->prefetch_target++;
266  SpinLockRelease(&pstate->mutex);
267  }
268 #endif /* USE_PREFETCH */
269  }
270 
271  /*
272  * Out of range? If so, nothing more to look at on this page
273  */
274  if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
275  {
276  node->tbmres = tbmres = NULL;
277  continue;
278  }
279 
280  /*
281  * We issue prefetch requests *after* fetching the current page to try
282  * to avoid having prefetching interfere with the main I/O. Also, this
283  * should happen only when we have determined there is still something
284  * to do on the current page, else we may uselessly prefetch the same
285  * page we are just about to request for real.
286  */
287  BitmapPrefetch(node, scan);
288 
289  /*
290  * Okay to fetch the tuple
291  */
292  targoffset = scan->rs_vistuples[scan->rs_cindex];
293  dp = (Page) BufferGetPage(scan->rs_cbuf);
294  lp = PageGetItemId(dp, targoffset);
295  Assert(ItemIdIsNormal(lp));
296 
297  scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
298  scan->rs_ctup.t_len = ItemIdGetLength(lp);
299  scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
300  ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
301 
303 
304  /*
305  * Set up the result slot to point to this tuple. Note that the slot
306  * acquires a pin on the buffer.
307  */
308  ExecStoreTuple(&scan->rs_ctup,
309  slot,
310  scan->rs_cbuf,
311  false);
312 
313  /*
314  * If we are using lossy info, we have to recheck the qual conditions
315  * at every tuple.
316  */
317  if (tbmres->recheck)
318  {
319  econtext->ecxt_scantuple = slot;
320  ResetExprContext(econtext);
321 
322  if (!ExecQual(node->bitmapqualorig, econtext))
323  {
324  /* Fails recheck, so drop it and loop back for another */
325  InstrCountFiltered2(node, 1);
326  ExecClearTuple(slot);
327  continue;
328  }
329  }
330 
331  /* OK to return this tuple */
332  return slot;
333  }
334 
335  /*
336  * if we get here it means we are at the end of the scan..
337  */
338  return ExecClearTuple(slot);
339 }
340 
341 /*
342  * bitgetpage - subroutine for BitmapHeapNext()
343  *
344  * This routine reads and pins the specified page of the relation, then
345  * builds an array indicating which tuples on the page are both potentially
346  * interesting according to the bitmap, and visible according to the snapshot.
347  */
348 static void
350 {
351  BlockNumber page = tbmres->blockno;
352  Buffer buffer;
353  Snapshot snapshot;
354  int ntup;
355 
356  /*
357  * Acquire pin on the target heap page, trading in any pin we held before.
358  */
359  Assert(page < scan->rs_nblocks);
360 
361  scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
362  scan->rs_rd,
363  page);
364  buffer = scan->rs_cbuf;
365  snapshot = scan->rs_snapshot;
366 
367  ntup = 0;
368 
369  /*
370  * Prune and repair fragmentation for the whole page, if possible.
371  */
372  heap_page_prune_opt(scan->rs_rd, buffer);
373 
374  /*
375  * We must hold share lock on the buffer content while examining tuple
376  * visibility. Afterwards, however, the tuples we have found to be
377  * visible are guaranteed good as long as we hold the buffer pin.
378  */
379  LockBuffer(buffer, BUFFER_LOCK_SHARE);
380 
381  /*
382  * We need two separate strategies for lossy and non-lossy cases.
383  */
384  if (tbmres->ntuples >= 0)
385  {
386  /*
387  * Bitmap is non-lossy, so we just look through the offsets listed in
388  * tbmres; but we have to follow any HOT chain starting at each such
389  * offset.
390  */
391  int curslot;
392 
393  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
394  {
395  OffsetNumber offnum = tbmres->offsets[curslot];
396  ItemPointerData tid;
397  HeapTupleData heapTuple;
398 
399  ItemPointerSet(&tid, page, offnum);
400  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
401  &heapTuple, NULL, true))
402  scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
403  }
404  }
405  else
406  {
407  /*
408  * Bitmap is lossy, so we must examine each item pointer on the page.
409  * But we can ignore HOT chains, since we'll check each tuple anyway.
410  */
411  Page dp = (Page) BufferGetPage(buffer);
413  OffsetNumber offnum;
414 
415  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
416  {
417  ItemId lp;
418  HeapTupleData loctup;
419  bool valid;
420 
421  lp = PageGetItemId(dp, offnum);
422  if (!ItemIdIsNormal(lp))
423  continue;
424  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
425  loctup.t_len = ItemIdGetLength(lp);
426  loctup.t_tableOid = scan->rs_rd->rd_id;
427  ItemPointerSet(&loctup.t_self, page, offnum);
428  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
429  if (valid)
430  {
431  scan->rs_vistuples[ntup++] = offnum;
432  PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
433  }
434  CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
435  buffer, snapshot);
436  }
437  }
438 
440 
441  Assert(ntup <= MaxHeapTuplesPerPage);
442  scan->rs_ntuples = ntup;
443 }
444 
445 /*
446  * BitmapDoneInitializingSharedState - Shared state is initialized
447  *
448  * By this time the leader has already populated the TBM and initialized the
449  * shared state so wake up other processes.
450  */
451 static inline void
453 {
454  SpinLockAcquire(&pstate->mutex);
455  pstate->state = BM_FINISHED;
456  SpinLockRelease(&pstate->mutex);
457  ConditionVariableBroadcast(&pstate->cv);
458 }
459 
460 /*
461  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
462  */
463 static inline void
465  TBMIterateResult *tbmres)
466 {
467 #ifdef USE_PREFETCH
468  ParallelBitmapHeapState *pstate = node->pstate;
469 
470  if (pstate == NULL)
471  {
472  TBMIterator *prefetch_iterator = node->prefetch_iterator;
473 
474  if (node->prefetch_pages > 0)
475  {
476  /* The main iterator has closed the distance by one page */
477  node->prefetch_pages--;
478  }
479  else if (prefetch_iterator)
480  {
481  /* Do not let the prefetch iterator get behind the main one */
482  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
483 
484  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
485  elog(ERROR, "prefetch and main iterators are out of sync");
486  }
487  return;
488  }
489 
490  if (node->prefetch_maximum > 0)
491  {
492  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
493 
494  SpinLockAcquire(&pstate->mutex);
495  if (pstate->prefetch_pages > 0)
496  {
497  pstate->prefetch_pages--;
498  SpinLockRelease(&pstate->mutex);
499  }
500  else
501  {
502  /* Release the mutex before iterating */
503  SpinLockRelease(&pstate->mutex);
504 
505  /*
506  * In case of shared mode, we can not ensure that the current
507  * blockno of the main iterator and that of the prefetch iterator
508  * are same. It's possible that whatever blockno we are
509  * prefetching will be processed by another process. Therefore,
510  * we don't validate the blockno here as we do in non-parallel
511  * case.
512  */
513  if (prefetch_iterator)
514  tbm_shared_iterate(prefetch_iterator);
515  }
516  }
517 #endif /* USE_PREFETCH */
518 }
519 
520 /*
521  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
522  *
523  * Increase prefetch target if it's not yet at the max. Note that
524  * we will increase it to zero after fetching the very first
525  * page/tuple, then to one after the second tuple is fetched, then
526  * it doubles as later pages are fetched.
527  */
528 static inline void
530 {
531 #ifdef USE_PREFETCH
532  ParallelBitmapHeapState *pstate = node->pstate;
533 
534  if (pstate == NULL)
535  {
536  if (node->prefetch_target >= node->prefetch_maximum)
537  /* don't increase any further */ ;
538  else if (node->prefetch_target >= node->prefetch_maximum / 2)
539  node->prefetch_target = node->prefetch_maximum;
540  else if (node->prefetch_target > 0)
541  node->prefetch_target *= 2;
542  else
543  node->prefetch_target++;
544  return;
545  }
546 
547  /* Do an unlocked check first to save spinlock acquisitions. */
548  if (pstate->prefetch_target < node->prefetch_maximum)
549  {
550  SpinLockAcquire(&pstate->mutex);
551  if (pstate->prefetch_target >= node->prefetch_maximum)
552  /* don't increase any further */ ;
553  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
554  pstate->prefetch_target = node->prefetch_maximum;
555  else if (pstate->prefetch_target > 0)
556  pstate->prefetch_target *= 2;
557  else
558  pstate->prefetch_target++;
559  SpinLockRelease(&pstate->mutex);
560  }
561 #endif /* USE_PREFETCH */
562 }
563 
564 /*
565  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
566  */
567 static inline void
569 {
570 #ifdef USE_PREFETCH
571  ParallelBitmapHeapState *pstate = node->pstate;
572 
573  if (pstate == NULL)
574  {
575  TBMIterator *prefetch_iterator = node->prefetch_iterator;
576 
577  if (prefetch_iterator)
578  {
579  while (node->prefetch_pages < node->prefetch_target)
580  {
581  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
582 
583  if (tbmpre == NULL)
584  {
585  /* No more pages to prefetch */
586  tbm_end_iterate(prefetch_iterator);
587  node->prefetch_iterator = NULL;
588  break;
589  }
590  node->prefetch_pages++;
591  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
592  }
593  }
594 
595  return;
596  }
597 
598  if (pstate->prefetch_pages < pstate->prefetch_target)
599  {
600  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
601 
602  if (prefetch_iterator)
603  {
604  while (1)
605  {
606  TBMIterateResult *tbmpre;
607  bool do_prefetch = false;
608 
609  /*
610  * Recheck under the mutex. If some other process has already
611  * done enough prefetching then we need not to do anything.
612  */
613  SpinLockAcquire(&pstate->mutex);
614  if (pstate->prefetch_pages < pstate->prefetch_target)
615  {
616  pstate->prefetch_pages++;
617  do_prefetch = true;
618  }
619  SpinLockRelease(&pstate->mutex);
620 
621  if (!do_prefetch)
622  return;
623 
624  tbmpre = tbm_shared_iterate(prefetch_iterator);
625  if (tbmpre == NULL)
626  {
627  /* No more pages to prefetch */
628  tbm_end_shared_iterate(prefetch_iterator);
630  break;
631  }
632 
633  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
634  }
635  }
636  }
637 #endif /* USE_PREFETCH */
638 }
639 
640 /*
641  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
642  */
643 static bool
645 {
646  ExprContext *econtext;
647 
648  /*
649  * extract necessary information from index scan node
650  */
651  econtext = node->ss.ps.ps_ExprContext;
652 
653  /* Does the tuple meet the original qual conditions? */
654  econtext->ecxt_scantuple = slot;
655 
656  ResetExprContext(econtext);
657 
658  return ExecQual(node->bitmapqualorig, econtext);
659 }
660 
661 /* ----------------------------------------------------------------
662  * ExecBitmapHeapScan(node)
663  * ----------------------------------------------------------------
664  */
667 {
668  return ExecScan(&node->ss,
671 }
672 
673 /* ----------------------------------------------------------------
674  * ExecReScanBitmapHeapScan(node)
675  * ----------------------------------------------------------------
676  */
677 void
679 {
681 
682  /* rescan to release any page pin */
684 
685  if (node->tbmiterator)
687  if (node->prefetch_iterator)
689  if (node->shared_tbmiterator)
691  if (node->shared_prefetch_iterator)
693  if (node->tbm)
694  tbm_free(node->tbm);
695  node->tbm = NULL;
696  node->tbmiterator = NULL;
697  node->tbmres = NULL;
698  node->prefetch_iterator = NULL;
699  node->initialized = false;
700  node->shared_tbmiterator = NULL;
702 
703  /* Reset parallel bitmap state, if present */
704  if (node->pstate)
705  {
706  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
707 
708  node->pstate->state = BM_INITIAL;
709 
712 
715 
718  }
719 
720  ExecScanReScan(&node->ss);
721 
722  /*
723  * if chgParam of subnode is not null then plan will be re-scanned by
724  * first ExecProcNode.
725  */
726  if (outerPlan->chgParam == NULL)
727  ExecReScan(outerPlan);
728 }
729 
730 /* ----------------------------------------------------------------
731  * ExecEndBitmapHeapScan
732  * ----------------------------------------------------------------
733  */
734 void
736 {
737  Relation relation;
738  HeapScanDesc scanDesc;
739 
740  /*
741  * extract information from the node
742  */
743  relation = node->ss.ss_currentRelation;
744  scanDesc = node->ss.ss_currentScanDesc;
745 
746  /*
747  * Free the exprcontext
748  */
749  ExecFreeExprContext(&node->ss.ps);
750 
751  /*
752  * clear out tuple table slots
753  */
756 
757  /*
758  * close down subplans
759  */
761 
762  /*
763  * release bitmap if any
764  */
765  if (node->tbmiterator)
767  if (node->prefetch_iterator)
769  if (node->tbm)
770  tbm_free(node->tbm);
771  if (node->shared_tbmiterator)
773  if (node->shared_prefetch_iterator)
775 
776  /*
777  * close heap scan
778  */
779  heap_endscan(scanDesc);
780 
781  /*
782  * close the heap relation.
783  */
784  ExecCloseScanRelation(relation);
785 }
786 
787 /* ----------------------------------------------------------------
788  * ExecInitBitmapHeapScan
789  *
790  * Initializes the scan's state information.
791  * ----------------------------------------------------------------
792  */
794 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
795 {
796  BitmapHeapScanState *scanstate;
797  Relation currentRelation;
798  int io_concurrency;
799 
800  /* check for unsupported flags */
801  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
802 
803  /*
804  * Assert caller didn't ask for an unsafe snapshot --- see comments at
805  * head of file.
806  */
808 
809  /*
810  * create state structure
811  */
812  scanstate = makeNode(BitmapHeapScanState);
813  scanstate->ss.ps.plan = (Plan *) node;
814  scanstate->ss.ps.state = estate;
815 
816  scanstate->tbm = NULL;
817  scanstate->tbmiterator = NULL;
818  scanstate->tbmres = NULL;
819  scanstate->exact_pages = 0;
820  scanstate->lossy_pages = 0;
821  scanstate->prefetch_iterator = NULL;
822  scanstate->prefetch_pages = 0;
823  scanstate->prefetch_target = 0;
824  /* may be updated below */
826  scanstate->pscan_len = 0;
827  scanstate->initialized = false;
828  scanstate->shared_tbmiterator = NULL;
829  scanstate->pstate = NULL;
830 
831  /*
832  * Miscellaneous initialization
833  *
834  * create expression context for node
835  */
836  ExecAssignExprContext(estate, &scanstate->ss.ps);
837 
838  /*
839  * initialize child expressions
840  */
841  scanstate->ss.ps.qual =
842  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
843  scanstate->bitmapqualorig =
844  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
845 
846  /*
847  * tuple table initialization
848  */
849  ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
850  ExecInitScanTupleSlot(estate, &scanstate->ss);
851 
852  /*
853  * open the base relation and acquire appropriate lock on it.
854  */
855  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
856 
857  /*
858  * Determine the maximum for prefetch_target. If the tablespace has a
859  * specific IO concurrency set, use that to compute the corresponding
860  * maximum value; otherwise, we already initialized to the value computed
861  * by the GUC machinery.
862  */
863  io_concurrency =
864  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
865  if (io_concurrency != effective_io_concurrency)
866  {
867  double maximum;
868 
869  if (ComputeIoConcurrency(io_concurrency, &maximum))
870  scanstate->prefetch_maximum = rint(maximum);
871  }
872 
873  scanstate->ss.ss_currentRelation = currentRelation;
874 
875  /*
876  * Even though we aren't going to do a conventional seqscan, it is useful
877  * to create a HeapScanDesc --- most of the fields in it are usable.
878  */
879  scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
880  estate->es_snapshot,
881  0,
882  NULL);
883 
884  /*
885  * get the scan type from the relation descriptor.
886  */
887  ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
888 
889  /*
890  * Initialize result tuple type and projection info.
891  */
892  ExecAssignResultTypeFromTL(&scanstate->ss.ps);
893  ExecAssignScanProjectionInfo(&scanstate->ss);
894 
895  /*
896  * initialize child nodes
897  *
898  * We do this last because the child nodes will open indexscans on our
899  * relation's indexes, and we want to be sure we have acquired a lock on
900  * the relation first.
901  */
902  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
903 
904  /*
905  * all done.
906  */
907  return scanstate;
908 }
909 
910 /*----------------
911  * BitmapShouldInitializeSharedState
912  *
913  * The first process to come here and see the state to the BM_INITIAL
914  * will become the leader for the parallel bitmap scan and will be
915  * responsible for populating the TIDBitmap. The other processes will
916  * be blocked by the condition variable until the leader wakes them up.
917  * ---------------
918  */
919 static bool
921 {
923 
924  while (1)
925  {
926  SpinLockAcquire(&pstate->mutex);
927  state = pstate->state;
928  if (pstate->state == BM_INITIAL)
929  pstate->state = BM_INPROGRESS;
930  SpinLockRelease(&pstate->mutex);
931 
932  /* Exit if bitmap is done, or if we're the leader. */
933  if (state != BM_INPROGRESS)
934  break;
935 
936  /* Wait for the leader to wake us up. */
938  }
939 
941 
942  return (state == BM_INITIAL);
943 }
944 
945 /* ----------------------------------------------------------------
946  * ExecBitmapHeapEstimate
947  *
948  * estimates the space required to serialize bitmap scan node.
949  * ----------------------------------------------------------------
950  */
951 void
953  ParallelContext *pcxt)
954 {
955  EState *estate = node->ss.ps.state;
956 
958  phs_snapshot_data),
960 
962  shm_toc_estimate_keys(&pcxt->estimator, 1);
963 }
964 
965 /* ----------------------------------------------------------------
966  * ExecBitmapHeapInitializeDSM
967  *
968  * Set up a parallel bitmap heap scan descriptor.
969  * ----------------------------------------------------------------
970  */
971 void
973  ParallelContext *pcxt)
974 {
975  ParallelBitmapHeapState *pstate;
976  EState *estate = node->ss.ps.state;
977 
978  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
979 
980  pstate->tbmiterator = 0;
981  pstate->prefetch_iterator = 0;
982 
983  /* Initialize the mutex */
984  SpinLockInit(&pstate->mutex);
985  pstate->prefetch_pages = 0;
986  pstate->prefetch_target = 0;
987  pstate->state = BM_INITIAL;
988 
989  ConditionVariableInit(&pstate->cv);
991 
992  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
993  node->pstate = pstate;
994 }
995 
996 /* ----------------------------------------------------------------
997  * ExecBitmapHeapInitializeWorker
998  *
999  * Copy relevant information from TOC into planstate.
1000  * ----------------------------------------------------------------
1001  */
1002 void
1004 {
1005  ParallelBitmapHeapState *pstate;
1006  Snapshot snapshot;
1007 
1008  pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
1009  node->pstate = pstate;
1010 
1011  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1012  heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
1013 }
static void BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
List * qual
Definition: plannodes.h:145
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
int target_prefetch_pages
Definition: bufmgr.c:129
struct dsa_area * es_query_dsa
Definition: execnodes.h:508
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1172
Plan plan
Definition: plannodes.h:328
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExprState * bitmapqualorig
Definition: execnodes.h:1326
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate)
Definition: execTuples.c:842
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2121
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1578
Index scanrelid
Definition: plannodes.h:329
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
#define InvalidDsaPointer
Definition: dsa.h:78
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:792
void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
Definition: heapam.c:1760
#define RelationGetDescr(relation)
Definition: rel.h:428
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:654
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:121
ExprContext * ps_ExprContext
Definition: execnodes.h:862
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
TIDBitmap * tbm
Definition: execnodes.h:1327
int ConditionVariableBroadcast(ConditionVariable *cv)
void ExecReScan(PlanState *node)
Definition: execAmi.c:75
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
int plan_node_id
Definition: plannodes.h:143
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
Snapshot es_snapshot
Definition: execnodes.h:429
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1082
uint32 BlockNumber
Definition: block.h:31
Relation ss_currentRelation
Definition: execnodes.h:1080
EState * state
Definition: execnodes.h:834
Form_pg_class rd_rel
Definition: rel.h:114
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:347
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
SharedBitmapState
Definition: execnodes.h:1271
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:516
int effective_io_concurrency
Definition: bufmgr.c:112
void CheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: predicate.c:3930
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:160
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:440
#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer)
Definition: tqual.h:45
HeapTupleData rs_ctup
Definition: relscan.h:68
bool ComputeIoConcurrency(int io_concurrency, double *target)
Definition: bufmgr.c:467
uint16 OffsetNumber
Definition: off.h:24
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:390
HeapTupleHeader t_data
Definition: htup.h:67
BlockNumber blockno
Definition: tidbitmap.h:42
PlanState ps
Definition: execnodes.h:1079
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1997
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:391
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableInit(ConditionVariable *cv)
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:861
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void ConditionVariableCancelSleep(void)
#define ERROR
Definition: elog.h:43
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:235
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1340
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2062
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:832
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:603
ItemPointerData t_self
Definition: htup.h:65
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1258
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
#define outerPlanState(node)
Definition: execnodes.h:874
uint32 t_len
Definition: htup.h:64
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:348
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define FirstOffsetNumber
Definition: off.h:27
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
Snapshot rs_snapshot
Definition: relscan.h:48
void heap_rescan(HeapScanDesc scan, ScanKey key)
Definition: heapam.c:1515
Oid t_tableOid
Definition: htup.h:66
dsa_pointer tbmiterator
Definition: execnodes.h:1293
double rint(double x)
Definition: rint.c:22
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
List * bitmapqualorig
Definition: plannodes.h:463
TBMIterateResult * tbmres
Definition: execnodes.h:1329
Oid rd_id
Definition: rel.h:116
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:367
Bitmapset * chgParam
Definition: execnodes.h:856
#define outerPlan(node)
Definition: plannodes.h:174
HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: heapam.c:1419
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SpinLockRelease(lock)
Definition: spin.h:64
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2038
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
BlockNumber rs_nblocks
Definition: relscan.h:59
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
Plan * plan
Definition: execnodes.h:832
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:529
Relation rs_rd
Definition: relscan.h:47
dsa_pointer prefetch_iterator
Definition: execnodes.h:1294
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:997
Buffer rs_cbuf
Definition: relscan.h:70
#define makeNode(_type_)
Definition: nodes.h:557
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define EXEC_FLAG_MARK
Definition: executor.h:61
Definition: regguts.h:298
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: relscan.h:77
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:882
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:418
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:715
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1078
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void ExecCloseScanRelation(Relation scanrel)
Definition: execUtils.c:661
SharedBitmapState state
Definition: execnodes.h:1298
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1513
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:197
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:95
TupleTableSlot * ExecBitmapHeapScan(BitmapHeapScanState *node)
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1338
TBMIterator * tbmiterator
Definition: execnodes.h:1328
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:83
ExprState * qual
Definition: execnodes.h:846
void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
Definition: predicate.c:2528
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1300
#define DsaPointerIsValid(x)
Definition: dsa.h:81
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
HeapScanDesc ss_currentScanDesc
Definition: execnodes.h:1081
Definition: dsa.c:354
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:161
ConditionVariable cv
Definition: execnodes.h:1299
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:601
TBMIterator * prefetch_iterator
Definition: execnodes.h:1332
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecScanReScan(ScanState *node)
Definition: execScan.c:327
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
void ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
Definition: execUtils.c:540
#define elog
Definition: elog.h:219
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:74
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:140
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1491
int Buffer
Definition: buf.h:23
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:222
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1339
#define offsetof(type, field)
Definition: c.h:555
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ResetExprContext(econtext)
Definition: executor.h:451
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
shm_toc * toc
Definition: parallel.h:44
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1184