PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/transam.h"
42 #include "executor/execdebug.h"
44 #include "miscadmin.h"
45 #include "pgstat.h"
46 #include "storage/bufmgr.h"
47 #include "storage/predicate.h"
48 #include "utils/memutils.h"
49 #include "utils/rel.h"
50 #include "utils/spccache.h"
51 #include "utils/snapmgr.h"
52 #include "utils/tqual.h"
53 
54 
56 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
57 static inline void BitmapDoneInitializingSharedState(
58  ParallelBitmapHeapState *pstate);
59 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
60  TBMIterateResult *tbmres);
61 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
62 static inline void BitmapPrefetch(BitmapHeapScanState *node,
63  HeapScanDesc scan);
65  ParallelBitmapHeapState *pstate);
66 
67 
68 /* ----------------------------------------------------------------
69  * BitmapHeapNext
70  *
71  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
72  * ----------------------------------------------------------------
73  */
74 static TupleTableSlot *
76 {
77  ExprContext *econtext;
78  HeapScanDesc scan;
79  TIDBitmap *tbm;
80  TBMIterator *tbmiterator = NULL;
81  TBMSharedIterator *shared_tbmiterator = NULL;
82  TBMIterateResult *tbmres;
83  OffsetNumber targoffset;
84  TupleTableSlot *slot;
85  ParallelBitmapHeapState *pstate = node->pstate;
86  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
87 
88  /*
89  * extract necessary information from index scan node
90  */
91  econtext = node->ss.ps.ps_ExprContext;
92  slot = node->ss.ss_ScanTupleSlot;
93  scan = node->ss.ss_currentScanDesc;
94  tbm = node->tbm;
95  if (pstate == NULL)
96  tbmiterator = node->tbmiterator;
97  else
98  shared_tbmiterator = node->shared_tbmiterator;
99  tbmres = node->tbmres;
100 
101  /*
102  * If we haven't yet performed the underlying index scan, do it, and begin
103  * the iteration over the bitmap.
104  *
105  * For prefetching, we use *two* iterators, one for the pages we are
106  * actually scanning and another that runs ahead of the first for
107  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
108  * the prefetch iterator is. Also, node->prefetch_target tracks the
109  * desired prefetch distance, which starts small and increases up to the
110  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
111  * a scan that stops after a few tuples because of a LIMIT.
112  */
113  if (!node->initialized)
114  {
115  if (!pstate)
116  {
117  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
118 
119  if (!tbm || !IsA(tbm, TIDBitmap))
120  elog(ERROR, "unrecognized result from subplan");
121 
122  node->tbm = tbm;
123  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
124  node->tbmres = tbmres = NULL;
125 
126 #ifdef USE_PREFETCH
127  if (node->prefetch_maximum > 0)
128  {
130  node->prefetch_pages = 0;
131  node->prefetch_target = -1;
132  }
133 #endif /* USE_PREFETCH */
134  }
135  else
136  {
137  /*
138  * The leader will immediately come out of the function, but
139  * others will be blocked until leader populates the TBM and wakes
140  * them up.
141  */
143  {
144  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
145  if (!tbm || !IsA(tbm, TIDBitmap))
146  elog(ERROR, "unrecognized result from subplan");
147 
148  node->tbm = tbm;
149 
150  /*
151  * Prepare to iterate over the TBM. This will return the
152  * dsa_pointer of the iterator state which will be used by
153  * multiple processes to iterate jointly.
154  */
156 #ifdef USE_PREFETCH
157  if (node->prefetch_maximum > 0)
158  {
159  pstate->prefetch_iterator =
161 
162  /*
163  * We don't need the mutex here as we haven't yet woke up
164  * others.
165  */
166  pstate->prefetch_pages = 0;
167  pstate->prefetch_target = -1;
168  }
169 #endif
170 
171  /* We have initialized the shared state so wake up others. */
173  }
174 
175  /* Allocate a private iterator and attach the shared state to it */
176  node->shared_tbmiterator = shared_tbmiterator =
178  node->tbmres = tbmres = NULL;
179 
180 #ifdef USE_PREFETCH
181  if (node->prefetch_maximum > 0)
182  {
185  }
186 #endif /* USE_PREFETCH */
187  }
188  node->initialized = true;
189  }
190 
191  for (;;)
192  {
193  Page dp;
194  ItemId lp;
195 
197 
198  /*
199  * Get next page of results if needed
200  */
201  if (tbmres == NULL)
202  {
203  if (!pstate)
204  node->tbmres = tbmres = tbm_iterate(tbmiterator);
205  else
206  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
207  if (tbmres == NULL)
208  {
209  /* no more entries in the bitmap */
210  break;
211  }
212 
213  BitmapAdjustPrefetchIterator(node, tbmres);
214 
215  /*
216  * Ignore any claimed entries past what we think is the end of the
217  * relation. (This is probably not necessary given that we got at
218  * least AccessShareLock on the table before performing any of the
219  * indexscans, but let's be safe.)
220  */
221  if (tbmres->blockno >= scan->rs_nblocks)
222  {
223  node->tbmres = tbmres = NULL;
224  continue;
225  }
226 
227  /*
228  * Fetch the current heap page and identify candidate tuples.
229  */
230  bitgetpage(scan, tbmres);
231 
232  if (tbmres->ntuples >= 0)
233  node->exact_pages++;
234  else
235  node->lossy_pages++;
236 
237  /*
238  * Set rs_cindex to first slot to examine
239  */
240  scan->rs_cindex = 0;
241 
242  /* Adjust the prefetch target */
244  }
245  else
246  {
247  /*
248  * Continuing in previously obtained page; advance rs_cindex
249  */
250  scan->rs_cindex++;
251 
252 #ifdef USE_PREFETCH
253 
254  /*
255  * Try to prefetch at least a few pages even before we get to the
256  * second page if we don't stop reading after the first tuple.
257  */
258  if (!pstate)
259  {
260  if (node->prefetch_target < node->prefetch_maximum)
261  node->prefetch_target++;
262  }
263  else if (pstate->prefetch_target < node->prefetch_maximum)
264  {
265  /* take spinlock while updating shared state */
266  SpinLockAcquire(&pstate->mutex);
267  if (pstate->prefetch_target < node->prefetch_maximum)
268  pstate->prefetch_target++;
269  SpinLockRelease(&pstate->mutex);
270  }
271 #endif /* USE_PREFETCH */
272  }
273 
274  /*
275  * Out of range? If so, nothing more to look at on this page
276  */
277  if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
278  {
279  node->tbmres = tbmres = NULL;
280  continue;
281  }
282 
283  /*
284  * We issue prefetch requests *after* fetching the current page to try
285  * to avoid having prefetching interfere with the main I/O. Also, this
286  * should happen only when we have determined there is still something
287  * to do on the current page, else we may uselessly prefetch the same
288  * page we are just about to request for real.
289  */
290  BitmapPrefetch(node, scan);
291 
292  /*
293  * Okay to fetch the tuple
294  */
295  targoffset = scan->rs_vistuples[scan->rs_cindex];
296  dp = (Page) BufferGetPage(scan->rs_cbuf);
297  lp = PageGetItemId(dp, targoffset);
298  Assert(ItemIdIsNormal(lp));
299 
300  scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
301  scan->rs_ctup.t_len = ItemIdGetLength(lp);
302  scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
303  ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
304 
306 
307  /*
308  * Set up the result slot to point to this tuple. Note that the slot
309  * acquires a pin on the buffer.
310  */
311  ExecStoreTuple(&scan->rs_ctup,
312  slot,
313  scan->rs_cbuf,
314  false);
315 
316  /*
317  * If we are using lossy info, we have to recheck the qual conditions
318  * at every tuple.
319  */
320  if (tbmres->recheck)
321  {
322  econtext->ecxt_scantuple = slot;
323  ResetExprContext(econtext);
324 
325  if (!ExecQual(node->bitmapqualorig, econtext))
326  {
327  /* Fails recheck, so drop it and loop back for another */
328  InstrCountFiltered2(node, 1);
329  ExecClearTuple(slot);
330  continue;
331  }
332  }
333 
334  /* OK to return this tuple */
335  return slot;
336  }
337 
338  /*
339  * if we get here it means we are at the end of the scan..
340  */
341  return ExecClearTuple(slot);
342 }
343 
344 /*
345  * bitgetpage - subroutine for BitmapHeapNext()
346  *
347  * This routine reads and pins the specified page of the relation, then
348  * builds an array indicating which tuples on the page are both potentially
349  * interesting according to the bitmap, and visible according to the snapshot.
350  */
351 static void
353 {
354  BlockNumber page = tbmres->blockno;
355  Buffer buffer;
356  Snapshot snapshot;
357  int ntup;
358 
359  /*
360  * Acquire pin on the target heap page, trading in any pin we held before.
361  */
362  Assert(page < scan->rs_nblocks);
363 
364  scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
365  scan->rs_rd,
366  page);
367  buffer = scan->rs_cbuf;
368  snapshot = scan->rs_snapshot;
369 
370  ntup = 0;
371 
372  /*
373  * Prune and repair fragmentation for the whole page, if possible.
374  */
375  heap_page_prune_opt(scan->rs_rd, buffer);
376 
377  /*
378  * We must hold share lock on the buffer content while examining tuple
379  * visibility. Afterwards, however, the tuples we have found to be
380  * visible are guaranteed good as long as we hold the buffer pin.
381  */
382  LockBuffer(buffer, BUFFER_LOCK_SHARE);
383 
384  /*
385  * We need two separate strategies for lossy and non-lossy cases.
386  */
387  if (tbmres->ntuples >= 0)
388  {
389  /*
390  * Bitmap is non-lossy, so we just look through the offsets listed in
391  * tbmres; but we have to follow any HOT chain starting at each such
392  * offset.
393  */
394  int curslot;
395 
396  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
397  {
398  OffsetNumber offnum = tbmres->offsets[curslot];
399  ItemPointerData tid;
400  HeapTupleData heapTuple;
401 
402  ItemPointerSet(&tid, page, offnum);
403  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
404  &heapTuple, NULL, true))
405  scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
406  }
407  }
408  else
409  {
410  /*
411  * Bitmap is lossy, so we must examine each item pointer on the page.
412  * But we can ignore HOT chains, since we'll check each tuple anyway.
413  */
414  Page dp = (Page) BufferGetPage(buffer);
416  OffsetNumber offnum;
417 
418  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
419  {
420  ItemId lp;
421  HeapTupleData loctup;
422  bool valid;
423 
424  lp = PageGetItemId(dp, offnum);
425  if (!ItemIdIsNormal(lp))
426  continue;
427  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
428  loctup.t_len = ItemIdGetLength(lp);
429  loctup.t_tableOid = scan->rs_rd->rd_id;
430  ItemPointerSet(&loctup.t_self, page, offnum);
431  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
432  if (valid)
433  {
434  scan->rs_vistuples[ntup++] = offnum;
435  PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
436  }
437  CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
438  buffer, snapshot);
439  }
440  }
441 
443 
444  Assert(ntup <= MaxHeapTuplesPerPage);
445  scan->rs_ntuples = ntup;
446 }
447 
448 /*
449  * BitmapDoneInitializingSharedState - Shared state is initialized
450  *
451  * By this time the leader has already populated the TBM and initialized the
452  * shared state so wake up other processes.
453  */
454 static inline void
456 {
457  SpinLockAcquire(&pstate->mutex);
458  pstate->state = BM_FINISHED;
459  SpinLockRelease(&pstate->mutex);
460  ConditionVariableBroadcast(&pstate->cv);
461 }
462 
463 /*
464  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
465  */
466 static inline void
468  TBMIterateResult *tbmres)
469 {
470 #ifdef USE_PREFETCH
471  ParallelBitmapHeapState *pstate = node->pstate;
472 
473  if (pstate == NULL)
474  {
475  TBMIterator *prefetch_iterator = node->prefetch_iterator;
476 
477  if (node->prefetch_pages > 0)
478  {
479  /* The main iterator has closed the distance by one page */
480  node->prefetch_pages--;
481  }
482  else if (prefetch_iterator)
483  {
484  /* Do not let the prefetch iterator get behind the main one */
485  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
486 
487  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
488  elog(ERROR, "prefetch and main iterators are out of sync");
489  }
490  return;
491  }
492 
493  if (node->prefetch_maximum > 0)
494  {
495  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
496 
497  SpinLockAcquire(&pstate->mutex);
498  if (pstate->prefetch_pages > 0)
499  {
500  pstate->prefetch_pages--;
501  SpinLockRelease(&pstate->mutex);
502  }
503  else
504  {
505  /* Release the mutex before iterating */
506  SpinLockRelease(&pstate->mutex);
507 
508  /*
509  * In case of shared mode, we can not ensure that the current
510  * blockno of the main iterator and that of the prefetch iterator
511  * are same. It's possible that whatever blockno we are
512  * prefetching will be processed by another process. Therefore,
513  * we don't validate the blockno here as we do in non-parallel
514  * case.
515  */
516  if (prefetch_iterator)
517  tbm_shared_iterate(prefetch_iterator);
518  }
519  }
520 #endif /* USE_PREFETCH */
521 }
522 
523 /*
524  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
525  *
526  * Increase prefetch target if it's not yet at the max. Note that
527  * we will increase it to zero after fetching the very first
528  * page/tuple, then to one after the second tuple is fetched, then
529  * it doubles as later pages are fetched.
530  */
531 static inline void
533 {
534 #ifdef USE_PREFETCH
535  ParallelBitmapHeapState *pstate = node->pstate;
536 
537  if (pstate == NULL)
538  {
539  if (node->prefetch_target >= node->prefetch_maximum)
540  /* don't increase any further */ ;
541  else if (node->prefetch_target >= node->prefetch_maximum / 2)
542  node->prefetch_target = node->prefetch_maximum;
543  else if (node->prefetch_target > 0)
544  node->prefetch_target *= 2;
545  else
546  node->prefetch_target++;
547  return;
548  }
549 
550  /* Do an unlocked check first to save spinlock acquisitions. */
551  if (pstate->prefetch_target < node->prefetch_maximum)
552  {
553  SpinLockAcquire(&pstate->mutex);
554  if (pstate->prefetch_target >= node->prefetch_maximum)
555  /* don't increase any further */ ;
556  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
557  pstate->prefetch_target = node->prefetch_maximum;
558  else if (pstate->prefetch_target > 0)
559  pstate->prefetch_target *= 2;
560  else
561  pstate->prefetch_target++;
562  SpinLockRelease(&pstate->mutex);
563  }
564 #endif /* USE_PREFETCH */
565 }
566 
567 /*
568  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
569  */
570 static inline void
572 {
573 #ifdef USE_PREFETCH
574  ParallelBitmapHeapState *pstate = node->pstate;
575 
576  if (pstate == NULL)
577  {
578  TBMIterator *prefetch_iterator = node->prefetch_iterator;
579 
580  if (prefetch_iterator)
581  {
582  while (node->prefetch_pages < node->prefetch_target)
583  {
584  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
585 
586  if (tbmpre == NULL)
587  {
588  /* No more pages to prefetch */
589  tbm_end_iterate(prefetch_iterator);
590  node->prefetch_iterator = NULL;
591  break;
592  }
593  node->prefetch_pages++;
594  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
595  }
596  }
597 
598  return;
599  }
600 
601  if (pstate->prefetch_pages < pstate->prefetch_target)
602  {
603  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
604 
605  if (prefetch_iterator)
606  {
607  while (1)
608  {
609  TBMIterateResult *tbmpre;
610  bool do_prefetch = false;
611 
612  /*
613  * Recheck under the mutex. If some other process has already
614  * done enough prefetching then we need not to do anything.
615  */
616  SpinLockAcquire(&pstate->mutex);
617  if (pstate->prefetch_pages < pstate->prefetch_target)
618  {
619  pstate->prefetch_pages++;
620  do_prefetch = true;
621  }
622  SpinLockRelease(&pstate->mutex);
623 
624  if (!do_prefetch)
625  return;
626 
627  tbmpre = tbm_shared_iterate(prefetch_iterator);
628  if (tbmpre == NULL)
629  {
630  /* No more pages to prefetch */
631  tbm_end_shared_iterate(prefetch_iterator);
632  node->shared_prefetch_iterator = NULL;
633  break;
634  }
635 
636  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
637  }
638  }
639  }
640 #endif /* USE_PREFETCH */
641 }
642 
643 /*
644  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
645  */
646 static bool
648 {
649  ExprContext *econtext;
650 
651  /*
652  * extract necessary information from index scan node
653  */
654  econtext = node->ss.ps.ps_ExprContext;
655 
656  /* Does the tuple meet the original qual conditions? */
657  econtext->ecxt_scantuple = slot;
658 
659  ResetExprContext(econtext);
660 
661  return ExecQual(node->bitmapqualorig, econtext);
662 }
663 
664 /* ----------------------------------------------------------------
665  * ExecBitmapHeapScan(node)
666  * ----------------------------------------------------------------
667  */
668 static TupleTableSlot *
670 {
672 
673  return ExecScan(&node->ss,
676 }
677 
678 /* ----------------------------------------------------------------
679  * ExecReScanBitmapHeapScan(node)
680  * ----------------------------------------------------------------
681  */
682 void
684 {
686 
687  /* rescan to release any page pin */
688  heap_rescan(node->ss.ss_currentScanDesc, NULL);
689 
690  if (node->tbmiterator)
692  if (node->prefetch_iterator)
694  if (node->shared_tbmiterator)
696  if (node->shared_prefetch_iterator)
698  if (node->tbm)
699  tbm_free(node->tbm);
700  node->tbm = NULL;
701  node->tbmiterator = NULL;
702  node->tbmres = NULL;
703  node->prefetch_iterator = NULL;
704  node->initialized = false;
705  node->shared_tbmiterator = NULL;
706  node->shared_prefetch_iterator = NULL;
707 
708  ExecScanReScan(&node->ss);
709 
710  /*
711  * if chgParam of subnode is not null then plan will be re-scanned by
712  * first ExecProcNode.
713  */
714  if (outerPlan->chgParam == NULL)
715  ExecReScan(outerPlan);
716 }
717 
718 /* ----------------------------------------------------------------
719  * ExecEndBitmapHeapScan
720  * ----------------------------------------------------------------
721  */
722 void
724 {
725  Relation relation;
726  HeapScanDesc scanDesc;
727 
728  /*
729  * extract information from the node
730  */
731  relation = node->ss.ss_currentRelation;
732  scanDesc = node->ss.ss_currentScanDesc;
733 
734  /*
735  * Free the exprcontext
736  */
737  ExecFreeExprContext(&node->ss.ps);
738 
739  /*
740  * clear out tuple table slots
741  */
744 
745  /*
746  * close down subplans
747  */
749 
750  /*
751  * release bitmap if any
752  */
753  if (node->tbmiterator)
755  if (node->prefetch_iterator)
757  if (node->tbm)
758  tbm_free(node->tbm);
759  if (node->shared_tbmiterator)
761  if (node->shared_prefetch_iterator)
763 
764  /*
765  * close heap scan
766  */
767  heap_endscan(scanDesc);
768 
769  /*
770  * close the heap relation.
771  */
772  ExecCloseScanRelation(relation);
773 }
774 
775 /* ----------------------------------------------------------------
776  * ExecInitBitmapHeapScan
777  *
778  * Initializes the scan's state information.
779  * ----------------------------------------------------------------
780  */
782 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
783 {
784  BitmapHeapScanState *scanstate;
785  Relation currentRelation;
786  int io_concurrency;
787 
788  /* check for unsupported flags */
789  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
790 
791  /*
792  * Assert caller didn't ask for an unsafe snapshot --- see comments at
793  * head of file.
794  */
796 
797  /*
798  * create state structure
799  */
800  scanstate = makeNode(BitmapHeapScanState);
801  scanstate->ss.ps.plan = (Plan *) node;
802  scanstate->ss.ps.state = estate;
803  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
804 
805  scanstate->tbm = NULL;
806  scanstate->tbmiterator = NULL;
807  scanstate->tbmres = NULL;
808  scanstate->exact_pages = 0;
809  scanstate->lossy_pages = 0;
810  scanstate->prefetch_iterator = NULL;
811  scanstate->prefetch_pages = 0;
812  scanstate->prefetch_target = 0;
813  /* may be updated below */
815  scanstate->pscan_len = 0;
816  scanstate->initialized = false;
817  scanstate->shared_tbmiterator = NULL;
818  scanstate->pstate = NULL;
819 
820  /*
821  * Miscellaneous initialization
822  *
823  * create expression context for node
824  */
825  ExecAssignExprContext(estate, &scanstate->ss.ps);
826 
827  /*
828  * initialize child expressions
829  */
830  scanstate->ss.ps.qual =
831  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
832  scanstate->bitmapqualorig =
833  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
834 
835  /*
836  * tuple table initialization
837  */
838  ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
839  ExecInitScanTupleSlot(estate, &scanstate->ss);
840 
841  /*
842  * open the base relation and acquire appropriate lock on it.
843  */
844  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
845 
846  /*
847  * Determine the maximum for prefetch_target. If the tablespace has a
848  * specific IO concurrency set, use that to compute the corresponding
849  * maximum value; otherwise, we already initialized to the value computed
850  * by the GUC machinery.
851  */
852  io_concurrency =
853  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
854  if (io_concurrency != effective_io_concurrency)
855  {
856  double maximum;
857 
858  if (ComputeIoConcurrency(io_concurrency, &maximum))
859  scanstate->prefetch_maximum = rint(maximum);
860  }
861 
862  scanstate->ss.ss_currentRelation = currentRelation;
863 
864  /*
865  * Even though we aren't going to do a conventional seqscan, it is useful
866  * to create a HeapScanDesc --- most of the fields in it are usable.
867  */
868  scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
869  estate->es_snapshot,
870  0,
871  NULL);
872 
873  /*
874  * get the scan type from the relation descriptor.
875  */
876  ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
877 
878  /*
879  * Initialize result tuple type and projection info.
880  */
881  ExecAssignResultTypeFromTL(&scanstate->ss.ps);
882  ExecAssignScanProjectionInfo(&scanstate->ss);
883 
884  /*
885  * initialize child nodes
886  *
887  * We do this last because the child nodes will open indexscans on our
888  * relation's indexes, and we want to be sure we have acquired a lock on
889  * the relation first.
890  */
891  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
892 
893  /*
894  * all done.
895  */
896  return scanstate;
897 }
898 
899 /*----------------
900  * BitmapShouldInitializeSharedState
901  *
902  * The first process to come here and see the state to the BM_INITIAL
903  * will become the leader for the parallel bitmap scan and will be
904  * responsible for populating the TIDBitmap. The other processes will
905  * be blocked by the condition variable until the leader wakes them up.
906  * ---------------
907  */
908 static bool
910 {
912 
913  while (1)
914  {
915  SpinLockAcquire(&pstate->mutex);
916  state = pstate->state;
917  if (pstate->state == BM_INITIAL)
918  pstate->state = BM_INPROGRESS;
919  SpinLockRelease(&pstate->mutex);
920 
921  /* Exit if bitmap is done, or if we're the leader. */
922  if (state != BM_INPROGRESS)
923  break;
924 
925  /* Wait for the leader to wake us up. */
927  }
928 
930 
931  return (state == BM_INITIAL);
932 }
933 
934 /* ----------------------------------------------------------------
935  * ExecBitmapHeapEstimate
936  *
937  * estimates the space required to serialize bitmap scan node.
938  * ----------------------------------------------------------------
939  */
940 void
942  ParallelContext *pcxt)
943 {
944  EState *estate = node->ss.ps.state;
945 
947  phs_snapshot_data),
949 
951  shm_toc_estimate_keys(&pcxt->estimator, 1);
952 }
953 
954 /* ----------------------------------------------------------------
955  * ExecBitmapHeapInitializeDSM
956  *
957  * Set up a parallel bitmap heap scan descriptor.
958  * ----------------------------------------------------------------
959  */
960 void
962  ParallelContext *pcxt)
963 {
964  ParallelBitmapHeapState *pstate;
965  EState *estate = node->ss.ps.state;
966 
967  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
968 
969  pstate->tbmiterator = 0;
970  pstate->prefetch_iterator = 0;
971 
972  /* Initialize the mutex */
973  SpinLockInit(&pstate->mutex);
974  pstate->prefetch_pages = 0;
975  pstate->prefetch_target = 0;
976  pstate->state = BM_INITIAL;
977 
978  ConditionVariableInit(&pstate->cv);
980 
981  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
982  node->pstate = pstate;
983 }
984 
985 /* ----------------------------------------------------------------
986  * ExecBitmapHeapReInitializeDSM
987  *
988  * Reset shared state before beginning a fresh scan.
989  * ----------------------------------------------------------------
990  */
991 void
993  ParallelContext *pcxt)
994 {
995  ParallelBitmapHeapState *pstate = node->pstate;
996  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
997 
998  pstate->state = BM_INITIAL;
999 
1000  if (DsaPointerIsValid(pstate->tbmiterator))
1001  tbm_free_shared_area(dsa, pstate->tbmiterator);
1002 
1003  if (DsaPointerIsValid(pstate->prefetch_iterator))
1005 
1006  pstate->tbmiterator = InvalidDsaPointer;
1008 }
1009 
1010 /* ----------------------------------------------------------------
1011  * ExecBitmapHeapInitializeWorker
1012  *
1013  * Copy relevant information from TOC into planstate.
1014  * ----------------------------------------------------------------
1015  */
1016 void
1018 {
1019  ParallelBitmapHeapState *pstate;
1020  Snapshot snapshot;
1021 
1022  pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
1023  node->pstate = pstate;
1024 
1025  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1026  heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
1027 }
static void BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
List * qual
Definition: plannodes.h:145
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
int target_prefetch_pages
Definition: bufmgr.c:129
struct dsa_area * es_query_dsa
Definition: execnodes.h:511
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1156
Plan plan
Definition: plannodes.h:328
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExprState * bitmapqualorig
Definition: execnodes.h:1347
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate)
Definition: execTuples.c:842
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2121
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1565
Index scanrelid
Definition: plannodes.h:329
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
#define InvalidDsaPointer
Definition: dsa.h:78
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:776
void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
Definition: heapam.c:1774
#define RelationGetDescr(relation)
Definition: rel.h:428
#define castNode(_type_, nodeptr)
Definition: nodes.h:578
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:523
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:123
ExprContext * ps_ExprContext
Definition: execnodes.h:881
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
TIDBitmap * tbm
Definition: execnodes.h:1348
int ConditionVariableBroadcast(ConditionVariable *cv)
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:439
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
int plan_node_id
Definition: plannodes.h:143
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
Snapshot es_snapshot
Definition: execnodes.h:429
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1103
uint32 BlockNumber
Definition: block.h:31
Relation ss_currentRelation
Definition: execnodes.h:1101
EState * state
Definition: execnodes.h:849
Form_pg_class rd_rel
Definition: rel.h:114
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:366
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
SharedBitmapState
Definition: execnodes.h:1292
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:521
int effective_io_concurrency
Definition: bufmgr.c:112
void CheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: predicate.c:3945
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:160
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:445
#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer)
Definition: tqual.h:45
HeapTupleData rs_ctup
Definition: relscan.h:69
bool ComputeIoConcurrency(int io_concurrency, double *target)
Definition: bufmgr.c:467
uint16 OffsetNumber
Definition: off.h:24
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:409
HeapTupleHeader t_data
Definition: htup.h:67
BlockNumber blockno
Definition: tidbitmap.h:42
PlanState ps
Definition: execnodes.h:1100
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:2011
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:410
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableInit(ConditionVariable *cv)
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:880
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void ConditionVariableCancelSleep(void)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
#define ERROR
Definition: elog.h:43
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:237
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1361
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2062
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:832
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:608
ItemPointerData t_self
Definition: htup.h:65
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1270
#define EXEC_FLAG_BACKWARD
Definition: executor.h:60
#define outerPlanState(node)
Definition: execnodes.h:893
uint32 t_len
Definition: htup.h:64
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:332
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define FirstOffsetNumber
Definition: off.h:27
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
Snapshot rs_snapshot
Definition: relscan.h:49
void heap_rescan(HeapScanDesc scan, ScanKey key)
Definition: heapam.c:1521
Oid t_tableOid
Definition: htup.h:66
dsa_pointer tbmiterator
Definition: execnodes.h:1314
double rint(double x)
Definition: rint.c:22
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
List * bitmapqualorig
Definition: plannodes.h:463
TBMIterateResult * tbmres
Definition: execnodes.h:1350
Oid rd_id
Definition: rel.h:116
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:351
Bitmapset * chgParam
Definition: execnodes.h:875
#define outerPlan(node)
Definition: plannodes.h:174
HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: heapam.c:1425
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define SpinLockRelease(lock)
Definition: spin.h:64
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2038
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:853
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
BlockNumber rs_nblocks
Definition: relscan.h:60
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
Plan * plan
Definition: execnodes.h:847
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:529
Relation rs_rd
Definition: relscan.h:48
dsa_pointer prefetch_iterator
Definition: execnodes.h:1315
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:981
Buffer rs_cbuf
Definition: relscan.h:71
#define makeNode(_type_)
Definition: nodes.h:557
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define Assert(condition)
Definition: c.h:664
#define IsMVCCSnapshot(snapshot)
Definition: tqual.h:31
#define EXEC_FLAG_MARK
Definition: executor.h:61
Definition: regguts.h:298
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: relscan.h:78
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:901
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:423
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:699
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1062
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void ExecCloseScanRelation(Relation scanrel)
Definition: execUtils.c:666
SharedBitmapState state
Definition: execnodes.h:1319
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1513
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:197
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:95
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1359
TBMIterator * tbmiterator
Definition: execnodes.h:1349
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
ExprState * qual
Definition: execnodes.h:865
void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
Definition: predicate.c:2543
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1321
#define DsaPointerIsValid(x)
Definition: dsa.h:81
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
HeapScanDesc ss_currentScanDesc
Definition: execnodes.h:1102
Definition: dsa.c:354
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable cv
Definition: execnodes.h:1320
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:468
TBMIterator * prefetch_iterator
Definition: execnodes.h:1353
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecScanReScan(ScanState *node)
Definition: execScan.c:329
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
void ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
Definition: execUtils.c:545
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
#define elog
Definition: elog.h:219
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:74
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1475
int Buffer
Definition: buf.h:23
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1360
#define offsetof(type, field)
Definition: c.h:549
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ResetExprContext(econtext)
Definition: executor.h:470
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
shm_toc * toc
Definition: parallel.h:44
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1168