PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "executor/execdebug.h"
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "storage/bufmgr.h"
49 #include "storage/predicate.h"
50 #include "utils/memutils.h"
51 #include "utils/rel.h"
52 #include "utils/snapmgr.h"
53 #include "utils/spccache.h"
54 
57 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
58  TBMIterateResult *tbmres);
59 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
60 static inline void BitmapPrefetch(BitmapHeapScanState *node,
61  TableScanDesc scan);
63 
64 
65 /* ----------------------------------------------------------------
66  * BitmapHeapNext
67  *
68  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
69  * ----------------------------------------------------------------
70  */
71 static TupleTableSlot *
73 {
74  ExprContext *econtext;
75  TableScanDesc scan;
76  TIDBitmap *tbm;
77  TBMIterator *tbmiterator = NULL;
78  TBMSharedIterator *shared_tbmiterator = NULL;
79  TBMIterateResult *tbmres;
80  TupleTableSlot *slot;
81  ParallelBitmapHeapState *pstate = node->pstate;
82  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
83 
84  /*
85  * extract necessary information from index scan node
86  */
87  econtext = node->ss.ps.ps_ExprContext;
88  slot = node->ss.ss_ScanTupleSlot;
89  scan = node->ss.ss_currentScanDesc;
90  tbm = node->tbm;
91  if (pstate == NULL)
92  tbmiterator = node->tbmiterator;
93  else
94  shared_tbmiterator = node->shared_tbmiterator;
95  tbmres = node->tbmres;
96 
97  /*
98  * If we haven't yet performed the underlying index scan, do it, and begin
99  * the iteration over the bitmap.
100  *
101  * For prefetching, we use *two* iterators, one for the pages we are
102  * actually scanning and another that runs ahead of the first for
103  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
104  * the prefetch iterator is. Also, node->prefetch_target tracks the
105  * desired prefetch distance, which starts small and increases up to the
106  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
107  * a scan that stops after a few tuples because of a LIMIT.
108  */
109  if (!node->initialized)
110  {
111  if (!pstate)
112  {
113  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114 
115  if (!tbm || !IsA(tbm, TIDBitmap))
116  elog(ERROR, "unrecognized result from subplan");
117 
118  node->tbm = tbm;
119  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
120  node->tbmres = tbmres = NULL;
121 
122 #ifdef USE_PREFETCH
123  if (node->prefetch_maximum > 0)
124  {
126  node->prefetch_pages = 0;
127  node->prefetch_target = -1;
128  }
129 #endif /* USE_PREFETCH */
130  }
131  else
132  {
133  /*
134  * The leader will immediately come out of the function, but
135  * others will be blocked until leader populates the TBM and wakes
136  * them up.
137  */
139  {
140  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
141  if (!tbm || !IsA(tbm, TIDBitmap))
142  elog(ERROR, "unrecognized result from subplan");
143 
144  node->tbm = tbm;
145 
146  /*
147  * Prepare to iterate over the TBM. This will return the
148  * dsa_pointer of the iterator state which will be used by
149  * multiple processes to iterate jointly.
150  */
152 #ifdef USE_PREFETCH
153  if (node->prefetch_maximum > 0)
154  {
155  pstate->prefetch_iterator =
157 
158  /*
159  * We don't need the mutex here as we haven't yet woke up
160  * others.
161  */
162  pstate->prefetch_pages = 0;
163  pstate->prefetch_target = -1;
164  }
165 #endif
166 
167  /* We have initialized the shared state so wake up others. */
169  }
170 
171  /* Allocate a private iterator and attach the shared state to it */
172  node->shared_tbmiterator = shared_tbmiterator =
174  node->tbmres = tbmres = NULL;
175 
176 #ifdef USE_PREFETCH
177  if (node->prefetch_maximum > 0)
178  {
181  }
182 #endif /* USE_PREFETCH */
183  }
184  node->initialized = true;
185  }
186 
187  for (;;)
188  {
189  bool skip_fetch;
190 
192 
193  /*
194  * Get next page of results if needed
195  */
196  if (tbmres == NULL)
197  {
198  if (!pstate)
199  node->tbmres = tbmres = tbm_iterate(tbmiterator);
200  else
201  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
202  if (tbmres == NULL)
203  {
204  /* no more entries in the bitmap */
205  break;
206  }
207 
208  BitmapAdjustPrefetchIterator(node, tbmres);
209 
210  /*
211  * We can skip fetching the heap page if we don't need any fields
212  * from the heap, and the bitmap entries don't need rechecking,
213  * and all tuples on the page are visible to our transaction.
214  *
215  * XXX: It's a layering violation that we do these checks above
216  * tableam, they should probably moved below it at some point.
217  */
218  skip_fetch = (node->can_skip_fetch &&
219  !tbmres->recheck &&
221  tbmres->blockno,
222  &node->vmbuffer));
223 
224  if (skip_fetch)
225  {
226  /* can't be lossy in the skip_fetch case */
227  Assert(tbmres->ntuples >= 0);
228 
229  /*
230  * The number of tuples on this page is put into
231  * node->return_empty_tuples.
232  */
233  node->return_empty_tuples = tbmres->ntuples;
234  }
235  else if (!table_scan_bitmap_next_block(scan, tbmres))
236  {
237  /* AM doesn't think this block is valid, skip */
238  continue;
239  }
240 
241  if (tbmres->ntuples >= 0)
242  node->exact_pages++;
243  else
244  node->lossy_pages++;
245 
246  /* Adjust the prefetch target */
248  }
249  else
250  {
251  /*
252  * Continuing in previously obtained page.
253  */
254 
255 #ifdef USE_PREFETCH
256 
257  /*
258  * Try to prefetch at least a few pages even before we get to the
259  * second page if we don't stop reading after the first tuple.
260  */
261  if (!pstate)
262  {
263  if (node->prefetch_target < node->prefetch_maximum)
264  node->prefetch_target++;
265  }
266  else if (pstate->prefetch_target < node->prefetch_maximum)
267  {
268  /* take spinlock while updating shared state */
269  SpinLockAcquire(&pstate->mutex);
270  if (pstate->prefetch_target < node->prefetch_maximum)
271  pstate->prefetch_target++;
272  SpinLockRelease(&pstate->mutex);
273  }
274 #endif /* USE_PREFETCH */
275  }
276 
277  /*
278  * We issue prefetch requests *after* fetching the current page to try
279  * to avoid having prefetching interfere with the main I/O. Also, this
280  * should happen only when we have determined there is still something
281  * to do on the current page, else we may uselessly prefetch the same
282  * page we are just about to request for real.
283  *
284  * XXX: It's a layering violation that we do these checks above
285  * tableam, they should probably moved below it at some point.
286  */
287  BitmapPrefetch(node, scan);
288 
289  if (node->return_empty_tuples > 0)
290  {
291  /*
292  * If we don't have to fetch the tuple, just return nulls.
293  */
294  ExecStoreAllNullTuple(slot);
295 
296  if (--node->return_empty_tuples == 0)
297  {
298  /* no more tuples to return in the next round */
299  node->tbmres = tbmres = NULL;
300  }
301  }
302  else
303  {
304  /*
305  * Attempt to fetch tuple from AM.
306  */
307  if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
308  {
309  /* nothing more to look at on this page */
310  node->tbmres = tbmres = NULL;
311  continue;
312  }
313 
314  /*
315  * If we are using lossy info, we have to recheck the qual
316  * conditions at every tuple.
317  */
318  if (tbmres->recheck)
319  {
320  econtext->ecxt_scantuple = slot;
321  if (!ExecQualAndReset(node->bitmapqualorig, econtext))
322  {
323  /* Fails recheck, so drop it and loop back for another */
324  InstrCountFiltered2(node, 1);
325  ExecClearTuple(slot);
326  continue;
327  }
328  }
329  }
330 
331  /* OK to return this tuple */
332  return slot;
333  }
334 
335  /*
336  * if we get here it means we are at the end of the scan..
337  */
338  return ExecClearTuple(slot);
339 }
340 
341 /*
342  * BitmapDoneInitializingSharedState - Shared state is initialized
343  *
344  * By this time the leader has already populated the TBM and initialized the
345  * shared state so wake up other processes.
346  */
347 static inline void
349 {
350  SpinLockAcquire(&pstate->mutex);
351  pstate->state = BM_FINISHED;
352  SpinLockRelease(&pstate->mutex);
353  ConditionVariableBroadcast(&pstate->cv);
354 }
355 
356 /*
357  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
358  */
359 static inline void
361  TBMIterateResult *tbmres)
362 {
363 #ifdef USE_PREFETCH
364  ParallelBitmapHeapState *pstate = node->pstate;
365 
366  if (pstate == NULL)
367  {
368  TBMIterator *prefetch_iterator = node->prefetch_iterator;
369 
370  if (node->prefetch_pages > 0)
371  {
372  /* The main iterator has closed the distance by one page */
373  node->prefetch_pages--;
374  }
375  else if (prefetch_iterator)
376  {
377  /* Do not let the prefetch iterator get behind the main one */
378  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
379 
380  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
381  elog(ERROR, "prefetch and main iterators are out of sync");
382  }
383  return;
384  }
385 
386  if (node->prefetch_maximum > 0)
387  {
388  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
389 
390  SpinLockAcquire(&pstate->mutex);
391  if (pstate->prefetch_pages > 0)
392  {
393  pstate->prefetch_pages--;
394  SpinLockRelease(&pstate->mutex);
395  }
396  else
397  {
398  /* Release the mutex before iterating */
399  SpinLockRelease(&pstate->mutex);
400 
401  /*
402  * In case of shared mode, we can not ensure that the current
403  * blockno of the main iterator and that of the prefetch iterator
404  * are same. It's possible that whatever blockno we are
405  * prefetching will be processed by another process. Therefore,
406  * we don't validate the blockno here as we do in non-parallel
407  * case.
408  */
409  if (prefetch_iterator)
410  tbm_shared_iterate(prefetch_iterator);
411  }
412  }
413 #endif /* USE_PREFETCH */
414 }
415 
416 /*
417  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
418  *
419  * Increase prefetch target if it's not yet at the max. Note that
420  * we will increase it to zero after fetching the very first
421  * page/tuple, then to one after the second tuple is fetched, then
422  * it doubles as later pages are fetched.
423  */
424 static inline void
426 {
427 #ifdef USE_PREFETCH
428  ParallelBitmapHeapState *pstate = node->pstate;
429 
430  if (pstate == NULL)
431  {
432  if (node->prefetch_target >= node->prefetch_maximum)
433  /* don't increase any further */ ;
434  else if (node->prefetch_target >= node->prefetch_maximum / 2)
435  node->prefetch_target = node->prefetch_maximum;
436  else if (node->prefetch_target > 0)
437  node->prefetch_target *= 2;
438  else
439  node->prefetch_target++;
440  return;
441  }
442 
443  /* Do an unlocked check first to save spinlock acquisitions. */
444  if (pstate->prefetch_target < node->prefetch_maximum)
445  {
446  SpinLockAcquire(&pstate->mutex);
447  if (pstate->prefetch_target >= node->prefetch_maximum)
448  /* don't increase any further */ ;
449  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
450  pstate->prefetch_target = node->prefetch_maximum;
451  else if (pstate->prefetch_target > 0)
452  pstate->prefetch_target *= 2;
453  else
454  pstate->prefetch_target++;
455  SpinLockRelease(&pstate->mutex);
456  }
457 #endif /* USE_PREFETCH */
458 }
459 
460 /*
461  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
462  */
463 static inline void
465 {
466 #ifdef USE_PREFETCH
467  ParallelBitmapHeapState *pstate = node->pstate;
468 
469  if (pstate == NULL)
470  {
471  TBMIterator *prefetch_iterator = node->prefetch_iterator;
472 
473  if (prefetch_iterator)
474  {
475  while (node->prefetch_pages < node->prefetch_target)
476  {
477  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
478  bool skip_fetch;
479 
480  if (tbmpre == NULL)
481  {
482  /* No more pages to prefetch */
483  tbm_end_iterate(prefetch_iterator);
484  node->prefetch_iterator = NULL;
485  break;
486  }
487  node->prefetch_pages++;
488 
489  /*
490  * If we expect not to have to actually read this heap page,
491  * skip this prefetch call, but continue to run the prefetch
492  * logic normally. (Would it be better not to increment
493  * prefetch_pages?)
494  *
495  * This depends on the assumption that the index AM will
496  * report the same recheck flag for this future heap page as
497  * it did for the current heap page; which is not a certainty
498  * but is true in many cases.
499  */
500  skip_fetch = (node->can_skip_fetch &&
501  (node->tbmres ? !node->tbmres->recheck : false) &&
503  tbmpre->blockno,
504  &node->pvmbuffer));
505 
506  if (!skip_fetch)
507  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508  }
509  }
510 
511  return;
512  }
513 
514  if (pstate->prefetch_pages < pstate->prefetch_target)
515  {
516  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
517 
518  if (prefetch_iterator)
519  {
520  while (1)
521  {
522  TBMIterateResult *tbmpre;
523  bool do_prefetch = false;
524  bool skip_fetch;
525 
526  /*
527  * Recheck under the mutex. If some other process has already
528  * done enough prefetching then we need not to do anything.
529  */
530  SpinLockAcquire(&pstate->mutex);
531  if (pstate->prefetch_pages < pstate->prefetch_target)
532  {
533  pstate->prefetch_pages++;
534  do_prefetch = true;
535  }
536  SpinLockRelease(&pstate->mutex);
537 
538  if (!do_prefetch)
539  return;
540 
541  tbmpre = tbm_shared_iterate(prefetch_iterator);
542  if (tbmpre == NULL)
543  {
544  /* No more pages to prefetch */
545  tbm_end_shared_iterate(prefetch_iterator);
546  node->shared_prefetch_iterator = NULL;
547  break;
548  }
549 
550  /* As above, skip prefetch if we expect not to need page */
551  skip_fetch = (node->can_skip_fetch &&
552  (node->tbmres ? !node->tbmres->recheck : false) &&
554  tbmpre->blockno,
555  &node->pvmbuffer));
556 
557  if (!skip_fetch)
558  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559  }
560  }
561  }
562 #endif /* USE_PREFETCH */
563 }
564 
565 /*
566  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
567  */
568 static bool
570 {
571  ExprContext *econtext;
572 
573  /*
574  * extract necessary information from index scan node
575  */
576  econtext = node->ss.ps.ps_ExprContext;
577 
578  /* Does the tuple meet the original qual conditions? */
579  econtext->ecxt_scantuple = slot;
580  return ExecQualAndReset(node->bitmapqualorig, econtext);
581 }
582 
583 /* ----------------------------------------------------------------
584  * ExecBitmapHeapScan(node)
585  * ----------------------------------------------------------------
586  */
587 static TupleTableSlot *
589 {
591 
592  return ExecScan(&node->ss,
595 }
596 
597 /* ----------------------------------------------------------------
598  * ExecReScanBitmapHeapScan(node)
599  * ----------------------------------------------------------------
600  */
601 void
603 {
605 
606  /* rescan to release any page pin */
607  table_rescan(node->ss.ss_currentScanDesc, NULL);
608 
609  /* release bitmaps and buffers if any */
610  if (node->tbmiterator)
612  if (node->prefetch_iterator)
614  if (node->shared_tbmiterator)
616  if (node->shared_prefetch_iterator)
618  if (node->tbm)
619  tbm_free(node->tbm);
620  if (node->vmbuffer != InvalidBuffer)
621  ReleaseBuffer(node->vmbuffer);
622  if (node->pvmbuffer != InvalidBuffer)
623  ReleaseBuffer(node->pvmbuffer);
624  node->tbm = NULL;
625  node->tbmiterator = NULL;
626  node->tbmres = NULL;
627  node->prefetch_iterator = NULL;
628  node->initialized = false;
629  node->shared_tbmiterator = NULL;
630  node->shared_prefetch_iterator = NULL;
631  node->vmbuffer = InvalidBuffer;
632  node->pvmbuffer = InvalidBuffer;
633 
634  ExecScanReScan(&node->ss);
635 
636  /*
637  * if chgParam of subnode is not null then plan will be re-scanned by
638  * first ExecProcNode.
639  */
640  if (outerPlan->chgParam == NULL)
641  ExecReScan(outerPlan);
642 }
643 
644 /* ----------------------------------------------------------------
645  * ExecEndBitmapHeapScan
646  * ----------------------------------------------------------------
647  */
648 void
650 {
651  TableScanDesc scanDesc;
652 
653  /*
654  * extract information from the node
655  */
656  scanDesc = node->ss.ss_currentScanDesc;
657 
658  /*
659  * Free the exprcontext
660  */
661  ExecFreeExprContext(&node->ss.ps);
662 
663  /*
664  * clear out tuple table slots
665  */
666  if (node->ss.ps.ps_ResultTupleSlot)
669 
670  /*
671  * close down subplans
672  */
674 
675  /*
676  * release bitmaps and buffers if any
677  */
678  if (node->tbmiterator)
680  if (node->prefetch_iterator)
682  if (node->tbm)
683  tbm_free(node->tbm);
684  if (node->shared_tbmiterator)
686  if (node->shared_prefetch_iterator)
688  if (node->vmbuffer != InvalidBuffer)
689  ReleaseBuffer(node->vmbuffer);
690  if (node->pvmbuffer != InvalidBuffer)
691  ReleaseBuffer(node->pvmbuffer);
692 
693  /*
694  * close heap scan
695  */
696  table_endscan(scanDesc);
697 }
698 
699 /* ----------------------------------------------------------------
700  * ExecInitBitmapHeapScan
701  *
702  * Initializes the scan's state information.
703  * ----------------------------------------------------------------
704  */
706 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
707 {
708  BitmapHeapScanState *scanstate;
709  Relation currentRelation;
710  int io_concurrency;
711 
712  /* check for unsupported flags */
713  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
714 
715  /*
716  * Assert caller didn't ask for an unsafe snapshot --- see comments at
717  * head of file.
718  */
720 
721  /*
722  * create state structure
723  */
724  scanstate = makeNode(BitmapHeapScanState);
725  scanstate->ss.ps.plan = (Plan *) node;
726  scanstate->ss.ps.state = estate;
727  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
728 
729  scanstate->tbm = NULL;
730  scanstate->tbmiterator = NULL;
731  scanstate->tbmres = NULL;
732  scanstate->return_empty_tuples = 0;
733  scanstate->vmbuffer = InvalidBuffer;
734  scanstate->pvmbuffer = InvalidBuffer;
735  scanstate->exact_pages = 0;
736  scanstate->lossy_pages = 0;
737  scanstate->prefetch_iterator = NULL;
738  scanstate->prefetch_pages = 0;
739  scanstate->prefetch_target = 0;
740  /* may be updated below */
742  scanstate->pscan_len = 0;
743  scanstate->initialized = false;
744  scanstate->shared_tbmiterator = NULL;
745  scanstate->shared_prefetch_iterator = NULL;
746  scanstate->pstate = NULL;
747 
748  /*
749  * We can potentially skip fetching heap pages if we do not need any
750  * columns of the table, either for checking non-indexable quals or for
751  * returning data. This test is a bit simplistic, as it checks the
752  * stronger condition that there's no qual or return tlist at all. But in
753  * most cases it's probably not worth working harder than that.
754  */
755  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
756  node->scan.plan.targetlist == NIL);
757 
758  /*
759  * Miscellaneous initialization
760  *
761  * create expression context for node
762  */
763  ExecAssignExprContext(estate, &scanstate->ss.ps);
764 
765  /*
766  * open the scan relation
767  */
768  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
769 
770  /*
771  * initialize child nodes
772  */
773  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
774 
775  /*
776  * get the scan type from the relation descriptor.
777  */
778  ExecInitScanTupleSlot(estate, &scanstate->ss,
779  RelationGetDescr(currentRelation),
780  table_slot_callbacks(currentRelation));
781 
782  /*
783  * Initialize result type and projection.
784  */
785  ExecInitResultTypeTL(&scanstate->ss.ps);
786  ExecAssignScanProjectionInfo(&scanstate->ss);
787 
788  /*
789  * initialize child expressions
790  */
791  scanstate->ss.ps.qual =
792  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
793  scanstate->bitmapqualorig =
794  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
795 
796  /*
797  * Determine the maximum for prefetch_target. If the tablespace has a
798  * specific IO concurrency set, use that to compute the corresponding
799  * maximum value; otherwise, we already initialized to the value computed
800  * by the GUC machinery.
801  */
802  io_concurrency =
803  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
804  if (io_concurrency != effective_io_concurrency)
805  {
806  double maximum;
807 
808  if (ComputeIoConcurrency(io_concurrency, &maximum))
809  scanstate->prefetch_maximum = rint(maximum);
810  }
811 
812  scanstate->ss.ss_currentRelation = currentRelation;
813 
814  scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
815  estate->es_snapshot,
816  0,
817  NULL);
818 
819  /*
820  * all done.
821  */
822  return scanstate;
823 }
824 
825 /*----------------
826  * BitmapShouldInitializeSharedState
827  *
828  * The first process to come here and see the state to the BM_INITIAL
829  * will become the leader for the parallel bitmap scan and will be
830  * responsible for populating the TIDBitmap. The other processes will
831  * be blocked by the condition variable until the leader wakes them up.
832  * ---------------
833  */
834 static bool
836 {
838 
839  while (1)
840  {
841  SpinLockAcquire(&pstate->mutex);
842  state = pstate->state;
843  if (pstate->state == BM_INITIAL)
844  pstate->state = BM_INPROGRESS;
845  SpinLockRelease(&pstate->mutex);
846 
847  /* Exit if bitmap is done, or if we're the leader. */
848  if (state != BM_INPROGRESS)
849  break;
850 
851  /* Wait for the leader to wake us up. */
853  }
854 
856 
857  return (state == BM_INITIAL);
858 }
859 
860 /* ----------------------------------------------------------------
861  * ExecBitmapHeapEstimate
862  *
863  * Compute the amount of space we'll need in the parallel
864  * query DSM, and inform pcxt->estimator about our needs.
865  * ----------------------------------------------------------------
866  */
867 void
869  ParallelContext *pcxt)
870 {
871  EState *estate = node->ss.ps.state;
872 
874  phs_snapshot_data),
876 
878  shm_toc_estimate_keys(&pcxt->estimator, 1);
879 }
880 
881 /* ----------------------------------------------------------------
882  * ExecBitmapHeapInitializeDSM
883  *
884  * Set up a parallel bitmap heap scan descriptor.
885  * ----------------------------------------------------------------
886  */
887 void
889  ParallelContext *pcxt)
890 {
891  ParallelBitmapHeapState *pstate;
892  EState *estate = node->ss.ps.state;
893  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
894 
895  /* If there's no DSA, there are no workers; initialize nothing. */
896  if (dsa == NULL)
897  return;
898 
899  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
900 
901  pstate->tbmiterator = 0;
902  pstate->prefetch_iterator = 0;
903 
904  /* Initialize the mutex */
905  SpinLockInit(&pstate->mutex);
906  pstate->prefetch_pages = 0;
907  pstate->prefetch_target = 0;
908  pstate->state = BM_INITIAL;
909 
910  ConditionVariableInit(&pstate->cv);
912 
913  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
914  node->pstate = pstate;
915 }
916 
917 /* ----------------------------------------------------------------
918  * ExecBitmapHeapReInitializeDSM
919  *
920  * Reset shared state before beginning a fresh scan.
921  * ----------------------------------------------------------------
922  */
923 void
925  ParallelContext *pcxt)
926 {
927  ParallelBitmapHeapState *pstate = node->pstate;
928  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
929 
930  /* If there's no DSA, there are no workers; do nothing. */
931  if (dsa == NULL)
932  return;
933 
934  pstate->state = BM_INITIAL;
935 
936  if (DsaPointerIsValid(pstate->tbmiterator))
937  tbm_free_shared_area(dsa, pstate->tbmiterator);
938 
941 
942  pstate->tbmiterator = InvalidDsaPointer;
944 }
945 
946 /* ----------------------------------------------------------------
947  * ExecBitmapHeapInitializeWorker
948  *
949  * Copy relevant information from TOC into planstate.
950  * ----------------------------------------------------------------
951  */
952 void
954  ParallelWorkerContext *pwcxt)
955 {
956  ParallelBitmapHeapState *pstate;
957  Snapshot snapshot;
958 
959  Assert(node->ss.ps.state->es_query_dsa != NULL);
960 
961  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
962  node->pstate = pstate;
963 
964  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
966 }
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
Definition: tableam.c:110
#define NIL
Definition: pg_list.h:65
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1657
List * qual
Definition: plannodes.h:141
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
int target_prefetch_pages
Definition: bufmgr.c:130
struct dsa_area * es_query_dsa
Definition: execnodes.h:583
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1145
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
Plan plan
Definition: plannodes.h:340
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExprState * bitmapqualorig
Definition: execnodes.h:1584
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2161
Index scanrelid
Definition: plannodes.h:341
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
#define InvalidDsaPointer
Definition: dsa.h:78
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:765
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1546
#define RelationGetDescr(relation)
Definition: rel.h:448
#define castNode(_type_, nodeptr)
Definition: nodes.h:594
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:537
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:158
ExprContext * ps_ExprContext
Definition: execnodes.h:978
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:44
TIDBitmap * tbm
Definition: execnodes.h:1585
void ExecReScan(PlanState *node)
Definition: execAmi.c:75
int plan_node_id
Definition: plannodes.h:139
#define InvalidBuffer
Definition: buf.h:25
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1332
void ConditionVariableBroadcast(ConditionVariable *cv)
Snapshot es_snapshot
Definition: execnodes.h:502
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1333
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3365
Relation ss_currentRelation
Definition: execnodes.h:1331
EState * state
Definition: execnodes.h:941
Form_pg_class rd_rel
Definition: rel.h:83
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:840
SharedBitmapState
Definition: execnodes.h:1525
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:614
int effective_io_concurrency
Definition: bufmgr.c:113
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:207
bool ComputeIoConcurrency(int io_concurrency, double *target)
Definition: bufmgr.c:469
BlockNumber blockno
Definition: tidbitmap.h:42
PlanState ps
Definition: execnodes.h:1330
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableInit(ConditionVariable *cv)
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:977
void ConditionVariableCancelSleep(void)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
#define ERROR
Definition: elog.h:43
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:272
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1602
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2102
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:693
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define outerPlanState(node)
Definition: execnodes.h:1033
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1781
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:321
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1725
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
dsa_pointer tbmiterator
Definition: execnodes.h:1547
double rint(double x)
Definition: rint.c:21
List * bitmapqualorig
Definition: plannodes.h:475
TBMIterateResult * tbmres
Definition: execnodes.h:1587
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:340
Bitmapset * chgParam
Definition: execnodes.h:971
#define outerPlan(node)
Definition: plannodes.h:170
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:426
#define SpinLockRelease(lock)
Definition: spin.h:64
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:392
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2078
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:945
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97
Plan * plan
Definition: execnodes.h:939
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:531
dsa_pointer prefetch_iterator
Definition: execnodes.h:1548
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:970
#define makeNode(_type_)
Definition: nodes.h:573
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define Assert(condition)
Definition: c.h:739
#define EXEC_FLAG_MARK
Definition: executor.h:59
Definition: regguts.h:298
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1046
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:444
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:688
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1051
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
SharedBitmapState state
Definition: execnodes.h:1552
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:223
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1600
TBMIterator * tbmiterator
Definition: execnodes.h:1586
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
List * targetlist
Definition: plannodes.h:140
ExprState * qual
Definition: execnodes.h:960
Relation rs_rd
Definition: relscan.h:34
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1554
#define DsaPointerIsValid(x)
Definition: dsa.h:81
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:831
Definition: dsa.c:354
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable cv
Definition: execnodes.h:1553
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:482
TBMIterator * prefetch_iterator
Definition: execnodes.h:1594
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1641
#define elog(elevel,...)
Definition: elog.h:228
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecScanReScan(ScanState *node)
Definition: execScan.c:299
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:781
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:425
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:138
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1464
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1601
#define offsetof(type, field)
Definition: c.h:662
shm_toc * toc
Definition: parallel.h:44
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1157