PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "executor/execdebug.h"
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "storage/bufmgr.h"
49 #include "storage/predicate.h"
50 #include "utils/memutils.h"
51 #include "utils/rel.h"
52 #include "utils/spccache.h"
53 #include "utils/snapmgr.h"
54 
55 
58 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
59  TBMIterateResult *tbmres);
60 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
61 static inline void BitmapPrefetch(BitmapHeapScanState *node,
62  TableScanDesc scan);
64 
65 
66 /* ----------------------------------------------------------------
67  * BitmapHeapNext
68  *
69  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
70  * ----------------------------------------------------------------
71  */
72 static TupleTableSlot *
74 {
75  ExprContext *econtext;
76  TableScanDesc scan;
77  TIDBitmap *tbm;
78  TBMIterator *tbmiterator = NULL;
79  TBMSharedIterator *shared_tbmiterator = NULL;
80  TBMIterateResult *tbmres;
81  TupleTableSlot *slot;
82  ParallelBitmapHeapState *pstate = node->pstate;
83  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
84 
85  /*
86  * extract necessary information from index scan node
87  */
88  econtext = node->ss.ps.ps_ExprContext;
89  slot = node->ss.ss_ScanTupleSlot;
90  scan = node->ss.ss_currentScanDesc;
91  tbm = node->tbm;
92  if (pstate == NULL)
93  tbmiterator = node->tbmiterator;
94  else
95  shared_tbmiterator = node->shared_tbmiterator;
96  tbmres = node->tbmres;
97 
98  /*
99  * If we haven't yet performed the underlying index scan, do it, and begin
100  * the iteration over the bitmap.
101  *
102  * For prefetching, we use *two* iterators, one for the pages we are
103  * actually scanning and another that runs ahead of the first for
104  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
105  * the prefetch iterator is. Also, node->prefetch_target tracks the
106  * desired prefetch distance, which starts small and increases up to the
107  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
108  * a scan that stops after a few tuples because of a LIMIT.
109  */
110  if (!node->initialized)
111  {
112  if (!pstate)
113  {
114  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
115 
116  if (!tbm || !IsA(tbm, TIDBitmap))
117  elog(ERROR, "unrecognized result from subplan");
118 
119  node->tbm = tbm;
120  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
121  node->tbmres = tbmres = NULL;
122 
123 #ifdef USE_PREFETCH
124  if (node->prefetch_maximum > 0)
125  {
127  node->prefetch_pages = 0;
128  node->prefetch_target = -1;
129  }
130 #endif /* USE_PREFETCH */
131  }
132  else
133  {
134  /*
135  * The leader will immediately come out of the function, but
136  * others will be blocked until leader populates the TBM and wakes
137  * them up.
138  */
140  {
141  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
142  if (!tbm || !IsA(tbm, TIDBitmap))
143  elog(ERROR, "unrecognized result from subplan");
144 
145  node->tbm = tbm;
146 
147  /*
148  * Prepare to iterate over the TBM. This will return the
149  * dsa_pointer of the iterator state which will be used by
150  * multiple processes to iterate jointly.
151  */
153 #ifdef USE_PREFETCH
154  if (node->prefetch_maximum > 0)
155  {
156  pstate->prefetch_iterator =
158 
159  /*
160  * We don't need the mutex here as we haven't yet woke up
161  * others.
162  */
163  pstate->prefetch_pages = 0;
164  pstate->prefetch_target = -1;
165  }
166 #endif
167 
168  /* We have initialized the shared state so wake up others. */
170  }
171 
172  /* Allocate a private iterator and attach the shared state to it */
173  node->shared_tbmiterator = shared_tbmiterator =
175  node->tbmres = tbmres = NULL;
176 
177 #ifdef USE_PREFETCH
178  if (node->prefetch_maximum > 0)
179  {
182  }
183 #endif /* USE_PREFETCH */
184  }
185  node->initialized = true;
186  }
187 
188  for (;;)
189  {
190  bool skip_fetch;
191 
193 
194  /*
195  * Get next page of results if needed
196  */
197  if (tbmres == NULL)
198  {
199  if (!pstate)
200  node->tbmres = tbmres = tbm_iterate(tbmiterator);
201  else
202  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
203  if (tbmres == NULL)
204  {
205  /* no more entries in the bitmap */
206  break;
207  }
208 
209  BitmapAdjustPrefetchIterator(node, tbmres);
210 
211  /*
212  * We can skip fetching the heap page if we don't need any fields
213  * from the heap, and the bitmap entries don't need rechecking,
214  * and all tuples on the page are visible to our transaction.
215  *
216  * XXX: It's a layering violation that we do these checks above
217  * tableam, they should probably moved below it at some point.
218  */
219  skip_fetch = (node->can_skip_fetch &&
220  !tbmres->recheck &&
222  tbmres->blockno,
223  &node->vmbuffer));
224 
225  if (skip_fetch)
226  {
227  /* can't be lossy in the skip_fetch case */
228  Assert(tbmres->ntuples >= 0);
229 
230  /*
231  * The number of tuples on this page is put into
232  * node->return_empty_tuples.
233  */
234  node->return_empty_tuples = tbmres->ntuples;
235  }
236  else if (!table_scan_bitmap_next_block(scan, tbmres))
237  {
238  /* AM doesn't think this block is valid, skip */
239  continue;
240  }
241 
242  if (tbmres->ntuples >= 0)
243  node->exact_pages++;
244  else
245  node->lossy_pages++;
246 
247  /* Adjust the prefetch target */
249  }
250  else
251  {
252  /*
253  * Continuing in previously obtained page.
254  */
255 
256 #ifdef USE_PREFETCH
257 
258  /*
259  * Try to prefetch at least a few pages even before we get to the
260  * second page if we don't stop reading after the first tuple.
261  */
262  if (!pstate)
263  {
264  if (node->prefetch_target < node->prefetch_maximum)
265  node->prefetch_target++;
266  }
267  else if (pstate->prefetch_target < node->prefetch_maximum)
268  {
269  /* take spinlock while updating shared state */
270  SpinLockAcquire(&pstate->mutex);
271  if (pstate->prefetch_target < node->prefetch_maximum)
272  pstate->prefetch_target++;
273  SpinLockRelease(&pstate->mutex);
274  }
275 #endif /* USE_PREFETCH */
276  }
277 
278  /*
279  * We issue prefetch requests *after* fetching the current page to try
280  * to avoid having prefetching interfere with the main I/O. Also, this
281  * should happen only when we have determined there is still something
282  * to do on the current page, else we may uselessly prefetch the same
283  * page we are just about to request for real.
284  *
285  * XXX: It's a layering violation that we do these checks above
286  * tableam, they should probably moved below it at some point.
287  */
288  BitmapPrefetch(node, scan);
289 
290  if (node->return_empty_tuples > 0)
291  {
292  /*
293  * If we don't have to fetch the tuple, just return nulls.
294  */
295  ExecStoreAllNullTuple(slot);
296 
297  if (--node->return_empty_tuples == 0)
298  {
299  /* no more tuples to return in the next round */
300  node->tbmres = tbmres = NULL;
301  }
302  }
303  else
304  {
305  /*
306  * Attempt to fetch tuple from AM.
307  */
308  if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
309  {
310  /* nothing more to look at on this page */
311  node->tbmres = tbmres = NULL;
312  continue;
313  }
314 
315  /*
316  * If we are using lossy info, we have to recheck the qual
317  * conditions at every tuple.
318  */
319  if (tbmres->recheck)
320  {
321  econtext->ecxt_scantuple = slot;
322  if (!ExecQualAndReset(node->bitmapqualorig, econtext))
323  {
324  /* Fails recheck, so drop it and loop back for another */
325  InstrCountFiltered2(node, 1);
326  ExecClearTuple(slot);
327  continue;
328  }
329  }
330  }
331 
332  /* OK to return this tuple */
333  return slot;
334  }
335 
336  /*
337  * if we get here it means we are at the end of the scan..
338  */
339  return ExecClearTuple(slot);
340 }
341 
342 /*
343  * BitmapDoneInitializingSharedState - Shared state is initialized
344  *
345  * By this time the leader has already populated the TBM and initialized the
346  * shared state so wake up other processes.
347  */
348 static inline void
350 {
351  SpinLockAcquire(&pstate->mutex);
352  pstate->state = BM_FINISHED;
353  SpinLockRelease(&pstate->mutex);
354  ConditionVariableBroadcast(&pstate->cv);
355 }
356 
357 /*
358  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
359  */
360 static inline void
362  TBMIterateResult *tbmres)
363 {
364 #ifdef USE_PREFETCH
365  ParallelBitmapHeapState *pstate = node->pstate;
366 
367  if (pstate == NULL)
368  {
369  TBMIterator *prefetch_iterator = node->prefetch_iterator;
370 
371  if (node->prefetch_pages > 0)
372  {
373  /* The main iterator has closed the distance by one page */
374  node->prefetch_pages--;
375  }
376  else if (prefetch_iterator)
377  {
378  /* Do not let the prefetch iterator get behind the main one */
379  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
380 
381  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
382  elog(ERROR, "prefetch and main iterators are out of sync");
383  }
384  return;
385  }
386 
387  if (node->prefetch_maximum > 0)
388  {
389  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
390 
391  SpinLockAcquire(&pstate->mutex);
392  if (pstate->prefetch_pages > 0)
393  {
394  pstate->prefetch_pages--;
395  SpinLockRelease(&pstate->mutex);
396  }
397  else
398  {
399  /* Release the mutex before iterating */
400  SpinLockRelease(&pstate->mutex);
401 
402  /*
403  * In case of shared mode, we can not ensure that the current
404  * blockno of the main iterator and that of the prefetch iterator
405  * are same. It's possible that whatever blockno we are
406  * prefetching will be processed by another process. Therefore,
407  * we don't validate the blockno here as we do in non-parallel
408  * case.
409  */
410  if (prefetch_iterator)
411  tbm_shared_iterate(prefetch_iterator);
412  }
413  }
414 #endif /* USE_PREFETCH */
415 }
416 
417 /*
418  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
419  *
420  * Increase prefetch target if it's not yet at the max. Note that
421  * we will increase it to zero after fetching the very first
422  * page/tuple, then to one after the second tuple is fetched, then
423  * it doubles as later pages are fetched.
424  */
425 static inline void
427 {
428 #ifdef USE_PREFETCH
429  ParallelBitmapHeapState *pstate = node->pstate;
430 
431  if (pstate == NULL)
432  {
433  if (node->prefetch_target >= node->prefetch_maximum)
434  /* don't increase any further */ ;
435  else if (node->prefetch_target >= node->prefetch_maximum / 2)
436  node->prefetch_target = node->prefetch_maximum;
437  else if (node->prefetch_target > 0)
438  node->prefetch_target *= 2;
439  else
440  node->prefetch_target++;
441  return;
442  }
443 
444  /* Do an unlocked check first to save spinlock acquisitions. */
445  if (pstate->prefetch_target < node->prefetch_maximum)
446  {
447  SpinLockAcquire(&pstate->mutex);
448  if (pstate->prefetch_target >= node->prefetch_maximum)
449  /* don't increase any further */ ;
450  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
451  pstate->prefetch_target = node->prefetch_maximum;
452  else if (pstate->prefetch_target > 0)
453  pstate->prefetch_target *= 2;
454  else
455  pstate->prefetch_target++;
456  SpinLockRelease(&pstate->mutex);
457  }
458 #endif /* USE_PREFETCH */
459 }
460 
461 /*
462  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
463  */
464 static inline void
466 {
467 #ifdef USE_PREFETCH
468  ParallelBitmapHeapState *pstate = node->pstate;
469 
470  if (pstate == NULL)
471  {
472  TBMIterator *prefetch_iterator = node->prefetch_iterator;
473 
474  if (prefetch_iterator)
475  {
476  while (node->prefetch_pages < node->prefetch_target)
477  {
478  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
479  bool skip_fetch;
480 
481  if (tbmpre == NULL)
482  {
483  /* No more pages to prefetch */
484  tbm_end_iterate(prefetch_iterator);
485  node->prefetch_iterator = NULL;
486  break;
487  }
488  node->prefetch_pages++;
489 
490  /*
491  * If we expect not to have to actually read this heap page,
492  * skip this prefetch call, but continue to run the prefetch
493  * logic normally. (Would it be better not to increment
494  * prefetch_pages?)
495  *
496  * This depends on the assumption that the index AM will
497  * report the same recheck flag for this future heap page as
498  * it did for the current heap page; which is not a certainty
499  * but is true in many cases.
500  */
501  skip_fetch = (node->can_skip_fetch &&
502  (node->tbmres ? !node->tbmres->recheck : false) &&
504  tbmpre->blockno,
505  &node->pvmbuffer));
506 
507  if (!skip_fetch)
508  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
509  }
510  }
511 
512  return;
513  }
514 
515  if (pstate->prefetch_pages < pstate->prefetch_target)
516  {
517  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
518 
519  if (prefetch_iterator)
520  {
521  while (1)
522  {
523  TBMIterateResult *tbmpre;
524  bool do_prefetch = false;
525  bool skip_fetch;
526 
527  /*
528  * Recheck under the mutex. If some other process has already
529  * done enough prefetching then we need not to do anything.
530  */
531  SpinLockAcquire(&pstate->mutex);
532  if (pstate->prefetch_pages < pstate->prefetch_target)
533  {
534  pstate->prefetch_pages++;
535  do_prefetch = true;
536  }
537  SpinLockRelease(&pstate->mutex);
538 
539  if (!do_prefetch)
540  return;
541 
542  tbmpre = tbm_shared_iterate(prefetch_iterator);
543  if (tbmpre == NULL)
544  {
545  /* No more pages to prefetch */
546  tbm_end_shared_iterate(prefetch_iterator);
547  node->shared_prefetch_iterator = NULL;
548  break;
549  }
550 
551  /* As above, skip prefetch if we expect not to need page */
552  skip_fetch = (node->can_skip_fetch &&
553  (node->tbmres ? !node->tbmres->recheck : false) &&
555  tbmpre->blockno,
556  &node->pvmbuffer));
557 
558  if (!skip_fetch)
559  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
560  }
561  }
562  }
563 #endif /* USE_PREFETCH */
564 }
565 
566 /*
567  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
568  */
569 static bool
571 {
572  ExprContext *econtext;
573 
574  /*
575  * extract necessary information from index scan node
576  */
577  econtext = node->ss.ps.ps_ExprContext;
578 
579  /* Does the tuple meet the original qual conditions? */
580  econtext->ecxt_scantuple = slot;
581  return ExecQualAndReset(node->bitmapqualorig, econtext);
582 }
583 
584 /* ----------------------------------------------------------------
585  * ExecBitmapHeapScan(node)
586  * ----------------------------------------------------------------
587  */
588 static TupleTableSlot *
590 {
592 
593  return ExecScan(&node->ss,
596 }
597 
598 /* ----------------------------------------------------------------
599  * ExecReScanBitmapHeapScan(node)
600  * ----------------------------------------------------------------
601  */
602 void
604 {
606 
607  /* rescan to release any page pin */
608  table_rescan(node->ss.ss_currentScanDesc, NULL);
609 
610  /* release bitmaps and buffers if any */
611  if (node->tbmiterator)
613  if (node->prefetch_iterator)
615  if (node->shared_tbmiterator)
617  if (node->shared_prefetch_iterator)
619  if (node->tbm)
620  tbm_free(node->tbm);
621  if (node->vmbuffer != InvalidBuffer)
622  ReleaseBuffer(node->vmbuffer);
623  if (node->pvmbuffer != InvalidBuffer)
624  ReleaseBuffer(node->pvmbuffer);
625  node->tbm = NULL;
626  node->tbmiterator = NULL;
627  node->tbmres = NULL;
628  node->prefetch_iterator = NULL;
629  node->initialized = false;
630  node->shared_tbmiterator = NULL;
631  node->shared_prefetch_iterator = NULL;
632  node->vmbuffer = InvalidBuffer;
633  node->pvmbuffer = InvalidBuffer;
634 
635  ExecScanReScan(&node->ss);
636 
637  /*
638  * if chgParam of subnode is not null then plan will be re-scanned by
639  * first ExecProcNode.
640  */
641  if (outerPlan->chgParam == NULL)
642  ExecReScan(outerPlan);
643 }
644 
645 /* ----------------------------------------------------------------
646  * ExecEndBitmapHeapScan
647  * ----------------------------------------------------------------
648  */
649 void
651 {
652  TableScanDesc scanDesc;
653 
654  /*
655  * extract information from the node
656  */
657  scanDesc = node->ss.ss_currentScanDesc;
658 
659  /*
660  * Free the exprcontext
661  */
662  ExecFreeExprContext(&node->ss.ps);
663 
664  /*
665  * clear out tuple table slots
666  */
667  if (node->ss.ps.ps_ResultTupleSlot)
670 
671  /*
672  * close down subplans
673  */
675 
676  /*
677  * release bitmaps and buffers if any
678  */
679  if (node->tbmiterator)
681  if (node->prefetch_iterator)
683  if (node->tbm)
684  tbm_free(node->tbm);
685  if (node->shared_tbmiterator)
687  if (node->shared_prefetch_iterator)
689  if (node->vmbuffer != InvalidBuffer)
690  ReleaseBuffer(node->vmbuffer);
691  if (node->pvmbuffer != InvalidBuffer)
692  ReleaseBuffer(node->pvmbuffer);
693 
694  /*
695  * close heap scan
696  */
697  table_endscan(scanDesc);
698 }
699 
700 /* ----------------------------------------------------------------
701  * ExecInitBitmapHeapScan
702  *
703  * Initializes the scan's state information.
704  * ----------------------------------------------------------------
705  */
707 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
708 {
709  BitmapHeapScanState *scanstate;
710  Relation currentRelation;
711  int io_concurrency;
712 
713  /* check for unsupported flags */
714  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
715 
716  /*
717  * Assert caller didn't ask for an unsafe snapshot --- see comments at
718  * head of file.
719  */
721 
722  /*
723  * create state structure
724  */
725  scanstate = makeNode(BitmapHeapScanState);
726  scanstate->ss.ps.plan = (Plan *) node;
727  scanstate->ss.ps.state = estate;
728  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
729 
730  scanstate->tbm = NULL;
731  scanstate->tbmiterator = NULL;
732  scanstate->tbmres = NULL;
733  scanstate->return_empty_tuples = 0;
734  scanstate->vmbuffer = InvalidBuffer;
735  scanstate->pvmbuffer = InvalidBuffer;
736  scanstate->exact_pages = 0;
737  scanstate->lossy_pages = 0;
738  scanstate->prefetch_iterator = NULL;
739  scanstate->prefetch_pages = 0;
740  scanstate->prefetch_target = 0;
741  /* may be updated below */
743  scanstate->pscan_len = 0;
744  scanstate->initialized = false;
745  scanstate->shared_tbmiterator = NULL;
746  scanstate->shared_prefetch_iterator = NULL;
747  scanstate->pstate = NULL;
748 
749  /*
750  * We can potentially skip fetching heap pages if we do not need any
751  * columns of the table, either for checking non-indexable quals or for
752  * returning data. This test is a bit simplistic, as it checks the
753  * stronger condition that there's no qual or return tlist at all. But in
754  * most cases it's probably not worth working harder than that.
755  */
756  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
757  node->scan.plan.targetlist == NIL);
758 
759  /*
760  * Miscellaneous initialization
761  *
762  * create expression context for node
763  */
764  ExecAssignExprContext(estate, &scanstate->ss.ps);
765 
766  /*
767  * open the scan relation
768  */
769  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
770 
771  /*
772  * initialize child nodes
773  */
774  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
775 
776  /*
777  * get the scan type from the relation descriptor.
778  */
779  ExecInitScanTupleSlot(estate, &scanstate->ss,
780  RelationGetDescr(currentRelation),
781  table_slot_callbacks(currentRelation));
782 
783  /*
784  * Initialize result type and projection.
785  */
786  ExecInitResultTypeTL(&scanstate->ss.ps);
787  ExecAssignScanProjectionInfo(&scanstate->ss);
788 
789  /*
790  * initialize child expressions
791  */
792  scanstate->ss.ps.qual =
793  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
794  scanstate->bitmapqualorig =
795  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
796 
797  /*
798  * Determine the maximum for prefetch_target. If the tablespace has a
799  * specific IO concurrency set, use that to compute the corresponding
800  * maximum value; otherwise, we already initialized to the value computed
801  * by the GUC machinery.
802  */
803  io_concurrency =
804  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
805  if (io_concurrency != effective_io_concurrency)
806  {
807  double maximum;
808 
809  if (ComputeIoConcurrency(io_concurrency, &maximum))
810  scanstate->prefetch_maximum = rint(maximum);
811  }
812 
813  scanstate->ss.ss_currentRelation = currentRelation;
814 
815  scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
816  estate->es_snapshot,
817  0,
818  NULL);
819 
820  /*
821  * all done.
822  */
823  return scanstate;
824 }
825 
826 /*----------------
827  * BitmapShouldInitializeSharedState
828  *
829  * The first process to come here and see the state to the BM_INITIAL
830  * will become the leader for the parallel bitmap scan and will be
831  * responsible for populating the TIDBitmap. The other processes will
832  * be blocked by the condition variable until the leader wakes them up.
833  * ---------------
834  */
835 static bool
837 {
839 
840  while (1)
841  {
842  SpinLockAcquire(&pstate->mutex);
843  state = pstate->state;
844  if (pstate->state == BM_INITIAL)
845  pstate->state = BM_INPROGRESS;
846  SpinLockRelease(&pstate->mutex);
847 
848  /* Exit if bitmap is done, or if we're the leader. */
849  if (state != BM_INPROGRESS)
850  break;
851 
852  /* Wait for the leader to wake us up. */
854  }
855 
857 
858  return (state == BM_INITIAL);
859 }
860 
861 /* ----------------------------------------------------------------
862  * ExecBitmapHeapEstimate
863  *
864  * Compute the amount of space we'll need in the parallel
865  * query DSM, and inform pcxt->estimator about our needs.
866  * ----------------------------------------------------------------
867  */
868 void
870  ParallelContext *pcxt)
871 {
872  EState *estate = node->ss.ps.state;
873 
875  phs_snapshot_data),
877 
879  shm_toc_estimate_keys(&pcxt->estimator, 1);
880 }
881 
882 /* ----------------------------------------------------------------
883  * ExecBitmapHeapInitializeDSM
884  *
885  * Set up a parallel bitmap heap scan descriptor.
886  * ----------------------------------------------------------------
887  */
888 void
890  ParallelContext *pcxt)
891 {
892  ParallelBitmapHeapState *pstate;
893  EState *estate = node->ss.ps.state;
894  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
895 
896  /* If there's no DSA, there are no workers; initialize nothing. */
897  if (dsa == NULL)
898  return;
899 
900  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
901 
902  pstate->tbmiterator = 0;
903  pstate->prefetch_iterator = 0;
904 
905  /* Initialize the mutex */
906  SpinLockInit(&pstate->mutex);
907  pstate->prefetch_pages = 0;
908  pstate->prefetch_target = 0;
909  pstate->state = BM_INITIAL;
910 
911  ConditionVariableInit(&pstate->cv);
913 
914  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
915  node->pstate = pstate;
916 }
917 
918 /* ----------------------------------------------------------------
919  * ExecBitmapHeapReInitializeDSM
920  *
921  * Reset shared state before beginning a fresh scan.
922  * ----------------------------------------------------------------
923  */
924 void
926  ParallelContext *pcxt)
927 {
928  ParallelBitmapHeapState *pstate = node->pstate;
929  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
930 
931  /* If there's no DSA, there are no workers; do nothing. */
932  if (dsa == NULL)
933  return;
934 
935  pstate->state = BM_INITIAL;
936 
937  if (DsaPointerIsValid(pstate->tbmiterator))
938  tbm_free_shared_area(dsa, pstate->tbmiterator);
939 
942 
943  pstate->tbmiterator = InvalidDsaPointer;
945 }
946 
947 /* ----------------------------------------------------------------
948  * ExecBitmapHeapInitializeWorker
949  *
950  * Copy relevant information from TOC into planstate.
951  * ----------------------------------------------------------------
952  */
953 void
955  ParallelWorkerContext *pwcxt)
956 {
957  ParallelBitmapHeapState *pstate;
958  Snapshot snapshot;
959 
960  Assert(node->ss.ps.state->es_query_dsa != NULL);
961 
962  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
963  node->pstate = pstate;
964 
965  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
967 }
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
Definition: tableam.c:110
#define NIL
Definition: pg_list.h:65
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1657
List * qual
Definition: plannodes.h:141
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
int target_prefetch_pages
Definition: bufmgr.c:130
struct dsa_area * es_query_dsa
Definition: execnodes.h:589
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1145
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
Plan plan
Definition: plannodes.h:340
#define IsA(nodeptr, _type_)
Definition: nodes.h:575
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExprState * bitmapqualorig
Definition: execnodes.h:1534
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:2161
Index scanrelid
Definition: plannodes.h:341
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
#define InvalidDsaPointer
Definition: dsa.h:78
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:765
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:426
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1541
#define RelationGetDescr(relation)
Definition: rel.h:442
#define castNode(_type_, nodeptr)
Definition: nodes.h:593
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:538
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:119
ExprContext * ps_ExprContext
Definition: execnodes.h:984
shm_toc_estimator estimator
Definition: parallel.h:41
#define SpinLockInit(lock)
Definition: spin.h:60
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:44
TIDBitmap * tbm
Definition: execnodes.h:1535
void ExecReScan(PlanState *node)
Definition: execAmi.c:77
int plan_node_id
Definition: plannodes.h:139
#define InvalidBuffer
Definition: buf.h:25
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1282
void ConditionVariableBroadcast(ConditionVariable *cv)
Snapshot es_snapshot
Definition: execnodes.h:503
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1283
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3353
Relation ss_currentRelation
Definition: execnodes.h:1281
EState * state
Definition: execnodes.h:947
Form_pg_class rd_rel
Definition: rel.h:83
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:840
SharedBitmapState
Definition: execnodes.h:1475
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:616
int effective_io_concurrency
Definition: bufmgr.c:113
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:207
bool ComputeIoConcurrency(int io_concurrency, double *target)
Definition: bufmgr.c:469
BlockNumber blockno
Definition: tidbitmap.h:42
PlanState ps
Definition: execnodes.h:1280
#define SpinLockAcquire(lock)
Definition: spin.h:62
void ConditionVariableInit(ConditionVariable *cv)
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:983
void ConditionVariableCancelSleep(void)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
#define ERROR
Definition: elog.h:43
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:233
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1552
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:2102
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:695
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define outerPlanState(node)
Definition: execnodes.h:1039
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1776
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:321
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1720
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
dsa_pointer tbmiterator
Definition: execnodes.h:1497
double rint(double x)
Definition: rint.c:21
List * bitmapqualorig
Definition: plannodes.h:475
TBMIterateResult * tbmres
Definition: execnodes.h:1537
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:340
Bitmapset * chgParam
Definition: execnodes.h:977
#define outerPlan(node)
Definition: plannodes.h:170
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:426
#define SpinLockRelease(lock)
Definition: spin.h:64
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:392
Size EstimateSnapshotSpace(Snapshot snap)
Definition: snapmgr.c:2078
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:951
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:98
Plan * plan
Definition: execnodes.h:945
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:531
dsa_pointer prefetch_iterator
Definition: execnodes.h:1498
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:970
#define makeNode(_type_)
Definition: nodes.h:572
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define Assert(condition)
Definition: c.h:732
#define EXEC_FLAG_MARK
Definition: executor.h:59
Definition: regguts.h:298
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1052
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:446
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:688
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1051
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
SharedBitmapState state
Definition: execnodes.h:1502
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:224
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1550
TBMIterator * tbmiterator
Definition: execnodes.h:1536
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
List * targetlist
Definition: plannodes.h:140
ExprState * qual
Definition: execnodes.h:966
Relation rs_rd
Definition: relscan.h:34
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1504
#define DsaPointerIsValid(x)
Definition: dsa.h:81
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:831
Definition: dsa.c:354
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
ConditionVariable cv
Definition: execnodes.h:1503
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:483
TBMIterator * prefetch_iterator
Definition: execnodes.h:1544
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1641
#define elog(elevel,...)
Definition: elog.h:226
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecScanReScan(ScanState *node)
Definition: execScan.c:260
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:781
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:425
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1464
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1551
#define offsetof(type, field)
Definition: c.h:655
shm_toc * toc
Definition: parallel.h:44
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1157