PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "executor/execdebug.h"
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "storage/bufmgr.h"
49 #include "storage/predicate.h"
50 #include "utils/memutils.h"
51 #include "utils/rel.h"
52 #include "utils/snapmgr.h"
53 #include "utils/spccache.h"
54 
57 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
58  TBMIterateResult *tbmres);
59 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
60 static inline void BitmapPrefetch(BitmapHeapScanState *node,
61  TableScanDesc scan);
63 
64 
65 /* ----------------------------------------------------------------
66  * BitmapHeapNext
67  *
68  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
69  * ----------------------------------------------------------------
70  */
71 static TupleTableSlot *
73 {
74  ExprContext *econtext;
75  TableScanDesc scan;
76  TIDBitmap *tbm;
77  TBMIterator *tbmiterator = NULL;
78  TBMSharedIterator *shared_tbmiterator = NULL;
79  TBMIterateResult *tbmres;
80  TupleTableSlot *slot;
81  ParallelBitmapHeapState *pstate = node->pstate;
82  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
83 
84  /*
85  * extract necessary information from index scan node
86  */
87  econtext = node->ss.ps.ps_ExprContext;
88  slot = node->ss.ss_ScanTupleSlot;
89  scan = node->ss.ss_currentScanDesc;
90  tbm = node->tbm;
91  if (pstate == NULL)
92  tbmiterator = node->tbmiterator;
93  else
94  shared_tbmiterator = node->shared_tbmiterator;
95  tbmres = node->tbmres;
96 
97  /*
98  * If we haven't yet performed the underlying index scan, do it, and begin
99  * the iteration over the bitmap.
100  *
101  * For prefetching, we use *two* iterators, one for the pages we are
102  * actually scanning and another that runs ahead of the first for
103  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
104  * the prefetch iterator is. Also, node->prefetch_target tracks the
105  * desired prefetch distance, which starts small and increases up to the
106  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
107  * a scan that stops after a few tuples because of a LIMIT.
108  */
109  if (!node->initialized)
110  {
111  if (!pstate)
112  {
113  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114 
115  if (!tbm || !IsA(tbm, TIDBitmap))
116  elog(ERROR, "unrecognized result from subplan");
117 
118  node->tbm = tbm;
119  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
120  node->tbmres = tbmres = NULL;
121 
122 #ifdef USE_PREFETCH
123  if (node->prefetch_maximum > 0)
124  {
126  node->prefetch_pages = 0;
127  node->prefetch_target = -1;
128  }
129 #endif /* USE_PREFETCH */
130  }
131  else
132  {
133  /*
134  * The leader will immediately come out of the function, but
135  * others will be blocked until leader populates the TBM and wakes
136  * them up.
137  */
139  {
140  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
141  if (!tbm || !IsA(tbm, TIDBitmap))
142  elog(ERROR, "unrecognized result from subplan");
143 
144  node->tbm = tbm;
145 
146  /*
147  * Prepare to iterate over the TBM. This will return the
148  * dsa_pointer of the iterator state which will be used by
149  * multiple processes to iterate jointly.
150  */
152 #ifdef USE_PREFETCH
153  if (node->prefetch_maximum > 0)
154  {
155  pstate->prefetch_iterator =
157 
158  /*
159  * We don't need the mutex here as we haven't yet woke up
160  * others.
161  */
162  pstate->prefetch_pages = 0;
163  pstate->prefetch_target = -1;
164  }
165 #endif
166 
167  /* We have initialized the shared state so wake up others. */
169  }
170 
171  /* Allocate a private iterator and attach the shared state to it */
172  node->shared_tbmiterator = shared_tbmiterator =
174  node->tbmres = tbmres = NULL;
175 
176 #ifdef USE_PREFETCH
177  if (node->prefetch_maximum > 0)
178  {
181  }
182 #endif /* USE_PREFETCH */
183  }
184  node->initialized = true;
185  }
186 
187  for (;;)
188  {
189  bool skip_fetch;
190 
192 
193  /*
194  * Get next page of results if needed
195  */
196  if (tbmres == NULL)
197  {
198  if (!pstate)
199  node->tbmres = tbmres = tbm_iterate(tbmiterator);
200  else
201  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
202  if (tbmres == NULL)
203  {
204  /* no more entries in the bitmap */
205  break;
206  }
207 
208  BitmapAdjustPrefetchIterator(node, tbmres);
209 
210  /*
211  * We can skip fetching the heap page if we don't need any fields
212  * from the heap, and the bitmap entries don't need rechecking,
213  * and all tuples on the page are visible to our transaction.
214  *
215  * XXX: It's a layering violation that we do these checks above
216  * tableam, they should probably moved below it at some point.
217  */
218  skip_fetch = (node->can_skip_fetch &&
219  !tbmres->recheck &&
221  tbmres->blockno,
222  &node->vmbuffer));
223 
224  if (skip_fetch)
225  {
226  /* can't be lossy in the skip_fetch case */
227  Assert(tbmres->ntuples >= 0);
228 
229  /*
230  * The number of tuples on this page is put into
231  * node->return_empty_tuples.
232  */
233  node->return_empty_tuples = tbmres->ntuples;
234  }
235  else if (!table_scan_bitmap_next_block(scan, tbmres))
236  {
237  /* AM doesn't think this block is valid, skip */
238  continue;
239  }
240 
241  if (tbmres->ntuples >= 0)
242  node->exact_pages++;
243  else
244  node->lossy_pages++;
245 
246  /* Adjust the prefetch target */
248  }
249  else
250  {
251  /*
252  * Continuing in previously obtained page.
253  */
254 
255 #ifdef USE_PREFETCH
256 
257  /*
258  * Try to prefetch at least a few pages even before we get to the
259  * second page if we don't stop reading after the first tuple.
260  */
261  if (!pstate)
262  {
263  if (node->prefetch_target < node->prefetch_maximum)
264  node->prefetch_target++;
265  }
266  else if (pstate->prefetch_target < node->prefetch_maximum)
267  {
268  /* take spinlock while updating shared state */
269  SpinLockAcquire(&pstate->mutex);
270  if (pstate->prefetch_target < node->prefetch_maximum)
271  pstate->prefetch_target++;
272  SpinLockRelease(&pstate->mutex);
273  }
274 #endif /* USE_PREFETCH */
275  }
276 
277  /*
278  * We issue prefetch requests *after* fetching the current page to try
279  * to avoid having prefetching interfere with the main I/O. Also, this
280  * should happen only when we have determined there is still something
281  * to do on the current page, else we may uselessly prefetch the same
282  * page we are just about to request for real.
283  *
284  * XXX: It's a layering violation that we do these checks above
285  * tableam, they should probably moved below it at some point.
286  */
287  BitmapPrefetch(node, scan);
288 
289  if (node->return_empty_tuples > 0)
290  {
291  /*
292  * If we don't have to fetch the tuple, just return nulls.
293  */
294  ExecStoreAllNullTuple(slot);
295 
296  if (--node->return_empty_tuples == 0)
297  {
298  /* no more tuples to return in the next round */
299  node->tbmres = tbmres = NULL;
300  }
301  }
302  else
303  {
304  /*
305  * Attempt to fetch tuple from AM.
306  */
307  if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
308  {
309  /* nothing more to look at on this page */
310  node->tbmres = tbmres = NULL;
311  continue;
312  }
313 
314  /*
315  * If we are using lossy info, we have to recheck the qual
316  * conditions at every tuple.
317  */
318  if (tbmres->recheck)
319  {
320  econtext->ecxt_scantuple = slot;
321  if (!ExecQualAndReset(node->bitmapqualorig, econtext))
322  {
323  /* Fails recheck, so drop it and loop back for another */
324  InstrCountFiltered2(node, 1);
325  ExecClearTuple(slot);
326  continue;
327  }
328  }
329  }
330 
331  /* OK to return this tuple */
332  return slot;
333  }
334 
335  /*
336  * if we get here it means we are at the end of the scan..
337  */
338  return ExecClearTuple(slot);
339 }
340 
341 /*
342  * BitmapDoneInitializingSharedState - Shared state is initialized
343  *
344  * By this time the leader has already populated the TBM and initialized the
345  * shared state so wake up other processes.
346  */
347 static inline void
349 {
350  SpinLockAcquire(&pstate->mutex);
351  pstate->state = BM_FINISHED;
352  SpinLockRelease(&pstate->mutex);
353  ConditionVariableBroadcast(&pstate->cv);
354 }
355 
356 /*
357  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
358  */
359 static inline void
361  TBMIterateResult *tbmres)
362 {
363 #ifdef USE_PREFETCH
364  ParallelBitmapHeapState *pstate = node->pstate;
365 
366  if (pstate == NULL)
367  {
368  TBMIterator *prefetch_iterator = node->prefetch_iterator;
369 
370  if (node->prefetch_pages > 0)
371  {
372  /* The main iterator has closed the distance by one page */
373  node->prefetch_pages--;
374  }
375  else if (prefetch_iterator)
376  {
377  /* Do not let the prefetch iterator get behind the main one */
378  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
379 
380  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
381  elog(ERROR, "prefetch and main iterators are out of sync");
382  }
383  return;
384  }
385 
386  if (node->prefetch_maximum > 0)
387  {
388  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
389 
390  SpinLockAcquire(&pstate->mutex);
391  if (pstate->prefetch_pages > 0)
392  {
393  pstate->prefetch_pages--;
394  SpinLockRelease(&pstate->mutex);
395  }
396  else
397  {
398  /* Release the mutex before iterating */
399  SpinLockRelease(&pstate->mutex);
400 
401  /*
402  * In case of shared mode, we can not ensure that the current
403  * blockno of the main iterator and that of the prefetch iterator
404  * are same. It's possible that whatever blockno we are
405  * prefetching will be processed by another process. Therefore,
406  * we don't validate the blockno here as we do in non-parallel
407  * case.
408  */
409  if (prefetch_iterator)
410  tbm_shared_iterate(prefetch_iterator);
411  }
412  }
413 #endif /* USE_PREFETCH */
414 }
415 
416 /*
417  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
418  *
419  * Increase prefetch target if it's not yet at the max. Note that
420  * we will increase it to zero after fetching the very first
421  * page/tuple, then to one after the second tuple is fetched, then
422  * it doubles as later pages are fetched.
423  */
424 static inline void
426 {
427 #ifdef USE_PREFETCH
428  ParallelBitmapHeapState *pstate = node->pstate;
429 
430  if (pstate == NULL)
431  {
432  if (node->prefetch_target >= node->prefetch_maximum)
433  /* don't increase any further */ ;
434  else if (node->prefetch_target >= node->prefetch_maximum / 2)
435  node->prefetch_target = node->prefetch_maximum;
436  else if (node->prefetch_target > 0)
437  node->prefetch_target *= 2;
438  else
439  node->prefetch_target++;
440  return;
441  }
442 
443  /* Do an unlocked check first to save spinlock acquisitions. */
444  if (pstate->prefetch_target < node->prefetch_maximum)
445  {
446  SpinLockAcquire(&pstate->mutex);
447  if (pstate->prefetch_target >= node->prefetch_maximum)
448  /* don't increase any further */ ;
449  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
450  pstate->prefetch_target = node->prefetch_maximum;
451  else if (pstate->prefetch_target > 0)
452  pstate->prefetch_target *= 2;
453  else
454  pstate->prefetch_target++;
455  SpinLockRelease(&pstate->mutex);
456  }
457 #endif /* USE_PREFETCH */
458 }
459 
460 /*
461  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
462  */
463 static inline void
465 {
466 #ifdef USE_PREFETCH
467  ParallelBitmapHeapState *pstate = node->pstate;
468 
469  if (pstate == NULL)
470  {
471  TBMIterator *prefetch_iterator = node->prefetch_iterator;
472 
473  if (prefetch_iterator)
474  {
475  while (node->prefetch_pages < node->prefetch_target)
476  {
477  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
478  bool skip_fetch;
479 
480  if (tbmpre == NULL)
481  {
482  /* No more pages to prefetch */
483  tbm_end_iterate(prefetch_iterator);
484  node->prefetch_iterator = NULL;
485  break;
486  }
487  node->prefetch_pages++;
488 
489  /*
490  * If we expect not to have to actually read this heap page,
491  * skip this prefetch call, but continue to run the prefetch
492  * logic normally. (Would it be better not to increment
493  * prefetch_pages?)
494  *
495  * This depends on the assumption that the index AM will
496  * report the same recheck flag for this future heap page as
497  * it did for the current heap page; which is not a certainty
498  * but is true in many cases.
499  */
500  skip_fetch = (node->can_skip_fetch &&
501  (node->tbmres ? !node->tbmres->recheck : false) &&
503  tbmpre->blockno,
504  &node->pvmbuffer));
505 
506  if (!skip_fetch)
507  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508  }
509  }
510 
511  return;
512  }
513 
514  if (pstate->prefetch_pages < pstate->prefetch_target)
515  {
516  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
517 
518  if (prefetch_iterator)
519  {
520  while (1)
521  {
522  TBMIterateResult *tbmpre;
523  bool do_prefetch = false;
524  bool skip_fetch;
525 
526  /*
527  * Recheck under the mutex. If some other process has already
528  * done enough prefetching then we need not to do anything.
529  */
530  SpinLockAcquire(&pstate->mutex);
531  if (pstate->prefetch_pages < pstate->prefetch_target)
532  {
533  pstate->prefetch_pages++;
534  do_prefetch = true;
535  }
536  SpinLockRelease(&pstate->mutex);
537 
538  if (!do_prefetch)
539  return;
540 
541  tbmpre = tbm_shared_iterate(prefetch_iterator);
542  if (tbmpre == NULL)
543  {
544  /* No more pages to prefetch */
545  tbm_end_shared_iterate(prefetch_iterator);
546  node->shared_prefetch_iterator = NULL;
547  break;
548  }
549 
550  /* As above, skip prefetch if we expect not to need page */
551  skip_fetch = (node->can_skip_fetch &&
552  (node->tbmres ? !node->tbmres->recheck : false) &&
554  tbmpre->blockno,
555  &node->pvmbuffer));
556 
557  if (!skip_fetch)
558  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559  }
560  }
561  }
562 #endif /* USE_PREFETCH */
563 }
564 
565 /*
566  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
567  */
568 static bool
570 {
571  ExprContext *econtext;
572 
573  /*
574  * extract necessary information from index scan node
575  */
576  econtext = node->ss.ps.ps_ExprContext;
577 
578  /* Does the tuple meet the original qual conditions? */
579  econtext->ecxt_scantuple = slot;
580  return ExecQualAndReset(node->bitmapqualorig, econtext);
581 }
582 
583 /* ----------------------------------------------------------------
584  * ExecBitmapHeapScan(node)
585  * ----------------------------------------------------------------
586  */
587 static TupleTableSlot *
589 {
591 
592  return ExecScan(&node->ss,
595 }
596 
597 /* ----------------------------------------------------------------
598  * ExecReScanBitmapHeapScan(node)
599  * ----------------------------------------------------------------
600  */
601 void
603 {
605 
606  /* rescan to release any page pin */
607  table_rescan(node->ss.ss_currentScanDesc, NULL);
608 
609  /* release bitmaps and buffers if any */
610  if (node->tbmiterator)
612  if (node->prefetch_iterator)
614  if (node->shared_tbmiterator)
616  if (node->shared_prefetch_iterator)
618  if (node->tbm)
619  tbm_free(node->tbm);
620  if (node->vmbuffer != InvalidBuffer)
621  ReleaseBuffer(node->vmbuffer);
622  if (node->pvmbuffer != InvalidBuffer)
623  ReleaseBuffer(node->pvmbuffer);
624  node->tbm = NULL;
625  node->tbmiterator = NULL;
626  node->tbmres = NULL;
627  node->prefetch_iterator = NULL;
628  node->initialized = false;
629  node->shared_tbmiterator = NULL;
630  node->shared_prefetch_iterator = NULL;
631  node->vmbuffer = InvalidBuffer;
632  node->pvmbuffer = InvalidBuffer;
633 
634  ExecScanReScan(&node->ss);
635 
636  /*
637  * if chgParam of subnode is not null then plan will be re-scanned by
638  * first ExecProcNode.
639  */
640  if (outerPlan->chgParam == NULL)
642 }
643 
644 /* ----------------------------------------------------------------
645  * ExecEndBitmapHeapScan
646  * ----------------------------------------------------------------
647  */
648 void
650 {
651  TableScanDesc scanDesc;
652 
653  /*
654  * extract information from the node
655  */
656  scanDesc = node->ss.ss_currentScanDesc;
657 
658  /*
659  * close down subplans
660  */
662 
663  /*
664  * release bitmaps and buffers if any
665  */
666  if (node->tbmiterator)
668  if (node->prefetch_iterator)
670  if (node->tbm)
671  tbm_free(node->tbm);
672  if (node->shared_tbmiterator)
674  if (node->shared_prefetch_iterator)
676  if (node->vmbuffer != InvalidBuffer)
677  ReleaseBuffer(node->vmbuffer);
678  if (node->pvmbuffer != InvalidBuffer)
679  ReleaseBuffer(node->pvmbuffer);
680 
681  /*
682  * close heap scan
683  */
684  table_endscan(scanDesc);
685 }
686 
687 /* ----------------------------------------------------------------
688  * ExecInitBitmapHeapScan
689  *
690  * Initializes the scan's state information.
691  * ----------------------------------------------------------------
692  */
694 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
695 {
696  BitmapHeapScanState *scanstate;
697  Relation currentRelation;
698 
699  /* check for unsupported flags */
700  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
701 
702  /*
703  * Assert caller didn't ask for an unsafe snapshot --- see comments at
704  * head of file.
705  */
707 
708  /*
709  * create state structure
710  */
711  scanstate = makeNode(BitmapHeapScanState);
712  scanstate->ss.ps.plan = (Plan *) node;
713  scanstate->ss.ps.state = estate;
714  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
715 
716  scanstate->tbm = NULL;
717  scanstate->tbmiterator = NULL;
718  scanstate->tbmres = NULL;
719  scanstate->return_empty_tuples = 0;
720  scanstate->vmbuffer = InvalidBuffer;
721  scanstate->pvmbuffer = InvalidBuffer;
722  scanstate->exact_pages = 0;
723  scanstate->lossy_pages = 0;
724  scanstate->prefetch_iterator = NULL;
725  scanstate->prefetch_pages = 0;
726  scanstate->prefetch_target = 0;
727  scanstate->pscan_len = 0;
728  scanstate->initialized = false;
729  scanstate->shared_tbmiterator = NULL;
730  scanstate->shared_prefetch_iterator = NULL;
731  scanstate->pstate = NULL;
732 
733  /*
734  * We can potentially skip fetching heap pages if we do not need any
735  * columns of the table, either for checking non-indexable quals or for
736  * returning data. This test is a bit simplistic, as it checks the
737  * stronger condition that there's no qual or return tlist at all. But in
738  * most cases it's probably not worth working harder than that.
739  */
740  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
741  node->scan.plan.targetlist == NIL);
742 
743  /*
744  * Miscellaneous initialization
745  *
746  * create expression context for node
747  */
748  ExecAssignExprContext(estate, &scanstate->ss.ps);
749 
750  /*
751  * open the scan relation
752  */
753  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
754 
755  /*
756  * initialize child nodes
757  */
758  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
759 
760  /*
761  * get the scan type from the relation descriptor.
762  */
763  ExecInitScanTupleSlot(estate, &scanstate->ss,
764  RelationGetDescr(currentRelation),
765  table_slot_callbacks(currentRelation));
766 
767  /*
768  * Initialize result type and projection.
769  */
770  ExecInitResultTypeTL(&scanstate->ss.ps);
771  ExecAssignScanProjectionInfo(&scanstate->ss);
772 
773  /*
774  * initialize child expressions
775  */
776  scanstate->ss.ps.qual =
777  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
778  scanstate->bitmapqualorig =
779  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
780 
781  /*
782  * Maximum number of prefetches for the tablespace if configured,
783  * otherwise the current value of the effective_io_concurrency GUC.
784  */
785  scanstate->prefetch_maximum =
786  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
787 
788  scanstate->ss.ss_currentRelation = currentRelation;
789 
790  scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
791  estate->es_snapshot,
792  0,
793  NULL);
794 
795  /*
796  * all done.
797  */
798  return scanstate;
799 }
800 
801 /*----------------
802  * BitmapShouldInitializeSharedState
803  *
804  * The first process to come here and see the state to the BM_INITIAL
805  * will become the leader for the parallel bitmap scan and will be
806  * responsible for populating the TIDBitmap. The other processes will
807  * be blocked by the condition variable until the leader wakes them up.
808  * ---------------
809  */
810 static bool
812 {
814 
815  while (1)
816  {
817  SpinLockAcquire(&pstate->mutex);
818  state = pstate->state;
819  if (pstate->state == BM_INITIAL)
820  pstate->state = BM_INPROGRESS;
821  SpinLockRelease(&pstate->mutex);
822 
823  /* Exit if bitmap is done, or if we're the leader. */
824  if (state != BM_INPROGRESS)
825  break;
826 
827  /* Wait for the leader to wake us up. */
828  ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
829  }
830 
832 
833  return (state == BM_INITIAL);
834 }
835 
836 /* ----------------------------------------------------------------
837  * ExecBitmapHeapEstimate
838  *
839  * Compute the amount of space we'll need in the parallel
840  * query DSM, and inform pcxt->estimator about our needs.
841  * ----------------------------------------------------------------
842  */
843 void
845  ParallelContext *pcxt)
846 {
847  EState *estate = node->ss.ps.state;
848 
849  node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
850  phs_snapshot_data),
852 
854  shm_toc_estimate_keys(&pcxt->estimator, 1);
855 }
856 
857 /* ----------------------------------------------------------------
858  * ExecBitmapHeapInitializeDSM
859  *
860  * Set up a parallel bitmap heap scan descriptor.
861  * ----------------------------------------------------------------
862  */
863 void
865  ParallelContext *pcxt)
866 {
867  ParallelBitmapHeapState *pstate;
868  EState *estate = node->ss.ps.state;
869  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
870 
871  /* If there's no DSA, there are no workers; initialize nothing. */
872  if (dsa == NULL)
873  return;
874 
875  pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
876 
877  pstate->tbmiterator = 0;
878  pstate->prefetch_iterator = 0;
879 
880  /* Initialize the mutex */
881  SpinLockInit(&pstate->mutex);
882  pstate->prefetch_pages = 0;
883  pstate->prefetch_target = 0;
884  pstate->state = BM_INITIAL;
885 
886  ConditionVariableInit(&pstate->cv);
888 
889  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
890  node->pstate = pstate;
891 }
892 
893 /* ----------------------------------------------------------------
894  * ExecBitmapHeapReInitializeDSM
895  *
896  * Reset shared state before beginning a fresh scan.
897  * ----------------------------------------------------------------
898  */
899 void
901  ParallelContext *pcxt)
902 {
903  ParallelBitmapHeapState *pstate = node->pstate;
904  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
905 
906  /* If there's no DSA, there are no workers; do nothing. */
907  if (dsa == NULL)
908  return;
909 
910  pstate->state = BM_INITIAL;
911 
912  if (DsaPointerIsValid(pstate->tbmiterator))
913  tbm_free_shared_area(dsa, pstate->tbmiterator);
914 
917 
918  pstate->tbmiterator = InvalidDsaPointer;
920 }
921 
922 /* ----------------------------------------------------------------
923  * ExecBitmapHeapInitializeWorker
924  *
925  * Copy relevant information from TOC into planstate.
926  * ----------------------------------------------------------------
927  */
928 void
930  ParallelWorkerContext *pwcxt)
931 {
932  ParallelBitmapHeapState *pstate;
933  Snapshot snapshot;
934 
935  Assert(node->ss.ps.state->es_query_dsa != NULL);
936 
937  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
938  node->pstate = pstate;
939 
940  snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
942 }
#define InvalidBuffer
Definition: buf.h:25
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:628
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4573
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define InvalidDsaPointer
Definition: dsa.h:78
#define DsaPointerIsValid(x)
Definition: dsa.h:81
#define ERROR
Definition: elog.h:39
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:214
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:502
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:157
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:271
void ExecScanReScan(ScanState *node)
Definition: execScan.c:298
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1577
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1812
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1756
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:488
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:702
#define outerPlanState(node)
Definition: execnodes.h:1132
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1145
SharedBitmapState
Definition: execnodes.h:1669
@ BM_INITIAL
Definition: execnodes.h:1670
@ BM_FINISHED
Definition: execnodes.h:1672
@ BM_INPROGRESS
Definition: execnodes.h:1671
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:472
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:473
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:439
#define EXEC_FLAG_MARK
Definition: executor.h:69
Assert(fmt[strlen(fmt) - 1] !='\n')
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition: nodes.h:179
#define makeNode(_type_)
Definition: nodes.h:176
#define castNode(_type_, nodeptr)
Definition: nodes.h:197
#define NIL
Definition: pg_list.h:68
#define outerPlan(node)
Definition: plannodes.h:182
#define RelationGetDescr(relation)
Definition: rel.h:530
@ MAIN_FORKNUM
Definition: relpath.h:50
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition: snapmgr.c:1722
Snapshot RestoreSnapshot(char *start_address)
Definition: snapmgr.c:1781
Size EstimateSnapshotSpace(Snapshot snapshot)
Definition: snapmgr.c:1698
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1745
ExprState * bitmapqualorig
Definition: execnodes.h:1727
TBMIterateResult * tbmres
Definition: execnodes.h:1730
TBMIterator * tbmiterator
Definition: execnodes.h:1729
TIDBitmap * tbm
Definition: execnodes.h:1728
TBMIterator * prefetch_iterator
Definition: execnodes.h:1737
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1744
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1743
List * bitmapqualorig
Definition: plannodes.h:539
struct dsa_area * es_query_dsa
Definition: execnodes.h:695
Snapshot es_snapshot
Definition: execnodes.h:615
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:248
SharedBitmapState state
Definition: execnodes.h:1695
dsa_pointer tbmiterator
Definition: execnodes.h:1690
ConditionVariable cv
Definition: execnodes.h:1696
dsa_pointer prefetch_iterator
Definition: execnodes.h:1691
char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1697
shm_toc_estimator estimator
Definition: parallel.h:42
shm_toc * toc
Definition: parallel.h:45
ExprState * qual
Definition: execnodes.h:1057
Plan * plan
Definition: execnodes.h:1036
EState * state
Definition: execnodes.h:1038
ExprContext * ps_ExprContext
Definition: execnodes.h:1075
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1042
int plan_node_id
Definition: plannodes.h:151
Form_pg_class rd_rel
Definition: rel.h:111
Relation ss_currentRelation
Definition: execnodes.h:1474
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1476
PlanState ps
Definition: execnodes.h:1473
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1475
Index scanrelid
Definition: plannodes.h:387
BlockNumber blockno
Definition: tidbitmap.h:42
Relation rs_rd
Definition: relscan.h:34
Definition: dsa.c:368
Definition: regguts.h:323
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
Definition: tableam.c:124
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1009
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:946
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1982
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1018
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1958
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:321
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:688
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1145
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1157
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1464
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:765
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:340
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1051
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:970
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:432
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24