PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/executor.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "storage/bufmgr.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50 #include "utils/spccache.h"
51 
54 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
55  TBMIterateResult *tbmres);
56 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
57 static inline void BitmapPrefetch(BitmapHeapScanState *node,
58  TableScanDesc scan);
60 
61 
62 /* ----------------------------------------------------------------
63  * BitmapHeapNext
64  *
65  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
66  * ----------------------------------------------------------------
67  */
68 static TupleTableSlot *
70 {
71  ExprContext *econtext;
72  TableScanDesc scan;
73  TIDBitmap *tbm;
74  TBMIterator *tbmiterator = NULL;
75  TBMSharedIterator *shared_tbmiterator = NULL;
76  TBMIterateResult *tbmres;
77  TupleTableSlot *slot;
78  ParallelBitmapHeapState *pstate = node->pstate;
79  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
80 
81  /*
82  * extract necessary information from index scan node
83  */
84  econtext = node->ss.ps.ps_ExprContext;
85  slot = node->ss.ss_ScanTupleSlot;
86  scan = node->ss.ss_currentScanDesc;
87  tbm = node->tbm;
88  if (pstate == NULL)
89  tbmiterator = node->tbmiterator;
90  else
91  shared_tbmiterator = node->shared_tbmiterator;
92  tbmres = node->tbmres;
93 
94  /*
95  * If we haven't yet performed the underlying index scan, do it, and begin
96  * the iteration over the bitmap.
97  *
98  * For prefetching, we use *two* iterators, one for the pages we are
99  * actually scanning and another that runs ahead of the first for
100  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
101  * the prefetch iterator is. Also, node->prefetch_target tracks the
102  * desired prefetch distance, which starts small and increases up to the
103  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
104  * a scan that stops after a few tuples because of a LIMIT.
105  */
106  if (!node->initialized)
107  {
108  if (!pstate)
109  {
110  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
111 
112  if (!tbm || !IsA(tbm, TIDBitmap))
113  elog(ERROR, "unrecognized result from subplan");
114 
115  node->tbm = tbm;
116  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
117  node->tbmres = tbmres = NULL;
118 
119 #ifdef USE_PREFETCH
120  if (node->prefetch_maximum > 0)
121  {
123  node->prefetch_pages = 0;
124  node->prefetch_target = -1;
125  }
126 #endif /* USE_PREFETCH */
127  }
128  else
129  {
130  /*
131  * The leader will immediately come out of the function, but
132  * others will be blocked until leader populates the TBM and wakes
133  * them up.
134  */
136  {
137  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
138  if (!tbm || !IsA(tbm, TIDBitmap))
139  elog(ERROR, "unrecognized result from subplan");
140 
141  node->tbm = tbm;
142 
143  /*
144  * Prepare to iterate over the TBM. This will return the
145  * dsa_pointer of the iterator state which will be used by
146  * multiple processes to iterate jointly.
147  */
149 #ifdef USE_PREFETCH
150  if (node->prefetch_maximum > 0)
151  {
152  pstate->prefetch_iterator =
154 
155  /*
156  * We don't need the mutex here as we haven't yet woke up
157  * others.
158  */
159  pstate->prefetch_pages = 0;
160  pstate->prefetch_target = -1;
161  }
162 #endif
163 
164  /* We have initialized the shared state so wake up others. */
166  }
167 
168  /* Allocate a private iterator and attach the shared state to it */
169  node->shared_tbmiterator = shared_tbmiterator =
171  node->tbmres = tbmres = NULL;
172 
173 #ifdef USE_PREFETCH
174  if (node->prefetch_maximum > 0)
175  {
178  }
179 #endif /* USE_PREFETCH */
180  }
181  node->initialized = true;
182  }
183 
184  for (;;)
185  {
186  bool skip_fetch;
187 
189 
190  /*
191  * Get next page of results if needed
192  */
193  if (tbmres == NULL)
194  {
195  if (!pstate)
196  node->tbmres = tbmres = tbm_iterate(tbmiterator);
197  else
198  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
199  if (tbmres == NULL)
200  {
201  /* no more entries in the bitmap */
202  break;
203  }
204 
205  BitmapAdjustPrefetchIterator(node, tbmres);
206 
207  if (tbmres->ntuples >= 0)
208  node->exact_pages++;
209  else
210  node->lossy_pages++;
211 
212  /*
213  * We can skip fetching the heap page if we don't need any fields
214  * from the heap, and the bitmap entries don't need rechecking,
215  * and all tuples on the page are visible to our transaction.
216  *
217  * XXX: It's a layering violation that we do these checks above
218  * tableam, they should probably moved below it at some point.
219  */
220  skip_fetch = (node->can_skip_fetch &&
221  !tbmres->recheck &&
223  tbmres->blockno,
224  &node->vmbuffer));
225 
226  if (skip_fetch)
227  {
228  /* can't be lossy in the skip_fetch case */
229  Assert(tbmres->ntuples >= 0);
230 
231  /*
232  * The number of tuples on this page is put into
233  * node->return_empty_tuples.
234  */
235  node->return_empty_tuples = tbmres->ntuples;
236  }
237  else if (!table_scan_bitmap_next_block(scan, tbmres))
238  {
239  /* AM doesn't think this block is valid, skip */
240  continue;
241  }
242 
243  /* Adjust the prefetch target */
245  }
246  else
247  {
248  /*
249  * Continuing in previously obtained page.
250  */
251 
252 #ifdef USE_PREFETCH
253 
254  /*
255  * Try to prefetch at least a few pages even before we get to the
256  * second page if we don't stop reading after the first tuple.
257  */
258  if (!pstate)
259  {
260  if (node->prefetch_target < node->prefetch_maximum)
261  node->prefetch_target++;
262  }
263  else if (pstate->prefetch_target < node->prefetch_maximum)
264  {
265  /* take spinlock while updating shared state */
266  SpinLockAcquire(&pstate->mutex);
267  if (pstate->prefetch_target < node->prefetch_maximum)
268  pstate->prefetch_target++;
269  SpinLockRelease(&pstate->mutex);
270  }
271 #endif /* USE_PREFETCH */
272  }
273 
274  /*
275  * We issue prefetch requests *after* fetching the current page to try
276  * to avoid having prefetching interfere with the main I/O. Also, this
277  * should happen only when we have determined there is still something
278  * to do on the current page, else we may uselessly prefetch the same
279  * page we are just about to request for real.
280  *
281  * XXX: It's a layering violation that we do these checks above
282  * tableam, they should probably moved below it at some point.
283  */
284  BitmapPrefetch(node, scan);
285 
286  if (node->return_empty_tuples > 0)
287  {
288  /*
289  * If we don't have to fetch the tuple, just return nulls.
290  */
291  ExecStoreAllNullTuple(slot);
292 
293  if (--node->return_empty_tuples == 0)
294  {
295  /* no more tuples to return in the next round */
296  node->tbmres = tbmres = NULL;
297  }
298  }
299  else
300  {
301  /*
302  * Attempt to fetch tuple from AM.
303  */
304  if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
305  {
306  /* nothing more to look at on this page */
307  node->tbmres = tbmres = NULL;
308  continue;
309  }
310 
311  /*
312  * If we are using lossy info, we have to recheck the qual
313  * conditions at every tuple.
314  */
315  if (tbmres->recheck)
316  {
317  econtext->ecxt_scantuple = slot;
318  if (!ExecQualAndReset(node->bitmapqualorig, econtext))
319  {
320  /* Fails recheck, so drop it and loop back for another */
321  InstrCountFiltered2(node, 1);
322  ExecClearTuple(slot);
323  continue;
324  }
325  }
326  }
327 
328  /* OK to return this tuple */
329  return slot;
330  }
331 
332  /*
333  * if we get here it means we are at the end of the scan..
334  */
335  return ExecClearTuple(slot);
336 }
337 
338 /*
339  * BitmapDoneInitializingSharedState - Shared state is initialized
340  *
341  * By this time the leader has already populated the TBM and initialized the
342  * shared state so wake up other processes.
343  */
344 static inline void
346 {
347  SpinLockAcquire(&pstate->mutex);
348  pstate->state = BM_FINISHED;
349  SpinLockRelease(&pstate->mutex);
350  ConditionVariableBroadcast(&pstate->cv);
351 }
352 
353 /*
354  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
355  */
356 static inline void
358  TBMIterateResult *tbmres)
359 {
360 #ifdef USE_PREFETCH
361  ParallelBitmapHeapState *pstate = node->pstate;
362 
363  if (pstate == NULL)
364  {
365  TBMIterator *prefetch_iterator = node->prefetch_iterator;
366 
367  if (node->prefetch_pages > 0)
368  {
369  /* The main iterator has closed the distance by one page */
370  node->prefetch_pages--;
371  }
372  else if (prefetch_iterator)
373  {
374  /* Do not let the prefetch iterator get behind the main one */
375  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
376 
377  if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
378  elog(ERROR, "prefetch and main iterators are out of sync");
379  }
380  return;
381  }
382 
383  if (node->prefetch_maximum > 0)
384  {
385  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
386 
387  SpinLockAcquire(&pstate->mutex);
388  if (pstate->prefetch_pages > 0)
389  {
390  pstate->prefetch_pages--;
391  SpinLockRelease(&pstate->mutex);
392  }
393  else
394  {
395  /* Release the mutex before iterating */
396  SpinLockRelease(&pstate->mutex);
397 
398  /*
399  * In case of shared mode, we can not ensure that the current
400  * blockno of the main iterator and that of the prefetch iterator
401  * are same. It's possible that whatever blockno we are
402  * prefetching will be processed by another process. Therefore,
403  * we don't validate the blockno here as we do in non-parallel
404  * case.
405  */
406  if (prefetch_iterator)
407  tbm_shared_iterate(prefetch_iterator);
408  }
409  }
410 #endif /* USE_PREFETCH */
411 }
412 
413 /*
414  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
415  *
416  * Increase prefetch target if it's not yet at the max. Note that
417  * we will increase it to zero after fetching the very first
418  * page/tuple, then to one after the second tuple is fetched, then
419  * it doubles as later pages are fetched.
420  */
421 static inline void
423 {
424 #ifdef USE_PREFETCH
425  ParallelBitmapHeapState *pstate = node->pstate;
426 
427  if (pstate == NULL)
428  {
429  if (node->prefetch_target >= node->prefetch_maximum)
430  /* don't increase any further */ ;
431  else if (node->prefetch_target >= node->prefetch_maximum / 2)
432  node->prefetch_target = node->prefetch_maximum;
433  else if (node->prefetch_target > 0)
434  node->prefetch_target *= 2;
435  else
436  node->prefetch_target++;
437  return;
438  }
439 
440  /* Do an unlocked check first to save spinlock acquisitions. */
441  if (pstate->prefetch_target < node->prefetch_maximum)
442  {
443  SpinLockAcquire(&pstate->mutex);
444  if (pstate->prefetch_target >= node->prefetch_maximum)
445  /* don't increase any further */ ;
446  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
447  pstate->prefetch_target = node->prefetch_maximum;
448  else if (pstate->prefetch_target > 0)
449  pstate->prefetch_target *= 2;
450  else
451  pstate->prefetch_target++;
452  SpinLockRelease(&pstate->mutex);
453  }
454 #endif /* USE_PREFETCH */
455 }
456 
457 /*
458  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
459  */
460 static inline void
462 {
463 #ifdef USE_PREFETCH
464  ParallelBitmapHeapState *pstate = node->pstate;
465 
466  if (pstate == NULL)
467  {
468  TBMIterator *prefetch_iterator = node->prefetch_iterator;
469 
470  if (prefetch_iterator)
471  {
472  while (node->prefetch_pages < node->prefetch_target)
473  {
474  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
475  bool skip_fetch;
476 
477  if (tbmpre == NULL)
478  {
479  /* No more pages to prefetch */
480  tbm_end_iterate(prefetch_iterator);
481  node->prefetch_iterator = NULL;
482  break;
483  }
484  node->prefetch_pages++;
485 
486  /*
487  * If we expect not to have to actually read this heap page,
488  * skip this prefetch call, but continue to run the prefetch
489  * logic normally. (Would it be better not to increment
490  * prefetch_pages?)
491  *
492  * This depends on the assumption that the index AM will
493  * report the same recheck flag for this future heap page as
494  * it did for the current heap page; which is not a certainty
495  * but is true in many cases.
496  */
497  skip_fetch = (node->can_skip_fetch &&
498  (node->tbmres ? !node->tbmres->recheck : false) &&
500  tbmpre->blockno,
501  &node->pvmbuffer));
502 
503  if (!skip_fetch)
504  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
505  }
506  }
507 
508  return;
509  }
510 
511  if (pstate->prefetch_pages < pstate->prefetch_target)
512  {
513  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
514 
515  if (prefetch_iterator)
516  {
517  while (1)
518  {
519  TBMIterateResult *tbmpre;
520  bool do_prefetch = false;
521  bool skip_fetch;
522 
523  /*
524  * Recheck under the mutex. If some other process has already
525  * done enough prefetching then we need not to do anything.
526  */
527  SpinLockAcquire(&pstate->mutex);
528  if (pstate->prefetch_pages < pstate->prefetch_target)
529  {
530  pstate->prefetch_pages++;
531  do_prefetch = true;
532  }
533  SpinLockRelease(&pstate->mutex);
534 
535  if (!do_prefetch)
536  return;
537 
538  tbmpre = tbm_shared_iterate(prefetch_iterator);
539  if (tbmpre == NULL)
540  {
541  /* No more pages to prefetch */
542  tbm_end_shared_iterate(prefetch_iterator);
543  node->shared_prefetch_iterator = NULL;
544  break;
545  }
546 
547  /* As above, skip prefetch if we expect not to need page */
548  skip_fetch = (node->can_skip_fetch &&
549  (node->tbmres ? !node->tbmres->recheck : false) &&
551  tbmpre->blockno,
552  &node->pvmbuffer));
553 
554  if (!skip_fetch)
555  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
556  }
557  }
558  }
559 #endif /* USE_PREFETCH */
560 }
561 
562 /*
563  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
564  */
565 static bool
567 {
568  ExprContext *econtext;
569 
570  /*
571  * extract necessary information from index scan node
572  */
573  econtext = node->ss.ps.ps_ExprContext;
574 
575  /* Does the tuple meet the original qual conditions? */
576  econtext->ecxt_scantuple = slot;
577  return ExecQualAndReset(node->bitmapqualorig, econtext);
578 }
579 
580 /* ----------------------------------------------------------------
581  * ExecBitmapHeapScan(node)
582  * ----------------------------------------------------------------
583  */
584 static TupleTableSlot *
586 {
588 
589  return ExecScan(&node->ss,
592 }
593 
594 /* ----------------------------------------------------------------
595  * ExecReScanBitmapHeapScan(node)
596  * ----------------------------------------------------------------
597  */
598 void
600 {
602 
603  /* rescan to release any page pin */
604  table_rescan(node->ss.ss_currentScanDesc, NULL);
605 
606  /* release bitmaps and buffers if any */
607  if (node->tbmiterator)
609  if (node->prefetch_iterator)
611  if (node->shared_tbmiterator)
613  if (node->shared_prefetch_iterator)
615  if (node->tbm)
616  tbm_free(node->tbm);
617  if (node->vmbuffer != InvalidBuffer)
618  ReleaseBuffer(node->vmbuffer);
619  if (node->pvmbuffer != InvalidBuffer)
620  ReleaseBuffer(node->pvmbuffer);
621  node->tbm = NULL;
622  node->tbmiterator = NULL;
623  node->tbmres = NULL;
624  node->prefetch_iterator = NULL;
625  node->initialized = false;
626  node->shared_tbmiterator = NULL;
627  node->shared_prefetch_iterator = NULL;
628  node->vmbuffer = InvalidBuffer;
629  node->pvmbuffer = InvalidBuffer;
630 
631  ExecScanReScan(&node->ss);
632 
633  /*
634  * if chgParam of subnode is not null then plan will be re-scanned by
635  * first ExecProcNode.
636  */
637  if (outerPlan->chgParam == NULL)
639 }
640 
641 /* ----------------------------------------------------------------
642  * ExecEndBitmapHeapScan
643  * ----------------------------------------------------------------
644  */
645 void
647 {
648  TableScanDesc scanDesc;
649 
650  /*
651  * extract information from the node
652  */
653  scanDesc = node->ss.ss_currentScanDesc;
654 
655  /*
656  * close down subplans
657  */
659 
660  /*
661  * release bitmaps and buffers if any
662  */
663  if (node->tbmiterator)
665  if (node->prefetch_iterator)
667  if (node->tbm)
668  tbm_free(node->tbm);
669  if (node->shared_tbmiterator)
671  if (node->shared_prefetch_iterator)
673  if (node->vmbuffer != InvalidBuffer)
674  ReleaseBuffer(node->vmbuffer);
675  if (node->pvmbuffer != InvalidBuffer)
676  ReleaseBuffer(node->pvmbuffer);
677 
678  /*
679  * close heap scan
680  */
681  table_endscan(scanDesc);
682 }
683 
684 /* ----------------------------------------------------------------
685  * ExecInitBitmapHeapScan
686  *
687  * Initializes the scan's state information.
688  * ----------------------------------------------------------------
689  */
691 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
692 {
693  BitmapHeapScanState *scanstate;
694  Relation currentRelation;
695 
696  /* check for unsupported flags */
697  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
698 
699  /*
700  * Assert caller didn't ask for an unsafe snapshot --- see comments at
701  * head of file.
702  */
704 
705  /*
706  * create state structure
707  */
708  scanstate = makeNode(BitmapHeapScanState);
709  scanstate->ss.ps.plan = (Plan *) node;
710  scanstate->ss.ps.state = estate;
711  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
712 
713  scanstate->tbm = NULL;
714  scanstate->tbmiterator = NULL;
715  scanstate->tbmres = NULL;
716  scanstate->return_empty_tuples = 0;
717  scanstate->vmbuffer = InvalidBuffer;
718  scanstate->pvmbuffer = InvalidBuffer;
719  scanstate->exact_pages = 0;
720  scanstate->lossy_pages = 0;
721  scanstate->prefetch_iterator = NULL;
722  scanstate->prefetch_pages = 0;
723  scanstate->prefetch_target = 0;
724  scanstate->initialized = false;
725  scanstate->shared_tbmiterator = NULL;
726  scanstate->shared_prefetch_iterator = NULL;
727  scanstate->pstate = NULL;
728 
729  /*
730  * We can potentially skip fetching heap pages if we do not need any
731  * columns of the table, either for checking non-indexable quals or for
732  * returning data. This test is a bit simplistic, as it checks the
733  * stronger condition that there's no qual or return tlist at all. But in
734  * most cases it's probably not worth working harder than that.
735  */
736  scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
737  node->scan.plan.targetlist == NIL);
738 
739  /*
740  * Miscellaneous initialization
741  *
742  * create expression context for node
743  */
744  ExecAssignExprContext(estate, &scanstate->ss.ps);
745 
746  /*
747  * open the scan relation
748  */
749  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
750 
751  /*
752  * initialize child nodes
753  */
754  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
755 
756  /*
757  * get the scan type from the relation descriptor.
758  */
759  ExecInitScanTupleSlot(estate, &scanstate->ss,
760  RelationGetDescr(currentRelation),
761  table_slot_callbacks(currentRelation));
762 
763  /*
764  * Initialize result type and projection.
765  */
766  ExecInitResultTypeTL(&scanstate->ss.ps);
767  ExecAssignScanProjectionInfo(&scanstate->ss);
768 
769  /*
770  * initialize child expressions
771  */
772  scanstate->ss.ps.qual =
773  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
774  scanstate->bitmapqualorig =
775  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
776 
777  /*
778  * Maximum number of prefetches for the tablespace if configured,
779  * otherwise the current value of the effective_io_concurrency GUC.
780  */
781  scanstate->prefetch_maximum =
782  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
783 
784  scanstate->ss.ss_currentRelation = currentRelation;
785 
786  scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
787  estate->es_snapshot,
788  0,
789  NULL);
790 
791  /*
792  * all done.
793  */
794  return scanstate;
795 }
796 
797 /*----------------
798  * BitmapShouldInitializeSharedState
799  *
800  * The first process to come here and see the state to the BM_INITIAL
801  * will become the leader for the parallel bitmap scan and will be
802  * responsible for populating the TIDBitmap. The other processes will
803  * be blocked by the condition variable until the leader wakes them up.
804  * ---------------
805  */
806 static bool
808 {
810 
811  while (1)
812  {
813  SpinLockAcquire(&pstate->mutex);
814  state = pstate->state;
815  if (pstate->state == BM_INITIAL)
816  pstate->state = BM_INPROGRESS;
817  SpinLockRelease(&pstate->mutex);
818 
819  /* Exit if bitmap is done, or if we're the leader. */
820  if (state != BM_INPROGRESS)
821  break;
822 
823  /* Wait for the leader to wake us up. */
824  ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
825  }
826 
828 
829  return (state == BM_INITIAL);
830 }
831 
832 /* ----------------------------------------------------------------
833  * ExecBitmapHeapEstimate
834  *
835  * Compute the amount of space we'll need in the parallel
836  * query DSM, and inform pcxt->estimator about our needs.
837  * ----------------------------------------------------------------
838  */
839 void
841  ParallelContext *pcxt)
842 {
844  shm_toc_estimate_keys(&pcxt->estimator, 1);
845 }
846 
847 /* ----------------------------------------------------------------
848  * ExecBitmapHeapInitializeDSM
849  *
850  * Set up a parallel bitmap heap scan descriptor.
851  * ----------------------------------------------------------------
852  */
853 void
855  ParallelContext *pcxt)
856 {
857  ParallelBitmapHeapState *pstate;
858  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
859 
860  /* If there's no DSA, there are no workers; initialize nothing. */
861  if (dsa == NULL)
862  return;
863 
864  pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
865 
866  pstate->tbmiterator = 0;
867  pstate->prefetch_iterator = 0;
868 
869  /* Initialize the mutex */
870  SpinLockInit(&pstate->mutex);
871  pstate->prefetch_pages = 0;
872  pstate->prefetch_target = 0;
873  pstate->state = BM_INITIAL;
874 
875  ConditionVariableInit(&pstate->cv);
876 
877  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
878  node->pstate = pstate;
879 }
880 
881 /* ----------------------------------------------------------------
882  * ExecBitmapHeapReInitializeDSM
883  *
884  * Reset shared state before beginning a fresh scan.
885  * ----------------------------------------------------------------
886  */
887 void
889  ParallelContext *pcxt)
890 {
891  ParallelBitmapHeapState *pstate = node->pstate;
892  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
893 
894  /* If there's no DSA, there are no workers; do nothing. */
895  if (dsa == NULL)
896  return;
897 
898  pstate->state = BM_INITIAL;
899 
900  if (DsaPointerIsValid(pstate->tbmiterator))
901  tbm_free_shared_area(dsa, pstate->tbmiterator);
902 
905 
906  pstate->tbmiterator = InvalidDsaPointer;
908 }
909 
910 /* ----------------------------------------------------------------
911  * ExecBitmapHeapInitializeWorker
912  *
913  * Copy relevant information from TOC into planstate.
914  * ----------------------------------------------------------------
915  */
916 void
918  ParallelWorkerContext *pwcxt)
919 {
920  ParallelBitmapHeapState *pstate;
921 
922  Assert(node->ss.ps.state->es_query_dsa != NULL);
923 
924  pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
925  node->pstate = pstate;
926 }
#define InvalidBuffer
Definition: buf.h:25
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:627
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4560
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define InvalidDsaPointer
Definition: dsa.h:78
#define DsaPointerIsValid(x)
Definition: dsa.h:81
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:213
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:502
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:156
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:270
void ExecScanReScan(ScanState *node)
Definition: execScan.c:297
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1575
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1810
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1754
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:483
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:697
#define outerPlanState(node)
Definition: execnodes.h:1139
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1152
SharedBitmapState
Definition: execnodes.h:1679
@ BM_INITIAL
Definition: execnodes.h:1680
@ BM_FINISHED
Definition: execnodes.h:1682
@ BM_INPROGRESS
Definition: execnodes.h:1681
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:473
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:474
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:440
#define EXEC_FLAG_MARK
Definition: executor.h:69
Assert(fmt[strlen(fmt) - 1] !='\n')
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
#define NIL
Definition: pg_list.h:68
#define outerPlan(node)
Definition: plannodes.h:182
#define RelationGetDescr(relation)
Definition: rel.h:531
@ MAIN_FORKNUM
Definition: relpath.h:50
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1751
ExprState * bitmapqualorig
Definition: execnodes.h:1734
TBMIterateResult * tbmres
Definition: execnodes.h:1737
TBMIterator * tbmiterator
Definition: execnodes.h:1736
TIDBitmap * tbm
Definition: execnodes.h:1735
TBMIterator * prefetch_iterator
Definition: execnodes.h:1744
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1750
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1749
List * bitmapqualorig
Definition: plannodes.h:539
struct dsa_area * es_query_dsa
Definition: execnodes.h:702
Snapshot es_snapshot
Definition: execnodes.h:622
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:255
SharedBitmapState state
Definition: execnodes.h:1704
dsa_pointer tbmiterator
Definition: execnodes.h:1699
ConditionVariable cv
Definition: execnodes.h:1705
dsa_pointer prefetch_iterator
Definition: execnodes.h:1700
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
ExprState * qual
Definition: execnodes.h:1064
Plan * plan
Definition: execnodes.h:1043
EState * state
Definition: execnodes.h:1045
ExprContext * ps_ExprContext
Definition: execnodes.h:1082
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1049
int plan_node_id
Definition: plannodes.h:151
Form_pg_class rd_rel
Definition: rel.h:111
Relation ss_currentRelation
Definition: execnodes.h:1484
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1486
PlanState ps
Definition: execnodes.h:1483
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1485
Index scanrelid
Definition: plannodes.h:387
BlockNumber blockno
Definition: tidbitmap.h:42
Relation rs_rd
Definition: relscan.h:34
Definition: dsa.c:366
Definition: regguts.h:323
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1009
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:946
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1977
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1018
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1953
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:322
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:689
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1146
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1158
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1461
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:766
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:341
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1052
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:971
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24