PostgreSQL Source Code  git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  * Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8  * special snapshots). The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned,
15  * but with anything else we might return a tuple that doesn't meet the
16  * required index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  * src/backend/executor/nodeBitmapHeapscan.c
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  * ExecBitmapHeapScan scans a relation using bitmap info
31  * ExecBitmapHeapNext workhorse for above
32  * ExecInitBitmapHeapScan creates and initializes state info.
33  * ExecReScanBitmapHeapScan prepares to rescan the plan.
34  * ExecEndBitmapHeapScan releases all storage.
35  */
36 #include "postgres.h"
37 
38 #include <math.h>
39 
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/visibilitymap.h"
43 #include "executor/executor.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "storage/bufmgr.h"
48 #include "utils/rel.h"
49 #include "utils/snapmgr.h"
50 #include "utils/spccache.h"
51 
54 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
55  BlockNumber blockno);
56 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
57 static inline void BitmapPrefetch(BitmapHeapScanState *node,
58  TableScanDesc scan);
60 
61 
62 /* ----------------------------------------------------------------
63  * BitmapHeapNext
64  *
65  * Retrieve next tuple from the BitmapHeapScan node's currentRelation
66  * ----------------------------------------------------------------
67  */
68 static TupleTableSlot *
70 {
71  ExprContext *econtext;
72  TableScanDesc scan;
73  TIDBitmap *tbm;
74  TBMIterator *tbmiterator = NULL;
75  TBMSharedIterator *shared_tbmiterator = NULL;
76  TBMIterateResult *tbmres;
77  TupleTableSlot *slot;
78  ParallelBitmapHeapState *pstate = node->pstate;
79  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
80 
81  /*
82  * extract necessary information from index scan node
83  */
84  econtext = node->ss.ps.ps_ExprContext;
85  slot = node->ss.ss_ScanTupleSlot;
86  scan = node->ss.ss_currentScanDesc;
87  tbm = node->tbm;
88  if (pstate == NULL)
89  tbmiterator = node->tbmiterator;
90  else
91  shared_tbmiterator = node->shared_tbmiterator;
92  tbmres = node->tbmres;
93 
94  /*
95  * If we haven't yet performed the underlying index scan, do it, and begin
96  * the iteration over the bitmap.
97  *
98  * For prefetching, we use *two* iterators, one for the pages we are
99  * actually scanning and another that runs ahead of the first for
100  * prefetching. node->prefetch_pages tracks exactly how many pages ahead
101  * the prefetch iterator is. Also, node->prefetch_target tracks the
102  * desired prefetch distance, which starts small and increases up to the
103  * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
104  * a scan that stops after a few tuples because of a LIMIT.
105  */
106  if (!node->initialized)
107  {
108  if (!pstate)
109  {
110  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
111 
112  if (!tbm || !IsA(tbm, TIDBitmap))
113  elog(ERROR, "unrecognized result from subplan");
114 
115  node->tbm = tbm;
116  node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
117  node->tbmres = tbmres = NULL;
118 
119 #ifdef USE_PREFETCH
120  if (node->prefetch_maximum > 0)
121  {
123  node->prefetch_pages = 0;
124  node->prefetch_target = -1;
125  }
126 #endif /* USE_PREFETCH */
127  }
128  else
129  {
130  /*
131  * The leader will immediately come out of the function, but
132  * others will be blocked until leader populates the TBM and wakes
133  * them up.
134  */
136  {
137  tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
138  if (!tbm || !IsA(tbm, TIDBitmap))
139  elog(ERROR, "unrecognized result from subplan");
140 
141  node->tbm = tbm;
142 
143  /*
144  * Prepare to iterate over the TBM. This will return the
145  * dsa_pointer of the iterator state which will be used by
146  * multiple processes to iterate jointly.
147  */
149 #ifdef USE_PREFETCH
150  if (node->prefetch_maximum > 0)
151  {
152  pstate->prefetch_iterator =
154 
155  /*
156  * We don't need the mutex here as we haven't yet woke up
157  * others.
158  */
159  pstate->prefetch_pages = 0;
160  pstate->prefetch_target = -1;
161  }
162 #endif
163 
164  /* We have initialized the shared state so wake up others. */
166  }
167 
168  /* Allocate a private iterator and attach the shared state to it */
169  node->shared_tbmiterator = shared_tbmiterator =
171  node->tbmres = tbmres = NULL;
172 
173 #ifdef USE_PREFETCH
174  if (node->prefetch_maximum > 0)
175  {
178  }
179 #endif /* USE_PREFETCH */
180  }
181 
182  /*
183  * If this is the first scan of the underlying table, create the table
184  * scan descriptor and begin the scan.
185  */
186  if (!scan)
187  {
188  bool need_tuples = false;
189 
190  /*
191  * We can potentially skip fetching heap pages if we do not need
192  * any columns of the table, either for checking non-indexable
193  * quals or for returning data. This test is a bit simplistic, as
194  * it checks the stronger condition that there's no qual or return
195  * tlist at all. But in most cases it's probably not worth working
196  * harder than that.
197  */
198  need_tuples = (node->ss.ps.plan->qual != NIL ||
199  node->ss.ps.plan->targetlist != NIL);
200 
202  node->ss.ps.state->es_snapshot,
203  0,
204  NULL,
205  need_tuples);
206 
207  node->ss.ss_currentScanDesc = scan;
208  }
209 
210  node->initialized = true;
211  }
212 
213  for (;;)
214  {
215  bool valid_block;
216 
218 
219  /*
220  * Get next page of results if needed
221  */
222  if (tbmres == NULL)
223  {
224  if (!pstate)
225  node->tbmres = tbmres = tbm_iterate(tbmiterator);
226  else
227  node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
228  if (tbmres == NULL)
229  {
230  /* no more entries in the bitmap */
231  break;
232  }
233 
234  BitmapAdjustPrefetchIterator(node, tbmres->blockno);
235 
236  valid_block = table_scan_bitmap_next_block(scan, tbmres);
237 
238  if (tbmres->ntuples >= 0)
239  node->stats.exact_pages++;
240  else
241  node->stats.lossy_pages++;
242 
243  if (!valid_block)
244  {
245  /* AM doesn't think this block is valid, skip */
246  continue;
247  }
248 
249  /* Adjust the prefetch target */
251  }
252  else
253  {
254  /*
255  * Continuing in previously obtained page.
256  */
257 
258 #ifdef USE_PREFETCH
259 
260  /*
261  * Try to prefetch at least a few pages even before we get to the
262  * second page if we don't stop reading after the first tuple.
263  */
264  if (!pstate)
265  {
266  if (node->prefetch_target < node->prefetch_maximum)
267  node->prefetch_target++;
268  }
269  else if (pstate->prefetch_target < node->prefetch_maximum)
270  {
271  /* take spinlock while updating shared state */
272  SpinLockAcquire(&pstate->mutex);
273  if (pstate->prefetch_target < node->prefetch_maximum)
274  pstate->prefetch_target++;
275  SpinLockRelease(&pstate->mutex);
276  }
277 #endif /* USE_PREFETCH */
278  }
279 
280  /*
281  * We issue prefetch requests *after* fetching the current page to try
282  * to avoid having prefetching interfere with the main I/O. Also, this
283  * should happen only when we have determined there is still something
284  * to do on the current page, else we may uselessly prefetch the same
285  * page we are just about to request for real.
286  */
287  BitmapPrefetch(node, scan);
288 
289  /*
290  * Attempt to fetch tuple from AM.
291  */
292  if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
293  {
294  /* nothing more to look at on this page */
295  node->tbmres = tbmres = NULL;
296  continue;
297  }
298 
299  /*
300  * If we are using lossy info, we have to recheck the qual conditions
301  * at every tuple.
302  */
303  if (tbmres->recheck)
304  {
305  econtext->ecxt_scantuple = slot;
306  if (!ExecQualAndReset(node->bitmapqualorig, econtext))
307  {
308  /* Fails recheck, so drop it and loop back for another */
309  InstrCountFiltered2(node, 1);
310  ExecClearTuple(slot);
311  continue;
312  }
313  }
314 
315  /* OK to return this tuple */
316  return slot;
317  }
318 
319  /*
320  * if we get here it means we are at the end of the scan..
321  */
322  return ExecClearTuple(slot);
323 }
324 
325 /*
326  * BitmapDoneInitializingSharedState - Shared state is initialized
327  *
328  * By this time the leader has already populated the TBM and initialized the
329  * shared state so wake up other processes.
330  */
331 static inline void
333 {
334  SpinLockAcquire(&pstate->mutex);
335  pstate->state = BM_FINISHED;
336  SpinLockRelease(&pstate->mutex);
337  ConditionVariableBroadcast(&pstate->cv);
338 }
339 
340 /*
341  * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
342  */
343 static inline void
345  BlockNumber blockno)
346 {
347 #ifdef USE_PREFETCH
348  ParallelBitmapHeapState *pstate = node->pstate;
349 
350  if (pstate == NULL)
351  {
352  TBMIterator *prefetch_iterator = node->prefetch_iterator;
353 
354  if (node->prefetch_pages > 0)
355  {
356  /* The main iterator has closed the distance by one page */
357  node->prefetch_pages--;
358  }
359  else if (prefetch_iterator)
360  {
361  /* Do not let the prefetch iterator get behind the main one */
362  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
363 
364  if (tbmpre == NULL || tbmpre->blockno != blockno)
365  elog(ERROR, "prefetch and main iterators are out of sync");
366  }
367  return;
368  }
369 
370  if (node->prefetch_maximum > 0)
371  {
372  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
373 
374  SpinLockAcquire(&pstate->mutex);
375  if (pstate->prefetch_pages > 0)
376  {
377  pstate->prefetch_pages--;
378  SpinLockRelease(&pstate->mutex);
379  }
380  else
381  {
382  /* Release the mutex before iterating */
383  SpinLockRelease(&pstate->mutex);
384 
385  /*
386  * In case of shared mode, we can not ensure that the current
387  * blockno of the main iterator and that of the prefetch iterator
388  * are same. It's possible that whatever blockno we are
389  * prefetching will be processed by another process. Therefore,
390  * we don't validate the blockno here as we do in non-parallel
391  * case.
392  */
393  if (prefetch_iterator)
394  tbm_shared_iterate(prefetch_iterator);
395  }
396  }
397 #endif /* USE_PREFETCH */
398 }
399 
400 /*
401  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
402  *
403  * Increase prefetch target if it's not yet at the max. Note that
404  * we will increase it to zero after fetching the very first
405  * page/tuple, then to one after the second tuple is fetched, then
406  * it doubles as later pages are fetched.
407  */
408 static inline void
410 {
411 #ifdef USE_PREFETCH
412  ParallelBitmapHeapState *pstate = node->pstate;
413 
414  if (pstate == NULL)
415  {
416  if (node->prefetch_target >= node->prefetch_maximum)
417  /* don't increase any further */ ;
418  else if (node->prefetch_target >= node->prefetch_maximum / 2)
419  node->prefetch_target = node->prefetch_maximum;
420  else if (node->prefetch_target > 0)
421  node->prefetch_target *= 2;
422  else
423  node->prefetch_target++;
424  return;
425  }
426 
427  /* Do an unlocked check first to save spinlock acquisitions. */
428  if (pstate->prefetch_target < node->prefetch_maximum)
429  {
430  SpinLockAcquire(&pstate->mutex);
431  if (pstate->prefetch_target >= node->prefetch_maximum)
432  /* don't increase any further */ ;
433  else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
434  pstate->prefetch_target = node->prefetch_maximum;
435  else if (pstate->prefetch_target > 0)
436  pstate->prefetch_target *= 2;
437  else
438  pstate->prefetch_target++;
439  SpinLockRelease(&pstate->mutex);
440  }
441 #endif /* USE_PREFETCH */
442 }
443 
444 /*
445  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
446  */
447 static inline void
449 {
450 #ifdef USE_PREFETCH
451  ParallelBitmapHeapState *pstate = node->pstate;
452 
453  if (pstate == NULL)
454  {
455  TBMIterator *prefetch_iterator = node->prefetch_iterator;
456 
457  if (prefetch_iterator)
458  {
459  while (node->prefetch_pages < node->prefetch_target)
460  {
461  TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
462  bool skip_fetch;
463 
464  if (tbmpre == NULL)
465  {
466  /* No more pages to prefetch */
467  tbm_end_iterate(prefetch_iterator);
468  node->prefetch_iterator = NULL;
469  break;
470  }
471  node->prefetch_pages++;
472 
473  /*
474  * If we expect not to have to actually read this heap page,
475  * skip this prefetch call, but continue to run the prefetch
476  * logic normally. (Would it be better not to increment
477  * prefetch_pages?)
478  */
479  skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
480  !tbmpre->recheck &&
482  tbmpre->blockno,
483  &node->pvmbuffer));
484 
485  if (!skip_fetch)
486  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
487  }
488  }
489 
490  return;
491  }
492 
493  if (pstate->prefetch_pages < pstate->prefetch_target)
494  {
495  TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
496 
497  if (prefetch_iterator)
498  {
499  while (1)
500  {
501  TBMIterateResult *tbmpre;
502  bool do_prefetch = false;
503  bool skip_fetch;
504 
505  /*
506  * Recheck under the mutex. If some other process has already
507  * done enough prefetching then we need not to do anything.
508  */
509  SpinLockAcquire(&pstate->mutex);
510  if (pstate->prefetch_pages < pstate->prefetch_target)
511  {
512  pstate->prefetch_pages++;
513  do_prefetch = true;
514  }
515  SpinLockRelease(&pstate->mutex);
516 
517  if (!do_prefetch)
518  return;
519 
520  tbmpre = tbm_shared_iterate(prefetch_iterator);
521  if (tbmpre == NULL)
522  {
523  /* No more pages to prefetch */
524  tbm_end_shared_iterate(prefetch_iterator);
525  node->shared_prefetch_iterator = NULL;
526  break;
527  }
528 
529  /* As above, skip prefetch if we expect not to need page */
530  skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
531  !tbmpre->recheck &&
533  tbmpre->blockno,
534  &node->pvmbuffer));
535 
536  if (!skip_fetch)
537  PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
538  }
539  }
540  }
541 #endif /* USE_PREFETCH */
542 }
543 
544 /*
545  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
546  */
547 static bool
549 {
550  ExprContext *econtext;
551 
552  /*
553  * extract necessary information from index scan node
554  */
555  econtext = node->ss.ps.ps_ExprContext;
556 
557  /* Does the tuple meet the original qual conditions? */
558  econtext->ecxt_scantuple = slot;
559  return ExecQualAndReset(node->bitmapqualorig, econtext);
560 }
561 
562 /* ----------------------------------------------------------------
563  * ExecBitmapHeapScan(node)
564  * ----------------------------------------------------------------
565  */
566 static TupleTableSlot *
568 {
570 
571  return ExecScan(&node->ss,
574 }
575 
576 /* ----------------------------------------------------------------
577  * ExecReScanBitmapHeapScan(node)
578  * ----------------------------------------------------------------
579  */
580 void
582 {
584 
585  /* rescan to release any page pin */
586  if (node->ss.ss_currentScanDesc)
587  table_rescan(node->ss.ss_currentScanDesc, NULL);
588 
589  /* release bitmaps and buffers if any */
590  if (node->tbmiterator)
592  if (node->prefetch_iterator)
594  if (node->shared_tbmiterator)
596  if (node->shared_prefetch_iterator)
598  if (node->tbm)
599  tbm_free(node->tbm);
600  if (node->pvmbuffer != InvalidBuffer)
601  ReleaseBuffer(node->pvmbuffer);
602  node->tbm = NULL;
603  node->tbmiterator = NULL;
604  node->tbmres = NULL;
605  node->prefetch_iterator = NULL;
606  node->initialized = false;
607  node->shared_tbmiterator = NULL;
608  node->shared_prefetch_iterator = NULL;
609  node->pvmbuffer = InvalidBuffer;
610 
611  ExecScanReScan(&node->ss);
612 
613  /*
614  * if chgParam of subnode is not null then plan will be re-scanned by
615  * first ExecProcNode.
616  */
617  if (outerPlan->chgParam == NULL)
619 }
620 
621 /* ----------------------------------------------------------------
622  * ExecEndBitmapHeapScan
623  * ----------------------------------------------------------------
624  */
625 void
627 {
628  TableScanDesc scanDesc;
629 
630  /*
631  * When ending a parallel worker, copy the statistics gathered by the
632  * worker back into shared memory so that it can be picked up by the main
633  * process to report in EXPLAIN ANALYZE.
634  */
635  if (node->sinstrument != NULL && IsParallelWorker())
636  {
638 
639  Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
641 
642  /*
643  * Here we accumulate the stats rather than performing memcpy on
644  * node->stats into si. When a Gather/GatherMerge node finishes it
645  * will perform planner shutdown on the workers. On rescan it will
646  * spin up new workers which will have a new BitmapHeapScanState and
647  * zeroed stats.
648  */
649  si->exact_pages += node->stats.exact_pages;
650  si->lossy_pages += node->stats.lossy_pages;
651  }
652 
653  /*
654  * extract information from the node
655  */
656  scanDesc = node->ss.ss_currentScanDesc;
657 
658  /*
659  * close down subplans
660  */
662 
663  /*
664  * release bitmaps and buffers if any
665  */
666  if (node->tbmiterator)
668  if (node->prefetch_iterator)
670  if (node->tbm)
671  tbm_free(node->tbm);
672  if (node->shared_tbmiterator)
674  if (node->shared_prefetch_iterator)
676  if (node->pvmbuffer != InvalidBuffer)
677  ReleaseBuffer(node->pvmbuffer);
678 
679  /*
680  * close heap scan
681  */
682  if (scanDesc)
683  table_endscan(scanDesc);
684 
685 }
686 
687 /* ----------------------------------------------------------------
688  * ExecInitBitmapHeapScan
689  *
690  * Initializes the scan's state information.
691  * ----------------------------------------------------------------
692  */
694 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
695 {
696  BitmapHeapScanState *scanstate;
697  Relation currentRelation;
698 
699  /* check for unsupported flags */
700  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
701 
702  /*
703  * Assert caller didn't ask for an unsafe snapshot --- see comments at
704  * head of file.
705  */
707 
708  /*
709  * create state structure
710  */
711  scanstate = makeNode(BitmapHeapScanState);
712  scanstate->ss.ps.plan = (Plan *) node;
713  scanstate->ss.ps.state = estate;
714  scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
715 
716  scanstate->tbm = NULL;
717  scanstate->tbmiterator = NULL;
718  scanstate->tbmres = NULL;
719  scanstate->pvmbuffer = InvalidBuffer;
720 
721  /* Zero the statistics counters */
722  memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
723 
724  scanstate->prefetch_iterator = NULL;
725  scanstate->prefetch_pages = 0;
726  scanstate->prefetch_target = 0;
727  scanstate->initialized = false;
728  scanstate->shared_tbmiterator = NULL;
729  scanstate->shared_prefetch_iterator = NULL;
730  scanstate->pstate = NULL;
731 
732  /*
733  * Miscellaneous initialization
734  *
735  * create expression context for node
736  */
737  ExecAssignExprContext(estate, &scanstate->ss.ps);
738 
739  /*
740  * open the scan relation
741  */
742  currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
743 
744  /*
745  * initialize child nodes
746  */
747  outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
748 
749  /*
750  * get the scan type from the relation descriptor.
751  */
752  ExecInitScanTupleSlot(estate, &scanstate->ss,
753  RelationGetDescr(currentRelation),
754  table_slot_callbacks(currentRelation));
755 
756  /*
757  * Initialize result type and projection.
758  */
759  ExecInitResultTypeTL(&scanstate->ss.ps);
760  ExecAssignScanProjectionInfo(&scanstate->ss);
761 
762  /*
763  * initialize child expressions
764  */
765  scanstate->ss.ps.qual =
766  ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
767  scanstate->bitmapqualorig =
768  ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
769 
770  /*
771  * Maximum number of prefetches for the tablespace if configured,
772  * otherwise the current value of the effective_io_concurrency GUC.
773  */
774  scanstate->prefetch_maximum =
775  get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
776 
777  scanstate->ss.ss_currentRelation = currentRelation;
778 
779  /*
780  * all done.
781  */
782  return scanstate;
783 }
784 
785 /*----------------
786  * BitmapShouldInitializeSharedState
787  *
788  * The first process to come here and see the state to the BM_INITIAL
789  * will become the leader for the parallel bitmap scan and will be
790  * responsible for populating the TIDBitmap. The other processes will
791  * be blocked by the condition variable until the leader wakes them up.
792  * ---------------
793  */
794 static bool
796 {
798 
799  while (1)
800  {
801  SpinLockAcquire(&pstate->mutex);
802  state = pstate->state;
803  if (pstate->state == BM_INITIAL)
804  pstate->state = BM_INPROGRESS;
805  SpinLockRelease(&pstate->mutex);
806 
807  /* Exit if bitmap is done, or if we're the leader. */
808  if (state != BM_INPROGRESS)
809  break;
810 
811  /* Wait for the leader to wake us up. */
812  ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
813  }
814 
816 
817  return (state == BM_INITIAL);
818 }
819 
820 /* ----------------------------------------------------------------
821  * ExecBitmapHeapEstimate
822  *
823  * Compute the amount of space we'll need in the parallel
824  * query DSM, and inform pcxt->estimator about our needs.
825  * ----------------------------------------------------------------
826  */
827 void
829  ParallelContext *pcxt)
830 {
831  Size size;
832 
834 
835  /* account for instrumentation, if required */
836  if (node->ss.ps.instrument && pcxt->nworkers > 0)
837  {
838  size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
840  }
841 
843  shm_toc_estimate_keys(&pcxt->estimator, 1);
844 }
845 
846 /* ----------------------------------------------------------------
847  * ExecBitmapHeapInitializeDSM
848  *
849  * Set up a parallel bitmap heap scan descriptor.
850  * ----------------------------------------------------------------
851  */
852 void
854  ParallelContext *pcxt)
855 {
856  ParallelBitmapHeapState *pstate;
857  SharedBitmapHeapInstrumentation *sinstrument = NULL;
858  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
859  char *ptr;
860  Size size;
861 
862  /* If there's no DSA, there are no workers; initialize nothing. */
863  if (dsa == NULL)
864  return;
865 
867  if (node->ss.ps.instrument && pcxt->nworkers > 0)
868  {
869  size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
871  }
872 
873  ptr = shm_toc_allocate(pcxt->toc, size);
874  pstate = (ParallelBitmapHeapState *) ptr;
875  ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
876  if (node->ss.ps.instrument && pcxt->nworkers > 0)
877  sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
878 
879  pstate->tbmiterator = 0;
880  pstate->prefetch_iterator = 0;
881 
882  /* Initialize the mutex */
883  SpinLockInit(&pstate->mutex);
884  pstate->prefetch_pages = 0;
885  pstate->prefetch_target = 0;
886  pstate->state = BM_INITIAL;
887 
888  ConditionVariableInit(&pstate->cv);
889 
890  if (sinstrument)
891  {
892  sinstrument->num_workers = pcxt->nworkers;
893 
894  /* ensure any unfilled slots will contain zeroes */
895  memset(sinstrument->sinstrument, 0,
896  pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
897  }
898 
899  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
900  node->pstate = pstate;
901  node->sinstrument = sinstrument;
902 }
903 
904 /* ----------------------------------------------------------------
905  * ExecBitmapHeapReInitializeDSM
906  *
907  * Reset shared state before beginning a fresh scan.
908  * ----------------------------------------------------------------
909  */
910 void
912  ParallelContext *pcxt)
913 {
914  ParallelBitmapHeapState *pstate = node->pstate;
915  dsa_area *dsa = node->ss.ps.state->es_query_dsa;
916 
917  /* If there's no DSA, there are no workers; do nothing. */
918  if (dsa == NULL)
919  return;
920 
921  pstate->state = BM_INITIAL;
922 
923  if (DsaPointerIsValid(pstate->tbmiterator))
924  tbm_free_shared_area(dsa, pstate->tbmiterator);
925 
928 
929  pstate->tbmiterator = InvalidDsaPointer;
931 }
932 
933 /* ----------------------------------------------------------------
934  * ExecBitmapHeapInitializeWorker
935  *
936  * Copy relevant information from TOC into planstate.
937  * ----------------------------------------------------------------
938  */
939 void
941  ParallelWorkerContext *pwcxt)
942 {
943  char *ptr;
944 
945  Assert(node->ss.ps.state->es_query_dsa != NULL);
946 
947  ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
948 
949  node->pstate = (ParallelBitmapHeapState *) ptr;
950  ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
951 
952  if (node->ss.ps.instrument)
954 }
955 
956 /* ----------------------------------------------------------------
957  * ExecBitmapHeapRetrieveInstrumentation
958  *
959  * Transfer bitmap heap scan statistics from DSM to private memory.
960  * ----------------------------------------------------------------
961  */
962 void
964 {
965  SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
966  Size size;
967 
968  if (sinstrument == NULL)
969  return;
970 
971  size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
972  + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
973 
974  node->sinstrument = palloc(size);
975  memcpy(node->sinstrument, sinstrument, size);
976 }
int ParallelWorkerNumber
Definition: parallel.c:112
uint32 BlockNumber
Definition: block.h:31
#define InvalidBuffer
Definition: buf.h:25
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:666
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4936
#define MAXALIGN(LEN)
Definition: c.h:811
#define Assert(condition)
Definition: c.h:858
size_t Size
Definition: c.h:605
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define InvalidDsaPointer
Definition: dsa.h:78
#define DsaPointerIsValid(x)
Definition: dsa.h:106
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:220
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:557
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:502
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:156
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:270
void ExecScanReScan(ScanState *node)
Definition: execScan.c:297
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1898
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1842
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:483
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:697
#define outerPlanState(node)
Definition: execnodes.h:1212
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1225
SharedBitmapState
Definition: execnodes.h:1776
@ BM_INITIAL
Definition: execnodes.h:1777
@ BM_FINISHED
Definition: execnodes.h:1779
@ BM_INPROGRESS
Definition: execnodes.h:1778
struct BitmapHeapScanInstrumentation BitmapHeapScanInstrumentation
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:473
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:474
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:440
#define EXEC_FLAG_MARK
Definition: executor.h:69
#define IsParallelWorker()
Definition: parallel.h:60
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, BlockNumber blockno)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
#define NIL
Definition: pg_list.h:68
#define outerPlan(node)
Definition: plannodes.h:182
#define RelationGetDescr(relation)
Definition: rel.h:531
@ MAIN_FORKNUM
Definition: relpath.h:50
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
#define SpinLockInit(lock)
Definition: spin.h:60
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1855
ExprState * bitmapqualorig
Definition: execnodes.h:1842
BitmapHeapScanInstrumentation stats
Definition: execnodes.h:1847
TBMIterateResult * tbmres
Definition: execnodes.h:1845
TBMIterator * tbmiterator
Definition: execnodes.h:1844
SharedBitmapHeapInstrumentation * sinstrument
Definition: execnodes.h:1856
TIDBitmap * tbm
Definition: execnodes.h:1843
TBMIterator * prefetch_iterator
Definition: execnodes.h:1848
TBMSharedIterator * shared_prefetch_iterator
Definition: execnodes.h:1854
TBMSharedIterator * shared_tbmiterator
Definition: execnodes.h:1853
List * bitmapqualorig
Definition: plannodes.h:541
struct dsa_area * es_query_dsa
Definition: execnodes.h:704
Snapshot es_snapshot
Definition: execnodes.h:624
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:255
SharedBitmapState state
Definition: execnodes.h:1801
dsa_pointer tbmiterator
Definition: execnodes.h:1796
ConditionVariable cv
Definition: execnodes.h:1802
dsa_pointer prefetch_iterator
Definition: execnodes.h:1797
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
Instrumentation * instrument
Definition: execnodes.h:1126
ExprState * qual
Definition: execnodes.h:1137
Plan * plan
Definition: execnodes.h:1116
EState * state
Definition: execnodes.h:1118
ExprContext * ps_ExprContext
Definition: execnodes.h:1155
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1122
List * qual
Definition: plannodes.h:153
int plan_node_id
Definition: plannodes.h:151
List * targetlist
Definition: plannodes.h:152
Form_pg_class rd_rel
Definition: rel.h:111
Relation ss_currentRelation
Definition: execnodes.h:1564
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1566
PlanState ps
Definition: execnodes.h:1563
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1565
Index scanrelid
Definition: plannodes.h:389
BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1816
BlockNumber blockno
Definition: tidbitmap.h:42
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
Definition: dsa.c:348
Definition: regguts.h:323
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
@ SO_NEED_TUPLES
Definition: tableam.h:72
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1019
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1985
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1028
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1961
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:953
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:322
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:689
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1146
void tbm_end_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1158
TBMSharedIterator * tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:1461
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:766
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:341
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1052
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:971
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24