PostgreSQL Source Code git master
nodeBitmapHeapscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
5 *
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
17 *
18 *
19 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
21 *
22 *
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
25 *
26 *-------------------------------------------------------------------------
27 */
28/*
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
35 */
36#include "postgres.h"
37
38#include <math.h>
39
40#include "access/relscan.h"
41#include "access/tableam.h"
43#include "executor/executor.h"
45#include "miscadmin.h"
46#include "pgstat.h"
47#include "storage/bufmgr.h"
48#include "utils/rel.h"
49#include "utils/spccache.h"
50
55static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
56static inline void BitmapPrefetch(BitmapHeapScanState *node,
57 TableScanDesc scan);
59
60
61/*
62 * Do the underlying index scan, build the bitmap, set up the parallel state
63 * needed for parallel workers to iterate through the bitmap, and set up the
64 * underlying table scan descriptor.
65 *
66 * For prefetching, we use *two* iterators, one for the pages we are actually
67 * scanning and another that runs ahead of the first for prefetching.
68 * node->prefetch_pages tracks exactly how many pages ahead the prefetch
69 * iterator is. Also, node->prefetch_target tracks the desired prefetch
70 * distance, which starts small and increases up to the
71 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in a
72 * scan that stops after a few tuples because of a LIMIT.
73 */
74static void
76{
77 TBMIterator tbmiterator = {0};
78 ParallelBitmapHeapState *pstate = node->pstate;
79 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
80
81 if (!pstate)
82 {
84
85 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
86 elog(ERROR, "unrecognized result from subplan");
87 }
88 else if (BitmapShouldInitializeSharedState(pstate))
89 {
90 /*
91 * The leader will immediately come out of the function, but others
92 * will be blocked until leader populates the TBM and wakes them up.
93 */
95 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
96 elog(ERROR, "unrecognized result from subplan");
97
98 /*
99 * Prepare to iterate over the TBM. This will return the dsa_pointer
100 * of the iterator state which will be used by multiple processes to
101 * iterate jointly.
102 */
104
105#ifdef USE_PREFETCH
106 if (node->prefetch_maximum > 0)
107 {
108 pstate->prefetch_iterator =
110 }
111#endif /* USE_PREFETCH */
112
113 /* We have initialized the shared state so wake up others. */
115 }
116
117 tbmiterator = tbm_begin_iterate(node->tbm, dsa,
118 pstate ?
119 pstate->tbmiterator :
121
122#ifdef USE_PREFETCH
123 if (node->prefetch_maximum > 0)
124 node->prefetch_iterator =
125 tbm_begin_iterate(node->tbm, dsa,
126 pstate ?
127 pstate->prefetch_iterator :
129#endif /* USE_PREFETCH */
130
131 /*
132 * If this is the first scan of the underlying table, create the table
133 * scan descriptor and begin the scan.
134 */
135 if (!node->ss.ss_currentScanDesc)
136 {
137 bool need_tuples = false;
138
139 /*
140 * We can potentially skip fetching heap pages if we do not need any
141 * columns of the table, either for checking non-indexable quals or
142 * for returning data. This test is a bit simplistic, as it checks
143 * the stronger condition that there's no qual or return tlist at all.
144 * But in most cases it's probably not worth working harder than that.
145 */
146 need_tuples = (node->ss.ps.plan->qual != NIL ||
147 node->ss.ps.plan->targetlist != NIL);
148
149 node->ss.ss_currentScanDesc =
151 node->ss.ps.state->es_snapshot,
152 0,
153 NULL,
154 need_tuples);
155 }
156
157 node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
158 node->initialized = true;
159}
160
161
162/* ----------------------------------------------------------------
163 * BitmapHeapNext
164 *
165 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
166 * ----------------------------------------------------------------
167 */
168static TupleTableSlot *
170{
171 ExprContext *econtext;
172 TableScanDesc scan;
173 TupleTableSlot *slot;
174
175#ifdef USE_PREFETCH
176 ParallelBitmapHeapState *pstate = node->pstate;
177#endif
178
179 /*
180 * extract necessary information from index scan node
181 */
182 econtext = node->ss.ps.ps_ExprContext;
183 slot = node->ss.ss_ScanTupleSlot;
184 scan = node->ss.ss_currentScanDesc;
185
186 /*
187 * If we haven't yet performed the underlying index scan, do it, and begin
188 * the iteration over the bitmap.
189 */
190 if (!node->initialized)
191 {
193 scan = node->ss.ss_currentScanDesc;
194 goto new_page;
195 }
196
197 for (;;)
198 {
199 while (table_scan_bitmap_next_tuple(scan, slot))
200 {
201 /*
202 * Continuing in previously obtained page.
203 */
204
206
207#ifdef USE_PREFETCH
208
209 /*
210 * Try to prefetch at least a few pages even before we get to the
211 * second page if we don't stop reading after the first tuple.
212 */
213 if (!pstate)
214 {
215 if (node->prefetch_target < node->prefetch_maximum)
216 node->prefetch_target++;
217 }
218 else if (pstate->prefetch_target < node->prefetch_maximum)
219 {
220 /* take spinlock while updating shared state */
221 SpinLockAcquire(&pstate->mutex);
222 if (pstate->prefetch_target < node->prefetch_maximum)
223 pstate->prefetch_target++;
224 SpinLockRelease(&pstate->mutex);
225 }
226#endif /* USE_PREFETCH */
227
228 /*
229 * We issue prefetch requests *after* fetching the current page to
230 * try to avoid having prefetching interfere with the main I/O.
231 * Also, this should happen only when we have determined there is
232 * still something to do on the current page, else we may
233 * uselessly prefetch the same page we are just about to request
234 * for real.
235 */
236 BitmapPrefetch(node, scan);
237
238 /*
239 * If we are using lossy info, we have to recheck the qual
240 * conditions at every tuple.
241 */
242 if (node->recheck)
243 {
244 econtext->ecxt_scantuple = slot;
245 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
246 {
247 /* Fails recheck, so drop it and loop back for another */
248 InstrCountFiltered2(node, 1);
249 ExecClearTuple(slot);
250 continue;
251 }
252 }
253
254 /* OK to return this tuple */
255 return slot;
256 }
257
258new_page:
259
261
262 /*
263 * Returns false if the bitmap is exhausted and there are no further
264 * blocks we need to scan.
265 */
266 if (!table_scan_bitmap_next_block(scan, &node->blockno,
267 &node->recheck,
268 &node->stats.lossy_pages,
269 &node->stats.exact_pages))
270 break;
271
272 /*
273 * If serial, we can error out if the prefetch block doesn't stay
274 * ahead of the current block.
275 */
276 if (node->pstate == NULL &&
278 node->prefetch_blockno < node->blockno)
279 elog(ERROR,
280 "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
281 node->prefetch_blockno, node->blockno);
282
283 /* Adjust the prefetch target */
285 }
286
287 /*
288 * if we get here it means we are at the end of the scan..
289 */
290 return ExecClearTuple(slot);
291}
292
293/*
294 * BitmapDoneInitializingSharedState - Shared state is initialized
295 *
296 * By this time the leader has already populated the TBM and initialized the
297 * shared state so wake up other processes.
298 */
299static inline void
301{
302 SpinLockAcquire(&pstate->mutex);
303 pstate->state = BM_FINISHED;
304 SpinLockRelease(&pstate->mutex);
306}
307
308/*
309 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
310 *
311 * We keep track of how far the prefetch iterator is ahead of the main
312 * iterator in prefetch_pages. For each block the main iterator returns, we
313 * decrement prefetch_pages.
314 */
315static inline void
317{
318#ifdef USE_PREFETCH
319 ParallelBitmapHeapState *pstate = node->pstate;
320 TBMIterateResult *tbmpre;
321
322 if (pstate == NULL)
323 {
324 TBMIterator *prefetch_iterator = &node->prefetch_iterator;
325
326 if (node->prefetch_pages > 0)
327 {
328 /* The main iterator has closed the distance by one page */
329 node->prefetch_pages--;
330 }
331 else if (!tbm_exhausted(prefetch_iterator))
332 {
333 tbmpre = tbm_iterate(prefetch_iterator);
334 node->prefetch_blockno = tbmpre ? tbmpre->blockno :
336 }
337 return;
338 }
339
340 /*
341 * XXX: There is a known issue with keeping the prefetch and current block
342 * iterators in sync for parallel bitmap table scans. This can lead to
343 * prefetching blocks that have already been read. See the discussion
344 * here:
345 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
346 * Note that moving the call site of BitmapAdjustPrefetchIterator()
347 * exacerbates the effects of this bug.
348 */
349 if (node->prefetch_maximum > 0)
350 {
351 TBMIterator *prefetch_iterator = &node->prefetch_iterator;
352
353 SpinLockAcquire(&pstate->mutex);
354 if (pstate->prefetch_pages > 0)
355 {
356 pstate->prefetch_pages--;
357 SpinLockRelease(&pstate->mutex);
358 }
359 else
360 {
361 /* Release the mutex before iterating */
362 SpinLockRelease(&pstate->mutex);
363
364 /*
365 * In case of shared mode, we can not ensure that the current
366 * blockno of the main iterator and that of the prefetch iterator
367 * are same. It's possible that whatever blockno we are
368 * prefetching will be processed by another process. Therefore,
369 * we don't validate the blockno here as we do in non-parallel
370 * case.
371 */
372 if (!tbm_exhausted(prefetch_iterator))
373 {
374 tbmpre = tbm_iterate(prefetch_iterator);
375 node->prefetch_blockno = tbmpre ? tbmpre->blockno :
377 }
378 }
379 }
380#endif /* USE_PREFETCH */
381}
382
383/*
384 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
385 *
386 * Increase prefetch target if it's not yet at the max. Note that
387 * we will increase it to zero after fetching the very first
388 * page/tuple, then to one after the second tuple is fetched, then
389 * it doubles as later pages are fetched.
390 */
391static inline void
393{
394#ifdef USE_PREFETCH
395 ParallelBitmapHeapState *pstate = node->pstate;
396
397 if (pstate == NULL)
398 {
399 if (node->prefetch_target >= node->prefetch_maximum)
400 /* don't increase any further */ ;
401 else if (node->prefetch_target >= node->prefetch_maximum / 2)
402 node->prefetch_target = node->prefetch_maximum;
403 else if (node->prefetch_target > 0)
404 node->prefetch_target *= 2;
405 else
406 node->prefetch_target++;
407 return;
408 }
409
410 /* Do an unlocked check first to save spinlock acquisitions. */
411 if (pstate->prefetch_target < node->prefetch_maximum)
412 {
413 SpinLockAcquire(&pstate->mutex);
414 if (pstate->prefetch_target >= node->prefetch_maximum)
415 /* don't increase any further */ ;
416 else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
417 pstate->prefetch_target = node->prefetch_maximum;
418 else if (pstate->prefetch_target > 0)
419 pstate->prefetch_target *= 2;
420 else
421 pstate->prefetch_target++;
422 SpinLockRelease(&pstate->mutex);
423 }
424#endif /* USE_PREFETCH */
425}
426
427/*
428 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
429 */
430static inline void
432{
433#ifdef USE_PREFETCH
434 ParallelBitmapHeapState *pstate = node->pstate;
435
436 if (pstate == NULL)
437 {
438 TBMIterator *prefetch_iterator = &node->prefetch_iterator;
439
440 if (!tbm_exhausted(prefetch_iterator))
441 {
442 while (node->prefetch_pages < node->prefetch_target)
443 {
444 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
445 bool skip_fetch;
446
447 if (tbmpre == NULL)
448 {
449 /* No more pages to prefetch */
450 tbm_end_iterate(prefetch_iterator);
451 break;
452 }
453 node->prefetch_pages++;
454 node->prefetch_blockno = tbmpre->blockno;
455
456 /*
457 * If we expect not to have to actually read this heap page,
458 * skip this prefetch call, but continue to run the prefetch
459 * logic normally. (Would it be better not to increment
460 * prefetch_pages?)
461 */
462 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
463 !tbmpre->recheck &&
465 tbmpre->blockno,
466 &node->pvmbuffer));
467
468 if (!skip_fetch)
469 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
470 }
471 }
472
473 return;
474 }
475
476 if (pstate->prefetch_pages < pstate->prefetch_target)
477 {
478 TBMIterator *prefetch_iterator = &node->prefetch_iterator;
479
480 if (!tbm_exhausted(prefetch_iterator))
481 {
482 while (1)
483 {
484 TBMIterateResult *tbmpre;
485 bool do_prefetch = false;
486 bool skip_fetch;
487
488 /*
489 * Recheck under the mutex. If some other process has already
490 * done enough prefetching then we need not to do anything.
491 */
492 SpinLockAcquire(&pstate->mutex);
493 if (pstate->prefetch_pages < pstate->prefetch_target)
494 {
495 pstate->prefetch_pages++;
496 do_prefetch = true;
497 }
498 SpinLockRelease(&pstate->mutex);
499
500 if (!do_prefetch)
501 return;
502
503 tbmpre = tbm_iterate(prefetch_iterator);
504 if (tbmpre == NULL)
505 {
506 /* No more pages to prefetch */
507 tbm_end_iterate(prefetch_iterator);
508 break;
509 }
510
511 node->prefetch_blockno = tbmpre->blockno;
512
513 /* As above, skip prefetch if we expect not to need page */
514 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
515 !tbmpre->recheck &&
517 tbmpre->blockno,
518 &node->pvmbuffer));
519
520 if (!skip_fetch)
521 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
522 }
523 }
524 }
525#endif /* USE_PREFETCH */
526}
527
528/*
529 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
530 */
531static bool
533{
534 ExprContext *econtext;
535
536 /*
537 * extract necessary information from index scan node
538 */
539 econtext = node->ss.ps.ps_ExprContext;
540
541 /* Does the tuple meet the original qual conditions? */
542 econtext->ecxt_scantuple = slot;
543 return ExecQualAndReset(node->bitmapqualorig, econtext);
544}
545
546/* ----------------------------------------------------------------
547 * ExecBitmapHeapScan(node)
548 * ----------------------------------------------------------------
549 */
550static TupleTableSlot *
552{
554
555 return ExecScan(&node->ss,
558}
559
560/* ----------------------------------------------------------------
561 * ExecReScanBitmapHeapScan(node)
562 * ----------------------------------------------------------------
563 */
564void
566{
568
570
571 if (scan)
572 {
573 /*
574 * End iteration on iterators saved in scan descriptor if they have
575 * not already been cleaned up.
576 */
577 if (!tbm_exhausted(&scan->st.rs_tbmiterator))
579
580 /* rescan to release any page pin */
582 }
583
584 /* If we did not already clean up the prefetch iterator, do so now. */
585 if (!tbm_exhausted(&node->prefetch_iterator))
587
588 /* release bitmaps and buffers if any */
589 if (node->tbm)
590 tbm_free(node->tbm);
591 if (node->pvmbuffer != InvalidBuffer)
593 node->tbm = NULL;
594 node->initialized = false;
595 node->pvmbuffer = InvalidBuffer;
596 node->recheck = true;
597 /* Only used for serial BHS */
600 node->prefetch_pages = 0;
601 node->prefetch_target = -1;
602
603 ExecScanReScan(&node->ss);
604
605 /*
606 * if chgParam of subnode is not null then plan will be re-scanned by
607 * first ExecProcNode.
608 */
609 if (outerPlan->chgParam == NULL)
611}
612
613/* ----------------------------------------------------------------
614 * ExecEndBitmapHeapScan
615 * ----------------------------------------------------------------
616 */
617void
619{
620 TableScanDesc scanDesc;
621
622 /*
623 * When ending a parallel worker, copy the statistics gathered by the
624 * worker back into shared memory so that it can be picked up by the main
625 * process to report in EXPLAIN ANALYZE.
626 */
627 if (node->sinstrument != NULL && IsParallelWorker())
628 {
630
631 Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
633
634 /*
635 * Here we accumulate the stats rather than performing memcpy on
636 * node->stats into si. When a Gather/GatherMerge node finishes it
637 * will perform planner shutdown on the workers. On rescan it will
638 * spin up new workers which will have a new BitmapHeapScanState and
639 * zeroed stats.
640 */
641 si->exact_pages += node->stats.exact_pages;
642 si->lossy_pages += node->stats.lossy_pages;
643 }
644
645 /*
646 * extract information from the node
647 */
648 scanDesc = node->ss.ss_currentScanDesc;
649
650 /*
651 * close down subplans
652 */
654
655 if (scanDesc)
656 {
657 /*
658 * End iteration on iterators saved in scan descriptor if they have
659 * not already been cleaned up.
660 */
661 if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
663
664 /*
665 * close table scan
666 */
667 table_endscan(scanDesc);
668 }
669
670 /* If we did not already clean up the prefetch iterator, do so now. */
671 if (!tbm_exhausted(&node->prefetch_iterator))
673
674 /*
675 * release bitmaps and buffers if any
676 */
677 if (node->tbm)
678 tbm_free(node->tbm);
679 if (node->pvmbuffer != InvalidBuffer)
681}
682
683/* ----------------------------------------------------------------
684 * ExecInitBitmapHeapScan
685 *
686 * Initializes the scan's state information.
687 * ----------------------------------------------------------------
688 */
691{
692 BitmapHeapScanState *scanstate;
693 Relation currentRelation;
694
695 /* check for unsupported flags */
697
698 /*
699 * Assert caller didn't ask for an unsafe snapshot --- see comments at
700 * head of file.
701 */
703
704 /*
705 * create state structure
706 */
707 scanstate = makeNode(BitmapHeapScanState);
708 scanstate->ss.ps.plan = (Plan *) node;
709 scanstate->ss.ps.state = estate;
711
712 scanstate->tbm = NULL;
713 scanstate->pvmbuffer = InvalidBuffer;
714
715 /* Zero the statistics counters */
716 memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
717
718 scanstate->prefetch_pages = 0;
719 scanstate->prefetch_target = -1;
720 scanstate->initialized = false;
721 scanstate->pstate = NULL;
722 scanstate->recheck = true;
723 scanstate->blockno = InvalidBlockNumber;
725
726 /*
727 * Miscellaneous initialization
728 *
729 * create expression context for node
730 */
731 ExecAssignExprContext(estate, &scanstate->ss.ps);
732
733 /*
734 * open the scan relation
735 */
736 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
737
738 /*
739 * initialize child nodes
740 */
741 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
742
743 /*
744 * get the scan type from the relation descriptor.
745 */
746 ExecInitScanTupleSlot(estate, &scanstate->ss,
747 RelationGetDescr(currentRelation),
748 table_slot_callbacks(currentRelation));
749
750 /*
751 * Initialize result type and projection.
752 */
753 ExecInitResultTypeTL(&scanstate->ss.ps);
754 ExecAssignScanProjectionInfo(&scanstate->ss);
755
756 /*
757 * initialize child expressions
758 */
759 scanstate->ss.ps.qual =
760 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
761 scanstate->bitmapqualorig =
762 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
763
764 /*
765 * Maximum number of prefetches for the tablespace if configured,
766 * otherwise the current value of the effective_io_concurrency GUC.
767 */
768 scanstate->prefetch_maximum =
769 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
770
771 scanstate->ss.ss_currentRelation = currentRelation;
772
773 /*
774 * all done.
775 */
776 return scanstate;
777}
778
779/*----------------
780 * BitmapShouldInitializeSharedState
781 *
782 * The first process to come here and see the state to the BM_INITIAL
783 * will become the leader for the parallel bitmap scan and will be
784 * responsible for populating the TIDBitmap. The other processes will
785 * be blocked by the condition variable until the leader wakes them up.
786 * ---------------
787 */
788static bool
790{
792
793 while (1)
794 {
795 SpinLockAcquire(&pstate->mutex);
796 state = pstate->state;
797 if (pstate->state == BM_INITIAL)
798 pstate->state = BM_INPROGRESS;
799 SpinLockRelease(&pstate->mutex);
800
801 /* Exit if bitmap is done, or if we're the leader. */
802 if (state != BM_INPROGRESS)
803 break;
804
805 /* Wait for the leader to wake us up. */
806 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
807 }
808
810
811 return (state == BM_INITIAL);
812}
813
814/* ----------------------------------------------------------------
815 * ExecBitmapHeapEstimate
816 *
817 * Compute the amount of space we'll need in the parallel
818 * query DSM, and inform pcxt->estimator about our needs.
819 * ----------------------------------------------------------------
820 */
821void
823 ParallelContext *pcxt)
824{
825 Size size;
826
828
829 /* account for instrumentation, if required */
830 if (node->ss.ps.instrument && pcxt->nworkers > 0)
831 {
832 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
834 }
835
838}
839
840/* ----------------------------------------------------------------
841 * ExecBitmapHeapInitializeDSM
842 *
843 * Set up a parallel bitmap heap scan descriptor.
844 * ----------------------------------------------------------------
845 */
846void
848 ParallelContext *pcxt)
849{
851 SharedBitmapHeapInstrumentation *sinstrument = NULL;
852 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
853 char *ptr;
854 Size size;
855
856 /* If there's no DSA, there are no workers; initialize nothing. */
857 if (dsa == NULL)
858 return;
859
861 if (node->ss.ps.instrument && pcxt->nworkers > 0)
862 {
863 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
865 }
866
867 ptr = shm_toc_allocate(pcxt->toc, size);
868 pstate = (ParallelBitmapHeapState *) ptr;
869 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
870 if (node->ss.ps.instrument && pcxt->nworkers > 0)
871 sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
872
873 pstate->tbmiterator = 0;
874 pstate->prefetch_iterator = 0;
875
876 /* Initialize the mutex */
877 SpinLockInit(&pstate->mutex);
878 pstate->prefetch_pages = 0;
879 pstate->prefetch_target = -1;
880 pstate->state = BM_INITIAL;
881
882 ConditionVariableInit(&pstate->cv);
883
884 if (sinstrument)
885 {
886 sinstrument->num_workers = pcxt->nworkers;
887
888 /* ensure any unfilled slots will contain zeroes */
889 memset(sinstrument->sinstrument, 0,
891 }
892
893 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
894 node->pstate = pstate;
895 node->sinstrument = sinstrument;
896}
897
898/* ----------------------------------------------------------------
899 * ExecBitmapHeapReInitializeDSM
900 *
901 * Reset shared state before beginning a fresh scan.
902 * ----------------------------------------------------------------
903 */
904void
906 ParallelContext *pcxt)
907{
908 ParallelBitmapHeapState *pstate = node->pstate;
909 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
910
911 /* If there's no DSA, there are no workers; do nothing. */
912 if (dsa == NULL)
913 return;
914
915 pstate->state = BM_INITIAL;
916 pstate->prefetch_pages = 0;
917 pstate->prefetch_target = -1;
918
919 if (DsaPointerIsValid(pstate->tbmiterator))
920 tbm_free_shared_area(dsa, pstate->tbmiterator);
921
924
927}
928
929/* ----------------------------------------------------------------
930 * ExecBitmapHeapInitializeWorker
931 *
932 * Copy relevant information from TOC into planstate.
933 * ----------------------------------------------------------------
934 */
935void
938{
939 char *ptr;
940
941 Assert(node->ss.ps.state->es_query_dsa != NULL);
942
943 ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
944
945 node->pstate = (ParallelBitmapHeapState *) ptr;
946 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
947
948 if (node->ss.ps.instrument)
950}
951
952/* ----------------------------------------------------------------
953 * ExecBitmapHeapRetrieveInstrumentation
954 *
955 * Transfer bitmap heap scan statistics from DSM to private memory.
956 * ----------------------------------------------------------------
957 */
958void
960{
961 SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
962 Size size;
963
964 if (sinstrument == NULL)
965 return;
966
967 size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
968 + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
969
970 node->sinstrument = palloc(size);
971 memcpy(node->sinstrument, sinstrument, size);
972}
int ParallelWorkerNumber
Definition: parallel.c:114
#define InvalidBlockNumber
Definition: block.h:33
#define InvalidBuffer
Definition: buf.h:25
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:639
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4866
#define MAXALIGN(LEN)
Definition: c.h:768
#define Assert(condition)
Definition: c.h:815
size_t Size
Definition: c.h:562
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define InvalidDsaPointer
Definition: dsa.h:78
#define DsaPointerIsValid(x)
Definition: dsa.h:106
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:229
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:507
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:562
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:142
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:47
void ExecAssignScanProjectionInfo(ScanState *node)
Definition: execScan.c:81
void ExecScanReScan(ScanState *node)
Definition: execScan.c:108
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1998
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1942
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:486
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:743
#define outerPlanState(node)
Definition: execnodes.h:1246
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1259
SharedBitmapState
Definition: execnodes.h:1816
@ BM_INITIAL
Definition: execnodes.h:1817
@ BM_FINISHED
Definition: execnodes.h:1819
@ BM_INPROGRESS
Definition: execnodes.h:1818
struct BitmapHeapScanInstrumentation BitmapHeapScanInstrumentation
#define EXEC_FLAG_BACKWARD
Definition: executor.h:68
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:487
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:453
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:486
#define EXEC_FLAG_MARK
Definition: executor.h:69
#define IsParallelWorker()
Definition: parallel.h:60
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
static void BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
static void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapTableScanSetup(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define makeNode(_type_)
Definition: nodes.h:155
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
#define NIL
Definition: pg_list.h:68
#define outerPlan(node)
Definition: plannodes.h:231
#define RelationGetDescr(relation)
Definition: rel.h:538
@ MAIN_FORKNUM
Definition: relpath.h:58
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505
static pg_noinline void Size size
Definition: slab.c:607
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
int get_tablespace_io_concurrency(Oid spcid)
Definition: spccache.c:215
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
ParallelBitmapHeapState * pstate
Definition: execnodes.h:1890
ExprState * bitmapqualorig
Definition: execnodes.h:1881
BitmapHeapScanInstrumentation stats
Definition: execnodes.h:1884
BlockNumber prefetch_blockno
Definition: execnodes.h:1894
SharedBitmapHeapInstrumentation * sinstrument
Definition: execnodes.h:1891
TIDBitmap * tbm
Definition: execnodes.h:1882
BlockNumber blockno
Definition: execnodes.h:1893
TBMIterator prefetch_iterator
Definition: execnodes.h:1885
List * bitmapqualorig
Definition: plannodes.h:643
struct dsa_area * es_query_dsa
Definition: execnodes.h:742
Snapshot es_snapshot
Definition: execnodes.h:650
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:267
SharedBitmapState state
Definition: execnodes.h:1841
dsa_pointer tbmiterator
Definition: execnodes.h:1836
ConditionVariable cv
Definition: execnodes.h:1842
dsa_pointer prefetch_iterator
Definition: execnodes.h:1837
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
Instrumentation * instrument
Definition: execnodes.h:1160
ExprState * qual
Definition: execnodes.h:1171
Plan * plan
Definition: execnodes.h:1150
EState * state
Definition: execnodes.h:1152
ExprContext * ps_ExprContext
Definition: execnodes.h:1189
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1156
List * qual
Definition: plannodes.h:201
int plan_node_id
Definition: plannodes.h:197
List * targetlist
Definition: plannodes.h:199
Form_pg_class rd_rel
Definition: rel.h:111
Relation ss_currentRelation
Definition: execnodes.h:1604
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1606
PlanState ps
Definition: execnodes.h:1603
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1605
Index scanrelid
Definition: plannodes.h:473
BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:1856
BlockNumber blockno
Definition: tidbitmap.h:56
TBMIterator rs_tbmiterator
Definition: relscan.h:47
Relation rs_rd
Definition: relscan.h:36
union TableScanDescData::@49 st
uint32 rs_flags
Definition: relscan.h:64
Definition: dsa.c:348
Definition: regguts.h:323
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
@ SO_NEED_TUPLES
Definition: tableam.h:71
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1025
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1034
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot)
Definition: tableam.h:2006
static bool table_scan_bitmap_next_block(TableScanDesc scan, BlockNumber *blockno, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:1977
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:958
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:322
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1595
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1614
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:767
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition: tidbitmap.c:341
TBMIterator tbm_begin_iterate(TIDBitmap *tbm, dsa_area *dsa, dsa_pointer dsp)
Definition: tidbitmap.c:1572
static bool tbm_exhausted(TBMIterator *iterator)
Definition: tidbitmap.h:96
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24