PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nodeBitmapHeapscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
5 *
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
17 *
18 *
19 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
21 *
22 *
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
25 *
26 *-------------------------------------------------------------------------
27 */
28/*
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
35 */
36#include "postgres.h"
37
38#include "access/relscan.h"
39#include "access/tableam.h"
41#include "executor/executor.h"
43#include "miscadmin.h"
44#include "pgstat.h"
45#include "storage/bufmgr.h"
46#include "utils/rel.h"
47#include "utils/spccache.h"
48
53
54
55/*
56 * Do the underlying index scan, build the bitmap, set up the parallel state
57 * needed for parallel workers to iterate through the bitmap, and set up the
58 * underlying table scan descriptor.
59 */
60static void
62{
63 TBMIterator tbmiterator = {0};
64 ParallelBitmapHeapState *pstate = node->pstate;
65 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
66
67 if (!pstate)
68 {
70
71 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
72 elog(ERROR, "unrecognized result from subplan");
73 }
74 else if (BitmapShouldInitializeSharedState(pstate))
75 {
76 /*
77 * The leader will immediately come out of the function, but others
78 * will be blocked until leader populates the TBM and wakes them up.
79 */
81 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
82 elog(ERROR, "unrecognized result from subplan");
83
84 /*
85 * Prepare to iterate over the TBM. This will return the dsa_pointer
86 * of the iterator state which will be used by multiple processes to
87 * iterate jointly.
88 */
90
91 /* We have initialized the shared state so wake up others. */
93 }
94
95 tbmiterator = tbm_begin_iterate(node->tbm, dsa,
96 pstate ?
97 pstate->tbmiterator :
99
100 /*
101 * If this is the first scan of the underlying table, create the table
102 * scan descriptor and begin the scan.
103 */
104 if (!node->ss.ss_currentScanDesc)
105 {
106 node->ss.ss_currentScanDesc =
108 node->ss.ps.state->es_snapshot,
109 0,
110 NULL);
111 }
112
113 node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
114 node->initialized = true;
115}
116
117/* ----------------------------------------------------------------
118 * BitmapHeapNext
119 *
120 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
121 * ----------------------------------------------------------------
122 */
123static TupleTableSlot *
125{
126 ExprContext *econtext = node->ss.ps.ps_ExprContext;
127 TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
128
129 /*
130 * If we haven't yet performed the underlying index scan, do it, and begin
131 * the iteration over the bitmap.
132 */
133 if (!node->initialized)
135
137 slot, &node->recheck,
138 &node->stats.lossy_pages,
139 &node->stats.exact_pages))
140 {
141 /*
142 * Continuing in previously obtained page.
143 */
145
146 /*
147 * If we are using lossy info, we have to recheck the qual conditions
148 * at every tuple.
149 */
150 if (node->recheck)
151 {
152 econtext->ecxt_scantuple = slot;
153 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
154 {
155 /* Fails recheck, so drop it and loop back for another */
156 InstrCountFiltered2(node, 1);
157 ExecClearTuple(slot);
158 continue;
159 }
160 }
161
162 /* OK to return this tuple */
163 return slot;
164 }
165
166 /*
167 * if we get here it means we are at the end of the scan..
168 */
169 return ExecClearTuple(slot);
170}
171
172/*
173 * BitmapDoneInitializingSharedState - Shared state is initialized
174 *
175 * By this time the leader has already populated the TBM and initialized the
176 * shared state so wake up other processes.
177 */
178static inline void
186
187/*
188 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
189 */
190static bool
192{
193 ExprContext *econtext;
194
195 /*
196 * extract necessary information from index scan node
197 */
198 econtext = node->ss.ps.ps_ExprContext;
199
200 /* Does the tuple meet the original qual conditions? */
201 econtext->ecxt_scantuple = slot;
202 return ExecQualAndReset(node->bitmapqualorig, econtext);
203}
204
205/* ----------------------------------------------------------------
206 * ExecBitmapHeapScan(node)
207 * ----------------------------------------------------------------
208 */
209static TupleTableSlot *
218
219/* ----------------------------------------------------------------
220 * ExecReScanBitmapHeapScan(node)
221 * ----------------------------------------------------------------
222 */
223void
225{
227
229
230 if (scan)
231 {
232 /*
233 * End iteration on iterators saved in scan descriptor if they have
234 * not already been cleaned up.
235 */
236 if (!tbm_exhausted(&scan->st.rs_tbmiterator))
238
239 /* rescan to release any page pin */
241 }
242
243 /* release bitmaps and buffers if any */
244 if (node->tbm)
245 tbm_free(node->tbm);
246 node->tbm = NULL;
247 node->initialized = false;
248 node->recheck = true;
249
250 ExecScanReScan(&node->ss);
251
252 /*
253 * if chgParam of subnode is not null then plan will be re-scanned by
254 * first ExecProcNode.
255 */
256 if (outerPlan->chgParam == NULL)
258}
259
260/* ----------------------------------------------------------------
261 * ExecEndBitmapHeapScan
262 * ----------------------------------------------------------------
263 */
264void
266{
268
269 /*
270 * When ending a parallel worker, copy the statistics gathered by the
271 * worker back into shared memory so that it can be picked up by the main
272 * process to report in EXPLAIN ANALYZE.
273 */
274 if (node->sinstrument != NULL && IsParallelWorker())
275 {
277
278 Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
280
281 /*
282 * Here we accumulate the stats rather than performing memcpy on
283 * node->stats into si. When a Gather/GatherMerge node finishes it
284 * will perform planner shutdown on the workers. On rescan it will
285 * spin up new workers which will have a new BitmapHeapScanState and
286 * zeroed stats.
287 */
289 si->lossy_pages += node->stats.lossy_pages;
290 }
291
292 /*
293 * extract information from the node
294 */
296
297 /*
298 * close down subplans
299 */
301
302 if (scanDesc)
303 {
304 /*
305 * End iteration on iterators saved in scan descriptor if they have
306 * not already been cleaned up.
307 */
308 if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
309 tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
310
311 /*
312 * close table scan
313 */
315 }
316
317 /*
318 * release bitmaps and buffers if any
319 */
320 if (node->tbm)
321 tbm_free(node->tbm);
322}
323
324/* ----------------------------------------------------------------
325 * ExecInitBitmapHeapScan
326 *
327 * Initializes the scan's state information.
328 * ----------------------------------------------------------------
329 */
332{
335
336 /* check for unsupported flags */
338
339 /*
340 * Assert caller didn't ask for an unsafe snapshot --- see comments at
341 * head of file.
342 */
344
345 /*
346 * create state structure
347 */
349 scanstate->ss.ps.plan = (Plan *) node;
350 scanstate->ss.ps.state = estate;
351 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
352
353 scanstate->tbm = NULL;
354
355 /* Zero the statistics counters */
357
358 scanstate->initialized = false;
359 scanstate->pstate = NULL;
360 scanstate->recheck = true;
361
362 /*
363 * Miscellaneous initialization
364 *
365 * create expression context for node
366 */
367 ExecAssignExprContext(estate, &scanstate->ss.ps);
368
369 /*
370 * open the scan relation
371 */
372 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
373
374 /*
375 * initialize child nodes
376 */
377 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
378
379 /*
380 * get the scan type from the relation descriptor.
381 */
382 ExecInitScanTupleSlot(estate, &scanstate->ss,
385
386 /*
387 * Initialize result type and projection.
388 */
391
392 /*
393 * initialize child expressions
394 */
395 scanstate->ss.ps.qual =
396 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
397 scanstate->bitmapqualorig =
399
400 scanstate->ss.ss_currentRelation = currentRelation;
401
402 /*
403 * all done.
404 */
405 return scanstate;
406}
407
408/*----------------
409 * BitmapShouldInitializeSharedState
410 *
411 * The first process to come here and see the state to the BM_INITIAL
412 * will become the leader for the parallel bitmap scan and will be
413 * responsible for populating the TIDBitmap. The other processes will
414 * be blocked by the condition variable until the leader wakes them up.
415 * ---------------
416 */
417static bool
419{
421
422 while (1)
423 {
424 SpinLockAcquire(&pstate->mutex);
425 state = pstate->state;
426 if (pstate->state == BM_INITIAL)
427 pstate->state = BM_INPROGRESS;
428 SpinLockRelease(&pstate->mutex);
429
430 /* Exit if bitmap is done, or if we're the leader. */
431 if (state != BM_INPROGRESS)
432 break;
433
434 /* Wait for the leader to wake us up. */
436 }
437
439
440 return (state == BM_INITIAL);
441}
442
443/* ----------------------------------------------------------------
444 * ExecBitmapHeapEstimate
445 *
446 * Compute the amount of space we'll need in the parallel
447 * query DSM, and inform pcxt->estimator about our needs.
448 * ----------------------------------------------------------------
449 */
450void
452 ParallelContext *pcxt)
453{
454 Size size;
455
456 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
457
458 /* account for instrumentation, if required */
459 if (node->ss.ps.instrument && pcxt->nworkers > 0)
460 {
461 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
462 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
463 }
464
465 shm_toc_estimate_chunk(&pcxt->estimator, size);
467}
468
469/* ----------------------------------------------------------------
470 * ExecBitmapHeapInitializeDSM
471 *
472 * Set up a parallel bitmap heap scan descriptor.
473 * ----------------------------------------------------------------
474 */
475void
477 ParallelContext *pcxt)
478{
481 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
482 char *ptr;
483 Size size;
484
485 /* If there's no DSA, there are no workers; initialize nothing. */
486 if (dsa == NULL)
487 return;
488
489 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
490 if (node->ss.ps.instrument && pcxt->nworkers > 0)
491 {
492 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
493 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
494 }
495
496 ptr = shm_toc_allocate(pcxt->toc, size);
497 pstate = (ParallelBitmapHeapState *) ptr;
498 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
499 if (node->ss.ps.instrument && pcxt->nworkers > 0)
500 sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
501
502 pstate->tbmiterator = 0;
503
504 /* Initialize the mutex */
505 SpinLockInit(&pstate->mutex);
506 pstate->state = BM_INITIAL;
507
508 ConditionVariableInit(&pstate->cv);
509
510 if (sinstrument)
511 {
512 sinstrument->num_workers = pcxt->nworkers;
513
514 /* ensure any unfilled slots will contain zeroes */
515 memset(sinstrument->sinstrument, 0,
517 }
518
519 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
520 node->pstate = pstate;
521 node->sinstrument = sinstrument;
522}
523
524/* ----------------------------------------------------------------
525 * ExecBitmapHeapReInitializeDSM
526 *
527 * Reset shared state before beginning a fresh scan.
528 * ----------------------------------------------------------------
529 */
530void
532 ParallelContext *pcxt)
533{
534 ParallelBitmapHeapState *pstate = node->pstate;
535 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
536
537 /* If there's no DSA, there are no workers; do nothing. */
538 if (dsa == NULL)
539 return;
540
541 pstate->state = BM_INITIAL;
542
543 if (DsaPointerIsValid(pstate->tbmiterator))
544 tbm_free_shared_area(dsa, pstate->tbmiterator);
545
547}
548
549/* ----------------------------------------------------------------
550 * ExecBitmapHeapInitializeWorker
551 *
552 * Copy relevant information from TOC into planstate.
553 * ----------------------------------------------------------------
554 */
555void
558{
559 char *ptr;
560
561 Assert(node->ss.ps.state->es_query_dsa != NULL);
562
563 ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
564
565 node->pstate = (ParallelBitmapHeapState *) ptr;
566 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
567
568 if (node->ss.ps.instrument)
570}
571
572/* ----------------------------------------------------------------
573 * ExecBitmapHeapRetrieveInstrumentation
574 *
575 * Transfer bitmap heap scan statistics from DSM to private memory.
576 * ----------------------------------------------------------------
577 */
578void
580{
581 SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
582 Size size;
583
584 if (sinstrument == NULL)
585 return;
586
587 size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
588 + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
589
590 node->sinstrument = palloc(size);
591 memcpy(node->sinstrument, sinstrument, size);
592}
int ParallelWorkerNumber
Definition parallel.c:115
#define MAXALIGN(LEN)
Definition c.h:826
#define Assert(condition)
Definition c.h:873
size_t Size
Definition c.h:619
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define InvalidDsaPointer
Definition dsa.h:78
#define DsaPointerIsValid(x)
Definition dsa.h:106
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
void ExecReScan(PlanState *node)
Definition execAmi.c:77
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:229
Node * MultiExecProcNode(PlanState *node)
void ExecEndNode(PlanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition execScan.c:47
void ExecAssignScanProjectionInfo(ScanState *node)
Definition execScan.c:81
void ExecScanReScan(ScanState *node)
Definition execScan.c:108
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
void ExecInitResultTypeTL(PlanState *planstate)
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition execUtils.c:485
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition execUtils.c:742
#define outerPlanState(node)
Definition execnodes.h:1263
#define InstrCountFiltered2(node, delta)
Definition execnodes.h:1276
SharedBitmapState
Definition execnodes.h:1835
@ BM_INITIAL
Definition execnodes.h:1836
@ BM_FINISHED
Definition execnodes.h:1838
@ BM_INPROGRESS
Definition execnodes.h:1837
#define EXEC_FLAG_BACKWARD
Definition executor.h:69
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition executor.h:580
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition executor.h:546
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition executor.h:579
#define EXEC_FLAG_MARK
Definition executor.h:70
#define IsParallelWorker()
Definition parallel.h:60
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapTableScanSetup(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
#define outerPlan(node)
Definition plannodes.h:261
static int fb(int x)
#define RelationGetDescr(relation)
Definition rel.h:540
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:482
Size mul_size(Size s1, Size s2)
Definition shmem.c:497
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
#define SpinLockInit(lock)
Definition spin.h:57
#define SpinLockRelease(lock)
Definition spin.h:61
#define SpinLockAcquire(lock)
Definition spin.h:59
ParallelBitmapHeapState * pstate
Definition execnodes.h:1876
ExprState * bitmapqualorig
Definition execnodes.h:1872
BitmapHeapScanInstrumentation stats
Definition execnodes.h:1874
SharedBitmapHeapInstrumentation * sinstrument
Definition execnodes.h:1877
List * bitmapqualorig
Definition plannodes.h:693
struct dsa_area * es_query_dsa
Definition execnodes.h:754
Snapshot es_snapshot
Definition execnodes.h:662
TupleTableSlot * ecxt_scantuple
Definition execnodes.h:275
SharedBitmapState state
Definition execnodes.h:1853
ConditionVariable cv
Definition execnodes.h:1854
shm_toc_estimator estimator
Definition parallel.h:41
shm_toc * toc
Definition parallel.h:44
Instrumentation * instrument
Definition execnodes.h:1177
Plan * plan
Definition execnodes.h:1167
EState * state
Definition execnodes.h:1169
ExprContext * ps_ExprContext
Definition execnodes.h:1206
int plan_node_id
Definition plannodes.h:227
Relation ss_currentRelation
Definition execnodes.h:1624
TupleTableSlot * ss_ScanTupleSlot
Definition execnodes.h:1626
PlanState ps
Definition execnodes.h:1623
struct TableScanDescData * ss_currentScanDesc
Definition execnodes.h:1625
Index scanrelid
Definition plannodes.h:523
BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
TBMIterator rs_tbmiterator
Definition relscan.h:46
union TableScanDescData::@50 st
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition tableam.c:59
static void table_endscan(TableScanDesc scan)
Definition tableam.h:1005
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition tableam.h:1956
static void table_rescan(TableScanDesc scan, ScanKeyData *key)
Definition tableam.h:1014
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, ScanKeyData *key)
Definition tableam.h:942
void tbm_free(TIDBitmap *tbm)
Definition tidbitmap.c:312
void tbm_end_iterate(TBMIterator *iterator)
Definition tidbitmap.c:1594
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition tidbitmap.c:752
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition tidbitmap.c:331
TBMIterator tbm_begin_iterate(TIDBitmap *tbm, dsa_area *dsa, dsa_pointer dsp)
Definition tidbitmap.c:1571
static bool tbm_exhausted(TBMIterator *iterator)
Definition tidbitmap.h:118
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457