PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nodeBitmapHeapscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
5 *
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
17 *
18 *
19 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
21 *
22 *
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
25 *
26 *-------------------------------------------------------------------------
27 */
28/*
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
35 */
36#include "postgres.h"
37
38#include "access/relscan.h"
39#include "access/tableam.h"
41#include "executor/executor.h"
42#include "executor/instrument.h"
44#include "miscadmin.h"
45#include "pgstat.h"
46#include "storage/bufmgr.h"
48#include "utils/dsa.h"
49#include "utils/rel.h"
50#include "utils/spccache.h"
51#include "utils/wait_event.h"
52
57
58
59/* ----------------
60 * SharedBitmapState information
61 *
62 * BM_INITIAL TIDBitmap creation is not yet started, so first worker
63 * to see this state will set the state to BM_INPROGRESS
64 * and that process will be responsible for creating
65 * TIDBitmap.
66 * BM_INPROGRESS TIDBitmap creation is in progress; workers need to
67 * sleep until it's finished.
68 * BM_FINISHED TIDBitmap creation is done, so now all workers can
69 * proceed to iterate over TIDBitmap.
70 * ----------------
71 */
78
79/* ----------------
80 * ParallelBitmapHeapState information
81 * tbmiterator iterator for scanning current pages
82 * mutex mutual exclusion for state
83 * state current state of the TIDBitmap
84 * cv conditional wait variable
85 * ----------------
86 */
94
95
96/*
97 * Do the underlying index scan, build the bitmap, set up the parallel state
98 * needed for parallel workers to iterate through the bitmap, and set up the
99 * underlying table scan descriptor.
100 */
101static void
103{
104 TBMIterator tbmiterator = {0};
105 ParallelBitmapHeapState *pstate = node->pstate;
106 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
107
108 if (!pstate)
109 {
111
112 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
113 elog(ERROR, "unrecognized result from subplan");
114 }
115 else if (BitmapShouldInitializeSharedState(pstate))
116 {
117 /*
118 * The leader will immediately come out of the function, but others
119 * will be blocked until leader populates the TBM and wakes them up.
120 */
122 if (!node->tbm || !IsA(node->tbm, TIDBitmap))
123 elog(ERROR, "unrecognized result from subplan");
124
125 /*
126 * Prepare to iterate over the TBM. This will return the dsa_pointer
127 * of the iterator state which will be used by multiple processes to
128 * iterate jointly.
129 */
131
132 /* We have initialized the shared state so wake up others. */
134 }
135
136 tbmiterator = tbm_begin_iterate(node->tbm, dsa,
137 pstate ?
138 pstate->tbmiterator :
140
141 /*
142 * If this is the first scan of the underlying table, create the table
143 * scan descriptor and begin the scan.
144 */
145 if (!node->ss.ss_currentScanDesc)
146 {
147 node->ss.ss_currentScanDesc =
149 node->ss.ps.state->es_snapshot,
150 0,
151 NULL);
152 }
153
154 node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
155 node->initialized = true;
156}
157
158/* ----------------------------------------------------------------
159 * BitmapHeapNext
160 *
161 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
162 * ----------------------------------------------------------------
163 */
164static TupleTableSlot *
166{
167 ExprContext *econtext = node->ss.ps.ps_ExprContext;
168 TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
169
170 /*
171 * If we haven't yet performed the underlying index scan, do it, and begin
172 * the iteration over the bitmap.
173 */
174 if (!node->initialized)
176
178 slot, &node->recheck,
179 &node->stats.lossy_pages,
180 &node->stats.exact_pages))
181 {
182 /*
183 * Continuing in previously obtained page.
184 */
186
187 /*
188 * If we are using lossy info, we have to recheck the qual conditions
189 * at every tuple.
190 */
191 if (node->recheck)
192 {
193 econtext->ecxt_scantuple = slot;
194 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
195 {
196 /* Fails recheck, so drop it and loop back for another */
197 InstrCountFiltered2(node, 1);
198 ExecClearTuple(slot);
199 continue;
200 }
201 }
202
203 /* OK to return this tuple */
204 return slot;
205 }
206
207 /*
208 * if we get here it means we are at the end of the scan..
209 */
210 return ExecClearTuple(slot);
211}
212
213/*
214 * BitmapDoneInitializingSharedState - Shared state is initialized
215 *
216 * By this time the leader has already populated the TBM and initialized the
217 * shared state so wake up other processes.
218 */
219static inline void
227
228/*
229 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
230 */
231static bool
233{
234 ExprContext *econtext;
235
236 /*
237 * extract necessary information from index scan node
238 */
239 econtext = node->ss.ps.ps_ExprContext;
240
241 /* Does the tuple meet the original qual conditions? */
242 econtext->ecxt_scantuple = slot;
243 return ExecQualAndReset(node->bitmapqualorig, econtext);
244}
245
246/* ----------------------------------------------------------------
247 * ExecBitmapHeapScan(node)
248 * ----------------------------------------------------------------
249 */
250static TupleTableSlot *
259
260/* ----------------------------------------------------------------
261 * ExecReScanBitmapHeapScan(node)
262 * ----------------------------------------------------------------
263 */
264void
266{
268
270
271 if (scan)
272 {
273 /*
274 * End iteration on iterators saved in scan descriptor if they have
275 * not already been cleaned up.
276 */
277 if (!tbm_exhausted(&scan->st.rs_tbmiterator))
279
280 /* rescan to release any page pin */
282 }
283
284 /* release bitmaps and buffers if any */
285 if (node->tbm)
286 tbm_free(node->tbm);
287 node->tbm = NULL;
288 node->initialized = false;
289 node->recheck = true;
290
291 ExecScanReScan(&node->ss);
292
293 /*
294 * if chgParam of subnode is not null then plan will be re-scanned by
295 * first ExecProcNode.
296 */
297 if (outerPlan->chgParam == NULL)
299}
300
301/* ----------------------------------------------------------------
302 * ExecEndBitmapHeapScan
303 * ----------------------------------------------------------------
304 */
305void
307{
309
310 /*
311 * When ending a parallel worker, copy the statistics gathered by the
312 * worker back into shared memory so that it can be picked up by the main
313 * process to report in EXPLAIN ANALYZE.
314 */
315 if (node->sinstrument != NULL && IsParallelWorker())
316 {
318
319 Assert(ParallelWorkerNumber < node->sinstrument->num_workers);
321
322 /*
323 * Here we accumulate the stats rather than performing memcpy on
324 * node->stats into si. When a Gather/GatherMerge node finishes it
325 * will perform planner shutdown on the workers. On rescan it will
326 * spin up new workers which will have a new BitmapHeapScanState and
327 * zeroed stats.
328 */
330 si->lossy_pages += node->stats.lossy_pages;
331 }
332
333 /*
334 * extract information from the node
335 */
337
338 /*
339 * close down subplans
340 */
342
343 if (scanDesc)
344 {
345 /*
346 * End iteration on iterators saved in scan descriptor if they have
347 * not already been cleaned up.
348 */
349 if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
350 tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
351
352 /*
353 * close table scan
354 */
356 }
357
358 /*
359 * release bitmaps and buffers if any
360 */
361 if (node->tbm)
362 tbm_free(node->tbm);
363}
364
365/* ----------------------------------------------------------------
366 * ExecInitBitmapHeapScan
367 *
368 * Initializes the scan's state information.
369 * ----------------------------------------------------------------
370 */
373{
376
377 /* check for unsupported flags */
379
380 /*
381 * Assert caller didn't ask for an unsafe snapshot --- see comments at
382 * head of file.
383 */
385
386 /*
387 * create state structure
388 */
390 scanstate->ss.ps.plan = (Plan *) node;
391 scanstate->ss.ps.state = estate;
392 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
393
394 scanstate->tbm = NULL;
395
396 /* Zero the statistics counters */
398
399 scanstate->initialized = false;
400 scanstate->pstate = NULL;
401 scanstate->recheck = true;
402
403 /*
404 * Miscellaneous initialization
405 *
406 * create expression context for node
407 */
408 ExecAssignExprContext(estate, &scanstate->ss.ps);
409
410 /*
411 * open the scan relation
412 */
413 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
414
415 /*
416 * initialize child nodes
417 */
418 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
419
420 /*
421 * get the scan type from the relation descriptor.
422 */
423 ExecInitScanTupleSlot(estate, &scanstate->ss,
427
428 /*
429 * Initialize result type and projection.
430 */
433
434 /*
435 * initialize child expressions
436 */
437 scanstate->ss.ps.qual =
438 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
439 scanstate->bitmapqualorig =
441
442 scanstate->ss.ss_currentRelation = currentRelation;
443
444 /*
445 * all done.
446 */
447 return scanstate;
448}
449
450/*----------------
451 * BitmapShouldInitializeSharedState
452 *
453 * The first process to come here and see the state to the BM_INITIAL
454 * will become the leader for the parallel bitmap scan and will be
455 * responsible for populating the TIDBitmap. The other processes will
456 * be blocked by the condition variable until the leader wakes them up.
457 * ---------------
458 */
459static bool
461{
463
464 while (1)
465 {
466 SpinLockAcquire(&pstate->mutex);
467 state = pstate->state;
468 if (pstate->state == BM_INITIAL)
469 pstate->state = BM_INPROGRESS;
470 SpinLockRelease(&pstate->mutex);
471
472 /* Exit if bitmap is done, or if we're the leader. */
473 if (state != BM_INPROGRESS)
474 break;
475
476 /* Wait for the leader to wake us up. */
478 }
479
481
482 return (state == BM_INITIAL);
483}
484
485/* ----------------------------------------------------------------
486 * ExecBitmapHeapEstimate
487 *
488 * Compute the amount of space we'll need in the parallel
489 * query DSM, and inform pcxt->estimator about our needs.
490 * ----------------------------------------------------------------
491 */
492void
494 ParallelContext *pcxt)
495{
496 Size size;
497
498 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
499
500 /* account for instrumentation, if required */
501 if (node->ss.ps.instrument && pcxt->nworkers > 0)
502 {
503 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
504 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
505 }
506
507 shm_toc_estimate_chunk(&pcxt->estimator, size);
509}
510
511/* ----------------------------------------------------------------
512 * ExecBitmapHeapInitializeDSM
513 *
514 * Set up a parallel bitmap heap scan descriptor.
515 * ----------------------------------------------------------------
516 */
517void
519 ParallelContext *pcxt)
520{
523 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
524 char *ptr;
525 Size size;
526
527 /* If there's no DSA, there are no workers; initialize nothing. */
528 if (dsa == NULL)
529 return;
530
531 size = MAXALIGN(sizeof(ParallelBitmapHeapState));
532 if (node->ss.ps.instrument && pcxt->nworkers > 0)
533 {
534 size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
535 size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
536 }
537
538 ptr = shm_toc_allocate(pcxt->toc, size);
539 pstate = (ParallelBitmapHeapState *) ptr;
540 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
541 if (node->ss.ps.instrument && pcxt->nworkers > 0)
542 sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
543
544 pstate->tbmiterator = 0;
545
546 /* Initialize the mutex */
547 SpinLockInit(&pstate->mutex);
548 pstate->state = BM_INITIAL;
549
550 ConditionVariableInit(&pstate->cv);
551
552 if (sinstrument)
553 {
554 sinstrument->num_workers = pcxt->nworkers;
555
556 /* ensure any unfilled slots will contain zeroes */
557 memset(sinstrument->sinstrument, 0,
559 }
560
561 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
562 node->pstate = pstate;
563 node->sinstrument = sinstrument;
564}
565
566/* ----------------------------------------------------------------
567 * ExecBitmapHeapReInitializeDSM
568 *
569 * Reset shared state before beginning a fresh scan.
570 * ----------------------------------------------------------------
571 */
572void
574 ParallelContext *pcxt)
575{
576 ParallelBitmapHeapState *pstate = node->pstate;
577 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
578
579 /* If there's no DSA, there are no workers; do nothing. */
580 if (dsa == NULL)
581 return;
582
583 pstate->state = BM_INITIAL;
584
585 if (DsaPointerIsValid(pstate->tbmiterator))
586 tbm_free_shared_area(dsa, pstate->tbmiterator);
587
589}
590
591/* ----------------------------------------------------------------
592 * ExecBitmapHeapInitializeWorker
593 *
594 * Copy relevant information from TOC into planstate.
595 * ----------------------------------------------------------------
596 */
597void
600{
601 char *ptr;
602
603 Assert(node->ss.ps.state->es_query_dsa != NULL);
604
605 ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
606
607 node->pstate = (ParallelBitmapHeapState *) ptr;
608 ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
609
610 if (node->ss.ps.instrument)
612}
613
614/* ----------------------------------------------------------------
615 * ExecBitmapHeapRetrieveInstrumentation
616 *
617 * Transfer bitmap heap scan statistics from DSM to private memory.
618 * ----------------------------------------------------------------
619 */
620void
622{
623 SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
624 Size size;
625
626 if (sinstrument == NULL)
627 return;
628
629 size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
630 + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
631
632 node->sinstrument = palloc(size);
633 memcpy(node->sinstrument, sinstrument, size);
634}
int ParallelWorkerNumber
Definition parallel.c:117
#define MAXALIGN(LEN)
Definition c.h:898
#define Assert(condition)
Definition c.h:945
size_t Size
Definition c.h:691
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
uint64 dsa_pointer
Definition dsa.h:62
#define InvalidDsaPointer
Definition dsa.h:78
#define DsaPointerIsValid(x)
Definition dsa.h:106
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
void ExecReScan(PlanState *node)
Definition execAmi.c:78
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:250
Node * MultiExecProcNode(PlanState *node)
void ExecEndNode(PlanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition execScan.c:47
void ExecAssignScanProjectionInfo(ScanState *node)
Definition execScan.c:81
void ExecScanReScan(ScanState *node)
Definition execScan.c:108
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops, uint16 flags)
void ExecInitResultTypeTL(PlanState *planstate)
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition execUtils.c:490
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition execUtils.c:747
#define outerPlanState(node)
Definition execnodes.h:1273
#define InstrCountFiltered2(node, delta)
Definition execnodes.h:1286
#define EXEC_FLAG_BACKWARD
Definition executor.h:70
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition executor.h:583
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition executor.h:549
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition executor.h:582
#define EXEC_FLAG_MARK
Definition executor.h:71
#define IsParallelWorker()
Definition parallel.h:62
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt)
void ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt)
void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
SharedBitmapState
@ BM_INITIAL
@ BM_FINISHED
@ BM_INPROGRESS
static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt)
static TupleTableSlot * ExecBitmapHeapScan(PlanState *pstate)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node)
static void BitmapTableScanSetup(BitmapHeapScanState *node)
static void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
static bool BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
#define outerPlan(node)
Definition plannodes.h:265
static int fb(int x)
#define RelationGetDescr(relation)
Definition rel.h:540
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
ParallelBitmapHeapState * pstate
Definition execnodes.h:1855
ExprState * bitmapqualorig
Definition execnodes.h:1851
BitmapHeapScanInstrumentation stats
Definition execnodes.h:1853
SharedBitmapHeapInstrumentation * sinstrument
Definition execnodes.h:1856
List * bitmapqualorig
Definition plannodes.h:710
struct dsa_area * es_query_dsa
Definition execnodes.h:764
Snapshot es_snapshot
Definition execnodes.h:672
TupleTableSlot * ecxt_scantuple
Definition execnodes.h:284
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
Instrumentation * instrument
Definition execnodes.h:1187
Plan * plan
Definition execnodes.h:1177
EState * state
Definition execnodes.h:1179
ExprContext * ps_ExprContext
Definition execnodes.h:1216
int plan_node_id
Definition plannodes.h:231
Relation ss_currentRelation
Definition execnodes.h:1634
TupleTableSlot * ss_ScanTupleSlot
Definition execnodes.h:1636
PlanState ps
Definition execnodes.h:1633
struct TableScanDescData * ss_currentScanDesc
Definition execnodes.h:1635
Index scanrelid
Definition plannodes.h:540
BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
TBMIterator rs_tbmiterator
Definition relscan.h:46
union TableScanDescData::@52 st
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition tableam.c:59
static void table_endscan(TableScanDesc scan)
Definition tableam.h:1004
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition tableam.h:1955
static void table_rescan(TableScanDesc scan, ScanKeyData *key)
Definition tableam.h:1013
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, ScanKeyData *key)
Definition tableam.h:941
void tbm_free(TIDBitmap *tbm)
Definition tidbitmap.c:312
void tbm_end_iterate(TBMIterator *iterator)
Definition tidbitmap.c:1594
dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm)
Definition tidbitmap.c:752
void tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
Definition tidbitmap.c:331
TBMIterator tbm_begin_iterate(TIDBitmap *tbm, dsa_area *dsa, dsa_pointer dsp)
Definition tidbitmap.c:1571
static bool tbm_exhausted(TBMIterator *iterator)
Definition tidbitmap.h:118
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
#define TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS
Definition tuptable.h:102