PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nodeTidrangescan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeTidrangescan.c
4 * Routines to support TID range scans of relations
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeTidrangescan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/relscan.h"
18#include "access/sysattr.h"
19#include "access/tableam.h"
20#include "catalog/pg_operator.h"
22#include "executor/executor.h"
23#include "executor/instrument.h"
25#include "nodes/nodeFuncs.h"
26#include "utils/rel.h"
27
28
29/*
30 * It's sufficient to check varattno to identify the CTID variable, as any
31 * Var in the relation scan qual must be for our table. (Even if it's a
32 * parameterized scan referencing some other table's CTID, the other table's
33 * Var would have become a Param by the time it gets here.)
34 */
35#define IsCTIDVar(node) \
36 ((node) != NULL && \
37 IsA((node), Var) && \
38 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber)
39
45
46/* Upper or lower range bound for scan */
47typedef struct TidOpExpr
48{
49 TidExprType exprtype; /* type of op; lower or upper */
50 ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
51 bool inclusive; /* whether op is inclusive */
53
54/*
55 * For the given 'expr', build and return an appropriate TidOpExpr taking into
56 * account the expr's operator and operand order.
57 */
58static TidOpExpr *
60{
61 Node *arg1 = get_leftop((Expr *) expr);
62 Node *arg2 = get_rightop((Expr *) expr);
63 ExprState *exprstate = NULL;
64 bool invert = false;
66
67 if (IsCTIDVar(arg1))
68 exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
69 else if (IsCTIDVar(arg2))
70 {
71 exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
72 invert = true;
73 }
74 else
75 elog(ERROR, "could not identify CTID variable");
76
78 tidopexpr->inclusive = false; /* for now */
79
80 switch (expr->opno)
81 {
83 tidopexpr->inclusive = true;
85 case TIDLessOperator:
87 break;
89 tidopexpr->inclusive = true;
93 break;
94 default:
95 elog(ERROR, "could not identify CTID operator");
96 }
97
98 tidopexpr->exprstate = exprstate;
99
100 return tidopexpr;
101}
102
103/*
104 * Extract the qual subexpressions that yield TIDs to search for,
105 * and compile them into ExprStates if they're ordinary expressions.
106 */
107static void
109{
110 TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
111 List *tidexprs = NIL;
112 ListCell *l;
113
114 foreach(l, node->tidrangequals)
115 {
116 OpExpr *opexpr = lfirst(l);
118
119 if (!IsA(opexpr, OpExpr))
120 elog(ERROR, "could not identify CTID expression");
121
124 }
125
126 tidrangestate->trss_tidexprs = tidexprs;
127}
128
129/* ----------------------------------------------------------------
130 * TidRangeEval
131 *
132 * Compute and set node's block and offset range to scan by evaluating
133 * node->trss_tidexprs. Returns false if we detect the range cannot
134 * contain any tuples. Returns true if it's possible for the range to
135 * contain tuples. We don't bother validating that trss_mintid is less
136 * than or equal to trss_maxtid, as the scan_set_tidrange() table AM
137 * function will handle that.
138 * ----------------------------------------------------------------
139 */
140static bool
142{
143 ExprContext *econtext = node->ss.ps.ps_ExprContext;
146 ListCell *l;
147
148 /*
149 * Set the upper and lower bounds to the absolute limits of the range of
150 * the ItemPointer type. Below we'll try to narrow this range on either
151 * side by looking at the TidOpExprs.
152 */
155
156 foreach(l, node->trss_tidexprs)
157 {
159 ItemPointer itemptr;
160 bool isNull;
161
162 /* Evaluate this bound. */
163 itemptr = (ItemPointer)
165 econtext,
166 &isNull));
167
168 /* If the bound is NULL, *nothing* matches the qual. */
169 if (isNull)
170 return false;
171
172 if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
173 {
175
176 ItemPointerCopy(itemptr, &lb);
177
178 /*
179 * Normalize non-inclusive ranges to become inclusive. The
180 * resulting ItemPointer here may not be a valid item pointer.
181 */
182 if (!tidopexpr->inclusive)
183 ItemPointerInc(&lb);
184
185 /* Check if we can narrow the range using this qual */
186 if (ItemPointerCompare(&lb, &lowerBound) > 0)
188 }
189
190 else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND)
191 {
193
194 ItemPointerCopy(itemptr, &ub);
195
196 /*
197 * Normalize non-inclusive ranges to become inclusive. The
198 * resulting ItemPointer here may not be a valid item pointer.
199 */
200 if (!tidopexpr->inclusive)
202
203 /* Check if we can narrow the range using this qual */
204 if (ItemPointerCompare(&ub, &upperBound) < 0)
206 }
207 }
208
211
212 return true;
213}
214
215/* ----------------------------------------------------------------
216 * TidRangeNext
217 *
218 * Retrieve a tuple from the TidRangeScan node's currentRelation
219 * using the TIDs in the TidRangeScanState information.
220 *
221 * ----------------------------------------------------------------
222 */
223static TupleTableSlot *
225{
226 TableScanDesc scandesc;
227 EState *estate;
228 ScanDirection direction;
229 TupleTableSlot *slot;
230
231 /*
232 * extract necessary information from TID scan node
233 */
234 scandesc = node->ss.ss_currentScanDesc;
235 estate = node->ss.ps.state;
236 slot = node->ss.ss_ScanTupleSlot;
237 direction = estate->es_direction;
238
239 if (!node->trss_inScan)
240 {
241 /* First time through, compute TID range to scan */
242 if (!TidRangeEval(node))
243 return NULL;
244
245 if (scandesc == NULL)
246 {
247 uint32 flags = SO_NONE;
248
249 if (ScanRelIsReadOnly(&node->ss))
250 flags |= SO_HINT_REL_READ_ONLY;
251
252 if (estate->es_instrument & INSTRUMENT_IO)
253 flags |= SO_SCAN_INSTRUMENT;
254
256 estate->es_snapshot,
257 &node->trss_mintid,
258 &node->trss_maxtid,
259 flags);
260 node->ss.ss_currentScanDesc = scandesc;
261 }
262 else
263 {
264 /* rescan with the updated TID range */
265 table_rescan_tidrange(scandesc, &node->trss_mintid,
266 &node->trss_maxtid);
267 }
268
269 node->trss_inScan = true;
270 }
271
272 /* Fetch the next tuple. */
273 if (!table_scan_getnextslot_tidrange(scandesc, direction, slot))
274 {
275 node->trss_inScan = false;
276 ExecClearTuple(slot);
277 }
278
279 return slot;
280}
281
282/*
283 * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual
284 */
285static bool
287{
288 if (!TidRangeEval(node))
289 return false;
290
292
293 /* Recheck the ctid is still within range */
294 if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
295 ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0)
296 return false;
297
298 return true;
299}
300
301/* ----------------------------------------------------------------
302 * ExecTidRangeScan(node)
303 *
304 * Scans the relation using tids and returns the next qualifying tuple.
305 * We call the ExecScan() routine and pass it the appropriate
306 * access method functions.
307 *
308 * Conditions:
309 * -- the "cursor" maintained by the AMI is positioned at the tuple
310 * returned previously.
311 *
312 * Initial States:
313 * -- the relation indicated is opened for TID range scanning.
314 * ----------------------------------------------------------------
315 */
316static TupleTableSlot *
325
326/* ----------------------------------------------------------------
327 * ExecReScanTidRangeScan(node)
328 * ----------------------------------------------------------------
329 */
330void
332{
333 /* mark scan as not in progress, and tid range list as not computed yet */
334 node->trss_inScan = false;
335
336 /*
337 * We must wait until TidRangeNext before calling table_rescan_tidrange.
338 */
339 ExecScanReScan(&node->ss);
340}
341
342/* ----------------------------------------------------------------
343 * ExecEndTidRangeScan
344 *
345 * Releases any storage allocated through C routines.
346 * Returns nothing.
347 * ----------------------------------------------------------------
348 */
349void
351{
353
354 /* Collect IO stats for this process into shared instrumentation */
355 if (node->trss_sinstrument != NULL && IsParallelWorker())
356 {
358
359 Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers);
361
362 if (scan && scan->rs_instrument)
363 {
364 AccumulateIOStats(&si->stats.io, &scan->rs_instrument->io);
365 }
366 }
367
368 if (scan != NULL)
369 table_endscan(scan);
370}
371
372/* ----------------------------------------------------------------
373 * ExecInitTidRangeScan
374 *
375 * Initializes the tid range scan's state information, creates
376 * scan keys, and opens the scan relation.
377 *
378 * Parameters:
379 * node: TidRangeScan node produced by the planner.
380 * estate: the execution state initialized in InitPlan.
381 * ----------------------------------------------------------------
382 */
384ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
385{
388
389 /*
390 * create state structure
391 */
393 tidrangestate->ss.ps.plan = (Plan *) node;
394 tidrangestate->ss.ps.state = estate;
395 tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan;
396
397 /*
398 * Miscellaneous initialization
399 *
400 * create expression context for node
401 */
402 ExecAssignExprContext(estate, &tidrangestate->ss.ps);
403
404 /*
405 * mark scan as not in progress, and TID range as not computed yet
406 */
407 tidrangestate->trss_inScan = false;
408
409 /*
410 * open the scan relation
411 */
412 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
413
414 tidrangestate->ss.ss_currentRelation = currentRelation;
415 tidrangestate->ss.ss_currentScanDesc = NULL; /* no table scan here */
416
417 /*
418 * get the scan type from the relation descriptor.
419 */
424
425 /*
426 * Initialize result type and projection.
427 */
430
431 /*
432 * initialize child expressions
433 */
434 tidrangestate->ss.ps.qual =
435 ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate);
436
438
439 /*
440 * all done.
441 */
442 return tidrangestate;
443}
444
445/* ----------------------------------------------------------------
446 * Parallel Scan Support
447 * ----------------------------------------------------------------
448 */
449
450/* ----------------------------------------------------------------
451 * ExecTidRangeScanEstimate
452 *
453 * Compute the amount of space we'll need in the parallel
454 * query DSM, and inform pcxt->estimator about our needs.
455 * ----------------------------------------------------------------
456 */
457void
468
469/* ----------------------------------------------------------------
470 * ExecTidRangeScanInitializeDSM
471 *
472 * Set up a parallel TID range scan descriptor.
473 * ----------------------------------------------------------------
474 */
475void
477{
478 EState *estate = node->ss.ps.state;
480 uint32 flags = SO_NONE;
481
482 if (ScanRelIsReadOnly(&node->ss))
483 flags |= SO_HINT_REL_READ_ONLY;
484
485 if (estate->es_instrument & INSTRUMENT_IO)
486 flags |= SO_SCAN_INSTRUMENT;
487
488 pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
490 pscan,
491 estate->es_snapshot);
492 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
493 node->ss.ss_currentScanDesc =
495 pscan, flags);
496}
497
498/* ----------------------------------------------------------------
499 * ExecTidRangeScanReInitializeDSM
500 *
501 * Reset shared state before beginning a fresh scan.
502 * ----------------------------------------------------------------
503 */
504void
513
514/* ----------------------------------------------------------------
515 * ExecTidRangeScanInitializeWorker
516 *
517 * Copy relevant information from TOC into planstate.
518 * ----------------------------------------------------------------
519 */
520void
538
539/*
540 * Compute the amount of space we'll need for the shared instrumentation and
541 * inform pcxt->estimator.
542 */
543void
545 ParallelContext *pcxt)
546{
547 EState *estate = node->ss.ps.state;
548 Size size;
549
550 if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
551 return;
552
555
556 shm_toc_estimate_chunk(&pcxt->estimator, size);
558}
559
560/*
561 * Set up parallel scan instrumentation.
562 */
563void
565 ParallelContext *pcxt)
566{
567 EState *estate = node->ss.ps.state;
569 Size size;
570
571 if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
572 return;
573
576 sinstrument = shm_toc_allocate(pcxt->toc, size);
577 memset(sinstrument, 0, size);
578 sinstrument->num_workers = pcxt->nworkers;
579 shm_toc_insert(pcxt->toc,
580 node->ss.ps.plan->plan_node_id +
582 sinstrument);
583 node->trss_sinstrument = sinstrument;
584}
585
586/*
587 * Look up and save the location of the shared instrumentation.
588 */
589void
592{
593 EState *estate = node->ss.ps.state;
594
595 if ((estate->es_instrument & INSTRUMENT_IO) == 0)
596 return;
597
599 node->ss.ps.plan->plan_node_id +
601 false);
602}
603
604/*
605 * Transfer scan instrumentation from DSM to private memory.
606 */
607void
609{
611 Size size;
612
613 if (sinstrument == NULL)
614 return;
615
617 + sinstrument->num_workers * sizeof(TidRangeScanInstrumentation);
618
619 node->trss_sinstrument = palloc(size);
620 memcpy(node->trss_sinstrument, sinstrument, size);
621}
int ParallelWorkerNumber
Definition parallel.c:117
#define InvalidBlockNumber
Definition block.h:33
#define Assert(condition)
Definition c.h:943
uint32_t uint32
Definition c.h:624
#define PG_UINT16_MAX
Definition c.h:671
#define pg_fallthrough
Definition c.h:161
size_t Size
Definition c.h:689
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition execExpr.c:143
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:250
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition execScan.c:47
void ExecAssignScanProjectionInfo(ScanState *node)
Definition execScan.c:81
void ExecScanReScan(ScanState *node)
Definition execScan.c:108
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops, uint16 flags)
void ExecInitResultTypeTL(PlanState *planstate)
bool ScanRelIsReadOnly(ScanState *ss)
Definition execUtils.c:751
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition execUtils.c:490
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition execUtils.c:768
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition executor.h:590
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition executor.h:589
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition executor.h:446
#define palloc_object(type)
Definition fe_memutils.h:74
#define IsParallelWorker()
Definition parallel.h:62
@ INSTRUMENT_IO
Definition instrument.h:67
#define PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET
static void AccumulateIOStats(IOStats *dst, IOStats *src)
void ItemPointerDec(ItemPointer pointer)
Definition itemptr.c:114
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
void ItemPointerInc(ItemPointer pointer)
Definition itemptr.c:84
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
ItemPointerData * ItemPointer
Definition itemptr.h:49
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
List * lappend(List *list, void *datum)
Definition list.c:339
void * palloc(Size size)
Definition mcxt.c:1387
static Node * get_rightop(const void *clause)
Definition nodeFuncs.h:95
static Node * get_leftop(const void *clause)
Definition nodeFuncs.h:83
void ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node, ParallelContext *pcxt)
static void TidExprListCreate(TidRangeScanState *tidrangestate)
void ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt)
void ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node, ParallelContext *pcxt)
void ExecReScanTidRangeScan(TidRangeScanState *node)
void ExecEndTidRangeScan(TidRangeScanState *node)
static bool TidRangeEval(TidRangeScanState *node)
void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt)
static TidOpExpr * MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
TidRangeScanState * ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
static TupleTableSlot * TidRangeNext(TidRangeScanState *node)
void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node)
#define IsCTIDVar(node)
TidExprType
@ TIDEXPR_LOWER_BOUND
@ TIDEXPR_UPPER_BOUND
void ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt)
static TupleTableSlot * ExecTidRangeScan(PlanState *pstate)
static bool TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
#define lfirst(lc)
Definition pg_list.h:172
#define NIL
Definition pg_list.h:68
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static int fb(int x)
#define RelationGetDescr(relation)
Definition rel.h:542
ScanDirection
Definition sdir.h:25
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:239
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
int es_instrument
Definition execnodes.h:756
ScanDirection es_direction
Definition execnodes.h:695
Snapshot es_snapshot
Definition execnodes.h:696
Definition pg_list.h:54
Definition nodes.h:135
Oid opno
Definition primnodes.h:851
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
Plan * plan
Definition execnodes.h:1201
EState * state
Definition execnodes.h:1203
ExprContext * ps_ExprContext
Definition execnodes.h:1242
int plan_node_id
Definition plannodes.h:233
Relation ss_currentRelation
Definition execnodes.h:1660
TupleTableSlot * ss_ScanTupleSlot
Definition execnodes.h:1662
PlanState ps
Definition execnodes.h:1659
struct TableScanDescData * ss_currentScanDesc
Definition execnodes.h:1661
Index scanrelid
Definition plannodes.h:544
TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
struct TableScanInstrumentation * rs_instrument
Definition relscan.h:72
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:66
ExprState * exprstate
TidExprType exprtype
ItemPointerData trss_maxtid
Definition execnodes.h:1922
struct SharedTidRangeScanInstrumentation * trss_sinstrument
Definition execnodes.h:1925
ItemPointerData trss_mintid
Definition execnodes.h:1921
List * tidrangequals
Definition plannodes.h:743
ItemPointerData tts_tid
Definition tuptable.h:142
TableScanDesc table_beginscan_parallel_tidrange(Relation relation, ParallelTableScanDesc pscan, uint32 flags)
Definition tableam.c:193
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition tableam.c:131
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition tableam.c:146
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition tableam.c:59
@ SO_HINT_REL_READ_ONLY
Definition tableam.h:71
@ SO_NONE
Definition tableam.h:49
@ SO_SCAN_INSTRUMENT
Definition tableam.h:74
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition tableam.h:1141
static void table_endscan(TableScanDesc scan)
Definition tableam.h:1061
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid, uint32 flags)
Definition tableam.h:1119
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition tableam.h:1157
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition tableam.h:1226
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
#define TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS
Definition tuptable.h:102