PostgreSQL Source Code git master
Loading...
Searching...
No Matches
nodeIndexonlyscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeIndexonlyscan.c
4 * Routines to support index-only scans
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeIndexonlyscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15/*
16 * INTERFACE ROUTINES
17 * ExecIndexOnlyScan scans an index
18 * IndexOnlyNext retrieve next tuple
19 * ExecInitIndexOnlyScan creates and initializes state info.
20 * ExecReScanIndexOnlyScan rescans the indexed relation.
21 * ExecEndIndexOnlyScan releases all storage.
22 * ExecIndexOnlyMarkPos marks scan position.
23 * ExecIndexOnlyRestrPos restores scan position.
24 * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 * parallel index-only scan
26 * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 * index-only scan
28 * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 */
31#include "postgres.h"
32
33#include "access/genam.h"
34#include "access/relscan.h"
35#include "access/tableam.h"
36#include "access/tupdesc.h"
38#include "catalog/pg_type.h"
39#include "executor/executor.h"
40#include "executor/instrument.h"
43#include "miscadmin.h"
44#include "storage/bufmgr.h"
45#include "storage/predicate.h"
46#include "utils/builtins.h"
47#include "utils/rel.h"
48
49
53
54
55/* ----------------------------------------------------------------
56 * IndexOnlyNext
57 *
58 * Retrieve a tuple from the IndexOnlyScan node's index.
59 * ----------------------------------------------------------------
60 */
61static TupleTableSlot *
63{
64 EState *estate;
65 ExprContext *econtext;
66 ScanDirection direction;
67 IndexScanDesc scandesc;
68 TupleTableSlot *slot;
69 ItemPointer tid;
70
71 /*
72 * extract necessary information from index scan node
73 */
74 estate = node->ss.ps.state;
75
76 /*
77 * Determine which direction to scan the index in based on the plan's scan
78 * direction and the current direction of execution.
79 */
80 direction = ScanDirectionCombine(estate->es_direction,
81 ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
82 scandesc = node->ioss_ScanDesc;
83 econtext = node->ss.ps.ps_ExprContext;
84 slot = node->ss.ss_ScanTupleSlot;
85
86 if (scandesc == NULL)
87 {
88 /*
89 * We reach here if the index only scan is not parallel, or if we're
90 * serially executing an index only scan that was planned to be
91 * parallel.
92 */
93 scandesc = index_beginscan(node->ss.ss_currentRelation,
95 estate->es_snapshot,
96 node->ioss_Instrument,
97 node->ioss_NumScanKeys,
99 ScanRelIsReadOnly(&node->ss) ?
101
102 node->ioss_ScanDesc = scandesc;
103
104
105 /* Set it up for index-only scan */
106 node->ioss_ScanDesc->xs_want_itup = true;
108
109 /*
110 * If no run-time keys to calculate or they are ready, go ahead and
111 * pass the scankeys to the index AM.
112 */
113 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
114 index_rescan(scandesc,
115 node->ioss_ScanKeys,
116 node->ioss_NumScanKeys,
117 node->ioss_OrderByKeys,
118 node->ioss_NumOrderByKeys);
119 }
120
121 /*
122 * OK, now that we have what we need, fetch the next tuple.
123 */
124 while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
125 {
126 bool tuple_from_heap = false;
127
129
130 /*
131 * We can skip the heap fetch if the TID references a heap page on
132 * which all tuples are known visible to everybody. In any case,
133 * we'll use the index tuple not the heap tuple as the data source.
134 *
135 * Note on Memory Ordering Effects: visibilitymap_get_status does not
136 * lock the visibility map buffer, and therefore the result we read
137 * here could be slightly stale. However, it can't be stale enough to
138 * matter.
139 *
140 * We need to detect clearing a VM bit due to an insert right away,
141 * because the tuple is present in the index page but not visible. The
142 * reading of the TID by this scan (using a shared lock on the index
143 * buffer) is serialized with the insert of the TID into the index
144 * (using an exclusive lock on the index buffer). Because the VM bit
145 * is cleared before updating the index, and locking/unlocking of the
146 * index page acts as a full memory barrier, we are sure to see the
147 * cleared bit if we see a recently-inserted TID.
148 *
149 * Deletes do not update the index page (only VACUUM will clear out
150 * the TID), so the clearing of the VM bit by a delete is not
151 * serialized with this test below, and we may see a value that is
152 * significantly stale. However, we don't care about the delete right
153 * away, because the tuple is still visible until the deleting
154 * transaction commits or the statement ends (if it's our
155 * transaction). In either case, the lock on the VM buffer will have
156 * been released (acting as a write barrier) after clearing the bit.
157 * And for us to have a snapshot that includes the deleting
158 * transaction (making the tuple invisible), we must have acquired
159 * ProcArrayLock after that time, acting as a read barrier.
160 *
161 * It's worth going through this complexity to avoid needing to lock
162 * the VM buffer, which could cause significant contention.
163 */
164 if (!VM_ALL_VISIBLE(scandesc->heapRelation,
166 &node->ioss_VMBuffer))
167 {
168 /*
169 * Rats, we have to visit the heap to check visibility.
170 */
171 InstrCountTuples2(node, 1);
172 if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
173 continue; /* no visible tuple, try next index entry */
174
176
177 /*
178 * Only MVCC snapshots are supported here, so there should be no
179 * need to keep following the HOT chain once a visible entry has
180 * been found. If we did want to allow that, we'd need to keep
181 * more state to remember not to call index_getnext_tid next time.
182 */
183 if (scandesc->xs_heap_continue)
184 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
185
186 /*
187 * Note: at this point we are holding a pin on the heap page, as
188 * recorded in scandesc->xs_cbuf. We could release that pin now,
189 * but it's not clear whether it's a win to do so. The next index
190 * entry might require a visit to the same heap page.
191 */
192
193 tuple_from_heap = true;
194 }
195
196 /*
197 * Fill the scan tuple slot with data from the index. This might be
198 * provided in either HeapTuple or IndexTuple format. Conceivably an
199 * index AM might fill both fields, in which case we prefer the heap
200 * format, since it's probably a bit cheaper to fill a slot from.
201 */
202 if (scandesc->xs_hitup)
203 {
204 /*
205 * We don't take the trouble to verify that the provided tuple has
206 * exactly the slot's format, but it seems worth doing a quick
207 * check on the number of fields.
208 */
210 scandesc->xs_hitupdesc->natts);
211 ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
212 }
213 else if (scandesc->xs_itup)
214 StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
215 else
216 elog(ERROR, "no data returned for index-only scan");
217
218 /*
219 * If the index was lossy, we have to recheck the index quals.
220 */
221 if (scandesc->xs_recheck)
222 {
223 econtext->ecxt_scantuple = slot;
224 if (!ExecQualAndReset(node->recheckqual, econtext))
225 {
226 /* Fails recheck, so drop it and loop back for another */
227 InstrCountFiltered2(node, 1);
228 continue;
229 }
230 }
231
232 /*
233 * We don't currently support rechecking ORDER BY distances. (In
234 * principle, if the index can support retrieval of the originally
235 * indexed value, it should be able to produce an exact distance
236 * calculation too. So it's not clear that adding code here for
237 * recheck/re-sort would be worth the trouble. But we should at least
238 * throw an error if someone tries it.)
239 */
240 if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
243 errmsg("lossy distance functions are not supported in index-only scans")));
244
245 /*
246 * If we didn't access the heap, then we'll need to take a predicate
247 * lock explicitly, as if we had. For now we do that at page level.
248 */
249 if (!tuple_from_heap)
252 estate->es_snapshot);
253
254 return slot;
255 }
256
257 /*
258 * if we get here it means the index scan failed so we are at the end of
259 * the scan..
260 */
261 return ExecClearTuple(slot);
262}
263
264/*
265 * StoreIndexTuple
266 * Fill the slot with data from the index tuple.
267 *
268 * At some point this might be generally-useful functionality, but
269 * right now we don't need it elsewhere.
270 */
271static void
274{
275 /*
276 * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
277 * not the slot's tupdesc, in case the latter has different datatypes
278 * (this happens for btree name_ops in particular). They'd better have
279 * the same number of columns though, as well as being datatype-compatible
280 * which is something we can't so easily check.
281 */
282 Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
283
284 ExecClearTuple(slot);
286
287 /*
288 * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
289 * sized allocation. We mark this branch as unlikely as generally "name"
290 * is used only for the system catalogs and this would have to be a user
291 * query running on those or some other user table with an index on a name
292 * column.
293 */
295 {
297
298 for (int idx = 0; idx < attcount; idx++)
299 {
301 Name name;
302
303 /* skip null Datums */
304 if (slot->tts_isnull[attnum])
305 continue;
306
307 /* allocate the NAMEDATALEN and copy the datum into that memory */
310
311 /* use namestrcpy to zero-pad all trailing bytes */
314 }
315 }
316
318}
319
320/*
321 * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
322 *
323 * This can't really happen, since an index can't supply CTID which would
324 * be necessary data for any potential EvalPlanQual target relation. If it
325 * did happen, the EPQ code would pass us the wrong data, namely a heap
326 * tuple not an index tuple. So throw an error.
327 */
328static bool
330{
331 elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
332 return false; /* keep compiler quiet */
333}
334
335/* ----------------------------------------------------------------
336 * ExecIndexOnlyScan(node)
337 * ----------------------------------------------------------------
338 */
339static TupleTableSlot *
341{
343
344 /*
345 * If we have runtime keys and they've not already been set up, do it now.
346 */
347 if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
348 ExecReScan((PlanState *) node);
349
350 return ExecScan(&node->ss,
353}
354
355/* ----------------------------------------------------------------
356 * ExecReScanIndexOnlyScan(node)
357 *
358 * Recalculates the values of any scan keys whose value depends on
359 * information known at runtime, then rescans the indexed relation.
360 *
361 * Updating the scan key was formerly done separately in
362 * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
363 * rescans of indices and relations/general streams more uniform.
364 * ----------------------------------------------------------------
365 */
366void
368{
369 /*
370 * If we are doing runtime key calculations (ie, any of the index key
371 * values weren't simple Consts), compute the new key values. But first,
372 * reset the context so we don't leak memory as each outer tuple is
373 * scanned. Note this assumes that we will recalculate *all* runtime keys
374 * on each call.
375 */
376 if (node->ioss_NumRuntimeKeys != 0)
377 {
378 ExprContext *econtext = node->ioss_RuntimeContext;
379
380 ResetExprContext(econtext);
382 node->ioss_RuntimeKeys,
383 node->ioss_NumRuntimeKeys);
384 }
385 node->ioss_RuntimeKeysReady = true;
386
387 /* reset index scan */
388 if (node->ioss_ScanDesc)
390 node->ioss_ScanKeys, node->ioss_NumScanKeys,
392
393 ExecScanReScan(&node->ss);
394}
395
396
397/* ----------------------------------------------------------------
398 * ExecEndIndexOnlyScan
399 * ----------------------------------------------------------------
400 */
401void
403{
406
407 /*
408 * extract information from the node
409 */
412
413 /* Release VM buffer pin, if any. */
414 if (node->ioss_VMBuffer != InvalidBuffer)
415 {
418 }
419
420 /*
421 * When ending a parallel worker, copy the statistics gathered by the
422 * worker back into shared memory so that it can be picked up by the main
423 * process to report in EXPLAIN ANALYZE
424 */
425 if (node->ioss_SharedInfo != NULL && IsParallelWorker())
426 {
427 IndexScanInstrumentation *winstrument;
428
429 Assert(ParallelWorkerNumber < node->ioss_SharedInfo->num_workers);
430 winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
431
432 /*
433 * We have to accumulate the stats rather than performing a memcpy.
434 * When a Gather/GatherMerge node finishes it will perform planner
435 * shutdown on the workers. On rescan it will spin up new workers
436 * which will have a new IndexOnlyScanState and zeroed stats.
437 */
438 winstrument->nsearches += node->ioss_Instrument->nsearches;
439 }
440
441 /*
442 * close the index relation (no-op if we didn't open it)
443 */
444 if (indexScanDesc)
448}
449
450/* ----------------------------------------------------------------
451 * ExecIndexOnlyMarkPos
452 *
453 * Note: we assume that no caller attempts to set a mark before having read
454 * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
455 * ----------------------------------------------------------------
456 */
457void
459{
460 EState *estate = node->ss.ps.state;
461 EPQState *epqstate = estate->es_epq_active;
462
463 if (epqstate != NULL)
464 {
465 /*
466 * We are inside an EvalPlanQual recheck. If a test tuple exists for
467 * this relation, then we shouldn't access the index at all. We would
468 * instead need to save, and later restore, the state of the
469 * relsubs_done flag, so that re-fetching the test tuple is possible.
470 * However, given the assumption that no caller sets a mark at the
471 * start of the scan, we can only get here with relsubs_done[i]
472 * already set, and so no state need be saved.
473 */
474 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
475
476 Assert(scanrelid > 0);
477 if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
478 epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
479 {
480 /* Verify the claim above */
481 if (!epqstate->relsubs_done[scanrelid - 1])
482 elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
483 return;
484 }
485 }
486
488}
489
490/* ----------------------------------------------------------------
491 * ExecIndexOnlyRestrPos
492 * ----------------------------------------------------------------
493 */
494void
496{
497 EState *estate = node->ss.ps.state;
498 EPQState *epqstate = estate->es_epq_active;
499
500 if (estate->es_epq_active != NULL)
501 {
502 /* See comments in ExecIndexMarkPos */
503 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
504
505 Assert(scanrelid > 0);
506 if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
507 epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
508 {
509 /* Verify the claim above */
510 if (!epqstate->relsubs_done[scanrelid - 1])
511 elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
512 return;
513 }
514 }
515
517}
518
519/* ----------------------------------------------------------------
520 * ExecInitIndexOnlyScan
521 *
522 * Initializes the index scan's state information, creates
523 * scan keys, and opens the base and index relations.
524 *
525 * Note: index scans have 2 sets of state information because
526 * we have to keep track of the base relation and the
527 * index relation.
528 * ----------------------------------------------------------------
529 */
531ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
532{
535 Relation indexRelation;
536 LOCKMODE lockmode;
537 TupleDesc tupDesc;
538 int indnkeyatts;
539 int namecount;
540
541 /*
542 * create state structure
543 */
545 indexstate->ss.ps.plan = (Plan *) node;
546 indexstate->ss.ps.state = estate;
547 indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
548
549 /*
550 * Miscellaneous initialization
551 *
552 * create expression context for node
553 */
554 ExecAssignExprContext(estate, &indexstate->ss.ps);
555
556 /*
557 * open the scan relation
558 */
559 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
560
561 indexstate->ss.ss_currentRelation = currentRelation;
562 indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
563
564 /*
565 * Build the scan tuple type using the indextlist generated by the
566 * planner. We use this, rather than the index's physical tuple
567 * descriptor, because the latter contains storage column types not the
568 * types of the original datums. (It's the AM's responsibility to return
569 * suitable data anyway.)
570 */
571 tupDesc = ExecTypeFromTL(node->indextlist);
572 ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
574 0);
575
576 /*
577 * We need another slot, in a format that's suitable for the table AM, for
578 * when we need to fetch a tuple from the table for rechecking visibility.
579 */
580 indexstate->ioss_TableSlot =
584
585 /*
586 * Initialize result type and projection info. The node's targetlist will
587 * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
588 */
591
592 /*
593 * initialize child expressions
594 *
595 * Note: we don't initialize all of the indexorderby expression, only the
596 * sub-parts corresponding to runtime keys (see below).
597 */
598 indexstate->ss.ps.qual =
599 ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
600 indexstate->recheckqual =
602
603 /*
604 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
605 * here. This allows an index-advisor plugin to EXPLAIN a plan containing
606 * references to nonexistent indexes.
607 */
608 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
609 return indexstate;
610
611 /* Set up instrumentation of index-only scans if requested */
612 if (estate->es_instrument)
614
615 /* Open the index relation. */
616 lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
617 indexRelation = index_open(node->indexid, lockmode);
618 indexstate->ioss_RelationDesc = indexRelation;
619
620 /*
621 * Initialize index-specific scan state
622 */
623 indexstate->ioss_RuntimeKeysReady = false;
624 indexstate->ioss_RuntimeKeys = NULL;
625 indexstate->ioss_NumRuntimeKeys = 0;
626
627 /*
628 * build the index scan keys from the index qualification
629 */
631 indexRelation,
632 node->indexqual,
633 false,
634 &indexstate->ioss_ScanKeys,
635 &indexstate->ioss_NumScanKeys,
636 &indexstate->ioss_RuntimeKeys,
637 &indexstate->ioss_NumRuntimeKeys,
638 NULL, /* no ArrayKeys */
639 NULL);
640
641 /*
642 * any ORDER BY exprs have to be turned into scankeys in the same way
643 */
645 indexRelation,
646 node->indexorderby,
647 true,
648 &indexstate->ioss_OrderByKeys,
649 &indexstate->ioss_NumOrderByKeys,
650 &indexstate->ioss_RuntimeKeys,
651 &indexstate->ioss_NumRuntimeKeys,
652 NULL, /* no ArrayKeys */
653 NULL);
654
655 /*
656 * If we have runtime keys, we need an ExprContext to evaluate them. The
657 * node's standard context won't do because we want to reset that context
658 * for every tuple. So, build another context just like the other one...
659 * -tgl 7/11/00
660 */
661 if (indexstate->ioss_NumRuntimeKeys != 0)
662 {
663 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
664
665 ExecAssignExprContext(estate, &indexstate->ss.ps);
666 indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
667 indexstate->ss.ps.ps_ExprContext = stdecontext;
668 }
669 else
670 {
671 indexstate->ioss_RuntimeContext = NULL;
672 }
673
674 indexstate->ioss_NameCStringAttNums = NULL;
675 indnkeyatts = indexRelation->rd_index->indnkeyatts;
676 namecount = 0;
677
678 /*
679 * The "name" type for btree uses text_ops which results in storing
680 * cstrings in the indexed keys rather than names. Here we detect that in
681 * a generic way in case other index AMs want to do the same optimization.
682 * Check for opclasses with an opcintype of NAMEOID and an index tuple
683 * descriptor with CSTRINGOID. If any of these are found, create an array
684 * marking the index attribute number of each of them. StoreIndexTuple()
685 * handles copying the name Datums into a NAMEDATALEN-byte allocation.
686 */
687
688 /* First, count the number of such index keys */
689 for (int attnum = 0; attnum < indnkeyatts; attnum++)
690 {
691 if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
692 indexRelation->rd_opcintype[attnum] == NAMEOID)
693 namecount++;
694 }
695
696 if (namecount > 0)
697 {
698 int idx = 0;
699
700 /*
701 * Now create an array to mark the attribute numbers of the keys that
702 * need to be converted from cstring to name.
703 */
704 indexstate->ioss_NameCStringAttNums = palloc_array(AttrNumber, namecount);
705
706 for (int attnum = 0; attnum < indnkeyatts; attnum++)
707 {
708 if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
709 indexRelation->rd_opcintype[attnum] == NAMEOID)
710 indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
711 }
712 }
713
714 indexstate->ioss_NameCStringCount = namecount;
715
716 /*
717 * all done.
718 */
719 return indexstate;
720}
721
722/* ----------------------------------------------------------------
723 * Parallel Index-only Scan Support
724 * ----------------------------------------------------------------
725 */
726
727/* ----------------------------------------------------------------
728 * ExecIndexOnlyScanEstimate
729 *
730 * Compute the amount of space we'll need in the parallel
731 * query DSM, and inform pcxt->estimator about our needs.
732 * ----------------------------------------------------------------
733 */
734void
747
748/* ----------------------------------------------------------------
749 * ExecIndexOnlyScanInitializeDSM
750 *
751 * Set up a parallel index-only scan descriptor.
752 * ----------------------------------------------------------------
753 */
754void
756 ParallelContext *pcxt)
757{
758 EState *estate = node->ss.ps.state;
760
763 node->ioss_RelationDesc,
764 estate->es_snapshot,
765 piscan);
767
768 node->ioss_ScanDesc =
770 node->ioss_RelationDesc,
771 node->ioss_Instrument,
772 node->ioss_NumScanKeys,
774 piscan,
775 ScanRelIsReadOnly(&node->ss) ?
777 node->ioss_ScanDesc->xs_want_itup = true;
779
780 /*
781 * If no run-time keys to calculate or they are ready, go ahead and pass
782 * the scankeys to the index AM.
783 */
784 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
786 node->ioss_ScanKeys, node->ioss_NumScanKeys,
788}
789
790/* ----------------------------------------------------------------
791 * ExecIndexOnlyScanReInitializeDSM
792 *
793 * Reset shared state before beginning a fresh scan.
794 * ----------------------------------------------------------------
795 */
796void
803
804/* ----------------------------------------------------------------
805 * ExecIndexOnlyScanInitializeWorker
806 *
807 * Copy relevant information from TOC into planstate.
808 * ----------------------------------------------------------------
809 */
810void
813{
815
816 piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
817
818 node->ioss_ScanDesc =
820 node->ioss_RelationDesc,
821 node->ioss_Instrument,
822 node->ioss_NumScanKeys,
824 piscan,
825 ScanRelIsReadOnly(&node->ss) ?
827 node->ioss_ScanDesc->xs_want_itup = true;
828
829 /*
830 * If no run-time keys to calculate or they are ready, go ahead and pass
831 * the scankeys to the index AM.
832 */
833 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
835 node->ioss_ScanKeys, node->ioss_NumScanKeys,
837}
838
839/*
840 * Compute the amount of space we'll need for the shared instrumentation and
841 * inform pcxt->estimator.
842 */
843void
845 ParallelContext *pcxt)
846{
847 Size size;
848
849 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
850 return;
851
852 /*
853 * This size calculation is trivial enough that we don't bother saving it
854 * in the IndexOnlyScanState. We'll recalculate the needed size in
855 * ExecIndexOnlyScanInstrumentInitDSM().
856 */
859 shm_toc_estimate_chunk(&pcxt->estimator, size);
861}
862
863/*
864 * Set up parallel index-only scan instrumentation.
865 */
866void
868 ParallelContext *pcxt)
869{
870 Size size;
871
872 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
873 return;
874
877 node->ioss_SharedInfo =
879
880 /* Each per-worker area must start out as zeroes */
881 memset(node->ioss_SharedInfo, 0, size);
882 node->ioss_SharedInfo->num_workers = pcxt->nworkers;
883 shm_toc_insert(pcxt->toc,
884 node->ss.ps.plan->plan_node_id +
886 node->ioss_SharedInfo);
887}
888
889/*
890 * Look up and save the location of the shared instrumentation.
891 */
892void
905
906/* ----------------------------------------------------------------
907 * ExecIndexOnlyScanRetrieveInstrumentation
908 *
909 * Transfer index-only scan statistics from DSM to private memory.
910 * ----------------------------------------------------------------
911 */
912void
914{
916 size_t size;
917
918 if (SharedInfo == NULL)
919 return;
920
921 /* Create a copy of SharedInfo in backend-local memory */
922 size = offsetof(SharedIndexScanInstrumentation, winstrument) +
924 node->ioss_SharedInfo = palloc(size);
925 memcpy(node->ioss_SharedInfo, SharedInfo, size);
926}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int16 AttrNumber
Definition attnum.h:21
int ParallelWorkerNumber
Definition parallel.c:117
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5586
NameData * Name
Definition c.h:833
#define Assert(condition)
Definition c.h:943
#define unlikely(x)
Definition c.h:438
unsigned int Index
Definition c.h:698
size_t Size
Definition c.h:689
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
void ExecReScan(PlanState *node)
Definition execAmi.c:78
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:250
void ExecAssignScanProjectionInfoWithVarno(ScanState *node, int varno)
Definition execScan.c:94
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition execScan.c:47
void ExecScanReScan(ScanState *node)
Definition execScan.c:108
const TupleTableSlotOps TTSOpsVirtual
Definition execTuples.c:84
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops, uint16 flags)
void ExecInitResultTypeTL(PlanState *planstate)
TupleTableSlot * ExecAllocTableSlot(List **tupleTable, TupleDesc desc, const TupleTableSlotOps *tts_ops, uint16 flags)
TupleDesc ExecTypeFromTL(List *targetList)
void ExecForceStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
bool ScanRelIsReadOnly(ScanState *ss)
Definition execUtils.c:751
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition execUtils.c:490
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition execUtils.c:768
#define InstrCountTuples2(node, delta)
Definition execnodes.h:1302
#define InstrCountFiltered2(node, delta)
Definition execnodes.h:1312
static RangeTblEntry * exec_rt_fetch(Index rti, EState *estate)
Definition executor.h:710
#define ResetExprContext(econtext)
Definition executor.h:661
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition executor.h:590
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition executor.h:556
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition executor.h:589
#define EXEC_FLAG_EXPLAIN_ONLY
Definition executor.h:67
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
#define IsParallelWorker()
Definition parallel.h:62
void index_restrpos(IndexScanDesc scan)
Definition indexam.c:448
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys, uint32 flags)
Definition indexam.c:257
IndexScanDesc index_beginscan_parallel(Relation heaprel, Relation indexrel, IndexScanInstrumentation *instrument, int nkeys, int norderbys, ParallelIndexScanDesc pscan, uint32 flags)
Definition indexam.c:560
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:178
ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Definition indexam.c:599
bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
Definition indexam.c:657
void index_markpos(IndexScanDesc scan)
Definition indexam.c:424
void index_endscan(IndexScanDesc scan)
Definition indexam.c:394
Size index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, Snapshot snapshot)
Definition indexam.c:470
void index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, Snapshot snapshot, ParallelIndexScanDesc target)
Definition indexam.c:505
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:134
void index_parallelrescan(IndexScanDesc scan)
Definition indexam.c:538
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition indexam.c:368
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition indextuple.c:364
#define PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
int LOCKMODE
Definition lockdefs.h:26
#define NoLock
Definition lockdefs.h:34
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
void namestrcpy(Name name, const char *str)
Definition name.c:233
void ExecEndIndexOnlyScan(IndexOnlyScanState *node)
static TupleTableSlot * IndexOnlyNext(IndexOnlyScanState *node)
static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
void ExecIndexOnlyScanEstimate(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
void ExecIndexOnlyScanInstrumentInitWorker(IndexOnlyScanState *node, ParallelWorkerContext *pwcxt)
void ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
void ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
void ExecIndexOnlyScanInstrumentInitDSM(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, ParallelWorkerContext *pwcxt)
static TupleTableSlot * ExecIndexOnlyScan(PlanState *pstate)
void ExecIndexOnlyScanInstrumentEstimate(IndexOnlyScanState *node, ParallelContext *pcxt)
static bool IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
IndexOnlyScanState * ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
void ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
void ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
void ExecIndexEvalRuntimeKeys(ExprContext *econtext, IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
static char * errmsg
int16 attnum
#define NAMEDATALEN
static char * DatumGetCString(Datum X)
Definition postgres.h:355
static Datum NameGetDatum(const NameData *X)
Definition postgres.h:393
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition predicate.c:2529
static int fb(int x)
#define INDEX_VAR
Definition primnodes.h:245
#define RelationGetDescr(relation)
Definition rel.h:542
#define ScanDirectionCombine(a, b)
Definition sdir.h:36
ScanDirection
Definition sdir.h:25
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition shm_toc.c:239
#define shm_toc_estimate_chunk(e, sz)
Definition shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition shm_toc.h:53
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
ExecAuxRowMark ** relsubs_rowmark
Definition execnodes.h:1378
TupleTableSlot ** relsubs_slot
Definition execnodes.h:1350
bool * relsubs_done
Definition execnodes.h:1385
int es_instrument
Definition execnodes.h:756
List * es_tupleTable
Definition execnodes.h:748
ScanDirection es_direction
Definition execnodes.h:695
struct EPQState * es_epq_active
Definition execnodes.h:778
Snapshot es_snapshot
Definition execnodes.h:696
MemoryContext ecxt_per_tuple_memory
Definition execnodes.h:295
TupleTableSlot * ecxt_scantuple
Definition execnodes.h:287
SharedIndexScanInstrumentation * ioss_SharedInfo
Definition execnodes.h:1814
TupleTableSlot * ioss_TableSlot
Definition execnodes.h:1815
IndexScanInstrumentation * ioss_Instrument
Definition execnodes.h:1813
ExprState * recheckqual
Definition execnodes.h:1802
struct IndexScanDescData * ioss_ScanDesc
Definition execnodes.h:1812
ScanKeyData * ioss_OrderByKeys
Definition execnodes.h:1805
ScanKeyData * ioss_ScanKeys
Definition execnodes.h:1803
ExprContext * ioss_RuntimeContext
Definition execnodes.h:1810
AttrNumber * ioss_NameCStringAttNums
Definition execnodes.h:1818
Relation ioss_RelationDesc
Definition execnodes.h:1811
IndexRuntimeKeyInfo * ioss_RuntimeKeys
Definition execnodes.h:1807
List * indexqual
Definition plannodes.h:660
List * recheckqual
Definition plannodes.h:662
List * indextlist
Definition plannodes.h:666
List * indexorderby
Definition plannodes.h:664
bool xs_heap_continue
Definition relscan.h:186
HeapTuple xs_hitup
Definition relscan.h:182
bool xs_recheckorderby
Definition relscan.h:201
IndexTuple xs_itup
Definition relscan.h:180
struct TupleDescData * xs_hitupdesc
Definition relscan.h:183
struct TupleDescData * xs_itupdesc
Definition relscan.h:181
Relation heapRelation
Definition relscan.h:149
shm_toc_estimator estimator
Definition parallel.h:43
shm_toc * toc
Definition parallel.h:46
Plan * plan
Definition execnodes.h:1201
EState * state
Definition execnodes.h:1203
NodeInstrumentation * instrument
Definition execnodes.h:1211
ExprContext * ps_ExprContext
Definition execnodes.h:1242
bool parallel_aware
Definition plannodes.h:219
int plan_node_id
Definition plannodes.h:233
Oid * rd_opcintype
Definition rel.h:208
TupleDesc rd_att
Definition rel.h:112
Form_pg_index rd_index
Definition rel.h:192
Relation ss_currentRelation
Definition execnodes.h:1660
TupleTableSlot * ss_ScanTupleSlot
Definition execnodes.h:1662
PlanState ps
Definition execnodes.h:1659
Index scanrelid
Definition plannodes.h:544
IndexScanInstrumentation winstrument[FLEXIBLE_ARRAY_MEMBER]
TupleDesc tts_tupleDescriptor
Definition tuptable.h:129
bool * tts_isnull
Definition tuptable.h:133
Datum * tts_values
Definition tuptable.h:131
Definition c.h:830
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition tableam.c:59
@ SO_HINT_REL_READ_ONLY
Definition tableam.h:71
@ SO_NONE
Definition tableam.h:49
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:178
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
#define VM_ALL_VISIBLE(r, b, v)
const char * name