PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
nodeIndexonlyscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * nodeIndexonlyscan.c
4 * Routines to support index-only scans
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeIndexonlyscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15/*
16 * INTERFACE ROUTINES
17 * ExecIndexOnlyScan scans an index
18 * IndexOnlyNext retrieve next tuple
19 * ExecInitIndexOnlyScan creates and initializes state info.
20 * ExecReScanIndexOnlyScan rescans the indexed relation.
21 * ExecEndIndexOnlyScan releases all storage.
22 * ExecIndexOnlyMarkPos marks scan position.
23 * ExecIndexOnlyRestrPos restores scan position.
24 * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 * parallel index-only scan
26 * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 * index-only scan
28 * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 */
31#include "postgres.h"
32
33#include "access/genam.h"
34#include "access/relscan.h"
35#include "access/tableam.h"
36#include "access/tupdesc.h"
38#include "catalog/pg_type.h"
39#include "executor/executor.h"
42#include "miscadmin.h"
43#include "storage/bufmgr.h"
44#include "storage/predicate.h"
45#include "utils/builtins.h"
46#include "utils/rel.h"
47
48
51 IndexTuple itup, TupleDesc itupdesc);
52
53
54/* ----------------------------------------------------------------
55 * IndexOnlyNext
56 *
57 * Retrieve a tuple from the IndexOnlyScan node's index.
58 * ----------------------------------------------------------------
59 */
60static TupleTableSlot *
62{
63 EState *estate;
64 ExprContext *econtext;
65 ScanDirection direction;
66 IndexScanDesc scandesc;
67 TupleTableSlot *slot;
68 ItemPointer tid;
69
70 /*
71 * extract necessary information from index scan node
72 */
73 estate = node->ss.ps.state;
74
75 /*
76 * Determine which direction to scan the index in based on the plan's scan
77 * direction and the current direction of execution.
78 */
79 direction = ScanDirectionCombine(estate->es_direction,
80 ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
81 scandesc = node->ioss_ScanDesc;
82 econtext = node->ss.ps.ps_ExprContext;
83 slot = node->ss.ss_ScanTupleSlot;
84
85 if (scandesc == NULL)
86 {
87 /*
88 * We reach here if the index only scan is not parallel, or if we're
89 * serially executing an index only scan that was planned to be
90 * parallel.
91 */
92 scandesc = index_beginscan(node->ss.ss_currentRelation,
94 estate->es_snapshot,
95 &node->ioss_Instrument,
96 node->ioss_NumScanKeys,
98
99 node->ioss_ScanDesc = scandesc;
100
101
102 /* Set it up for index-only scan */
103 node->ioss_ScanDesc->xs_want_itup = true;
105
106 /*
107 * If no run-time keys to calculate or they are ready, go ahead and
108 * pass the scankeys to the index AM.
109 */
110 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
111 index_rescan(scandesc,
112 node->ioss_ScanKeys,
113 node->ioss_NumScanKeys,
114 node->ioss_OrderByKeys,
115 node->ioss_NumOrderByKeys);
116 }
117
118 /*
119 * OK, now that we have what we need, fetch the next tuple.
120 */
121 while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
122 {
123 bool tuple_from_heap = false;
124
126
127 /*
128 * We can skip the heap fetch if the TID references a heap page on
129 * which all tuples are known visible to everybody. In any case,
130 * we'll use the index tuple not the heap tuple as the data source.
131 *
132 * Note on Memory Ordering Effects: visibilitymap_get_status does not
133 * lock the visibility map buffer, and therefore the result we read
134 * here could be slightly stale. However, it can't be stale enough to
135 * matter.
136 *
137 * We need to detect clearing a VM bit due to an insert right away,
138 * because the tuple is present in the index page but not visible. The
139 * reading of the TID by this scan (using a shared lock on the index
140 * buffer) is serialized with the insert of the TID into the index
141 * (using an exclusive lock on the index buffer). Because the VM bit
142 * is cleared before updating the index, and locking/unlocking of the
143 * index page acts as a full memory barrier, we are sure to see the
144 * cleared bit if we see a recently-inserted TID.
145 *
146 * Deletes do not update the index page (only VACUUM will clear out
147 * the TID), so the clearing of the VM bit by a delete is not
148 * serialized with this test below, and we may see a value that is
149 * significantly stale. However, we don't care about the delete right
150 * away, because the tuple is still visible until the deleting
151 * transaction commits or the statement ends (if it's our
152 * transaction). In either case, the lock on the VM buffer will have
153 * been released (acting as a write barrier) after clearing the bit.
154 * And for us to have a snapshot that includes the deleting
155 * transaction (making the tuple invisible), we must have acquired
156 * ProcArrayLock after that time, acting as a read barrier.
157 *
158 * It's worth going through this complexity to avoid needing to lock
159 * the VM buffer, which could cause significant contention.
160 */
161 if (!VM_ALL_VISIBLE(scandesc->heapRelation,
163 &node->ioss_VMBuffer))
164 {
165 /*
166 * Rats, we have to visit the heap to check visibility.
167 */
168 InstrCountTuples2(node, 1);
169 if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
170 continue; /* no visible tuple, try next index entry */
171
173
174 /*
175 * Only MVCC snapshots are supported here, so there should be no
176 * need to keep following the HOT chain once a visible entry has
177 * been found. If we did want to allow that, we'd need to keep
178 * more state to remember not to call index_getnext_tid next time.
179 */
180 if (scandesc->xs_heap_continue)
181 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
182
183 /*
184 * Note: at this point we are holding a pin on the heap page, as
185 * recorded in scandesc->xs_cbuf. We could release that pin now,
186 * but it's not clear whether it's a win to do so. The next index
187 * entry might require a visit to the same heap page.
188 */
189
190 tuple_from_heap = true;
191 }
192
193 /*
194 * Fill the scan tuple slot with data from the index. This might be
195 * provided in either HeapTuple or IndexTuple format. Conceivably an
196 * index AM might fill both fields, in which case we prefer the heap
197 * format, since it's probably a bit cheaper to fill a slot from.
198 */
199 if (scandesc->xs_hitup)
200 {
201 /*
202 * We don't take the trouble to verify that the provided tuple has
203 * exactly the slot's format, but it seems worth doing a quick
204 * check on the number of fields.
205 */
207 scandesc->xs_hitupdesc->natts);
208 ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
209 }
210 else if (scandesc->xs_itup)
211 StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
212 else
213 elog(ERROR, "no data returned for index-only scan");
214
215 /*
216 * If the index was lossy, we have to recheck the index quals.
217 */
218 if (scandesc->xs_recheck)
219 {
220 econtext->ecxt_scantuple = slot;
221 if (!ExecQualAndReset(node->recheckqual, econtext))
222 {
223 /* Fails recheck, so drop it and loop back for another */
224 InstrCountFiltered2(node, 1);
225 continue;
226 }
227 }
228
229 /*
230 * We don't currently support rechecking ORDER BY distances. (In
231 * principle, if the index can support retrieval of the originally
232 * indexed value, it should be able to produce an exact distance
233 * calculation too. So it's not clear that adding code here for
234 * recheck/re-sort would be worth the trouble. But we should at least
235 * throw an error if someone tries it.)
236 */
237 if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
239 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 errmsg("lossy distance functions are not supported in index-only scans")));
241
242 /*
243 * If we didn't access the heap, then we'll need to take a predicate
244 * lock explicitly, as if we had. For now we do that at page level.
245 */
246 if (!tuple_from_heap)
249 estate->es_snapshot);
250
251 return slot;
252 }
253
254 /*
255 * if we get here it means the index scan failed so we are at the end of
256 * the scan..
257 */
258 return ExecClearTuple(slot);
259}
260
261/*
262 * StoreIndexTuple
263 * Fill the slot with data from the index tuple.
264 *
265 * At some point this might be generally-useful functionality, but
266 * right now we don't need it elsewhere.
267 */
268static void
270 IndexTuple itup, TupleDesc itupdesc)
271{
272 /*
273 * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
274 * not the slot's tupdesc, in case the latter has different datatypes
275 * (this happens for btree name_ops in particular). They'd better have
276 * the same number of columns though, as well as being datatype-compatible
277 * which is something we can't so easily check.
278 */
279 Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
280
281 ExecClearTuple(slot);
282 index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
283
284 /*
285 * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
286 * sized allocation. We mark this branch as unlikely as generally "name"
287 * is used only for the system catalogs and this would have to be a user
288 * query running on those or some other user table with an index on a name
289 * column.
290 */
291 if (unlikely(node->ioss_NameCStringAttNums != NULL))
292 {
293 int attcount = node->ioss_NameCStringCount;
294
295 for (int idx = 0; idx < attcount; idx++)
296 {
298 Name name;
299
300 /* skip null Datums */
301 if (slot->tts_isnull[attnum])
302 continue;
303
304 /* allocate the NAMEDATALEN and copy the datum into that memory */
307
308 /* use namestrcpy to zero-pad all trailing bytes */
311 }
312 }
313
315}
316
317/*
318 * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
319 *
320 * This can't really happen, since an index can't supply CTID which would
321 * be necessary data for any potential EvalPlanQual target relation. If it
322 * did happen, the EPQ code would pass us the wrong data, namely a heap
323 * tuple not an index tuple. So throw an error.
324 */
325static bool
327{
328 elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
329 return false; /* keep compiler quiet */
330}
331
332/* ----------------------------------------------------------------
333 * ExecIndexOnlyScan(node)
334 * ----------------------------------------------------------------
335 */
336static TupleTableSlot *
338{
340
341 /*
342 * If we have runtime keys and they've not already been set up, do it now.
343 */
344 if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
345 ExecReScan((PlanState *) node);
346
347 return ExecScan(&node->ss,
350}
351
352/* ----------------------------------------------------------------
353 * ExecReScanIndexOnlyScan(node)
354 *
355 * Recalculates the values of any scan keys whose value depends on
356 * information known at runtime, then rescans the indexed relation.
357 *
358 * Updating the scan key was formerly done separately in
359 * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
360 * rescans of indices and relations/general streams more uniform.
361 * ----------------------------------------------------------------
362 */
363void
365{
366 /*
367 * If we are doing runtime key calculations (ie, any of the index key
368 * values weren't simple Consts), compute the new key values. But first,
369 * reset the context so we don't leak memory as each outer tuple is
370 * scanned. Note this assumes that we will recalculate *all* runtime keys
371 * on each call.
372 */
373 if (node->ioss_NumRuntimeKeys != 0)
374 {
375 ExprContext *econtext = node->ioss_RuntimeContext;
376
377 ResetExprContext(econtext);
379 node->ioss_RuntimeKeys,
380 node->ioss_NumRuntimeKeys);
381 }
382 node->ioss_RuntimeKeysReady = true;
383
384 /* reset index scan */
385 if (node->ioss_ScanDesc)
387 node->ioss_ScanKeys, node->ioss_NumScanKeys,
389
390 ExecScanReScan(&node->ss);
391}
392
393
394/* ----------------------------------------------------------------
395 * ExecEndIndexOnlyScan
396 * ----------------------------------------------------------------
397 */
398void
400{
401 Relation indexRelationDesc;
402 IndexScanDesc indexScanDesc;
403
404 /*
405 * extract information from the node
406 */
407 indexRelationDesc = node->ioss_RelationDesc;
408 indexScanDesc = node->ioss_ScanDesc;
409
410 /* Release VM buffer pin, if any. */
411 if (node->ioss_VMBuffer != InvalidBuffer)
412 {
415 }
416
417 /*
418 * When ending a parallel worker, copy the statistics gathered by the
419 * worker back into shared memory so that it can be picked up by the main
420 * process to report in EXPLAIN ANALYZE
421 */
422 if (node->ioss_SharedInfo != NULL && IsParallelWorker())
423 {
424 IndexScanInstrumentation *winstrument;
425
426 Assert(ParallelWorkerNumber <= node->ioss_SharedInfo->num_workers);
427 winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
428
429 /*
430 * We have to accumulate the stats rather than performing a memcpy.
431 * When a Gather/GatherMerge node finishes it will perform planner
432 * shutdown on the workers. On rescan it will spin up new workers
433 * which will have a new IndexOnlyScanState and zeroed stats.
434 */
435 winstrument->nsearches += node->ioss_Instrument.nsearches;
436 }
437
438 /*
439 * close the index relation (no-op if we didn't open it)
440 */
441 if (indexScanDesc)
442 index_endscan(indexScanDesc);
443 if (indexRelationDesc)
444 index_close(indexRelationDesc, NoLock);
445}
446
447/* ----------------------------------------------------------------
448 * ExecIndexOnlyMarkPos
449 *
450 * Note: we assume that no caller attempts to set a mark before having read
451 * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
452 * ----------------------------------------------------------------
453 */
454void
456{
457 EState *estate = node->ss.ps.state;
458 EPQState *epqstate = estate->es_epq_active;
459
460 if (epqstate != NULL)
461 {
462 /*
463 * We are inside an EvalPlanQual recheck. If a test tuple exists for
464 * this relation, then we shouldn't access the index at all. We would
465 * instead need to save, and later restore, the state of the
466 * relsubs_done flag, so that re-fetching the test tuple is possible.
467 * However, given the assumption that no caller sets a mark at the
468 * start of the scan, we can only get here with relsubs_done[i]
469 * already set, and so no state need be saved.
470 */
471 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
472
473 Assert(scanrelid > 0);
474 if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
475 epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
476 {
477 /* Verify the claim above */
478 if (!epqstate->relsubs_done[scanrelid - 1])
479 elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
480 return;
481 }
482 }
483
485}
486
487/* ----------------------------------------------------------------
488 * ExecIndexOnlyRestrPos
489 * ----------------------------------------------------------------
490 */
491void
493{
494 EState *estate = node->ss.ps.state;
495 EPQState *epqstate = estate->es_epq_active;
496
497 if (estate->es_epq_active != NULL)
498 {
499 /* See comments in ExecIndexMarkPos */
500 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
501
502 Assert(scanrelid > 0);
503 if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
504 epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
505 {
506 /* Verify the claim above */
507 if (!epqstate->relsubs_done[scanrelid - 1])
508 elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
509 return;
510 }
511 }
512
514}
515
516/* ----------------------------------------------------------------
517 * ExecInitIndexOnlyScan
518 *
519 * Initializes the index scan's state information, creates
520 * scan keys, and opens the base and index relations.
521 *
522 * Note: index scans have 2 sets of state information because
523 * we have to keep track of the base relation and the
524 * index relation.
525 * ----------------------------------------------------------------
526 */
528ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
529{
530 IndexOnlyScanState *indexstate;
531 Relation currentRelation;
532 Relation indexRelation;
533 LOCKMODE lockmode;
534 TupleDesc tupDesc;
535 int indnkeyatts;
536 int namecount;
537
538 /*
539 * create state structure
540 */
541 indexstate = makeNode(IndexOnlyScanState);
542 indexstate->ss.ps.plan = (Plan *) node;
543 indexstate->ss.ps.state = estate;
544 indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
545
546 /*
547 * Miscellaneous initialization
548 *
549 * create expression context for node
550 */
551 ExecAssignExprContext(estate, &indexstate->ss.ps);
552
553 /*
554 * open the scan relation
555 */
556 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
557
558 indexstate->ss.ss_currentRelation = currentRelation;
559 indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
560
561 /*
562 * Build the scan tuple type using the indextlist generated by the
563 * planner. We use this, rather than the index's physical tuple
564 * descriptor, because the latter contains storage column types not the
565 * types of the original datums. (It's the AM's responsibility to return
566 * suitable data anyway.)
567 */
568 tupDesc = ExecTypeFromTL(node->indextlist);
569 ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
571
572 /*
573 * We need another slot, in a format that's suitable for the table AM, for
574 * when we need to fetch a tuple from the table for rechecking visibility.
575 */
576 indexstate->ioss_TableSlot =
578 RelationGetDescr(currentRelation),
579 table_slot_callbacks(currentRelation));
580
581 /*
582 * Initialize result type and projection info. The node's targetlist will
583 * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
584 */
585 ExecInitResultTypeTL(&indexstate->ss.ps);
587
588 /*
589 * initialize child expressions
590 *
591 * Note: we don't initialize all of the indexorderby expression, only the
592 * sub-parts corresponding to runtime keys (see below).
593 */
594 indexstate->ss.ps.qual =
595 ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
596 indexstate->recheckqual =
597 ExecInitQual(node->recheckqual, (PlanState *) indexstate);
598
599 /*
600 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
601 * here. This allows an index-advisor plugin to EXPLAIN a plan containing
602 * references to nonexistent indexes.
603 */
604 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
605 return indexstate;
606
607 /* Open the index relation. */
608 lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
609 indexRelation = index_open(node->indexid, lockmode);
610 indexstate->ioss_RelationDesc = indexRelation;
611
612 /*
613 * Initialize index-specific scan state
614 */
615 indexstate->ioss_RuntimeKeysReady = false;
616 indexstate->ioss_RuntimeKeys = NULL;
617 indexstate->ioss_NumRuntimeKeys = 0;
618
619 /*
620 * build the index scan keys from the index qualification
621 */
622 ExecIndexBuildScanKeys((PlanState *) indexstate,
623 indexRelation,
624 node->indexqual,
625 false,
626 &indexstate->ioss_ScanKeys,
627 &indexstate->ioss_NumScanKeys,
628 &indexstate->ioss_RuntimeKeys,
629 &indexstate->ioss_NumRuntimeKeys,
630 NULL, /* no ArrayKeys */
631 NULL);
632
633 /*
634 * any ORDER BY exprs have to be turned into scankeys in the same way
635 */
636 ExecIndexBuildScanKeys((PlanState *) indexstate,
637 indexRelation,
638 node->indexorderby,
639 true,
640 &indexstate->ioss_OrderByKeys,
641 &indexstate->ioss_NumOrderByKeys,
642 &indexstate->ioss_RuntimeKeys,
643 &indexstate->ioss_NumRuntimeKeys,
644 NULL, /* no ArrayKeys */
645 NULL);
646
647 /*
648 * If we have runtime keys, we need an ExprContext to evaluate them. The
649 * node's standard context won't do because we want to reset that context
650 * for every tuple. So, build another context just like the other one...
651 * -tgl 7/11/00
652 */
653 if (indexstate->ioss_NumRuntimeKeys != 0)
654 {
655 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
656
657 ExecAssignExprContext(estate, &indexstate->ss.ps);
658 indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
659 indexstate->ss.ps.ps_ExprContext = stdecontext;
660 }
661 else
662 {
663 indexstate->ioss_RuntimeContext = NULL;
664 }
665
666 indexstate->ioss_NameCStringAttNums = NULL;
667 indnkeyatts = indexRelation->rd_index->indnkeyatts;
668 namecount = 0;
669
670 /*
671 * The "name" type for btree uses text_ops which results in storing
672 * cstrings in the indexed keys rather than names. Here we detect that in
673 * a generic way in case other index AMs want to do the same optimization.
674 * Check for opclasses with an opcintype of NAMEOID and an index tuple
675 * descriptor with CSTRINGOID. If any of these are found, create an array
676 * marking the index attribute number of each of them. StoreIndexTuple()
677 * handles copying the name Datums into a NAMEDATALEN-byte allocation.
678 */
679
680 /* First, count the number of such index keys */
681 for (int attnum = 0; attnum < indnkeyatts; attnum++)
682 {
683 if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
684 indexRelation->rd_opcintype[attnum] == NAMEOID)
685 namecount++;
686 }
687
688 if (namecount > 0)
689 {
690 int idx = 0;
691
692 /*
693 * Now create an array to mark the attribute numbers of the keys that
694 * need to be converted from cstring to name.
695 */
696 indexstate->ioss_NameCStringAttNums = (AttrNumber *)
697 palloc(sizeof(AttrNumber) * namecount);
698
699 for (int attnum = 0; attnum < indnkeyatts; attnum++)
700 {
701 if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
702 indexRelation->rd_opcintype[attnum] == NAMEOID)
703 indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
704 }
705 }
706
707 indexstate->ioss_NameCStringCount = namecount;
708
709 /*
710 * all done.
711 */
712 return indexstate;
713}
714
715/* ----------------------------------------------------------------
716 * Parallel Index-only Scan Support
717 * ----------------------------------------------------------------
718 */
719
720/* ----------------------------------------------------------------
721 * ExecIndexOnlyScanEstimate
722 *
723 * Compute the amount of space we'll need in the parallel
724 * query DSM, and inform pcxt->estimator about our needs.
725 * ----------------------------------------------------------------
726 */
727void
729 ParallelContext *pcxt)
730{
731 EState *estate = node->ss.ps.state;
732 bool instrument = (node->ss.ps.instrument != NULL);
733 bool parallel_aware = node->ss.ps.plan->parallel_aware;
734
735 if (!instrument && !parallel_aware)
736 {
737 /* No DSM required by the scan */
738 return;
739 }
740
742 node->ioss_NumScanKeys,
744 estate->es_snapshot,
745 instrument, parallel_aware,
746 pcxt->nworkers);
749}
750
751/* ----------------------------------------------------------------
752 * ExecIndexOnlyScanInitializeDSM
753 *
754 * Set up a parallel index-only scan descriptor.
755 * ----------------------------------------------------------------
756 */
757void
759 ParallelContext *pcxt)
760{
761 EState *estate = node->ss.ps.state;
763 bool instrument = node->ss.ps.instrument != NULL;
764 bool parallel_aware = node->ss.ps.plan->parallel_aware;
765
766 if (!instrument && !parallel_aware)
767 {
768 /* No DSM required by the scan */
769 return;
770 }
771
772 piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
774 node->ioss_RelationDesc,
775 estate->es_snapshot,
776 instrument, parallel_aware, pcxt->nworkers,
777 &node->ioss_SharedInfo, piscan);
778 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
779
780 if (!parallel_aware)
781 {
782 /* Only here to initialize SharedInfo in DSM */
783 return;
784 }
785
786 node->ioss_ScanDesc =
788 node->ioss_RelationDesc,
789 &node->ioss_Instrument,
790 node->ioss_NumScanKeys,
792 piscan);
793 node->ioss_ScanDesc->xs_want_itup = true;
795
796 /*
797 * If no run-time keys to calculate or they are ready, go ahead and pass
798 * the scankeys to the index AM.
799 */
800 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
802 node->ioss_ScanKeys, node->ioss_NumScanKeys,
804}
805
806/* ----------------------------------------------------------------
807 * ExecIndexOnlyScanReInitializeDSM
808 *
809 * Reset shared state before beginning a fresh scan.
810 * ----------------------------------------------------------------
811 */
812void
814 ParallelContext *pcxt)
815{
818}
819
820/* ----------------------------------------------------------------
821 * ExecIndexOnlyScanInitializeWorker
822 *
823 * Copy relevant information from TOC into planstate.
824 * ----------------------------------------------------------------
825 */
826void
829{
831 bool instrument = node->ss.ps.instrument != NULL;
832 bool parallel_aware = node->ss.ps.plan->parallel_aware;
833
834 if (!instrument && !parallel_aware)
835 {
836 /* No DSM required by the scan */
837 return;
838 }
839
840 piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
841
842 if (instrument)
844 OffsetToPointer(piscan, piscan->ps_offset_ins);
845
846 if (!parallel_aware)
847 {
848 /* Only here to set up worker node's SharedInfo */
849 return;
850 }
851
852 node->ioss_ScanDesc =
854 node->ioss_RelationDesc,
855 &node->ioss_Instrument,
856 node->ioss_NumScanKeys,
858 piscan);
859 node->ioss_ScanDesc->xs_want_itup = true;
860
861 /*
862 * If no run-time keys to calculate or they are ready, go ahead and pass
863 * the scankeys to the index AM.
864 */
865 if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
867 node->ioss_ScanKeys, node->ioss_NumScanKeys,
869}
870
871/* ----------------------------------------------------------------
872 * ExecIndexOnlyScanRetrieveInstrumentation
873 *
874 * Transfer index-only scan statistics from DSM to private memory.
875 * ----------------------------------------------------------------
876 */
877void
879{
881 size_t size;
882
883 if (SharedInfo == NULL)
884 return;
885
886 /* Create a copy of SharedInfo in backend-local memory */
887 size = offsetof(SharedIndexScanInstrumentation, winstrument) +
888 SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
889 node->ioss_SharedInfo = palloc(size);
890 memcpy(node->ioss_SharedInfo, SharedInfo, size);
891}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
int16 AttrNumber
Definition: attnum.h:21
int ParallelWorkerNumber
Definition: parallel.c:115
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5373
#define OffsetToPointer(base, offset)
Definition: c.h:743
NameData * Name
Definition: c.h:715
#define unlikely(x)
Definition: c.h:347
unsigned int Index
Definition: c.h:585
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
void ExecReScan(PlanState *node)
Definition: execAmi.c:77
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:229
void ExecAssignScanProjectionInfoWithVarno(ScanState *node, int varno)
Definition: execScan.c:94
TupleTableSlot * ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
Definition: execScan.c:47
void ExecScanReScan(ScanState *node)
Definition: execScan.c:108
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1741
TupleTableSlot * ExecAllocTableSlot(List **tupleTable, TupleDesc desc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1360
void ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:2000
void ExecInitResultTypeTL(PlanState *planstate)
Definition: execTuples.c:1944
TupleDesc ExecTypeFromTL(List *targetList)
Definition: execTuples.c:2127
void ExecForceStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1658
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:486
Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
Definition: execUtils.c:743
#define InstrCountTuples2(node, delta)
Definition: execnodes.h:1258
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1268
static RangeTblEntry * exec_rt_fetch(Index rti, EState *estate)
Definition: executor.h:719
#define ResetExprContext(econtext)
Definition: executor.h:672
bool(* ExecScanRecheckMtd)(ScanState *node, TupleTableSlot *slot)
Definition: executor.h:602
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:568
TupleTableSlot *(* ExecScanAccessMtd)(ScanState *node)
Definition: executor.h:601
#define EXEC_FLAG_EXPLAIN_ONLY
Definition: executor.h:66
struct IndexScanInstrumentation IndexScanInstrumentation
Assert(PointerIsAligned(start, uint64))
#define IsParallelWorker()
Definition: parallel.h:60
void index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, Snapshot snapshot, bool instrument, bool parallel_aware, int nworkers, SharedIndexScanInstrumentation **sharedinfo, ParallelIndexScanDesc target)
Definition: indexam.c:510
IndexScanDesc index_beginscan_parallel(Relation heaprel, Relation indexrel, IndexScanInstrumentation *instrument, int nkeys, int norderbys, ParallelIndexScanDesc pscan)
Definition: indexam.c:583
void index_restrpos(IndexScanDesc scan)
Definition: indexam.c:436
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys)
Definition: indexam.c:256
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Definition: indexam.c:621
bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
Definition: indexam.c:679
void index_markpos(IndexScanDesc scan)
Definition: indexam.c:412
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:382
Size index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys, Snapshot snapshot, bool instrument, bool parallel_aware, int nworkers)
Definition: indexam.c:461
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
void index_parallelrescan(IndexScanDesc scan)
Definition: indexam.c:565
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:356
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:456
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
int LOCKMODE
Definition: lockdefs.h:26
#define NoLock
Definition: lockdefs.h:34
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1260
void * palloc(Size size)
Definition: mcxt.c:1943
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void ExecEndIndexOnlyScan(IndexOnlyScanState *node)
static TupleTableSlot * IndexOnlyNext(IndexOnlyScanState *node)
static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
void ExecIndexOnlyScanEstimate(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
void ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
void ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
void ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, ParallelWorkerContext *pwcxt)
static TupleTableSlot * ExecIndexOnlyScan(PlanState *pstate)
static bool IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
IndexOnlyScanState * ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
void ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
void ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, ParallelContext *pcxt)
void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
void ExecIndexEvalRuntimeKeys(ExprContext *econtext, IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
#define makeNode(_type_)
Definition: nodes.h:161
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
int16 attnum
Definition: pg_attribute.h:74
#define NAMEDATALEN
static char * DatumGetCString(Datum X)
Definition: postgres.h:340
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:378
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition: predicate.c:2599
#define INDEX_VAR
Definition: primnodes.h:244
#define RelationGetDescr(relation)
Definition: rel.h:542
#define ScanDirectionCombine(a, b)
Definition: sdir.h:36
ScanDirection
Definition: sdir.h:25
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
ExecAuxRowMark ** relsubs_rowmark
Definition: execnodes.h:1334
TupleTableSlot ** relsubs_slot
Definition: execnodes.h:1306
bool * relsubs_done
Definition: execnodes.h:1341
List * es_tupleTable
Definition: execnodes.h:710
ScanDirection es_direction
Definition: execnodes.h:656
struct EPQState * es_epq_active
Definition: execnodes.h:741
Snapshot es_snapshot
Definition: execnodes.h:657
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:276
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:268
SharedIndexScanInstrumentation * ioss_SharedInfo
Definition: execnodes.h:1769
TupleTableSlot * ioss_TableSlot
Definition: execnodes.h:1770
bool ioss_RuntimeKeysReady
Definition: execnodes.h:1764
struct ScanKeyData * ioss_ScanKeys
Definition: execnodes.h:1758
ExprState * recheckqual
Definition: execnodes.h:1757
struct ScanKeyData * ioss_OrderByKeys
Definition: execnodes.h:1760
struct IndexScanDescData * ioss_ScanDesc
Definition: execnodes.h:1767
ExprContext * ioss_RuntimeContext
Definition: execnodes.h:1765
AttrNumber * ioss_NameCStringAttNums
Definition: execnodes.h:1773
Relation ioss_RelationDesc
Definition: execnodes.h:1766
IndexScanInstrumentation ioss_Instrument
Definition: execnodes.h:1768
IndexRuntimeKeyInfo * ioss_RuntimeKeys
Definition: execnodes.h:1762
List * indexqual
Definition: plannodes.h:599
List * recheckqual
Definition: plannodes.h:601
List * indextlist
Definition: plannodes.h:605
List * indexorderby
Definition: plannodes.h:603
bool xs_heap_continue
Definition: relscan.h:173
HeapTuple xs_hitup
Definition: relscan.h:169
int numberOfOrderBys
Definition: relscan.h:140
bool xs_recheckorderby
Definition: relscan.h:188
IndexTuple xs_itup
Definition: relscan.h:167
struct TupleDescData * xs_hitupdesc
Definition: relscan.h:170
struct TupleDescData * xs_itupdesc
Definition: relscan.h:168
Relation heapRelation
Definition: relscan.h:136
shm_toc_estimator estimator
Definition: parallel.h:41
shm_toc * toc
Definition: parallel.h:44
Instrumentation * instrument
Definition: execnodes.h:1169
ExprState * qual
Definition: execnodes.h:1180
Plan * plan
Definition: execnodes.h:1159
EState * state
Definition: execnodes.h:1161
ExprContext * ps_ExprContext
Definition: execnodes.h:1198
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:1165
bool parallel_aware
Definition: plannodes.h:193
int plan_node_id
Definition: plannodes.h:207
Oid * rd_opcintype
Definition: rel.h:208
TupleDesc rd_att
Definition: rel.h:112
Form_pg_index rd_index
Definition: rel.h:192
Relation ss_currentRelation
Definition: execnodes.h:1616
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1618
PlanState ps
Definition: execnodes.h:1615
struct TableScanDescData * ss_currentScanDesc
Definition: execnodes.h:1617
Index scanrelid
Definition: plannodes.h:483
IndexScanInstrumentation winstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: genam.h:47
TupleDesc tts_tupleDescriptor
Definition: tuptable.h:123
bool * tts_isnull
Definition: tuptable.h:127
Datum * tts_values
Definition: tuptable.h:125
Definition: c.h:712
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:59
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:160
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
const char * name