PostgreSQL Source Code git master
tableam.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tableam.h
4 * POSTGRES table access method definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/access/tableam.h
11 *
12 * NOTES
13 * See tableam.sgml for higher level documentation.
14 *
15 *-------------------------------------------------------------------------
16 */
17#ifndef TABLEAM_H
18#define TABLEAM_H
19
20#include "access/relscan.h"
21#include "access/sdir.h"
22#include "access/xact.h"
23#include "executor/tuptable.h"
24#include "storage/read_stream.h"
25#include "utils/rel.h"
26#include "utils/snapshot.h"
27
28
29#define DEFAULT_TABLE_ACCESS_METHOD "heap"
30
31/* GUCs */
34
35
37struct IndexInfo;
38struct SampleScanState;
39struct VacuumParams;
41
42/*
43 * Bitmask values for the flags argument to the scan_begin callback.
44 */
45typedef enum ScanOptions
46{
47 /* one of SO_TYPE_* may be specified */
54
55 /* several of SO_ALLOW_* may be specified */
56 /* allow or disallow use of access strategy */
58 /* report location to syncscan logic? */
59 SO_ALLOW_SYNC = 1 << 7,
60 /* verify visibility page-at-a-time? */
62
63 /* unregister snapshot at scan end? */
65
66 /*
67 * At the discretion of the table AM, bitmap table scans may be able to
68 * skip fetching a block from the table if none of the table data is
69 * needed. If table data may be needed, set SO_NEED_TUPLES.
70 */
71 SO_NEED_TUPLES = 1 << 10,
73
74/*
75 * Result codes for table_{update,delete,lock_tuple}, and for visibility
76 * routines inside table AMs.
77 */
78typedef enum TM_Result
79{
80 /*
81 * Signals that the action succeeded (i.e. update/delete performed, lock
82 * was acquired)
83 */
85
86 /* The affected tuple wasn't visible to the relevant snapshot */
88
89 /* The affected tuple was already modified by the calling backend */
91
92 /*
93 * The affected tuple was updated by another transaction. This includes
94 * the case where tuple was moved to another partition.
95 */
97
98 /* The affected tuple was deleted by another transaction */
100
101 /*
102 * The affected tuple is currently being modified by another session. This
103 * will only be returned if table_(update/delete/lock_tuple) are
104 * instructed not to wait.
105 */
107
108 /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
111
112/*
113 * Result codes for table_update(..., update_indexes*..).
114 * Used to determine which indexes to update.
115 */
117{
118 /* No indexed columns were updated (incl. TID addressing of tuple) */
120
121 /* A non-summarizing indexed column was updated, or the TID has changed */
123
124 /* Only summarized columns were updated, TID is unchanged */
127
128/*
129 * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
130 * because the target tuple is already outdated, they fill in this struct to
131 * provide information to the caller about what happened.
132 *
133 * ctid is the target's ctid link: it is the same as the target's TID if the
134 * target was deleted, or the location of the replacement tuple if the target
135 * was updated.
136 *
137 * xmax is the outdating transaction's XID. If the caller wants to visit the
138 * replacement tuple, it must check that this matches before believing the
139 * replacement is really a match. This is InvalidTransactionId if the target
140 * was !LP_NORMAL (expected only for a TID retrieved from syscache).
141 *
142 * cmax is the outdating command's CID, but only when the failure code is
143 * TM_SelfModified (i.e., something in the current transaction outdated the
144 * tuple); otherwise cmax is zero. (We make this restriction because
145 * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
146 * transactions.)
147 */
148typedef struct TM_FailureData
149{
155
156/*
157 * State used when calling table_index_delete_tuples().
158 *
159 * Represents the status of table tuples, referenced by table TID and taken by
160 * index AM from index tuples. State consists of high level parameters of the
161 * deletion operation, plus two mutable palloc()'d arrays for information
162 * about the status of individual table tuples. These are conceptually one
163 * single array. Using two arrays keeps the TM_IndexDelete struct small,
164 * which makes sorting the first array (the deltids array) fast.
165 *
166 * Some index AM callers perform simple index tuple deletion (by specifying
167 * bottomup = false), and include only known-dead deltids. These known-dead
168 * entries are all marked knowndeletable = true directly (typically these are
169 * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
170 *
171 * Callers that specify bottomup = true are "bottom-up index deletion"
172 * callers. The considerations for the tableam are more subtle with these
173 * callers because they ask the tableam to perform highly speculative work,
174 * and might only expect the tableam to check a small fraction of all entries.
175 * Caller is not allowed to specify knowndeletable = true for any entry
176 * because everything is highly speculative. Bottom-up caller provides
177 * context and hints to tableam -- see comments below for details on how index
178 * AMs and tableams should coordinate during bottom-up index deletion.
179 *
180 * Simple index deletion callers may ask the tableam to perform speculative
181 * work, too. This is a little like bottom-up deletion, but not too much.
182 * The tableam will only perform speculative work when it's practically free
183 * to do so in passing for simple deletion caller (while always performing
184 * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
185 * be deleted within index AM). This is the real reason why it's possible for
186 * simple index deletion caller to specify knowndeletable = false up front
187 * (this means "check if it's possible for me to delete corresponding index
188 * tuple when it's cheap to do so in passing"). The index AM should only
189 * include "extra" entries for index tuples whose TIDs point to a table block
190 * that tableam is expected to have to visit anyway (in the event of a block
191 * orientated tableam). The tableam isn't strictly obligated to check these
192 * "extra" TIDs, but a block-based AM should always manage to do so in
193 * practice.
194 *
195 * The final contents of the deltids/status arrays are interesting to callers
196 * that ask tableam to perform speculative work (i.e. when _any_ items have
197 * knowndeletable set to false up front). These index AM callers will
198 * naturally need to consult final state to determine which index tuples are
199 * in fact deletable.
200 *
201 * The index AM can keep track of which index tuple relates to which deltid by
202 * setting idxoffnum (and/or relying on each entry being uniquely identifiable
203 * using tid), which is important when the final contents of the array will
204 * need to be interpreted -- the array can shrink from initial size after
205 * tableam processing and/or have entries in a new order (tableam may sort
206 * deltids array for its own reasons). Bottom-up callers may find that final
207 * ndeltids is 0 on return from call to tableam, in which case no index tuple
208 * deletions are possible. Simple deletion callers can rely on any entries
209 * they know to be deletable appearing in the final array as deletable.
210 */
211typedef struct TM_IndexDelete
212{
213 ItemPointerData tid; /* table TID from index tuple */
214 int16 id; /* Offset into TM_IndexStatus array */
216
217typedef struct TM_IndexStatus
218{
219 OffsetNumber idxoffnum; /* Index am page offset number */
220 bool knowndeletable; /* Currently known to be deletable? */
221
222 /* Bottom-up index deletion specific fields follow */
223 bool promising; /* Promising (duplicate) index tuple? */
224 int16 freespace; /* Space freed in index if deleted */
226
227/*
228 * Index AM/tableam coordination is central to the design of bottom-up index
229 * deletion. The index AM provides hints about where to look to the tableam
230 * by marking some entries as "promising". Index AM does this with duplicate
231 * index tuples that are strongly suspected to be old versions left behind by
232 * UPDATEs that did not logically modify indexed values. Index AM may find it
233 * helpful to only mark entries as promising when they're thought to have been
234 * affected by such an UPDATE in the recent past.
235 *
236 * Bottom-up index deletion casts a wide net at first, usually by including
237 * all TIDs on a target index page. It is up to the tableam to worry about
238 * the cost of checking transaction status information. The tableam is in
239 * control, but needs careful guidance from the index AM. Index AM requests
240 * that bottomupfreespace target be met, while tableam measures progress
241 * towards that goal by tallying the per-entry freespace value for known
242 * deletable entries. (All !bottomup callers can just set these space related
243 * fields to zero.)
244 */
245typedef struct TM_IndexDeleteOp
246{
247 Relation irel; /* Target index relation */
248 BlockNumber iblknum; /* Index block number (for error reports) */
249 bool bottomup; /* Bottom-up (not simple) deletion? */
250 int bottomupfreespace; /* Bottom-up space target */
251
252 /* Mutable per-TID information follows (index AM initializes entries) */
253 int ndeltids; /* Current # of deltids/status elements */
257
258/* "options" flag bits for table_tuple_insert */
259/* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
260#define TABLE_INSERT_SKIP_FSM 0x0002
261#define TABLE_INSERT_FROZEN 0x0004
262#define TABLE_INSERT_NO_LOGICAL 0x0008
263
264/* flag bits for table_tuple_lock */
265/* Follow tuples whose update is in progress if lock modes don't conflict */
266#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
267/* Follow update chain and lock latest version of tuple */
268#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
269
270
271/* Typedef for callback function for table_index_build_scan */
273 ItemPointer tid,
274 Datum *values,
275 bool *isnull,
276 bool tupleIsAlive,
277 void *state);
278
279/*
280 * API struct for a table AM. Note this must be allocated in a
281 * server-lifetime manner, typically as a static const struct, which then gets
282 * returned by FormData_pg_am.amhandler.
283 *
284 * In most cases it's not appropriate to call the callbacks directly, use the
285 * table_* wrapper functions instead.
286 *
287 * GetTableAmRoutine() asserts that required callbacks are filled in, remember
288 * to update when adding a callback.
289 */
290typedef struct TableAmRoutine
291{
292 /* this must be set to T_TableAmRoutine */
294
295
296 /* ------------------------------------------------------------------------
297 * Slot related callbacks.
298 * ------------------------------------------------------------------------
299 */
300
301 /*
302 * Return slot implementation suitable for storing a tuple of this AM.
303 */
304 const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
305
306
307 /* ------------------------------------------------------------------------
308 * Table scan callbacks.
309 * ------------------------------------------------------------------------
310 */
311
312 /*
313 * Start a scan of `rel`. The callback has to return a TableScanDesc,
314 * which will typically be embedded in a larger, AM specific, struct.
315 *
316 * If nkeys != 0, the results need to be filtered by those scan keys.
317 *
318 * pscan, if not NULL, will have already been initialized with
319 * parallelscan_initialize(), and has to be for the same relation. Will
320 * only be set coming from table_beginscan_parallel().
321 *
322 * `flags` is a bitmask indicating the type of scan (ScanOptions's
323 * SO_TYPE_*, currently only one may be specified), options controlling
324 * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
325 * specified, an AM may ignore unsupported ones) and whether the snapshot
326 * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
327 */
329 Snapshot snapshot,
330 int nkeys, struct ScanKeyData *key,
332 uint32 flags);
333
334 /*
335 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
336 * TableScanDesc.rs_snapshot needs to be unregistered.
337 */
338 void (*scan_end) (TableScanDesc scan);
339
340 /*
341 * Restart relation scan. If set_params is set to true, allow_{strat,
342 * sync, pagemode} (see scan_begin) changes should be taken into account.
343 */
344 void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
345 bool set_params, bool allow_strat,
346 bool allow_sync, bool allow_pagemode);
347
348 /*
349 * Return next tuple from `scan`, store in slot.
350 */
352 ScanDirection direction,
353 TupleTableSlot *slot);
354
355 /*-----------
356 * Optional functions to provide scanning for ranges of ItemPointers.
357 * Implementations must either provide both of these functions, or neither
358 * of them.
359 *
360 * Implementations of scan_set_tidrange must themselves handle
361 * ItemPointers of any value. i.e, they must handle each of the following:
362 *
363 * 1) mintid or maxtid is beyond the end of the table; and
364 * 2) mintid is above maxtid; and
365 * 3) item offset for mintid or maxtid is beyond the maximum offset
366 * allowed by the AM.
367 *
368 * Implementations can assume that scan_set_tidrange is always called
369 * before scan_getnextslot_tidrange or after scan_rescan and before any
370 * further calls to scan_getnextslot_tidrange.
371 */
373 ItemPointer mintid,
374 ItemPointer maxtid);
375
376 /*
377 * Return next tuple from `scan` that's in the range of TIDs defined by
378 * scan_set_tidrange.
379 */
381 ScanDirection direction,
382 TupleTableSlot *slot);
383
384 /* ------------------------------------------------------------------------
385 * Parallel table scan related functions.
386 * ------------------------------------------------------------------------
387 */
388
389 /*
390 * Estimate the size of shared memory needed for a parallel scan of this
391 * relation. The snapshot does not need to be accounted for.
392 */
394
395 /*
396 * Initialize ParallelTableScanDesc for a parallel scan of this relation.
397 * `pscan` will be sized according to parallelscan_estimate() for the same
398 * relation.
399 */
402
403 /*
404 * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
405 * when `pscan` was initialized by parallelscan_initialize.
406 */
409
410
411 /* ------------------------------------------------------------------------
412 * Index Scan Callbacks
413 * ------------------------------------------------------------------------
414 */
415
416 /*
417 * Prepare to fetch tuples from the relation, as needed when fetching
418 * tuples for an index scan. The callback has to return an
419 * IndexFetchTableData, which the AM will typically embed in a larger
420 * structure with additional information.
421 *
422 * Tuples for an index scan can then be fetched via index_fetch_tuple.
423 */
424 struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
425
426 /*
427 * Reset index fetch. Typically this will release cross index fetch
428 * resources held in IndexFetchTableData.
429 */
431
432 /*
433 * Release resources and deallocate index fetch.
434 */
436
437 /*
438 * Fetch tuple at `tid` into `slot`, after doing a visibility test
439 * according to `snapshot`. If a tuple was found and passed the visibility
440 * test, return true, false otherwise.
441 *
442 * Note that AMs that do not necessarily update indexes when indexed
443 * columns do not change, need to return the current/correct version of
444 * the tuple that is visible to the snapshot, even if the tid points to an
445 * older version of the tuple.
446 *
447 * *call_again is false on the first call to index_fetch_tuple for a tid.
448 * If there potentially is another tuple matching the tid, *call_again
449 * needs to be set to true by index_fetch_tuple, signaling to the caller
450 * that index_fetch_tuple should be called again for the same tid.
451 *
452 * *all_dead, if all_dead is not NULL, should be set to true by
453 * index_fetch_tuple iff it is guaranteed that no backend needs to see
454 * that tuple. Index AMs can use that to avoid returning that tid in
455 * future searches.
456 */
458 ItemPointer tid,
459 Snapshot snapshot,
460 TupleTableSlot *slot,
461 bool *call_again, bool *all_dead);
462
463
464 /* ------------------------------------------------------------------------
465 * Callbacks for non-modifying operations on individual tuples
466 * ------------------------------------------------------------------------
467 */
468
469 /*
470 * Fetch tuple at `tid` into `slot`, after doing a visibility test
471 * according to `snapshot`. If a tuple was found and passed the visibility
472 * test, returns true, false otherwise.
473 */
475 ItemPointer tid,
476 Snapshot snapshot,
477 TupleTableSlot *slot);
478
479 /*
480 * Is tid valid for a scan of this relation.
481 */
483 ItemPointer tid);
484
485 /*
486 * Return the latest version of the tuple at `tid`, by updating `tid` to
487 * point at the newest version.
488 */
490 ItemPointer tid);
491
492 /*
493 * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
494 * the appropriate type for the AM.
495 */
497 TupleTableSlot *slot,
498 Snapshot snapshot);
499
500 /* see table_index_delete_tuples() */
502 TM_IndexDeleteOp *delstate);
503
504
505 /* ------------------------------------------------------------------------
506 * Manipulations of physical tuples.
507 * ------------------------------------------------------------------------
508 */
509
510 /* see table_tuple_insert() for reference about parameters */
512 CommandId cid, int options,
513 struct BulkInsertStateData *bistate);
514
515 /* see table_tuple_insert_speculative() for reference about parameters */
517 TupleTableSlot *slot,
518 CommandId cid,
519 int options,
520 struct BulkInsertStateData *bistate,
521 uint32 specToken);
522
523 /* see table_tuple_complete_speculative() for reference about parameters */
525 TupleTableSlot *slot,
526 uint32 specToken,
527 bool succeeded);
528
529 /* see table_multi_insert() for reference about parameters */
530 void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
531 CommandId cid, int options, struct BulkInsertStateData *bistate);
532
533 /* see table_tuple_delete() for reference about parameters */
535 ItemPointer tid,
536 CommandId cid,
537 Snapshot snapshot,
538 Snapshot crosscheck,
539 bool wait,
540 TM_FailureData *tmfd,
541 bool changingPart);
542
543 /* see table_tuple_update() for reference about parameters */
545 ItemPointer otid,
546 TupleTableSlot *slot,
547 CommandId cid,
548 Snapshot snapshot,
549 Snapshot crosscheck,
550 bool wait,
551 TM_FailureData *tmfd,
552 LockTupleMode *lockmode,
553 TU_UpdateIndexes *update_indexes);
554
555 /* see table_tuple_lock() for reference about parameters */
557 ItemPointer tid,
558 Snapshot snapshot,
559 TupleTableSlot *slot,
560 CommandId cid,
562 LockWaitPolicy wait_policy,
563 uint8 flags,
564 TM_FailureData *tmfd);
565
566 /*
567 * Perform operations necessary to complete insertions made via
568 * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
569 * access methods ceased to use this.
570 *
571 * Typically callers of tuple_insert and multi_insert will just pass all
572 * the flags that apply to them, and each AM has to decide which of them
573 * make sense for it, and then only take actions in finish_bulk_insert for
574 * those flags, and ignore others.
575 *
576 * Optional callback.
577 */
579
580
581 /* ------------------------------------------------------------------------
582 * DDL related functionality.
583 * ------------------------------------------------------------------------
584 */
585
586 /*
587 * This callback needs to create new relation storage for `rel`, with
588 * appropriate durability behaviour for `persistence`.
589 *
590 * Note that only the subset of the relcache filled by
591 * RelationBuildLocalRelation() can be relied upon and that the relation's
592 * catalog entries will either not yet exist (new relation), or will still
593 * reference the old relfilelocator.
594 *
595 * As output *freezeXid, *minmulti must be set to the values appropriate
596 * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
597 * fields to be filled they can be set to InvalidTransactionId and
598 * InvalidMultiXactId, respectively.
599 *
600 * See also table_relation_set_new_filelocator().
601 */
603 const RelFileLocator *newrlocator,
604 char persistence,
605 TransactionId *freezeXid,
606 MultiXactId *minmulti);
607
608 /*
609 * This callback needs to remove all contents from `rel`'s current
610 * relfilelocator. No provisions for transactional behaviour need to be
611 * made. Often this can be implemented by truncating the underlying
612 * storage to its minimal size.
613 *
614 * See also table_relation_nontransactional_truncate().
615 */
617
618 /*
619 * See table_relation_copy_data().
620 *
621 * This can typically be implemented by directly copying the underlying
622 * storage, unless it contains references to the tablespace internally.
623 */
625 const RelFileLocator *newrlocator);
626
627 /* See table_relation_copy_for_cluster() */
629 Relation NewTable,
630 Relation OldIndex,
631 bool use_sort,
632 TransactionId OldestXmin,
633 TransactionId *xid_cutoff,
634 MultiXactId *multi_cutoff,
635 double *num_tuples,
636 double *tups_vacuumed,
637 double *tups_recently_dead);
638
639 /*
640 * React to VACUUM command on the relation. The VACUUM can be triggered by
641 * a user or by autovacuum. The specific actions performed by the AM will
642 * depend heavily on the individual AM.
643 *
644 * On entry a transaction is already established, and the relation is
645 * locked with a ShareUpdateExclusive lock.
646 *
647 * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
648 * this routine, even if (for ANALYZE) it is part of the same VACUUM
649 * command.
650 *
651 * There probably, in the future, needs to be a separate callback to
652 * integrate with autovacuum's scheduling.
653 */
655 struct VacuumParams *params,
656 BufferAccessStrategy bstrategy);
657
658 /*
659 * Prepare to analyze block `blockno` of `scan`. The scan has been started
660 * with table_beginscan_analyze(). See also
661 * table_scan_analyze_next_block().
662 *
663 * The callback may acquire resources like locks that are held until
664 * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
665 * to hold a lock until all tuples on a block have been analyzed by
666 * scan_analyze_next_tuple.
667 *
668 * The callback can return false if the block is not suitable for
669 * sampling, e.g. because it's a metapage that could never contain tuples.
670 *
671 * XXX: This obviously is primarily suited for block-based AMs. It's not
672 * clear what a good interface for non block based AMs would be, so there
673 * isn't one yet.
674 */
676 ReadStream *stream);
677
678 /*
679 * See table_scan_analyze_next_tuple().
680 *
681 * Not every AM might have a meaningful concept of dead rows, in which
682 * case it's OK to not increment *deadrows - but note that that may
683 * influence autovacuum scheduling (see comment for relation_vacuum
684 * callback).
685 */
687 TransactionId OldestXmin,
688 double *liverows,
689 double *deadrows,
690 TupleTableSlot *slot);
691
692 /* see table_index_build_range_scan for reference about parameters */
693 double (*index_build_range_scan) (Relation table_rel,
694 Relation index_rel,
695 struct IndexInfo *index_info,
696 bool allow_sync,
697 bool anyvisible,
698 bool progress,
699 BlockNumber start_blockno,
700 BlockNumber numblocks,
702 void *callback_state,
703 TableScanDesc scan);
704
705 /* see table_index_validate_scan for reference about parameters */
706 void (*index_validate_scan) (Relation table_rel,
707 Relation index_rel,
708 struct IndexInfo *index_info,
709 Snapshot snapshot,
710 struct ValidateIndexState *state);
711
712
713 /* ------------------------------------------------------------------------
714 * Miscellaneous functions.
715 * ------------------------------------------------------------------------
716 */
717
718 /*
719 * See table_relation_size().
720 *
721 * Note that currently a few callers use the MAIN_FORKNUM size to figure
722 * out the range of potentially interesting blocks (brin, analyze). It's
723 * probable that we'll need to revise the interface for those at some
724 * point.
725 */
727
728
729 /*
730 * This callback should return true if the relation requires a TOAST table
731 * and false if it does not. It may wish to examine the relation's tuple
732 * descriptor before making a decision, but if it uses some other method
733 * of storing large values (or if it does not support them) it can simply
734 * return false.
735 */
737
738 /*
739 * This callback should return the OID of the table AM that implements
740 * TOAST tables for this AM. If the relation_needs_toast_table callback
741 * always returns false, this callback is not required.
742 */
744
745 /*
746 * This callback is invoked when detoasting a value stored in a toast
747 * table implemented by this AM. See table_relation_fetch_toast_slice()
748 * for more details.
749 */
750 void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
751 int32 attrsize,
752 int32 sliceoffset,
753 int32 slicelength,
754 struct varlena *result);
755
756
757 /* ------------------------------------------------------------------------
758 * Planner related functions.
759 * ------------------------------------------------------------------------
760 */
761
762 /*
763 * See table_relation_estimate_size().
764 *
765 * While block oriented, it shouldn't be too hard for an AM that doesn't
766 * internally use blocks to convert into a usable representation.
767 *
768 * This differs from the relation_size callback by returning size
769 * estimates (both relation size and tuple count) for planning purposes,
770 * rather than returning a currently correct estimate.
771 */
772 void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
773 BlockNumber *pages, double *tuples,
774 double *allvisfrac);
775
776
777 /* ------------------------------------------------------------------------
778 * Executor related functions.
779 * ------------------------------------------------------------------------
780 */
781
782 /*
783 * Fetch the next tuple of a bitmap table scan into `slot` and return true
784 * if a visible tuple was found, false otherwise.
785 *
786 * `lossy_pages` is incremented if the bitmap is lossy for the selected
787 * page; otherwise, `exact_pages` is incremented. These are tracked for
788 * display in EXPLAIN ANALYZE output.
789 *
790 * Prefetching additional data from the bitmap is left to the table AM.
791 *
792 * This is an optional callback.
793 */
795 TupleTableSlot *slot,
796 bool *recheck,
797 uint64 *lossy_pages,
798 uint64 *exact_pages);
799
800 /*
801 * Prepare to fetch tuples from the next block in a sample scan. Return
802 * false if the sample scan is finished, true otherwise. `scan` was
803 * started via table_beginscan_sampling().
804 *
805 * Typically this will first determine the target block by calling the
806 * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
807 * perform a sequential scan over all blocks. The determined block is
808 * then typically read and pinned.
809 *
810 * As the TsmRoutine interface is block based, a block needs to be passed
811 * to NextSampleBlock(). If that's not appropriate for an AM, it
812 * internally needs to perform mapping between the internal and a block
813 * based representation.
814 *
815 * Note that it's not acceptable to hold deadlock prone resources such as
816 * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
817 * block - the tuple is likely to be returned to an upper query node, and
818 * the next call could be off a long while. Holding buffer pins and such
819 * is obviously OK.
820 *
821 * Currently it is required to implement this interface, as there's no
822 * alternative way (contrary e.g. to bitmap scans) to implement sample
823 * scans. If infeasible to implement, the AM may raise an error.
824 */
826 struct SampleScanState *scanstate);
827
828 /*
829 * This callback, only called after scan_sample_next_block has returned
830 * true, should determine the next tuple to be returned from the selected
831 * block using the TsmRoutine's NextSampleTuple() callback.
832 *
833 * The callback needs to perform visibility checks, and only return
834 * visible tuples. That obviously can mean calling NextSampleTuple()
835 * multiple times.
836 *
837 * The TsmRoutine interface assumes that there's a maximum offset on a
838 * given page, so if that doesn't apply to an AM, it needs to emulate that
839 * assumption somehow.
840 */
842 struct SampleScanState *scanstate,
843 TupleTableSlot *slot);
844
846
847
848/* ----------------------------------------------------------------------------
849 * Slot functions.
850 * ----------------------------------------------------------------------------
851 */
852
853/*
854 * Returns slot callbacks suitable for holding tuples of the appropriate type
855 * for the relation. Works for tables, views, foreign tables and partitioned
856 * tables.
857 */
858extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
859
860/*
861 * Returns slot using the callbacks returned by table_slot_callbacks(), and
862 * registers it on *reglist.
863 */
864extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
865
866
867/* ----------------------------------------------------------------------------
868 * Table scan functions.
869 * ----------------------------------------------------------------------------
870 */
871
872/*
873 * Start a scan of `rel`. Returned tuples pass a visibility test of
874 * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
875 */
876static inline TableScanDesc
878 int nkeys, struct ScanKeyData *key)
879{
880 uint32 flags = SO_TYPE_SEQSCAN |
882
883 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
884}
885
886/*
887 * Like table_beginscan(), but for scanning catalog. It'll automatically use a
888 * snapshot appropriate for scanning catalog relations.
889 */
890extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
891 struct ScanKeyData *key);
892
893/*
894 * Like table_beginscan(), but table_beginscan_strat() offers an extended API
895 * that lets the caller control whether a nondefault buffer access strategy
896 * can be used, and whether syncscan can be chosen (possibly resulting in the
897 * scan not starting from block zero). Both of these default to true with
898 * plain table_beginscan.
899 */
900static inline TableScanDesc
902 int nkeys, struct ScanKeyData *key,
903 bool allow_strat, bool allow_sync)
904{
906
907 if (allow_strat)
908 flags |= SO_ALLOW_STRAT;
909 if (allow_sync)
910 flags |= SO_ALLOW_SYNC;
911
912 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
913}
914
915/*
916 * table_beginscan_bm is an alternative entry point for setting up a
917 * TableScanDesc for a bitmap heap scan. Although that scan technology is
918 * really quite unlike a standard seqscan, there is just enough commonality to
919 * make it worth using the same data structure.
920 */
921static inline TableScanDesc
923 int nkeys, struct ScanKeyData *key, bool need_tuple)
924{
926
927 if (need_tuple)
928 flags |= SO_NEED_TUPLES;
929
930 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
931 NULL, flags);
932}
933
934/*
935 * table_beginscan_sampling is an alternative entry point for setting up a
936 * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
937 * using the same data structure although the behavior is rather different.
938 * In addition to the options offered by table_beginscan_strat, this call
939 * also allows control of whether page-mode visibility checking is used.
940 */
941static inline TableScanDesc
943 int nkeys, struct ScanKeyData *key,
944 bool allow_strat, bool allow_sync,
945 bool allow_pagemode)
946{
948
949 if (allow_strat)
950 flags |= SO_ALLOW_STRAT;
951 if (allow_sync)
952 flags |= SO_ALLOW_SYNC;
953 if (allow_pagemode)
954 flags |= SO_ALLOW_PAGEMODE;
955
956 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
957}
958
959/*
960 * table_beginscan_tid is an alternative entry point for setting up a
961 * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
962 * the same data structure although the behavior is rather different.
963 */
964static inline TableScanDesc
966{
967 uint32 flags = SO_TYPE_TIDSCAN;
968
969 return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
970}
971
972/*
973 * table_beginscan_analyze is an alternative entry point for setting up a
974 * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
975 * the same data structure although the behavior is rather different.
976 */
977static inline TableScanDesc
979{
980 uint32 flags = SO_TYPE_ANALYZE;
981
982 return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
983}
984
985/*
986 * End relation scan.
987 */
988static inline void
990{
991 scan->rs_rd->rd_tableam->scan_end(scan);
992}
993
994/*
995 * Restart a relation scan.
996 */
997static inline void
999 struct ScanKeyData *key)
1000{
1001 scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
1002}
1003
1004/*
1005 * Restart a relation scan after changing params.
1006 *
1007 * This call allows changing the buffer strategy, syncscan, and pagemode
1008 * options before starting a fresh scan. Note that although the actual use of
1009 * syncscan might change (effectively, enabling or disabling reporting), the
1010 * previously selected startblock will be kept.
1011 */
1012static inline void
1014 bool allow_strat, bool allow_sync, bool allow_pagemode)
1015{
1016 scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1017 allow_strat, allow_sync,
1018 allow_pagemode);
1019}
1020
1021/*
1022 * Return next tuple from `scan`, store in slot.
1023 */
1024static inline bool
1026{
1027 slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1028
1029 /* We don't expect actual scans using NoMovementScanDirection */
1030 Assert(direction == ForwardScanDirection ||
1031 direction == BackwardScanDirection);
1032
1033 /*
1034 * We don't expect direct calls to table_scan_getnextslot with valid
1035 * CheckXidAlive for catalog or regular tables. See detailed comments in
1036 * xact.c where these variables are declared.
1037 */
1039 elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1040
1041 return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1042}
1043
1044/* ----------------------------------------------------------------------------
1045 * TID Range scanning related functions.
1046 * ----------------------------------------------------------------------------
1047 */
1048
1049/*
1050 * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1051 * for a TID range scan.
1052 */
1053static inline TableScanDesc
1055 ItemPointer mintid,
1056 ItemPointer maxtid)
1057{
1058 TableScanDesc sscan;
1060
1061 sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1062
1063 /* Set the range of TIDs to scan */
1064 sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1065
1066 return sscan;
1067}
1068
1069/*
1070 * table_rescan_tidrange resets the scan position and sets the minimum and
1071 * maximum TID range to scan for a TableScanDesc created by
1072 * table_beginscan_tidrange.
1073 */
1074static inline void
1076 ItemPointer maxtid)
1077{
1078 /* Ensure table_beginscan_tidrange() was used. */
1079 Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1080
1081 sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1082 sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1083}
1084
1085/*
1086 * Fetch the next tuple from `sscan` for a TID range scan created by
1087 * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1088 * or returns false if no more tuples exist in the range.
1089 */
1090static inline bool
1092 TupleTableSlot *slot)
1093{
1094 /* Ensure table_beginscan_tidrange() was used. */
1095 Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1096
1097 /* We don't expect actual scans using NoMovementScanDirection */
1098 Assert(direction == ForwardScanDirection ||
1099 direction == BackwardScanDirection);
1100
1101 return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1102 direction,
1103 slot);
1104}
1105
1106
1107/* ----------------------------------------------------------------------------
1108 * Parallel table scan related functions.
1109 * ----------------------------------------------------------------------------
1110 */
1111
1112/*
1113 * Estimate the size of shared memory needed for a parallel scan of this
1114 * relation.
1115 */
1117
1118/*
1119 * Initialize ParallelTableScanDesc for a parallel scan of this
1120 * relation. `pscan` needs to be sized according to parallelscan_estimate()
1121 * for the same relation. Call this just once in the leader process; then,
1122 * individual workers attach via table_beginscan_parallel.
1123 */
1126 Snapshot snapshot);
1127
1128/*
1129 * Begin a parallel scan. `pscan` needs to have been initialized with
1130 * table_parallelscan_initialize(), for the same relation. The initialization
1131 * does not need to have happened in this backend.
1132 *
1133 * Caller must hold a suitable lock on the relation.
1134 */
1136 ParallelTableScanDesc pscan);
1137
1138/*
1139 * Restart a parallel scan. Call this in the leader process. Caller is
1140 * responsible for making sure that all workers have finished the scan
1141 * beforehand.
1142 */
1143static inline void
1145{
1146 rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1147}
1148
1149
1150/* ----------------------------------------------------------------------------
1151 * Index scan related functions.
1152 * ----------------------------------------------------------------------------
1153 */
1154
1155/*
1156 * Prepare to fetch tuples from the relation, as needed when fetching tuples
1157 * for an index scan.
1158 *
1159 * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1160 */
1161static inline IndexFetchTableData *
1163{
1164 return rel->rd_tableam->index_fetch_begin(rel);
1165}
1166
1167/*
1168 * Reset index fetch. Typically this will release cross index fetch resources
1169 * held in IndexFetchTableData.
1170 */
1171static inline void
1173{
1174 scan->rel->rd_tableam->index_fetch_reset(scan);
1175}
1176
1177/*
1178 * Release resources and deallocate index fetch.
1179 */
1180static inline void
1182{
1183 scan->rel->rd_tableam->index_fetch_end(scan);
1184}
1185
1186/*
1187 * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1188 * a visibility test according to `snapshot`. If a tuple was found and passed
1189 * the visibility test, returns true, false otherwise. Note that *tid may be
1190 * modified when we return true (see later remarks on multiple row versions
1191 * reachable via a single index entry).
1192 *
1193 * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1194 * a tid. If there potentially is another tuple matching the tid, *call_again
1195 * will be set to true, signaling that table_index_fetch_tuple() should be called
1196 * again for the same tid.
1197 *
1198 * *all_dead, if all_dead is not NULL, will be set to true by
1199 * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1200 * that tuple. Index AMs can use that to avoid returning that tid in future
1201 * searches.
1202 *
1203 * The difference between this function and table_tuple_fetch_row_version()
1204 * is that this function returns the currently visible version of a row if
1205 * the AM supports storing multiple row versions reachable via a single index
1206 * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1207 * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1208 * lookups, table_tuple_fetch_row_version() is what's usually needed.
1209 */
1210static inline bool
1212 ItemPointer tid,
1213 Snapshot snapshot,
1214 TupleTableSlot *slot,
1215 bool *call_again, bool *all_dead)
1216{
1217 /*
1218 * We don't expect direct calls to table_index_fetch_tuple with valid
1219 * CheckXidAlive for catalog or regular tables. See detailed comments in
1220 * xact.c where these variables are declared.
1221 */
1223 elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1224
1225 return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1226 slot, call_again,
1227 all_dead);
1228}
1229
1230/*
1231 * This is a convenience wrapper around table_index_fetch_tuple() which
1232 * returns whether there are table tuple items corresponding to an index
1233 * entry. This likely is only useful to verify if there's a conflict in a
1234 * unique index.
1235 */
1237 ItemPointer tid,
1238 Snapshot snapshot,
1239 bool *all_dead);
1240
1241
1242/* ------------------------------------------------------------------------
1243 * Functions for non-modifying operations on individual tuples
1244 * ------------------------------------------------------------------------
1245 */
1246
1247
1248/*
1249 * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1250 * `snapshot`. If a tuple was found and passed the visibility test, returns
1251 * true, false otherwise.
1252 *
1253 * See table_index_fetch_tuple's comment about what the difference between
1254 * these functions is. It is correct to use this function outside of index
1255 * entry->table tuple lookups.
1256 */
1257static inline bool
1259 ItemPointer tid,
1260 Snapshot snapshot,
1261 TupleTableSlot *slot)
1262{
1263 /*
1264 * We don't expect direct calls to table_tuple_fetch_row_version with
1265 * valid CheckXidAlive for catalog or regular tables. See detailed
1266 * comments in xact.c where these variables are declared.
1267 */
1269 elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1270
1271 return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1272}
1273
1274/*
1275 * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1276 * mean that the pointed to row needs to exist or be visible, but that
1277 * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1278 * table_tuple_fetch_row_version()) should not error out if called with that
1279 * tid.
1280 *
1281 * `scan` needs to have been started via table_beginscan().
1282 */
1283static inline bool
1285{
1286 return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1287}
1288
1289/*
1290 * Return the latest version of the tuple at `tid`, by updating `tid` to
1291 * point at the newest version.
1292 */
1294
1295/*
1296 * Return true iff tuple in slot satisfies the snapshot.
1297 *
1298 * This assumes the slot's tuple is valid, and of the appropriate type for the
1299 * AM.
1300 *
1301 * Some AMs might modify the data underlying the tuple as a side-effect. If so
1302 * they ought to mark the relevant buffer dirty.
1303 */
1304static inline bool
1306 Snapshot snapshot)
1307{
1308 return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1309}
1310
1311/*
1312 * Determine which index tuples are safe to delete based on their table TID.
1313 *
1314 * Determines which entries from index AM caller's TM_IndexDeleteOp state
1315 * point to vacuumable table tuples. Entries that are found by tableam to be
1316 * vacuumable are naturally safe for index AM to delete, and so get directly
1317 * marked as deletable. See comments above TM_IndexDelete and comments above
1318 * TM_IndexDeleteOp for full details.
1319 *
1320 * Returns a snapshotConflictHorizon transaction ID that caller places in
1321 * its index deletion WAL record. This might be used during subsequent REDO
1322 * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1323 * index deletion operation might be required on the standby.
1324 */
1325static inline TransactionId
1327{
1328 return rel->rd_tableam->index_delete_tuples(rel, delstate);
1329}
1330
1331
1332/* ----------------------------------------------------------------------------
1333 * Functions for manipulations of physical tuples.
1334 * ----------------------------------------------------------------------------
1335 */
1336
1337/*
1338 * Insert a tuple from a slot into table AM routine.
1339 *
1340 * The options bitmask allows the caller to specify options that may change the
1341 * behaviour of the AM. The AM will ignore options that it does not support.
1342 *
1343 * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1344 * free space in the relation. This can save some cycles when we know the
1345 * relation is new and doesn't contain useful amounts of free space.
1346 * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1347 * RelationGetBufferForTuple. See that method for more information.
1348 *
1349 * TABLE_INSERT_FROZEN should only be specified for inserts into
1350 * relation storage created during the current subtransaction and when
1351 * there are no prior snapshots or pre-existing portals open.
1352 * This causes rows to be frozen, which is an MVCC violation and
1353 * requires explicit options chosen by user.
1354 *
1355 * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1356 * information for the tuple. This should solely be used during table rewrites
1357 * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1358 * relation.
1359 *
1360 * Note that most of these options will be applied when inserting into the
1361 * heap's TOAST table, too, if the tuple requires any out-of-line data.
1362 *
1363 * The BulkInsertState object (if any; bistate can be NULL for default
1364 * behavior) is also just passed through to RelationGetBufferForTuple. If
1365 * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1366 *
1367 * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1368 * insertion. But note that any toasting of fields within the slot is NOT
1369 * reflected in the slots contents.
1370 */
1371static inline void
1373 int options, struct BulkInsertStateData *bistate)
1374{
1375 rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1376 bistate);
1377}
1378
1379/*
1380 * Perform a "speculative insertion". These can be backed out afterwards
1381 * without aborting the whole transaction. Other sessions can wait for the
1382 * speculative insertion to be confirmed, turning it into a regular tuple, or
1383 * aborted, as if it never existed. Speculatively inserted tuples behave as
1384 * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1385 *
1386 * A transaction having performed a speculative insertion has to either abort,
1387 * or finish the speculative insertion with
1388 * table_tuple_complete_speculative(succeeded = ...).
1389 */
1390static inline void
1392 CommandId cid, int options,
1393 struct BulkInsertStateData *bistate,
1394 uint32 specToken)
1395{
1396 rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1397 bistate, specToken);
1398}
1399
1400/*
1401 * Complete "speculative insertion" started in the same transaction. If
1402 * succeeded is true, the tuple is fully inserted, if false, it's removed.
1403 */
1404static inline void
1406 uint32 specToken, bool succeeded)
1407{
1408 rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1409 succeeded);
1410}
1411
1412/*
1413 * Insert multiple tuples into a table.
1414 *
1415 * This is like table_tuple_insert(), but inserts multiple tuples in one
1416 * operation. That's often faster than calling table_tuple_insert() in a loop,
1417 * because e.g. the AM can reduce WAL logging and page locking overhead.
1418 *
1419 * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1420 * in `slots`, the parameters for table_multi_insert() are the same as for
1421 * table_tuple_insert().
1422 *
1423 * Note: this leaks memory into the current memory context. You can create a
1424 * temporary context before calling this, if that's a problem.
1425 */
1426static inline void
1428 CommandId cid, int options, struct BulkInsertStateData *bistate)
1429{
1430 rel->rd_tableam->multi_insert(rel, slots, nslots,
1431 cid, options, bistate);
1432}
1433
1434/*
1435 * Delete a tuple.
1436 *
1437 * NB: do not call this directly unless prepared to deal with
1438 * concurrent-update conditions. Use simple_table_tuple_delete instead.
1439 *
1440 * Input parameters:
1441 * relation - table to be modified (caller must hold suitable lock)
1442 * tid - TID of tuple to be deleted
1443 * cid - delete command ID (used for visibility test, and stored into
1444 * cmax if successful)
1445 * crosscheck - if not InvalidSnapshot, also check tuple against this
1446 * wait - true if should wait for any conflicting update to commit/abort
1447 * Output parameters:
1448 * tmfd - filled in failure cases (see below)
1449 * changingPart - true iff the tuple is being moved to another partition
1450 * table due to an update of the partition key. Otherwise, false.
1451 *
1452 * Normal, successful return value is TM_Ok, which means we did actually
1453 * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1454 * TM_BeingModified (the last only possible if wait == false).
1455 *
1456 * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1457 * t_xmax, and, if possible, t_cmax. See comments for struct
1458 * TM_FailureData for additional info.
1459 */
1460static inline TM_Result
1462 Snapshot snapshot, Snapshot crosscheck, bool wait,
1463 TM_FailureData *tmfd, bool changingPart)
1464{
1465 return rel->rd_tableam->tuple_delete(rel, tid, cid,
1466 snapshot, crosscheck,
1467 wait, tmfd, changingPart);
1468}
1469
1470/*
1471 * Update a tuple.
1472 *
1473 * NB: do not call this directly unless you are prepared to deal with
1474 * concurrent-update conditions. Use simple_table_tuple_update instead.
1475 *
1476 * Input parameters:
1477 * relation - table to be modified (caller must hold suitable lock)
1478 * otid - TID of old tuple to be replaced
1479 * slot - newly constructed tuple data to store
1480 * cid - update command ID (used for visibility test, and stored into
1481 * cmax/cmin if successful)
1482 * crosscheck - if not InvalidSnapshot, also check old tuple against this
1483 * wait - true if should wait for any conflicting update to commit/abort
1484 * Output parameters:
1485 * tmfd - filled in failure cases (see below)
1486 * lockmode - filled with lock mode acquired on tuple
1487 * update_indexes - in success cases this is set to true if new index entries
1488 * are required for this tuple
1489 *
1490 * Normal, successful return value is TM_Ok, which means we did actually
1491 * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1492 * TM_BeingModified (the last only possible if wait == false).
1493 *
1494 * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1495 * stored tuple; in particular, slot->tts_tid is set to the TID where the
1496 * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1497 * update was done. However, any TOAST changes in the new tuple's
1498 * data are not reflected into *newtup.
1499 *
1500 * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1501 * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1502 * for additional info.
1503 */
1504static inline TM_Result
1506 CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1507 bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1508 TU_UpdateIndexes *update_indexes)
1509{
1510 return rel->rd_tableam->tuple_update(rel, otid, slot,
1511 cid, snapshot, crosscheck,
1512 wait, tmfd,
1513 lockmode, update_indexes);
1514}
1515
1516/*
1517 * Lock a tuple in the specified mode.
1518 *
1519 * Input parameters:
1520 * relation: relation containing tuple (caller must hold suitable lock)
1521 * tid: TID of tuple to lock
1522 * snapshot: snapshot to use for visibility determinations
1523 * cid: current command ID (used for visibility test, and stored into
1524 * tuple's cmax if lock is successful)
1525 * mode: lock mode desired
1526 * wait_policy: what to do if tuple lock is not available
1527 * flags:
1528 * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1529 * also lock descendant tuples if lock modes don't conflict.
1530 * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1531 * latest version.
1532 *
1533 * Output parameters:
1534 * *slot: contains the target tuple
1535 * *tmfd: filled in failure cases (see below)
1536 *
1537 * Function result may be:
1538 * TM_Ok: lock was successfully acquired
1539 * TM_Invisible: lock failed because tuple was never visible to us
1540 * TM_SelfModified: lock failed because tuple updated by self
1541 * TM_Updated: lock failed because tuple updated by other xact
1542 * TM_Deleted: lock failed because tuple deleted by other xact
1543 * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1544 *
1545 * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1546 * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1547 * comments for struct TM_FailureData for additional info.
1548 */
1549static inline TM_Result
1552 LockWaitPolicy wait_policy, uint8 flags,
1553 TM_FailureData *tmfd)
1554{
1555 return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1556 cid, mode, wait_policy,
1557 flags, tmfd);
1558}
1559
1560/*
1561 * Perform operations necessary to complete insertions made via
1562 * tuple_insert and multi_insert with a BulkInsertState specified.
1563 */
1564static inline void
1566{
1567 /* optional callback */
1568 if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1570}
1571
1572
1573/* ------------------------------------------------------------------------
1574 * DDL related functionality.
1575 * ------------------------------------------------------------------------
1576 */
1577
1578/*
1579 * Create storage for `rel` in `newrlocator`, with persistence set to
1580 * `persistence`.
1581 *
1582 * This is used both during relation creation and various DDL operations to
1583 * create new rel storage that can be filled from scratch. When creating
1584 * new storage for an existing relfilelocator, this should be called before the
1585 * relcache entry has been updated.
1586 *
1587 * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1588 * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1589 */
1590static inline void
1592 const RelFileLocator *newrlocator,
1593 char persistence,
1594 TransactionId *freezeXid,
1595 MultiXactId *minmulti)
1596{
1597 rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
1598 persistence, freezeXid,
1599 minmulti);
1600}
1601
1602/*
1603 * Remove all table contents from `rel`, in a non-transactional manner.
1604 * Non-transactional meaning that there's no need to support rollbacks. This
1605 * commonly only is used to perform truncations for relation storage created in
1606 * the current transaction.
1607 */
1608static inline void
1610{
1612}
1613
1614/*
1615 * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
1616 * relfilelocator may not have storage associated before this function is
1617 * called. This is only supposed to be used for low level operations like
1618 * changing a relation's tablespace.
1619 */
1620static inline void
1622{
1623 rel->rd_tableam->relation_copy_data(rel, newrlocator);
1624}
1625
1626/*
1627 * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1628 * FULL.
1629 *
1630 * Additional Input parameters:
1631 * - use_sort - if true, the table contents are sorted appropriate for
1632 * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1633 * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1634 * performed
1635 * - OldIndex - see use_sort
1636 * - OldestXmin - computed by vacuum_get_cutoffs(), even when
1637 * not needed for the relation's AM
1638 * - *xid_cutoff - ditto
1639 * - *multi_cutoff - ditto
1640 *
1641 * Output parameters:
1642 * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1643 * - *multi_cutoff - rel's new relminmxid value, may be invalid
1644 * - *tups_vacuumed - stats, for logging, if appropriate for AM
1645 * - *tups_recently_dead - stats, for logging, if appropriate for AM
1646 */
1647static inline void
1649 Relation OldIndex,
1650 bool use_sort,
1651 TransactionId OldestXmin,
1652 TransactionId *xid_cutoff,
1653 MultiXactId *multi_cutoff,
1654 double *num_tuples,
1655 double *tups_vacuumed,
1656 double *tups_recently_dead)
1657{
1658 OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1659 use_sort, OldestXmin,
1660 xid_cutoff, multi_cutoff,
1661 num_tuples, tups_vacuumed,
1662 tups_recently_dead);
1663}
1664
1665/*
1666 * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1667 * autovacuum. The specific actions performed by the AM will depend heavily on
1668 * the individual AM.
1669 *
1670 * On entry a transaction needs to already been established, and the
1671 * table is locked with a ShareUpdateExclusive lock.
1672 *
1673 * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1674 * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1675 */
1676static inline void
1678 BufferAccessStrategy bstrategy)
1679{
1680 rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1681}
1682
1683/*
1684 * Prepare to analyze the next block in the read stream. The scan needs to
1685 * have been started with table_beginscan_analyze(). Note that this routine
1686 * might acquire resources like locks that are held until
1687 * table_scan_analyze_next_tuple() returns false.
1688 *
1689 * Returns false if block is unsuitable for sampling, true otherwise.
1690 */
1691static inline bool
1693{
1694 return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
1695}
1696
1697/*
1698 * Iterate over tuples in the block selected with
1699 * table_scan_analyze_next_block() (which needs to have returned true, and
1700 * this routine may not have returned false for the same block before). If a
1701 * tuple that's suitable for sampling is found, true is returned and a tuple
1702 * is stored in `slot`.
1703 *
1704 * *liverows and *deadrows are incremented according to the encountered
1705 * tuples.
1706 */
1707static inline bool
1709 double *liverows, double *deadrows,
1710 TupleTableSlot *slot)
1711{
1712 return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1713 liverows, deadrows,
1714 slot);
1715}
1716
1717/*
1718 * table_index_build_scan - scan the table to find tuples to be indexed
1719 *
1720 * This is called back from an access-method-specific index build procedure
1721 * after the AM has done whatever setup it needs. The parent table relation
1722 * is scanned to find tuples that should be entered into the index. Each
1723 * such tuple is passed to the AM's callback routine, which does the right
1724 * things to add it to the new index. After we return, the AM's index
1725 * build procedure does whatever cleanup it needs.
1726 *
1727 * The total count of live tuples is returned. This is for updating pg_class
1728 * statistics. (It's annoying not to be able to do that here, but we want to
1729 * merge that update with others; see index_update_stats.) Note that the
1730 * index AM itself must keep track of the number of index tuples; we don't do
1731 * so here because the AM might reject some of the tuples for its own reasons,
1732 * such as being unable to store NULLs.
1733 *
1734 * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1735 * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1736 *
1737 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1738 * any potentially broken HOT chains. Currently, we set this if there are any
1739 * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1740 * very hard to detect whether they're really incompatible with the chain tip.
1741 * This only really makes sense for heap AM, it might need to be generalized
1742 * for other AMs later.
1743 */
1744static inline double
1746 Relation index_rel,
1747 struct IndexInfo *index_info,
1748 bool allow_sync,
1749 bool progress,
1751 void *callback_state,
1752 TableScanDesc scan)
1753{
1754 return table_rel->rd_tableam->index_build_range_scan(table_rel,
1755 index_rel,
1756 index_info,
1757 allow_sync,
1758 false,
1759 progress,
1760 0,
1762 callback,
1763 callback_state,
1764 scan);
1765}
1766
1767/*
1768 * As table_index_build_scan(), except that instead of scanning the complete
1769 * table, only the given number of blocks are scanned. Scan to end-of-rel can
1770 * be signaled by passing InvalidBlockNumber as numblocks. Note that
1771 * restricting the range to scan cannot be done when requesting syncscan.
1772 *
1773 * When "anyvisible" mode is requested, all tuples visible to any transaction
1774 * are indexed and counted as live, including those inserted or deleted by
1775 * transactions that are still in progress.
1776 */
1777static inline double
1779 Relation index_rel,
1780 struct IndexInfo *index_info,
1781 bool allow_sync,
1782 bool anyvisible,
1783 bool progress,
1784 BlockNumber start_blockno,
1785 BlockNumber numblocks,
1787 void *callback_state,
1788 TableScanDesc scan)
1789{
1790 return table_rel->rd_tableam->index_build_range_scan(table_rel,
1791 index_rel,
1792 index_info,
1793 allow_sync,
1794 anyvisible,
1795 progress,
1796 start_blockno,
1797 numblocks,
1798 callback,
1799 callback_state,
1800 scan);
1801}
1802
1803/*
1804 * table_index_validate_scan - second table scan for concurrent index build
1805 *
1806 * See validate_index() for an explanation.
1807 */
1808static inline void
1810 Relation index_rel,
1811 struct IndexInfo *index_info,
1812 Snapshot snapshot,
1813 struct ValidateIndexState *state)
1814{
1815 table_rel->rd_tableam->index_validate_scan(table_rel,
1816 index_rel,
1817 index_info,
1818 snapshot,
1819 state);
1820}
1821
1822
1823/* ----------------------------------------------------------------------------
1824 * Miscellaneous functionality
1825 * ----------------------------------------------------------------------------
1826 */
1827
1828/*
1829 * Return the current size of `rel` in bytes. If `forkNumber` is
1830 * InvalidForkNumber, return the relation's overall size, otherwise the size
1831 * for the indicated fork.
1832 *
1833 * Note that the overall size might not be the equivalent of the sum of sizes
1834 * for the individual forks for some AMs, e.g. because the AMs storage does
1835 * not neatly map onto the builtin types of forks.
1836 */
1837static inline uint64
1839{
1840 return rel->rd_tableam->relation_size(rel, forkNumber);
1841}
1842
1843/*
1844 * table_relation_needs_toast_table - does this relation need a toast table?
1845 */
1846static inline bool
1848{
1849 return rel->rd_tableam->relation_needs_toast_table(rel);
1850}
1851
1852/*
1853 * Return the OID of the AM that should be used to implement the TOAST table
1854 * for this relation.
1855 */
1856static inline Oid
1858{
1859 return rel->rd_tableam->relation_toast_am(rel);
1860}
1861
1862/*
1863 * Fetch all or part of a TOAST value from a TOAST table.
1864 *
1865 * If this AM is never used to implement a TOAST table, then this callback
1866 * is not needed. But, if toasted values are ever stored in a table of this
1867 * type, then you will need this callback.
1868 *
1869 * toastrel is the relation in which the toasted value is stored.
1870 *
1871 * valueid identifies which toast value is to be fetched. For the heap,
1872 * this corresponds to the values stored in the chunk_id column.
1873 *
1874 * attrsize is the total size of the toast value to be fetched.
1875 *
1876 * sliceoffset is the offset within the toast value of the first byte that
1877 * should be fetched.
1878 *
1879 * slicelength is the number of bytes from the toast value that should be
1880 * fetched.
1881 *
1882 * result is caller-allocated space into which the fetched bytes should be
1883 * stored.
1884 */
1885static inline void
1887 int32 attrsize, int32 sliceoffset,
1888 int32 slicelength, struct varlena *result)
1889{
1890 toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1891 attrsize,
1892 sliceoffset, slicelength,
1893 result);
1894}
1895
1896
1897/* ----------------------------------------------------------------------------
1898 * Planner related functionality
1899 * ----------------------------------------------------------------------------
1900 */
1901
1902/*
1903 * Estimate the current size of the relation, as an AM specific workhorse for
1904 * estimate_rel_size(). Look there for an explanation of the parameters.
1905 */
1906static inline void
1908 BlockNumber *pages, double *tuples,
1909 double *allvisfrac)
1910{
1911 rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1912 allvisfrac);
1913}
1914
1915
1916/* ----------------------------------------------------------------------------
1917 * Executor related functionality
1918 * ----------------------------------------------------------------------------
1919 */
1920
1921/*
1922 * Fetch / check / return tuples as part of a bitmap table scan. `scan` needs
1923 * to have been started via table_beginscan_bm(). Fetch the next tuple of a
1924 * bitmap table scan into `slot` and return true if a visible tuple was found,
1925 * false otherwise.
1926 *
1927 * `recheck` is set by the table AM to indicate whether or not the tuple in
1928 * `slot` should be rechecked. Tuples from lossy pages will always need to be
1929 * rechecked, but some non-lossy pages' tuples may also require recheck.
1930 *
1931 * `lossy_pages` is incremented if the block's representation in the bitmap is
1932 * lossy; otherwise, `exact_pages` is incremented.
1933 */
1934static inline bool
1936 TupleTableSlot *slot,
1937 bool *recheck,
1938 uint64 *lossy_pages,
1939 uint64 *exact_pages)
1940{
1941 /*
1942 * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
1943 * CheckXidAlive for catalog or regular tables. See detailed comments in
1944 * xact.c where these variables are declared.
1945 */
1947 elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
1948
1949 return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
1950 slot,
1951 recheck,
1952 lossy_pages,
1953 exact_pages);
1954}
1955
1956/*
1957 * Prepare to fetch tuples from the next block in a sample scan. Returns false
1958 * if the sample scan is finished, true otherwise. `scan` needs to have been
1959 * started via table_beginscan_sampling().
1960 *
1961 * This will call the TsmRoutine's NextSampleBlock() callback if necessary
1962 * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
1963 * underlying relation.
1964 */
1965static inline bool
1967 struct SampleScanState *scanstate)
1968{
1969 /*
1970 * We don't expect direct calls to table_scan_sample_next_block with valid
1971 * CheckXidAlive for catalog or regular tables. See detailed comments in
1972 * xact.c where these variables are declared.
1973 */
1975 elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
1976 return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
1977}
1978
1979/*
1980 * Fetch the next sample tuple into `slot` and return true if a visible tuple
1981 * was found, false otherwise. table_scan_sample_next_block() needs to
1982 * previously have selected a block (i.e. returned true), and no previous
1983 * table_scan_sample_next_tuple() for the same block may have returned false.
1984 *
1985 * This will call the TsmRoutine's NextSampleTuple() callback.
1986 */
1987static inline bool
1989 struct SampleScanState *scanstate,
1990 TupleTableSlot *slot)
1991{
1992 /*
1993 * We don't expect direct calls to table_scan_sample_next_tuple with valid
1994 * CheckXidAlive for catalog or regular tables. See detailed comments in
1995 * xact.c where these variables are declared.
1996 */
1998 elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
1999 return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
2000 slot);
2001}
2002
2003
2004/* ----------------------------------------------------------------------------
2005 * Functions to make modifications a bit simpler.
2006 * ----------------------------------------------------------------------------
2007 */
2008
2009extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2011 Snapshot snapshot);
2012extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2013 TupleTableSlot *slot, Snapshot snapshot,
2014 TU_UpdateIndexes *update_indexes);
2015
2016
2017/* ----------------------------------------------------------------------------
2018 * Helper functions to implement parallel scans for block oriented AMs.
2019 * ----------------------------------------------------------------------------
2020 */
2021
2024 ParallelTableScanDesc pscan);
2026 ParallelTableScanDesc pscan);
2033
2034
2035/* ----------------------------------------------------------------------------
2036 * Helper functions to implement relation sizing for block oriented AMs.
2037 * ----------------------------------------------------------------------------
2038 */
2039
2040extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2042 int32 *attr_widths,
2043 BlockNumber *pages,
2044 double *tuples,
2045 double *allvisfrac,
2046 Size overhead_bytes_per_tuple,
2047 Size usable_bytes_per_page);
2048
2049/* ----------------------------------------------------------------------------
2050 * Functions in tableamapi.c
2051 * ----------------------------------------------------------------------------
2052 */
2053
2054extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2055
2056/* ----------------------------------------------------------------------------
2057 * Functions in heapam_handler.c
2058 * ----------------------------------------------------------------------------
2059 */
2060
2061extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2062
2063#endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define PGDLLIMPORT
Definition: c.h:1291
uint8_t uint8
Definition: c.h:500
TransactionId MultiXactId
Definition: c.h:633
int16_t int16
Definition: c.h:497
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
#define unlikely(x)
Definition: c.h:347
uint32_t uint32
Definition: c.h:502
uint32 CommandId
Definition: c.h:637
uint32 TransactionId
Definition: c.h:623
size_t Size
Definition: c.h:576
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
Assert(PointerIsAligned(start, uint64))
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:55
const void * data
static char ** options
static int progress
Definition: pgbench.c:262
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
#define RelationGetRelid(relation)
Definition: rel.h:513
ForkNumber
Definition: relpath.h:56
struct TableScanDescData * TableScanDesc
Definition: relscan.h:69
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
Definition: pg_list.h:54
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool traversed
Definition: tableam.h:153
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
Relation irel
Definition: tableam.h:247
TM_IndexDelete * deltids
Definition: tableam.h:254
BlockNumber iblknum
Definition: tableam.h:248
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224
OffsetNumber idxoffnum
Definition: tableam.h:219
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:400
void(* relation_copy_data)(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:624
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:841
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:430
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:524
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:407
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:489
void(* relation_copy_for_cluster)(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:628
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:794
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:380
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:772
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:693
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:328
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:736
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:482
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:530
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:338
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:726
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:556
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:825
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:616
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:544
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:511
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:344
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:474
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:750
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:654
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:743
bool(* scan_analyze_next_block)(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:675
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:393
void(* relation_set_new_filelocator)(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:602
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:372
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:424
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:578
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:686
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:501
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:435
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:457
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:516
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:534
NodeTag type
Definition: tableam.h:293
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:706
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:351
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:496
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
Oid tts_tableOid
Definition: tuptable.h:130
Definition: type.h:96
Definition: regguts.h:323
Definition: c.h:658
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1886
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:49
ScanOptions
Definition: tableam.h:46
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_TYPE_ANALYZE
Definition: tableam.h:53
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64
@ SO_TYPE_TIDSCAN
Definition: tableam.h:51
@ SO_NEED_TUPLES
Definition: tableam.h:71
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_ALLOW_SYNC
Definition: tableam.h:59
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1075
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:877
TU_UpdateIndexes
Definition: tableam.h:117
@ TU_Summarizing
Definition: tableam.h:125
@ TU_All
Definition: tableam.h:122
@ TU_None
Definition: tableam.h:119
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:989
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes)
Definition: tableam.c:336
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1708
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:209
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:50
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:389
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:166
struct TM_IndexDelete TM_IndexDelete
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1648
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1172
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1838
TM_Result
Definition: tableam.h:79
@ TM_Ok
Definition: tableam.h:84
@ TM_BeingModified
Definition: tableam.h:106
@ TM_Deleted
Definition: tableam.h:99
@ TM_WouldBlock
Definition: tableam.h:109
@ TM_Updated
Definition: tableam.h:96
@ TM_SelfModified
Definition: tableam.h:90
@ TM_Invisible
Definition: tableam.h:87
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:1935
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:942
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:998
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1550
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:277
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1284
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1162
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1809
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1778
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:422
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:113
static bool table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:1692
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1847
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:901
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1405
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1054
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:1505
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1181
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:978
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:28
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1461
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:236
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1211
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1013
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1677
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:291
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1565
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:407
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:272
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:617
static void table_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1591
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1427
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1091
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1857
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1372
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:131
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1745
static void table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:1621
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:1966
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:383
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1907
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1025
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1391
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1305
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1326
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:1988
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1609
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:146
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:922
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1258
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1144
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:965
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:59
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:492
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:654
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99