PostgreSQL Source Code  git master
tableam.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tableam.h
4  * POSTGRES table access method definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/tableam.h
11  *
12  * NOTES
13  * See tableam.sgml for higher level documentation.
14  *
15  *-------------------------------------------------------------------------
16  */
17 #ifndef TABLEAM_H
18 #define TABLEAM_H
19 
20 #include "access/relscan.h"
21 #include "access/sdir.h"
22 #include "access/xact.h"
23 #include "executor/tuptable.h"
24 #include "storage/read_stream.h"
25 #include "utils/rel.h"
26 #include "utils/snapshot.h"
27 
28 
29 #define DEFAULT_TABLE_ACCESS_METHOD "heap"
30 
31 /* GUCs */
34 
35 
36 struct BulkInsertStateData;
37 struct IndexInfo;
38 struct SampleScanState;
39 struct TBMIterateResult;
40 struct VacuumParams;
41 struct ValidateIndexState;
42 
43 /*
44  * Bitmask values for the flags argument to the scan_begin callback.
45  */
46 typedef enum ScanOptions
47 {
48  /* one of SO_TYPE_* may be specified */
49  SO_TYPE_SEQSCAN = 1 << 0,
52  SO_TYPE_TIDSCAN = 1 << 3,
54  SO_TYPE_ANALYZE = 1 << 5,
55 
56  /* several of SO_ALLOW_* may be specified */
57  /* allow or disallow use of access strategy */
58  SO_ALLOW_STRAT = 1 << 6,
59  /* report location to syncscan logic? */
60  SO_ALLOW_SYNC = 1 << 7,
61  /* verify visibility page-at-a-time? */
63 
64  /* unregister snapshot at scan end? */
65  SO_TEMP_SNAPSHOT = 1 << 9,
66 
67  /*
68  * At the discretion of the table AM, bitmap table scans may be able to
69  * skip fetching a block from the table if none of the table data is
70  * needed. If table data may be needed, set SO_NEED_TUPLES.
71  */
72  SO_NEED_TUPLES = 1 << 10,
74 
75 /*
76  * Result codes for table_{update,delete,lock_tuple}, and for visibility
77  * routines inside table AMs.
78  */
79 typedef enum TM_Result
80 {
81  /*
82  * Signals that the action succeeded (i.e. update/delete performed, lock
83  * was acquired)
84  */
86 
87  /* The affected tuple wasn't visible to the relevant snapshot */
89 
90  /* The affected tuple was already modified by the calling backend */
92 
93  /*
94  * The affected tuple was updated by another transaction. This includes
95  * the case where tuple was moved to another partition.
96  */
98 
99  /* The affected tuple was deleted by another transaction */
101 
102  /*
103  * The affected tuple is currently being modified by another session. This
104  * will only be returned if table_(update/delete/lock_tuple) are
105  * instructed not to wait.
106  */
108 
109  /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
112 
113 /*
114  * Result codes for table_update(..., update_indexes*..).
115  * Used to determine which indexes to update.
116  */
117 typedef enum TU_UpdateIndexes
118 {
119  /* No indexed columns were updated (incl. TID addressing of tuple) */
121 
122  /* A non-summarizing indexed column was updated, or the TID has changed */
124 
125  /* Only summarized columns were updated, TID is unchanged */
128 
129 /*
130  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
131  * because the target tuple is already outdated, they fill in this struct to
132  * provide information to the caller about what happened.
133  *
134  * ctid is the target's ctid link: it is the same as the target's TID if the
135  * target was deleted, or the location of the replacement tuple if the target
136  * was updated.
137  *
138  * xmax is the outdating transaction's XID. If the caller wants to visit the
139  * replacement tuple, it must check that this matches before believing the
140  * replacement is really a match.
141  *
142  * cmax is the outdating command's CID, but only when the failure code is
143  * TM_SelfModified (i.e., something in the current transaction outdated the
144  * tuple); otherwise cmax is zero. (We make this restriction because
145  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
146  * transactions.)
147  */
148 typedef struct TM_FailureData
149 {
153  bool traversed;
155 
156 /*
157  * State used when calling table_index_delete_tuples().
158  *
159  * Represents the status of table tuples, referenced by table TID and taken by
160  * index AM from index tuples. State consists of high level parameters of the
161  * deletion operation, plus two mutable palloc()'d arrays for information
162  * about the status of individual table tuples. These are conceptually one
163  * single array. Using two arrays keeps the TM_IndexDelete struct small,
164  * which makes sorting the first array (the deltids array) fast.
165  *
166  * Some index AM callers perform simple index tuple deletion (by specifying
167  * bottomup = false), and include only known-dead deltids. These known-dead
168  * entries are all marked knowndeletable = true directly (typically these are
169  * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
170  *
171  * Callers that specify bottomup = true are "bottom-up index deletion"
172  * callers. The considerations for the tableam are more subtle with these
173  * callers because they ask the tableam to perform highly speculative work,
174  * and might only expect the tableam to check a small fraction of all entries.
175  * Caller is not allowed to specify knowndeletable = true for any entry
176  * because everything is highly speculative. Bottom-up caller provides
177  * context and hints to tableam -- see comments below for details on how index
178  * AMs and tableams should coordinate during bottom-up index deletion.
179  *
180  * Simple index deletion callers may ask the tableam to perform speculative
181  * work, too. This is a little like bottom-up deletion, but not too much.
182  * The tableam will only perform speculative work when it's practically free
183  * to do so in passing for simple deletion caller (while always performing
184  * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
185  * be deleted within index AM). This is the real reason why it's possible for
186  * simple index deletion caller to specify knowndeletable = false up front
187  * (this means "check if it's possible for me to delete corresponding index
188  * tuple when it's cheap to do so in passing"). The index AM should only
189  * include "extra" entries for index tuples whose TIDs point to a table block
190  * that tableam is expected to have to visit anyway (in the event of a block
191  * orientated tableam). The tableam isn't strictly obligated to check these
192  * "extra" TIDs, but a block-based AM should always manage to do so in
193  * practice.
194  *
195  * The final contents of the deltids/status arrays are interesting to callers
196  * that ask tableam to perform speculative work (i.e. when _any_ items have
197  * knowndeletable set to false up front). These index AM callers will
198  * naturally need to consult final state to determine which index tuples are
199  * in fact deletable.
200  *
201  * The index AM can keep track of which index tuple relates to which deltid by
202  * setting idxoffnum (and/or relying on each entry being uniquely identifiable
203  * using tid), which is important when the final contents of the array will
204  * need to be interpreted -- the array can shrink from initial size after
205  * tableam processing and/or have entries in a new order (tableam may sort
206  * deltids array for its own reasons). Bottom-up callers may find that final
207  * ndeltids is 0 on return from call to tableam, in which case no index tuple
208  * deletions are possible. Simple deletion callers can rely on any entries
209  * they know to be deletable appearing in the final array as deletable.
210  */
211 typedef struct TM_IndexDelete
212 {
213  ItemPointerData tid; /* table TID from index tuple */
214  int16 id; /* Offset into TM_IndexStatus array */
216 
217 typedef struct TM_IndexStatus
218 {
219  OffsetNumber idxoffnum; /* Index am page offset number */
220  bool knowndeletable; /* Currently known to be deletable? */
221 
222  /* Bottom-up index deletion specific fields follow */
223  bool promising; /* Promising (duplicate) index tuple? */
224  int16 freespace; /* Space freed in index if deleted */
226 
227 /*
228  * Index AM/tableam coordination is central to the design of bottom-up index
229  * deletion. The index AM provides hints about where to look to the tableam
230  * by marking some entries as "promising". Index AM does this with duplicate
231  * index tuples that are strongly suspected to be old versions left behind by
232  * UPDATEs that did not logically modify indexed values. Index AM may find it
233  * helpful to only mark entries as promising when they're thought to have been
234  * affected by such an UPDATE in the recent past.
235  *
236  * Bottom-up index deletion casts a wide net at first, usually by including
237  * all TIDs on a target index page. It is up to the tableam to worry about
238  * the cost of checking transaction status information. The tableam is in
239  * control, but needs careful guidance from the index AM. Index AM requests
240  * that bottomupfreespace target be met, while tableam measures progress
241  * towards that goal by tallying the per-entry freespace value for known
242  * deletable entries. (All !bottomup callers can just set these space related
243  * fields to zero.)
244  */
245 typedef struct TM_IndexDeleteOp
246 {
247  Relation irel; /* Target index relation */
248  BlockNumber iblknum; /* Index block number (for error reports) */
249  bool bottomup; /* Bottom-up (not simple) deletion? */
250  int bottomupfreespace; /* Bottom-up space target */
251 
252  /* Mutable per-TID information follows (index AM initializes entries) */
253  int ndeltids; /* Current # of deltids/status elements */
257 
258 /* "options" flag bits for table_tuple_insert */
259 /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
260 #define TABLE_INSERT_SKIP_FSM 0x0002
261 #define TABLE_INSERT_FROZEN 0x0004
262 #define TABLE_INSERT_NO_LOGICAL 0x0008
263 
264 /* flag bits for table_tuple_lock */
265 /* Follow tuples whose update is in progress if lock modes don't conflict */
266 #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
267 /* Follow update chain and lock latest version of tuple */
268 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
269 
270 
271 /* Typedef for callback function for table_index_build_scan */
273  ItemPointer tid,
274  Datum *values,
275  bool *isnull,
276  bool tupleIsAlive,
277  void *state);
278 
279 /*
280  * API struct for a table AM. Note this must be allocated in a
281  * server-lifetime manner, typically as a static const struct, which then gets
282  * returned by FormData_pg_am.amhandler.
283  *
284  * In most cases it's not appropriate to call the callbacks directly, use the
285  * table_* wrapper functions instead.
286  *
287  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
288  * to update when adding a callback.
289  */
290 typedef struct TableAmRoutine
291 {
292  /* this must be set to T_TableAmRoutine */
294 
295 
296  /* ------------------------------------------------------------------------
297  * Slot related callbacks.
298  * ------------------------------------------------------------------------
299  */
300 
301  /*
302  * Return slot implementation suitable for storing a tuple of this AM.
303  */
304  const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
305 
306 
307  /* ------------------------------------------------------------------------
308  * Table scan callbacks.
309  * ------------------------------------------------------------------------
310  */
311 
312  /*
313  * Start a scan of `rel`. The callback has to return a TableScanDesc,
314  * which will typically be embedded in a larger, AM specific, struct.
315  *
316  * If nkeys != 0, the results need to be filtered by those scan keys.
317  *
318  * pscan, if not NULL, will have already been initialized with
319  * parallelscan_initialize(), and has to be for the same relation. Will
320  * only be set coming from table_beginscan_parallel().
321  *
322  * `flags` is a bitmask indicating the type of scan (ScanOptions's
323  * SO_TYPE_*, currently only one may be specified), options controlling
324  * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
325  * specified, an AM may ignore unsupported ones) and whether the snapshot
326  * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
327  */
329  Snapshot snapshot,
330  int nkeys, struct ScanKeyData *key,
331  ParallelTableScanDesc pscan,
332  uint32 flags);
333 
334  /*
335  * Release resources and deallocate scan. If TableScanDesc.temp_snap,
336  * TableScanDesc.rs_snapshot needs to be unregistered.
337  */
338  void (*scan_end) (TableScanDesc scan);
339 
340  /*
341  * Restart relation scan. If set_params is set to true, allow_{strat,
342  * sync, pagemode} (see scan_begin) changes should be taken into account.
343  */
344  void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
345  bool set_params, bool allow_strat,
346  bool allow_sync, bool allow_pagemode);
347 
348  /*
349  * Return next tuple from `scan`, store in slot.
350  */
352  ScanDirection direction,
353  TupleTableSlot *slot);
354 
355  /*-----------
356  * Optional functions to provide scanning for ranges of ItemPointers.
357  * Implementations must either provide both of these functions, or neither
358  * of them.
359  *
360  * Implementations of scan_set_tidrange must themselves handle
361  * ItemPointers of any value. i.e, they must handle each of the following:
362  *
363  * 1) mintid or maxtid is beyond the end of the table; and
364  * 2) mintid is above maxtid; and
365  * 3) item offset for mintid or maxtid is beyond the maximum offset
366  * allowed by the AM.
367  *
368  * Implementations can assume that scan_set_tidrange is always called
369  * before scan_getnextslot_tidrange or after scan_rescan and before any
370  * further calls to scan_getnextslot_tidrange.
371  */
373  ItemPointer mintid,
374  ItemPointer maxtid);
375 
376  /*
377  * Return next tuple from `scan` that's in the range of TIDs defined by
378  * scan_set_tidrange.
379  */
381  ScanDirection direction,
382  TupleTableSlot *slot);
383 
384  /* ------------------------------------------------------------------------
385  * Parallel table scan related functions.
386  * ------------------------------------------------------------------------
387  */
388 
389  /*
390  * Estimate the size of shared memory needed for a parallel scan of this
391  * relation. The snapshot does not need to be accounted for.
392  */
394 
395  /*
396  * Initialize ParallelTableScanDesc for a parallel scan of this relation.
397  * `pscan` will be sized according to parallelscan_estimate() for the same
398  * relation.
399  */
401  ParallelTableScanDesc pscan);
402 
403  /*
404  * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
405  * when `pscan` was initialized by parallelscan_initialize.
406  */
408  ParallelTableScanDesc pscan);
409 
410 
411  /* ------------------------------------------------------------------------
412  * Index Scan Callbacks
413  * ------------------------------------------------------------------------
414  */
415 
416  /*
417  * Prepare to fetch tuples from the relation, as needed when fetching
418  * tuples for an index scan. The callback has to return an
419  * IndexFetchTableData, which the AM will typically embed in a larger
420  * structure with additional information.
421  *
422  * Tuples for an index scan can then be fetched via index_fetch_tuple.
423  */
424  struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
425 
426  /*
427  * Reset index fetch. Typically this will release cross index fetch
428  * resources held in IndexFetchTableData.
429  */
431 
432  /*
433  * Release resources and deallocate index fetch.
434  */
436 
437  /*
438  * Fetch tuple at `tid` into `slot`, after doing a visibility test
439  * according to `snapshot`. If a tuple was found and passed the visibility
440  * test, return true, false otherwise.
441  *
442  * Note that AMs that do not necessarily update indexes when indexed
443  * columns do not change, need to return the current/correct version of
444  * the tuple that is visible to the snapshot, even if the tid points to an
445  * older version of the tuple.
446  *
447  * *call_again is false on the first call to index_fetch_tuple for a tid.
448  * If there potentially is another tuple matching the tid, *call_again
449  * needs to be set to true by index_fetch_tuple, signaling to the caller
450  * that index_fetch_tuple should be called again for the same tid.
451  *
452  * *all_dead, if all_dead is not NULL, should be set to true by
453  * index_fetch_tuple iff it is guaranteed that no backend needs to see
454  * that tuple. Index AMs can use that to avoid returning that tid in
455  * future searches.
456  */
458  ItemPointer tid,
459  Snapshot snapshot,
460  TupleTableSlot *slot,
461  bool *call_again, bool *all_dead);
462 
463 
464  /* ------------------------------------------------------------------------
465  * Callbacks for non-modifying operations on individual tuples
466  * ------------------------------------------------------------------------
467  */
468 
469  /*
470  * Fetch tuple at `tid` into `slot`, after doing a visibility test
471  * according to `snapshot`. If a tuple was found and passed the visibility
472  * test, returns true, false otherwise.
473  */
475  ItemPointer tid,
476  Snapshot snapshot,
477  TupleTableSlot *slot);
478 
479  /*
480  * Is tid valid for a scan of this relation.
481  */
483  ItemPointer tid);
484 
485  /*
486  * Return the latest version of the tuple at `tid`, by updating `tid` to
487  * point at the newest version.
488  */
490  ItemPointer tid);
491 
492  /*
493  * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
494  * the appropriate type for the AM.
495  */
497  TupleTableSlot *slot,
498  Snapshot snapshot);
499 
500  /* see table_index_delete_tuples() */
502  TM_IndexDeleteOp *delstate);
503 
504 
505  /* ------------------------------------------------------------------------
506  * Manipulations of physical tuples.
507  * ------------------------------------------------------------------------
508  */
509 
510  /* see table_tuple_insert() for reference about parameters */
512  CommandId cid, int options,
513  struct BulkInsertStateData *bistate);
514 
515  /* see table_tuple_insert_speculative() for reference about parameters */
517  TupleTableSlot *slot,
518  CommandId cid,
519  int options,
520  struct BulkInsertStateData *bistate,
521  uint32 specToken);
522 
523  /* see table_tuple_complete_speculative() for reference about parameters */
525  TupleTableSlot *slot,
526  uint32 specToken,
527  bool succeeded);
528 
529  /* see table_multi_insert() for reference about parameters */
530  void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
531  CommandId cid, int options, struct BulkInsertStateData *bistate);
532 
533  /* see table_tuple_delete() for reference about parameters */
535  ItemPointer tid,
536  CommandId cid,
537  Snapshot snapshot,
538  Snapshot crosscheck,
539  bool wait,
540  TM_FailureData *tmfd,
541  bool changingPart);
542 
543  /* see table_tuple_update() for reference about parameters */
545  ItemPointer otid,
546  TupleTableSlot *slot,
547  CommandId cid,
548  Snapshot snapshot,
549  Snapshot crosscheck,
550  bool wait,
551  TM_FailureData *tmfd,
552  LockTupleMode *lockmode,
553  TU_UpdateIndexes *update_indexes);
554 
555  /* see table_tuple_lock() for reference about parameters */
557  ItemPointer tid,
558  Snapshot snapshot,
559  TupleTableSlot *slot,
560  CommandId cid,
562  LockWaitPolicy wait_policy,
563  uint8 flags,
564  TM_FailureData *tmfd);
565 
566  /*
567  * Perform operations necessary to complete insertions made via
568  * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
569  * access methods ceased to use this.
570  *
571  * Typically callers of tuple_insert and multi_insert will just pass all
572  * the flags that apply to them, and each AM has to decide which of them
573  * make sense for it, and then only take actions in finish_bulk_insert for
574  * those flags, and ignore others.
575  *
576  * Optional callback.
577  */
578  void (*finish_bulk_insert) (Relation rel, int options);
579 
580 
581  /* ------------------------------------------------------------------------
582  * DDL related functionality.
583  * ------------------------------------------------------------------------
584  */
585 
586  /*
587  * This callback needs to create new relation storage for `rel`, with
588  * appropriate durability behaviour for `persistence`.
589  *
590  * Note that only the subset of the relcache filled by
591  * RelationBuildLocalRelation() can be relied upon and that the relation's
592  * catalog entries will either not yet exist (new relation), or will still
593  * reference the old relfilelocator.
594  *
595  * As output *freezeXid, *minmulti must be set to the values appropriate
596  * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
597  * fields to be filled they can be set to InvalidTransactionId and
598  * InvalidMultiXactId, respectively.
599  *
600  * See also table_relation_set_new_filelocator().
601  */
603  const RelFileLocator *newrlocator,
604  char persistence,
605  TransactionId *freezeXid,
606  MultiXactId *minmulti);
607 
608  /*
609  * This callback needs to remove all contents from `rel`'s current
610  * relfilelocator. No provisions for transactional behaviour need to be
611  * made. Often this can be implemented by truncating the underlying
612  * storage to its minimal size.
613  *
614  * See also table_relation_nontransactional_truncate().
615  */
617 
618  /*
619  * See table_relation_copy_data().
620  *
621  * This can typically be implemented by directly copying the underlying
622  * storage, unless it contains references to the tablespace internally.
623  */
625  const RelFileLocator *newrlocator);
626 
627  /* See table_relation_copy_for_cluster() */
629  Relation NewTable,
630  Relation OldIndex,
631  bool use_sort,
632  TransactionId OldestXmin,
633  TransactionId *xid_cutoff,
634  MultiXactId *multi_cutoff,
635  double *num_tuples,
636  double *tups_vacuumed,
637  double *tups_recently_dead);
638 
639  /*
640  * React to VACUUM command on the relation. The VACUUM can be triggered by
641  * a user or by autovacuum. The specific actions performed by the AM will
642  * depend heavily on the individual AM.
643  *
644  * On entry a transaction is already established, and the relation is
645  * locked with a ShareUpdateExclusive lock.
646  *
647  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
648  * this routine, even if (for ANALYZE) it is part of the same VACUUM
649  * command.
650  *
651  * There probably, in the future, needs to be a separate callback to
652  * integrate with autovacuum's scheduling.
653  */
654  void (*relation_vacuum) (Relation rel,
655  struct VacuumParams *params,
656  BufferAccessStrategy bstrategy);
657 
658  /*
659  * Prepare to analyze block `blockno` of `scan`. The scan has been started
660  * with table_beginscan_analyze(). See also
661  * table_scan_analyze_next_block().
662  *
663  * The callback may acquire resources like locks that are held until
664  * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
665  * to hold a lock until all tuples on a block have been analyzed by
666  * scan_analyze_next_tuple.
667  *
668  * The callback can return false if the block is not suitable for
669  * sampling, e.g. because it's a metapage that could never contain tuples.
670  *
671  * XXX: This obviously is primarily suited for block-based AMs. It's not
672  * clear what a good interface for non block based AMs would be, so there
673  * isn't one yet.
674  */
676  ReadStream *stream);
677 
678  /*
679  * See table_scan_analyze_next_tuple().
680  *
681  * Not every AM might have a meaningful concept of dead rows, in which
682  * case it's OK to not increment *deadrows - but note that that may
683  * influence autovacuum scheduling (see comment for relation_vacuum
684  * callback).
685  */
687  TransactionId OldestXmin,
688  double *liverows,
689  double *deadrows,
690  TupleTableSlot *slot);
691 
692  /* see table_index_build_range_scan for reference about parameters */
693  double (*index_build_range_scan) (Relation table_rel,
694  Relation index_rel,
695  struct IndexInfo *index_info,
696  bool allow_sync,
697  bool anyvisible,
698  bool progress,
699  BlockNumber start_blockno,
700  BlockNumber numblocks,
702  void *callback_state,
703  TableScanDesc scan);
704 
705  /* see table_index_validate_scan for reference about parameters */
706  void (*index_validate_scan) (Relation table_rel,
707  Relation index_rel,
708  struct IndexInfo *index_info,
709  Snapshot snapshot,
710  struct ValidateIndexState *state);
711 
712 
713  /* ------------------------------------------------------------------------
714  * Miscellaneous functions.
715  * ------------------------------------------------------------------------
716  */
717 
718  /*
719  * See table_relation_size().
720  *
721  * Note that currently a few callers use the MAIN_FORKNUM size to figure
722  * out the range of potentially interesting blocks (brin, analyze). It's
723  * probable that we'll need to revise the interface for those at some
724  * point.
725  */
726  uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
727 
728 
729  /*
730  * This callback should return true if the relation requires a TOAST table
731  * and false if it does not. It may wish to examine the relation's tuple
732  * descriptor before making a decision, but if it uses some other method
733  * of storing large values (or if it does not support them) it can simply
734  * return false.
735  */
737 
738  /*
739  * This callback should return the OID of the table AM that implements
740  * TOAST tables for this AM. If the relation_needs_toast_table callback
741  * always returns false, this callback is not required.
742  */
744 
745  /*
746  * This callback is invoked when detoasting a value stored in a toast
747  * table implemented by this AM. See table_relation_fetch_toast_slice()
748  * for more details.
749  */
750  void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
751  int32 attrsize,
752  int32 sliceoffset,
753  int32 slicelength,
754  struct varlena *result);
755 
756 
757  /* ------------------------------------------------------------------------
758  * Planner related functions.
759  * ------------------------------------------------------------------------
760  */
761 
762  /*
763  * See table_relation_estimate_size().
764  *
765  * While block oriented, it shouldn't be too hard for an AM that doesn't
766  * internally use blocks to convert into a usable representation.
767  *
768  * This differs from the relation_size callback by returning size
769  * estimates (both relation size and tuple count) for planning purposes,
770  * rather than returning a currently correct estimate.
771  */
772  void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
773  BlockNumber *pages, double *tuples,
774  double *allvisfrac);
775 
776 
777  /* ------------------------------------------------------------------------
778  * Executor related functions.
779  * ------------------------------------------------------------------------
780  */
781 
782  /*
783  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
784  * of a bitmap table scan. `scan` was started via table_beginscan_bm().
785  * Return false if there are no tuples to be found on the page, true
786  * otherwise.
787  *
788  * This will typically read and pin the target block, and do the necessary
789  * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
790  * make sense to perform tuple visibility checks at this time). For some
791  * AMs it will make more sense to do all the work referencing `tbmres`
792  * contents here, for others it might be better to defer more work to
793  * scan_bitmap_next_tuple.
794  *
795  * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
796  * on the page have to be returned, otherwise the tuples at offsets in
797  * `tbmres->offsets` need to be returned.
798  *
799  * XXX: Currently this may only be implemented if the AM uses md.c as its
800  * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
801  * blockids directly to the underlying storage. nodeBitmapHeapscan.c
802  * performs prefetching directly using that interface. This probably
803  * needs to be rectified at a later point.
804  *
805  * XXX: Currently this may only be implemented if the AM uses the
806  * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
807  * perform prefetching. This probably needs to be rectified at a later
808  * point.
809  *
810  * Optional callback, but either both scan_bitmap_next_block and
811  * scan_bitmap_next_tuple need to exist, or neither.
812  */
814  struct TBMIterateResult *tbmres);
815 
816  /*
817  * Fetch the next tuple of a bitmap table scan into `slot` and return true
818  * if a visible tuple was found, false otherwise.
819  *
820  * For some AMs it will make more sense to do all the work referencing
821  * `tbmres` contents in scan_bitmap_next_block, for others it might be
822  * better to defer more work to this callback.
823  *
824  * Optional callback, but either both scan_bitmap_next_block and
825  * scan_bitmap_next_tuple need to exist, or neither.
826  */
828  struct TBMIterateResult *tbmres,
829  TupleTableSlot *slot);
830 
831  /*
832  * Prepare to fetch tuples from the next block in a sample scan. Return
833  * false if the sample scan is finished, true otherwise. `scan` was
834  * started via table_beginscan_sampling().
835  *
836  * Typically this will first determine the target block by calling the
837  * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
838  * perform a sequential scan over all blocks. The determined block is
839  * then typically read and pinned.
840  *
841  * As the TsmRoutine interface is block based, a block needs to be passed
842  * to NextSampleBlock(). If that's not appropriate for an AM, it
843  * internally needs to perform mapping between the internal and a block
844  * based representation.
845  *
846  * Note that it's not acceptable to hold deadlock prone resources such as
847  * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
848  * block - the tuple is likely to be returned to an upper query node, and
849  * the next call could be off a long while. Holding buffer pins and such
850  * is obviously OK.
851  *
852  * Currently it is required to implement this interface, as there's no
853  * alternative way (contrary e.g. to bitmap scans) to implement sample
854  * scans. If infeasible to implement, the AM may raise an error.
855  */
857  struct SampleScanState *scanstate);
858 
859  /*
860  * This callback, only called after scan_sample_next_block has returned
861  * true, should determine the next tuple to be returned from the selected
862  * block using the TsmRoutine's NextSampleTuple() callback.
863  *
864  * The callback needs to perform visibility checks, and only return
865  * visible tuples. That obviously can mean calling NextSampleTuple()
866  * multiple times.
867  *
868  * The TsmRoutine interface assumes that there's a maximum offset on a
869  * given page, so if that doesn't apply to an AM, it needs to emulate that
870  * assumption somehow.
871  */
873  struct SampleScanState *scanstate,
874  TupleTableSlot *slot);
875 
877 
878 
879 /* ----------------------------------------------------------------------------
880  * Slot functions.
881  * ----------------------------------------------------------------------------
882  */
883 
884 /*
885  * Returns slot callbacks suitable for holding tuples of the appropriate type
886  * for the relation. Works for tables, views, foreign tables and partitioned
887  * tables.
888  */
889 extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
890 
891 /*
892  * Returns slot using the callbacks returned by table_slot_callbacks(), and
893  * registers it on *reglist.
894  */
895 extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
896 
897 
898 /* ----------------------------------------------------------------------------
899  * Table scan functions.
900  * ----------------------------------------------------------------------------
901  */
902 
903 /*
904  * Start a scan of `rel`. Returned tuples pass a visibility test of
905  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
906  */
907 static inline TableScanDesc
909  int nkeys, struct ScanKeyData *key)
910 {
911  uint32 flags = SO_TYPE_SEQSCAN |
913 
914  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
915 }
916 
917 /*
918  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
919  * snapshot appropriate for scanning catalog relations.
920  */
921 extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
922  struct ScanKeyData *key);
923 
924 /*
925  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
926  * that lets the caller control whether a nondefault buffer access strategy
927  * can be used, and whether syncscan can be chosen (possibly resulting in the
928  * scan not starting from block zero). Both of these default to true with
929  * plain table_beginscan.
930  */
931 static inline TableScanDesc
933  int nkeys, struct ScanKeyData *key,
934  bool allow_strat, bool allow_sync)
935 {
937 
938  if (allow_strat)
939  flags |= SO_ALLOW_STRAT;
940  if (allow_sync)
941  flags |= SO_ALLOW_SYNC;
942 
943  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
944 }
945 
946 /*
947  * table_beginscan_bm is an alternative entry point for setting up a
948  * TableScanDesc for a bitmap heap scan. Although that scan technology is
949  * really quite unlike a standard seqscan, there is just enough commonality to
950  * make it worth using the same data structure.
951  */
952 static inline TableScanDesc
954  int nkeys, struct ScanKeyData *key, bool need_tuple)
955 {
957 
958  if (need_tuple)
959  flags |= SO_NEED_TUPLES;
960 
961  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
962 }
963 
964 /*
965  * table_beginscan_sampling is an alternative entry point for setting up a
966  * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
967  * using the same data structure although the behavior is rather different.
968  * In addition to the options offered by table_beginscan_strat, this call
969  * also allows control of whether page-mode visibility checking is used.
970  */
971 static inline TableScanDesc
973  int nkeys, struct ScanKeyData *key,
974  bool allow_strat, bool allow_sync,
975  bool allow_pagemode)
976 {
977  uint32 flags = SO_TYPE_SAMPLESCAN;
978 
979  if (allow_strat)
980  flags |= SO_ALLOW_STRAT;
981  if (allow_sync)
982  flags |= SO_ALLOW_SYNC;
983  if (allow_pagemode)
984  flags |= SO_ALLOW_PAGEMODE;
985 
986  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
987 }
988 
989 /*
990  * table_beginscan_tid is an alternative entry point for setting up a
991  * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
992  * the same data structure although the behavior is rather different.
993  */
994 static inline TableScanDesc
996 {
997  uint32 flags = SO_TYPE_TIDSCAN;
998 
999  return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1000 }
1001 
1002 /*
1003  * table_beginscan_analyze is an alternative entry point for setting up a
1004  * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
1005  * the same data structure although the behavior is rather different.
1006  */
1007 static inline TableScanDesc
1009 {
1010  uint32 flags = SO_TYPE_ANALYZE;
1011 
1012  return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
1013 }
1014 
1015 /*
1016  * End relation scan.
1017  */
1018 static inline void
1020 {
1021  scan->rs_rd->rd_tableam->scan_end(scan);
1022 }
1023 
1024 /*
1025  * Restart a relation scan.
1026  */
1027 static inline void
1029  struct ScanKeyData *key)
1030 {
1031  scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
1032 }
1033 
1034 /*
1035  * Restart a relation scan after changing params.
1036  *
1037  * This call allows changing the buffer strategy, syncscan, and pagemode
1038  * options before starting a fresh scan. Note that although the actual use of
1039  * syncscan might change (effectively, enabling or disabling reporting), the
1040  * previously selected startblock will be kept.
1041  */
1042 static inline void
1044  bool allow_strat, bool allow_sync, bool allow_pagemode)
1045 {
1046  scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1047  allow_strat, allow_sync,
1048  allow_pagemode);
1049 }
1050 
1051 /*
1052  * Return next tuple from `scan`, store in slot.
1053  */
1054 static inline bool
1056 {
1057  slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1058 
1059  /* We don't expect actual scans using NoMovementScanDirection */
1060  Assert(direction == ForwardScanDirection ||
1061  direction == BackwardScanDirection);
1062 
1063  /*
1064  * We don't expect direct calls to table_scan_getnextslot with valid
1065  * CheckXidAlive for catalog or regular tables. See detailed comments in
1066  * xact.c where these variables are declared.
1067  */
1069  elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1070 
1071  return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1072 }
1073 
1074 /* ----------------------------------------------------------------------------
1075  * TID Range scanning related functions.
1076  * ----------------------------------------------------------------------------
1077  */
1078 
1079 /*
1080  * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1081  * for a TID range scan.
1082  */
1083 static inline TableScanDesc
1085  ItemPointer mintid,
1086  ItemPointer maxtid)
1087 {
1088  TableScanDesc sscan;
1090 
1091  sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1092 
1093  /* Set the range of TIDs to scan */
1094  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1095 
1096  return sscan;
1097 }
1098 
1099 /*
1100  * table_rescan_tidrange resets the scan position and sets the minimum and
1101  * maximum TID range to scan for a TableScanDesc created by
1102  * table_beginscan_tidrange.
1103  */
1104 static inline void
1106  ItemPointer maxtid)
1107 {
1108  /* Ensure table_beginscan_tidrange() was used. */
1109  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1110 
1111  sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1112  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1113 }
1114 
1115 /*
1116  * Fetch the next tuple from `sscan` for a TID range scan created by
1117  * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1118  * or returns false if no more tuples exist in the range.
1119  */
1120 static inline bool
1122  TupleTableSlot *slot)
1123 {
1124  /* Ensure table_beginscan_tidrange() was used. */
1125  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1126 
1127  /* We don't expect actual scans using NoMovementScanDirection */
1128  Assert(direction == ForwardScanDirection ||
1129  direction == BackwardScanDirection);
1130 
1131  return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1132  direction,
1133  slot);
1134 }
1135 
1136 
1137 /* ----------------------------------------------------------------------------
1138  * Parallel table scan related functions.
1139  * ----------------------------------------------------------------------------
1140  */
1141 
1142 /*
1143  * Estimate the size of shared memory needed for a parallel scan of this
1144  * relation.
1145  */
1146 extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
1147 
1148 /*
1149  * Initialize ParallelTableScanDesc for a parallel scan of this
1150  * relation. `pscan` needs to be sized according to parallelscan_estimate()
1151  * for the same relation. Call this just once in the leader process; then,
1152  * individual workers attach via table_beginscan_parallel.
1153  */
1154 extern void table_parallelscan_initialize(Relation rel,
1155  ParallelTableScanDesc pscan,
1156  Snapshot snapshot);
1157 
1158 /*
1159  * Begin a parallel scan. `pscan` needs to have been initialized with
1160  * table_parallelscan_initialize(), for the same relation. The initialization
1161  * does not need to have happened in this backend.
1162  *
1163  * Caller must hold a suitable lock on the relation.
1164  */
1166  ParallelTableScanDesc pscan);
1167 
1168 /*
1169  * Restart a parallel scan. Call this in the leader process. Caller is
1170  * responsible for making sure that all workers have finished the scan
1171  * beforehand.
1172  */
1173 static inline void
1175 {
1176  rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1177 }
1178 
1179 
1180 /* ----------------------------------------------------------------------------
1181  * Index scan related functions.
1182  * ----------------------------------------------------------------------------
1183  */
1184 
1185 /*
1186  * Prepare to fetch tuples from the relation, as needed when fetching tuples
1187  * for an index scan.
1188  *
1189  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1190  */
1191 static inline IndexFetchTableData *
1193 {
1194  return rel->rd_tableam->index_fetch_begin(rel);
1195 }
1196 
1197 /*
1198  * Reset index fetch. Typically this will release cross index fetch resources
1199  * held in IndexFetchTableData.
1200  */
1201 static inline void
1203 {
1204  scan->rel->rd_tableam->index_fetch_reset(scan);
1205 }
1206 
1207 /*
1208  * Release resources and deallocate index fetch.
1209  */
1210 static inline void
1212 {
1213  scan->rel->rd_tableam->index_fetch_end(scan);
1214 }
1215 
1216 /*
1217  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1218  * a visibility test according to `snapshot`. If a tuple was found and passed
1219  * the visibility test, returns true, false otherwise. Note that *tid may be
1220  * modified when we return true (see later remarks on multiple row versions
1221  * reachable via a single index entry).
1222  *
1223  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1224  * a tid. If there potentially is another tuple matching the tid, *call_again
1225  * will be set to true, signaling that table_index_fetch_tuple() should be called
1226  * again for the same tid.
1227  *
1228  * *all_dead, if all_dead is not NULL, will be set to true by
1229  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1230  * that tuple. Index AMs can use that to avoid returning that tid in future
1231  * searches.
1232  *
1233  * The difference between this function and table_tuple_fetch_row_version()
1234  * is that this function returns the currently visible version of a row if
1235  * the AM supports storing multiple row versions reachable via a single index
1236  * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1237  * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1238  * lookups, table_tuple_fetch_row_version() is what's usually needed.
1239  */
1240 static inline bool
1242  ItemPointer tid,
1243  Snapshot snapshot,
1244  TupleTableSlot *slot,
1245  bool *call_again, bool *all_dead)
1246 {
1247  /*
1248  * We don't expect direct calls to table_index_fetch_tuple with valid
1249  * CheckXidAlive for catalog or regular tables. See detailed comments in
1250  * xact.c where these variables are declared.
1251  */
1253  elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1254 
1255  return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1256  slot, call_again,
1257  all_dead);
1258 }
1259 
1260 /*
1261  * This is a convenience wrapper around table_index_fetch_tuple() which
1262  * returns whether there are table tuple items corresponding to an index
1263  * entry. This likely is only useful to verify if there's a conflict in a
1264  * unique index.
1265  */
1266 extern bool table_index_fetch_tuple_check(Relation rel,
1267  ItemPointer tid,
1268  Snapshot snapshot,
1269  bool *all_dead);
1270 
1271 
1272 /* ------------------------------------------------------------------------
1273  * Functions for non-modifying operations on individual tuples
1274  * ------------------------------------------------------------------------
1275  */
1276 
1277 
1278 /*
1279  * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1280  * `snapshot`. If a tuple was found and passed the visibility test, returns
1281  * true, false otherwise.
1282  *
1283  * See table_index_fetch_tuple's comment about what the difference between
1284  * these functions is. It is correct to use this function outside of index
1285  * entry->table tuple lookups.
1286  */
1287 static inline bool
1289  ItemPointer tid,
1290  Snapshot snapshot,
1291  TupleTableSlot *slot)
1292 {
1293  /*
1294  * We don't expect direct calls to table_tuple_fetch_row_version with
1295  * valid CheckXidAlive for catalog or regular tables. See detailed
1296  * comments in xact.c where these variables are declared.
1297  */
1299  elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1300 
1301  return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1302 }
1303 
1304 /*
1305  * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1306  * mean that the pointed to row needs to exist or be visible, but that
1307  * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1308  * table_tuple_fetch_row_version()) should not error out if called with that
1309  * tid.
1310  *
1311  * `scan` needs to have been started via table_beginscan().
1312  */
1313 static inline bool
1315 {
1316  return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1317 }
1318 
1319 /*
1320  * Return the latest version of the tuple at `tid`, by updating `tid` to
1321  * point at the newest version.
1322  */
1324 
1325 /*
1326  * Return true iff tuple in slot satisfies the snapshot.
1327  *
1328  * This assumes the slot's tuple is valid, and of the appropriate type for the
1329  * AM.
1330  *
1331  * Some AMs might modify the data underlying the tuple as a side-effect. If so
1332  * they ought to mark the relevant buffer dirty.
1333  */
1334 static inline bool
1336  Snapshot snapshot)
1337 {
1338  return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1339 }
1340 
1341 /*
1342  * Determine which index tuples are safe to delete based on their table TID.
1343  *
1344  * Determines which entries from index AM caller's TM_IndexDeleteOp state
1345  * point to vacuumable table tuples. Entries that are found by tableam to be
1346  * vacuumable are naturally safe for index AM to delete, and so get directly
1347  * marked as deletable. See comments above TM_IndexDelete and comments above
1348  * TM_IndexDeleteOp for full details.
1349  *
1350  * Returns a snapshotConflictHorizon transaction ID that caller places in
1351  * its index deletion WAL record. This might be used during subsequent REDO
1352  * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1353  * index deletion operation might be required on the standby.
1354  */
1355 static inline TransactionId
1357 {
1358  return rel->rd_tableam->index_delete_tuples(rel, delstate);
1359 }
1360 
1361 
1362 /* ----------------------------------------------------------------------------
1363  * Functions for manipulations of physical tuples.
1364  * ----------------------------------------------------------------------------
1365  */
1366 
1367 /*
1368  * Insert a tuple from a slot into table AM routine.
1369  *
1370  * The options bitmask allows the caller to specify options that may change the
1371  * behaviour of the AM. The AM will ignore options that it does not support.
1372  *
1373  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1374  * free space in the relation. This can save some cycles when we know the
1375  * relation is new and doesn't contain useful amounts of free space.
1376  * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1377  * RelationGetBufferForTuple. See that method for more information.
1378  *
1379  * TABLE_INSERT_FROZEN should only be specified for inserts into
1380  * relation storage created during the current subtransaction and when
1381  * there are no prior snapshots or pre-existing portals open.
1382  * This causes rows to be frozen, which is an MVCC violation and
1383  * requires explicit options chosen by user.
1384  *
1385  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1386  * information for the tuple. This should solely be used during table rewrites
1387  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1388  * relation.
1389  *
1390  * Note that most of these options will be applied when inserting into the
1391  * heap's TOAST table, too, if the tuple requires any out-of-line data.
1392  *
1393  * The BulkInsertState object (if any; bistate can be NULL for default
1394  * behavior) is also just passed through to RelationGetBufferForTuple. If
1395  * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1396  *
1397  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1398  * insertion. But note that any toasting of fields within the slot is NOT
1399  * reflected in the slots contents.
1400  */
1401 static inline void
1403  int options, struct BulkInsertStateData *bistate)
1404 {
1405  rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1406  bistate);
1407 }
1408 
1409 /*
1410  * Perform a "speculative insertion". These can be backed out afterwards
1411  * without aborting the whole transaction. Other sessions can wait for the
1412  * speculative insertion to be confirmed, turning it into a regular tuple, or
1413  * aborted, as if it never existed. Speculatively inserted tuples behave as
1414  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1415  *
1416  * A transaction having performed a speculative insertion has to either abort,
1417  * or finish the speculative insertion with
1418  * table_tuple_complete_speculative(succeeded = ...).
1419  */
1420 static inline void
1422  CommandId cid, int options,
1423  struct BulkInsertStateData *bistate,
1424  uint32 specToken)
1425 {
1426  rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1427  bistate, specToken);
1428 }
1429 
1430 /*
1431  * Complete "speculative insertion" started in the same transaction. If
1432  * succeeded is true, the tuple is fully inserted, if false, it's removed.
1433  */
1434 static inline void
1436  uint32 specToken, bool succeeded)
1437 {
1438  rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1439  succeeded);
1440 }
1441 
1442 /*
1443  * Insert multiple tuples into a table.
1444  *
1445  * This is like table_tuple_insert(), but inserts multiple tuples in one
1446  * operation. That's often faster than calling table_tuple_insert() in a loop,
1447  * because e.g. the AM can reduce WAL logging and page locking overhead.
1448  *
1449  * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1450  * in `slots`, the parameters for table_multi_insert() are the same as for
1451  * table_tuple_insert().
1452  *
1453  * Note: this leaks memory into the current memory context. You can create a
1454  * temporary context before calling this, if that's a problem.
1455  */
1456 static inline void
1457 table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
1458  CommandId cid, int options, struct BulkInsertStateData *bistate)
1459 {
1460  rel->rd_tableam->multi_insert(rel, slots, nslots,
1461  cid, options, bistate);
1462 }
1463 
1464 /*
1465  * Delete a tuple.
1466  *
1467  * NB: do not call this directly unless prepared to deal with
1468  * concurrent-update conditions. Use simple_table_tuple_delete instead.
1469  *
1470  * Input parameters:
1471  * relation - table to be modified (caller must hold suitable lock)
1472  * tid - TID of tuple to be deleted
1473  * cid - delete command ID (used for visibility test, and stored into
1474  * cmax if successful)
1475  * crosscheck - if not InvalidSnapshot, also check tuple against this
1476  * wait - true if should wait for any conflicting update to commit/abort
1477  * Output parameters:
1478  * tmfd - filled in failure cases (see below)
1479  * changingPart - true iff the tuple is being moved to another partition
1480  * table due to an update of the partition key. Otherwise, false.
1481  *
1482  * Normal, successful return value is TM_Ok, which means we did actually
1483  * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1484  * TM_BeingModified (the last only possible if wait == false).
1485  *
1486  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1487  * t_xmax, and, if possible, t_cmax. See comments for struct
1488  * TM_FailureData for additional info.
1489  */
1490 static inline TM_Result
1492  Snapshot snapshot, Snapshot crosscheck, bool wait,
1493  TM_FailureData *tmfd, bool changingPart)
1494 {
1495  return rel->rd_tableam->tuple_delete(rel, tid, cid,
1496  snapshot, crosscheck,
1497  wait, tmfd, changingPart);
1498 }
1499 
1500 /*
1501  * Update a tuple.
1502  *
1503  * NB: do not call this directly unless you are prepared to deal with
1504  * concurrent-update conditions. Use simple_table_tuple_update instead.
1505  *
1506  * Input parameters:
1507  * relation - table to be modified (caller must hold suitable lock)
1508  * otid - TID of old tuple to be replaced
1509  * slot - newly constructed tuple data to store
1510  * cid - update command ID (used for visibility test, and stored into
1511  * cmax/cmin if successful)
1512  * crosscheck - if not InvalidSnapshot, also check old tuple against this
1513  * wait - true if should wait for any conflicting update to commit/abort
1514  * Output parameters:
1515  * tmfd - filled in failure cases (see below)
1516  * lockmode - filled with lock mode acquired on tuple
1517  * update_indexes - in success cases this is set to true if new index entries
1518  * are required for this tuple
1519  *
1520  * Normal, successful return value is TM_Ok, which means we did actually
1521  * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1522  * TM_BeingModified (the last only possible if wait == false).
1523  *
1524  * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1525  * stored tuple; in particular, slot->tts_tid is set to the TID where the
1526  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1527  * update was done. However, any TOAST changes in the new tuple's
1528  * data are not reflected into *newtup.
1529  *
1530  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1531  * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1532  * for additional info.
1533  */
1534 static inline TM_Result
1536  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1537  bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1538  TU_UpdateIndexes *update_indexes)
1539 {
1540  return rel->rd_tableam->tuple_update(rel, otid, slot,
1541  cid, snapshot, crosscheck,
1542  wait, tmfd,
1543  lockmode, update_indexes);
1544 }
1545 
1546 /*
1547  * Lock a tuple in the specified mode.
1548  *
1549  * Input parameters:
1550  * relation: relation containing tuple (caller must hold suitable lock)
1551  * tid: TID of tuple to lock
1552  * snapshot: snapshot to use for visibility determinations
1553  * cid: current command ID (used for visibility test, and stored into
1554  * tuple's cmax if lock is successful)
1555  * mode: lock mode desired
1556  * wait_policy: what to do if tuple lock is not available
1557  * flags:
1558  * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1559  * also lock descendant tuples if lock modes don't conflict.
1560  * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1561  * latest version.
1562  *
1563  * Output parameters:
1564  * *slot: contains the target tuple
1565  * *tmfd: filled in failure cases (see below)
1566  *
1567  * Function result may be:
1568  * TM_Ok: lock was successfully acquired
1569  * TM_Invisible: lock failed because tuple was never visible to us
1570  * TM_SelfModified: lock failed because tuple updated by self
1571  * TM_Updated: lock failed because tuple updated by other xact
1572  * TM_Deleted: lock failed because tuple deleted by other xact
1573  * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1574  *
1575  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1576  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1577  * comments for struct TM_FailureData for additional info.
1578  */
1579 static inline TM_Result
1582  LockWaitPolicy wait_policy, uint8 flags,
1583  TM_FailureData *tmfd)
1584 {
1585  return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1586  cid, mode, wait_policy,
1587  flags, tmfd);
1588 }
1589 
1590 /*
1591  * Perform operations necessary to complete insertions made via
1592  * tuple_insert and multi_insert with a BulkInsertState specified.
1593  */
1594 static inline void
1596 {
1597  /* optional callback */
1598  if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1600 }
1601 
1602 
1603 /* ------------------------------------------------------------------------
1604  * DDL related functionality.
1605  * ------------------------------------------------------------------------
1606  */
1607 
1608 /*
1609  * Create storage for `rel` in `newrlocator`, with persistence set to
1610  * `persistence`.
1611  *
1612  * This is used both during relation creation and various DDL operations to
1613  * create new rel storage that can be filled from scratch. When creating
1614  * new storage for an existing relfilelocator, this should be called before the
1615  * relcache entry has been updated.
1616  *
1617  * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1618  * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1619  */
1620 static inline void
1622  const RelFileLocator *newrlocator,
1623  char persistence,
1624  TransactionId *freezeXid,
1625  MultiXactId *minmulti)
1626 {
1627  rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
1628  persistence, freezeXid,
1629  minmulti);
1630 }
1631 
1632 /*
1633  * Remove all table contents from `rel`, in a non-transactional manner.
1634  * Non-transactional meaning that there's no need to support rollbacks. This
1635  * commonly only is used to perform truncations for relation storage created in
1636  * the current transaction.
1637  */
1638 static inline void
1640 {
1642 }
1643 
1644 /*
1645  * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
1646  * relfilelocator may not have storage associated before this function is
1647  * called. This is only supposed to be used for low level operations like
1648  * changing a relation's tablespace.
1649  */
1650 static inline void
1652 {
1653  rel->rd_tableam->relation_copy_data(rel, newrlocator);
1654 }
1655 
1656 /*
1657  * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1658  * FULL.
1659  *
1660  * Additional Input parameters:
1661  * - use_sort - if true, the table contents are sorted appropriate for
1662  * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1663  * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1664  * performed
1665  * - OldIndex - see use_sort
1666  * - OldestXmin - computed by vacuum_get_cutoffs(), even when
1667  * not needed for the relation's AM
1668  * - *xid_cutoff - ditto
1669  * - *multi_cutoff - ditto
1670  *
1671  * Output parameters:
1672  * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1673  * - *multi_cutoff - rel's new relminmxid value, may be invalid
1674  * - *tups_vacuumed - stats, for logging, if appropriate for AM
1675  * - *tups_recently_dead - stats, for logging, if appropriate for AM
1676  */
1677 static inline void
1679  Relation OldIndex,
1680  bool use_sort,
1681  TransactionId OldestXmin,
1682  TransactionId *xid_cutoff,
1683  MultiXactId *multi_cutoff,
1684  double *num_tuples,
1685  double *tups_vacuumed,
1686  double *tups_recently_dead)
1687 {
1688  OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1689  use_sort, OldestXmin,
1690  xid_cutoff, multi_cutoff,
1691  num_tuples, tups_vacuumed,
1692  tups_recently_dead);
1693 }
1694 
1695 /*
1696  * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1697  * autovacuum. The specific actions performed by the AM will depend heavily on
1698  * the individual AM.
1699  *
1700  * On entry a transaction needs to already been established, and the
1701  * table is locked with a ShareUpdateExclusive lock.
1702  *
1703  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1704  * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1705  */
1706 static inline void
1708  BufferAccessStrategy bstrategy)
1709 {
1710  rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1711 }
1712 
1713 /*
1714  * Prepare to analyze the next block in the read stream. The scan needs to
1715  * have been started with table_beginscan_analyze(). Note that this routine
1716  * might acquire resources like locks that are held until
1717  * table_scan_analyze_next_tuple() returns false.
1718  *
1719  * Returns false if block is unsuitable for sampling, true otherwise.
1720  */
1721 static inline bool
1723 {
1724  return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
1725 }
1726 
1727 /*
1728  * Iterate over tuples in the block selected with
1729  * table_scan_analyze_next_block() (which needs to have returned true, and
1730  * this routine may not have returned false for the same block before). If a
1731  * tuple that's suitable for sampling is found, true is returned and a tuple
1732  * is stored in `slot`.
1733  *
1734  * *liverows and *deadrows are incremented according to the encountered
1735  * tuples.
1736  */
1737 static inline bool
1739  double *liverows, double *deadrows,
1740  TupleTableSlot *slot)
1741 {
1742  return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1743  liverows, deadrows,
1744  slot);
1745 }
1746 
1747 /*
1748  * table_index_build_scan - scan the table to find tuples to be indexed
1749  *
1750  * This is called back from an access-method-specific index build procedure
1751  * after the AM has done whatever setup it needs. The parent table relation
1752  * is scanned to find tuples that should be entered into the index. Each
1753  * such tuple is passed to the AM's callback routine, which does the right
1754  * things to add it to the new index. After we return, the AM's index
1755  * build procedure does whatever cleanup it needs.
1756  *
1757  * The total count of live tuples is returned. This is for updating pg_class
1758  * statistics. (It's annoying not to be able to do that here, but we want to
1759  * merge that update with others; see index_update_stats.) Note that the
1760  * index AM itself must keep track of the number of index tuples; we don't do
1761  * so here because the AM might reject some of the tuples for its own reasons,
1762  * such as being unable to store NULLs.
1763  *
1764  * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1765  * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1766  *
1767  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1768  * any potentially broken HOT chains. Currently, we set this if there are any
1769  * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1770  * very hard to detect whether they're really incompatible with the chain tip.
1771  * This only really makes sense for heap AM, it might need to be generalized
1772  * for other AMs later.
1773  */
1774 static inline double
1776  Relation index_rel,
1777  struct IndexInfo *index_info,
1778  bool allow_sync,
1779  bool progress,
1781  void *callback_state,
1782  TableScanDesc scan)
1783 {
1784  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1785  index_rel,
1786  index_info,
1787  allow_sync,
1788  false,
1789  progress,
1790  0,
1792  callback,
1793  callback_state,
1794  scan);
1795 }
1796 
1797 /*
1798  * As table_index_build_scan(), except that instead of scanning the complete
1799  * table, only the given number of blocks are scanned. Scan to end-of-rel can
1800  * be signaled by passing InvalidBlockNumber as numblocks. Note that
1801  * restricting the range to scan cannot be done when requesting syncscan.
1802  *
1803  * When "anyvisible" mode is requested, all tuples visible to any transaction
1804  * are indexed and counted as live, including those inserted or deleted by
1805  * transactions that are still in progress.
1806  */
1807 static inline double
1809  Relation index_rel,
1810  struct IndexInfo *index_info,
1811  bool allow_sync,
1812  bool anyvisible,
1813  bool progress,
1814  BlockNumber start_blockno,
1815  BlockNumber numblocks,
1817  void *callback_state,
1818  TableScanDesc scan)
1819 {
1820  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1821  index_rel,
1822  index_info,
1823  allow_sync,
1824  anyvisible,
1825  progress,
1826  start_blockno,
1827  numblocks,
1828  callback,
1829  callback_state,
1830  scan);
1831 }
1832 
1833 /*
1834  * table_index_validate_scan - second table scan for concurrent index build
1835  *
1836  * See validate_index() for an explanation.
1837  */
1838 static inline void
1840  Relation index_rel,
1841  struct IndexInfo *index_info,
1842  Snapshot snapshot,
1843  struct ValidateIndexState *state)
1844 {
1845  table_rel->rd_tableam->index_validate_scan(table_rel,
1846  index_rel,
1847  index_info,
1848  snapshot,
1849  state);
1850 }
1851 
1852 
1853 /* ----------------------------------------------------------------------------
1854  * Miscellaneous functionality
1855  * ----------------------------------------------------------------------------
1856  */
1857 
1858 /*
1859  * Return the current size of `rel` in bytes. If `forkNumber` is
1860  * InvalidForkNumber, return the relation's overall size, otherwise the size
1861  * for the indicated fork.
1862  *
1863  * Note that the overall size might not be the equivalent of the sum of sizes
1864  * for the individual forks for some AMs, e.g. because the AMs storage does
1865  * not neatly map onto the builtin types of forks.
1866  */
1867 static inline uint64
1869 {
1870  return rel->rd_tableam->relation_size(rel, forkNumber);
1871 }
1872 
1873 /*
1874  * table_relation_needs_toast_table - does this relation need a toast table?
1875  */
1876 static inline bool
1878 {
1879  return rel->rd_tableam->relation_needs_toast_table(rel);
1880 }
1881 
1882 /*
1883  * Return the OID of the AM that should be used to implement the TOAST table
1884  * for this relation.
1885  */
1886 static inline Oid
1888 {
1889  return rel->rd_tableam->relation_toast_am(rel);
1890 }
1891 
1892 /*
1893  * Fetch all or part of a TOAST value from a TOAST table.
1894  *
1895  * If this AM is never used to implement a TOAST table, then this callback
1896  * is not needed. But, if toasted values are ever stored in a table of this
1897  * type, then you will need this callback.
1898  *
1899  * toastrel is the relation in which the toasted value is stored.
1900  *
1901  * valueid identifies which toast value is to be fetched. For the heap,
1902  * this corresponds to the values stored in the chunk_id column.
1903  *
1904  * attrsize is the total size of the toast value to be fetched.
1905  *
1906  * sliceoffset is the offset within the toast value of the first byte that
1907  * should be fetched.
1908  *
1909  * slicelength is the number of bytes from the toast value that should be
1910  * fetched.
1911  *
1912  * result is caller-allocated space into which the fetched bytes should be
1913  * stored.
1914  */
1915 static inline void
1917  int32 attrsize, int32 sliceoffset,
1918  int32 slicelength, struct varlena *result)
1919 {
1920  toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1921  attrsize,
1922  sliceoffset, slicelength,
1923  result);
1924 }
1925 
1926 
1927 /* ----------------------------------------------------------------------------
1928  * Planner related functionality
1929  * ----------------------------------------------------------------------------
1930  */
1931 
1932 /*
1933  * Estimate the current size of the relation, as an AM specific workhorse for
1934  * estimate_rel_size(). Look there for an explanation of the parameters.
1935  */
1936 static inline void
1938  BlockNumber *pages, double *tuples,
1939  double *allvisfrac)
1940 {
1941  rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1942  allvisfrac);
1943 }
1944 
1945 
1946 /* ----------------------------------------------------------------------------
1947  * Executor related functionality
1948  * ----------------------------------------------------------------------------
1949  */
1950 
1951 /*
1952  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
1953  * a bitmap table scan. `scan` needs to have been started via
1954  * table_beginscan_bm(). Returns false if there are no tuples to be found on
1955  * the page, true otherwise.
1956  *
1957  * Note, this is an optionally implemented function, therefore should only be
1958  * used after verifying the presence (at plan time or such).
1959  */
1960 static inline bool
1962  struct TBMIterateResult *tbmres)
1963 {
1964  /*
1965  * We don't expect direct calls to table_scan_bitmap_next_block with valid
1966  * CheckXidAlive for catalog or regular tables. See detailed comments in
1967  * xact.c where these variables are declared.
1968  */
1970  elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
1971 
1972  return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
1973  tbmres);
1974 }
1975 
1976 /*
1977  * Fetch the next tuple of a bitmap table scan into `slot` and return true if
1978  * a visible tuple was found, false otherwise.
1979  * table_scan_bitmap_next_block() needs to previously have selected a
1980  * block (i.e. returned true), and no previous
1981  * table_scan_bitmap_next_tuple() for the same block may have
1982  * returned false.
1983  */
1984 static inline bool
1986  struct TBMIterateResult *tbmres,
1987  TupleTableSlot *slot)
1988 {
1989  /*
1990  * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
1991  * CheckXidAlive for catalog or regular tables. See detailed comments in
1992  * xact.c where these variables are declared.
1993  */
1995  elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
1996 
1997  return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
1998  tbmres,
1999  slot);
2000 }
2001 
2002 /*
2003  * Prepare to fetch tuples from the next block in a sample scan. Returns false
2004  * if the sample scan is finished, true otherwise. `scan` needs to have been
2005  * started via table_beginscan_sampling().
2006  *
2007  * This will call the TsmRoutine's NextSampleBlock() callback if necessary
2008  * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
2009  * underlying relation.
2010  */
2011 static inline bool
2013  struct SampleScanState *scanstate)
2014 {
2015  /*
2016  * We don't expect direct calls to table_scan_sample_next_block with valid
2017  * CheckXidAlive for catalog or regular tables. See detailed comments in
2018  * xact.c where these variables are declared.
2019  */
2021  elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
2022  return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
2023 }
2024 
2025 /*
2026  * Fetch the next sample tuple into `slot` and return true if a visible tuple
2027  * was found, false otherwise. table_scan_sample_next_block() needs to
2028  * previously have selected a block (i.e. returned true), and no previous
2029  * table_scan_sample_next_tuple() for the same block may have returned false.
2030  *
2031  * This will call the TsmRoutine's NextSampleTuple() callback.
2032  */
2033 static inline bool
2035  struct SampleScanState *scanstate,
2036  TupleTableSlot *slot)
2037 {
2038  /*
2039  * We don't expect direct calls to table_scan_sample_next_tuple with valid
2040  * CheckXidAlive for catalog or regular tables. See detailed comments in
2041  * xact.c where these variables are declared.
2042  */
2044  elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
2045  return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
2046  slot);
2047 }
2048 
2049 
2050 /* ----------------------------------------------------------------------------
2051  * Functions to make modifications a bit simpler.
2052  * ----------------------------------------------------------------------------
2053  */
2054 
2055 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2056 extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
2057  Snapshot snapshot);
2058 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2059  TupleTableSlot *slot, Snapshot snapshot,
2060  TU_UpdateIndexes *update_indexes);
2061 
2062 
2063 /* ----------------------------------------------------------------------------
2064  * Helper functions to implement parallel scans for block oriented AMs.
2065  * ----------------------------------------------------------------------------
2066  */
2067 
2070  ParallelTableScanDesc pscan);
2072  ParallelTableScanDesc pscan);
2074  ParallelBlockTableScanWorker pbscanwork,
2077  ParallelBlockTableScanWorker pbscanwork,
2079 
2080 
2081 /* ----------------------------------------------------------------------------
2082  * Helper functions to implement relation sizing for block oriented AMs.
2083  * ----------------------------------------------------------------------------
2084  */
2085 
2086 extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2088  int32 *attr_widths,
2089  BlockNumber *pages,
2090  double *tuples,
2091  double *allvisfrac,
2092  Size overhead_bytes_per_tuple,
2093  Size usable_bytes_per_page);
2094 
2095 /* ----------------------------------------------------------------------------
2096  * Functions in tableamapi.c
2097  * ----------------------------------------------------------------------------
2098  */
2099 
2100 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2101 
2102 /* ----------------------------------------------------------------------------
2103  * Functions in heapam_handler.c
2104  * ----------------------------------------------------------------------------
2105  */
2106 
2107 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2108 
2109 #endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:150
unsigned int uint32
Definition: c.h:506
#define PGDLLIMPORT
Definition: c.h:1316
signed short int16
Definition: c.h:493
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
TransactionId MultiXactId
Definition: c.h:662
unsigned char bool
Definition: c.h:456
#define unlikely(x)
Definition: c.h:311
unsigned char uint8
Definition: c.h:504
uint32 CommandId
Definition: c.h:666
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:56
const void * data
static char ** options
static int progress
Definition: pgbench.c:261
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:505
ForkNumber
Definition: relpath.h:48
struct TableScanDescData * TableScanDesc
Definition: relscan.h:52
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
Definition: pg_list.h:54
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool traversed
Definition: tableam.h:153
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
Relation irel
Definition: tableam.h:247
TM_IndexDelete * deltids
Definition: tableam.h:254
BlockNumber iblknum
Definition: tableam.h:248
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224
OffsetNumber idxoffnum
Definition: tableam.h:219
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:400
void(* relation_copy_data)(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:624
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:872
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:430
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:524
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:827
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:407
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:489
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:424
void(* relation_copy_for_cluster)(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:628
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:380
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:772
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:693
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:328
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:736
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:482
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:530
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:338
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:726
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:556
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:856
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:616
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:544
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:511
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:344
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:474
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:750
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:654
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:743
bool(* scan_analyze_next_block)(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:675
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:393
void(* relation_set_new_filelocator)(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:602
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:372
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:578
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:686
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:501
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:435
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:457
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:516
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:534
bool(* scan_bitmap_next_block)(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:813
NodeTag type
Definition: tableam.h:293
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:706
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:351
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:496
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
Oid tts_tableOid
Definition: tuptable.h:130
Definition: type.h:95
Definition: regguts.h:323
Definition: c.h:687
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1916
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:48
ScanOptions
Definition: tableam.h:47
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_TYPE_ANALYZE
Definition: tableam.h:54
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65
@ SO_TYPE_TIDSCAN
Definition: tableam.h:52
@ SO_NEED_TUPLES
Definition: tableam.h:72
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_ALLOW_SYNC
Definition: tableam.h:60
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:50
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1105
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:908
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
TU_UpdateIndexes
Definition: tableam.h:118
@ TU_Summarizing
Definition: tableam.h:126
@ TU_All
Definition: tableam.h:123
@ TU_None
Definition: tableam.h:120
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1019
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes)
Definition: tableam.c:335
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1738
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:208
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:49
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:388
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:165
struct TM_IndexDelete TM_IndexDelete
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1192
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1678
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1202
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1868
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_WouldBlock
Definition: tableam.h:110
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1985
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:972
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1028
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1580
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:276
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1314
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1839
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1808
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:421
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
static bool table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:1722
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1877
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:932
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1435
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1084
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:1535
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1211
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:1008
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1491
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:235
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1241
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1043
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1707
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:290
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:28
const TableAmRoutine * GetHeapamTableAmRoutine(void)
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1595
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:406
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:272
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:616
static void table_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1621
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1457
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1121
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1887
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1402
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:130
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1775
static void table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:1651
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2012
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:382
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1937
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1055
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1421
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1335
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1356
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2034
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1639
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1961
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:145
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:953
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1288
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1174
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:995
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:491
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:653
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97