PostgreSQL Source Code  git master
tableam.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tableam.h
4  * POSTGRES table access method definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/tableam.h
11  *
12  * NOTES
13  * See tableam.sgml for higher level documentation.
14  *
15  *-------------------------------------------------------------------------
16  */
17 #ifndef TABLEAM_H
18 #define TABLEAM_H
19 
20 #include "access/relscan.h"
21 #include "access/sdir.h"
22 #include "access/xact.h"
23 #include "executor/tuptable.h"
24 #include "storage/read_stream.h"
25 #include "utils/rel.h"
26 #include "utils/snapshot.h"
27 
28 
29 #define DEFAULT_TABLE_ACCESS_METHOD "heap"
30 
31 /* GUCs */
34 
35 
36 struct BulkInsertStateData;
37 struct IndexInfo;
38 struct SampleScanState;
39 struct TBMIterateResult;
40 struct VacuumParams;
41 struct ValidateIndexState;
42 
43 /*
44  * Bitmask values for the flags argument to the scan_begin callback.
45  */
46 typedef enum ScanOptions
47 {
48  /* one of SO_TYPE_* may be specified */
49  SO_TYPE_SEQSCAN = 1 << 0,
52  SO_TYPE_TIDSCAN = 1 << 3,
54  SO_TYPE_ANALYZE = 1 << 5,
55 
56  /* several of SO_ALLOW_* may be specified */
57  /* allow or disallow use of access strategy */
58  SO_ALLOW_STRAT = 1 << 6,
59  /* report location to syncscan logic? */
60  SO_ALLOW_SYNC = 1 << 7,
61  /* verify visibility page-at-a-time? */
63 
64  /* unregister snapshot at scan end? */
65  SO_TEMP_SNAPSHOT = 1 << 9,
66 
67  /*
68  * At the discretion of the table AM, bitmap table scans may be able to
69  * skip fetching a block from the table if none of the table data is
70  * needed. If table data may be needed, set SO_NEED_TUPLES.
71  */
72  SO_NEED_TUPLES = 1 << 10,
74 
75 /*
76  * Result codes for table_{update,delete,lock_tuple}, and for visibility
77  * routines inside table AMs.
78  */
79 typedef enum TM_Result
80 {
81  /*
82  * Signals that the action succeeded (i.e. update/delete performed, lock
83  * was acquired)
84  */
86 
87  /* The affected tuple wasn't visible to the relevant snapshot */
89 
90  /* The affected tuple was already modified by the calling backend */
92 
93  /*
94  * The affected tuple was updated by another transaction. This includes
95  * the case where tuple was moved to another partition.
96  */
98 
99  /* The affected tuple was deleted by another transaction */
101 
102  /*
103  * The affected tuple is currently being modified by another session. This
104  * will only be returned if table_(update/delete/lock_tuple) are
105  * instructed not to wait.
106  */
108 
109  /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
112 
113 /*
114  * Result codes for table_update(..., update_indexes*..).
115  * Used to determine which indexes to update.
116  */
117 typedef enum TU_UpdateIndexes
118 {
119  /* No indexed columns were updated (incl. TID addressing of tuple) */
121 
122  /* A non-summarizing indexed column was updated, or the TID has changed */
124 
125  /* Only summarized columns were updated, TID is unchanged */
128 
129 /*
130  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
131  * because the target tuple is already outdated, they fill in this struct to
132  * provide information to the caller about what happened.
133  *
134  * ctid is the target's ctid link: it is the same as the target's TID if the
135  * target was deleted, or the location of the replacement tuple if the target
136  * was updated.
137  *
138  * xmax is the outdating transaction's XID. If the caller wants to visit the
139  * replacement tuple, it must check that this matches before believing the
140  * replacement is really a match.
141  *
142  * cmax is the outdating command's CID, but only when the failure code is
143  * TM_SelfModified (i.e., something in the current transaction outdated the
144  * tuple); otherwise cmax is zero. (We make this restriction because
145  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
146  * transactions.)
147  */
148 typedef struct TM_FailureData
149 {
153  bool traversed;
155 
156 /*
157  * State used when calling table_index_delete_tuples().
158  *
159  * Represents the status of table tuples, referenced by table TID and taken by
160  * index AM from index tuples. State consists of high level parameters of the
161  * deletion operation, plus two mutable palloc()'d arrays for information
162  * about the status of individual table tuples. These are conceptually one
163  * single array. Using two arrays keeps the TM_IndexDelete struct small,
164  * which makes sorting the first array (the deltids array) fast.
165  *
166  * Some index AM callers perform simple index tuple deletion (by specifying
167  * bottomup = false), and include only known-dead deltids. These known-dead
168  * entries are all marked knowndeletable = true directly (typically these are
169  * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
170  *
171  * Callers that specify bottomup = true are "bottom-up index deletion"
172  * callers. The considerations for the tableam are more subtle with these
173  * callers because they ask the tableam to perform highly speculative work,
174  * and might only expect the tableam to check a small fraction of all entries.
175  * Caller is not allowed to specify knowndeletable = true for any entry
176  * because everything is highly speculative. Bottom-up caller provides
177  * context and hints to tableam -- see comments below for details on how index
178  * AMs and tableams should coordinate during bottom-up index deletion.
179  *
180  * Simple index deletion callers may ask the tableam to perform speculative
181  * work, too. This is a little like bottom-up deletion, but not too much.
182  * The tableam will only perform speculative work when it's practically free
183  * to do so in passing for simple deletion caller (while always performing
184  * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
185  * be deleted within index AM). This is the real reason why it's possible for
186  * simple index deletion caller to specify knowndeletable = false up front
187  * (this means "check if it's possible for me to delete corresponding index
188  * tuple when it's cheap to do so in passing"). The index AM should only
189  * include "extra" entries for index tuples whose TIDs point to a table block
190  * that tableam is expected to have to visit anyway (in the event of a block
191  * orientated tableam). The tableam isn't strictly obligated to check these
192  * "extra" TIDs, but a block-based AM should always manage to do so in
193  * practice.
194  *
195  * The final contents of the deltids/status arrays are interesting to callers
196  * that ask tableam to perform speculative work (i.e. when _any_ items have
197  * knowndeletable set to false up front). These index AM callers will
198  * naturally need to consult final state to determine which index tuples are
199  * in fact deletable.
200  *
201  * The index AM can keep track of which index tuple relates to which deltid by
202  * setting idxoffnum (and/or relying on each entry being uniquely identifiable
203  * using tid), which is important when the final contents of the array will
204  * need to be interpreted -- the array can shrink from initial size after
205  * tableam processing and/or have entries in a new order (tableam may sort
206  * deltids array for its own reasons). Bottom-up callers may find that final
207  * ndeltids is 0 on return from call to tableam, in which case no index tuple
208  * deletions are possible. Simple deletion callers can rely on any entries
209  * they know to be deletable appearing in the final array as deletable.
210  */
211 typedef struct TM_IndexDelete
212 {
213  ItemPointerData tid; /* table TID from index tuple */
214  int16 id; /* Offset into TM_IndexStatus array */
216 
217 typedef struct TM_IndexStatus
218 {
219  OffsetNumber idxoffnum; /* Index am page offset number */
220  bool knowndeletable; /* Currently known to be deletable? */
221 
222  /* Bottom-up index deletion specific fields follow */
223  bool promising; /* Promising (duplicate) index tuple? */
224  int16 freespace; /* Space freed in index if deleted */
226 
227 /*
228  * Index AM/tableam coordination is central to the design of bottom-up index
229  * deletion. The index AM provides hints about where to look to the tableam
230  * by marking some entries as "promising". Index AM does this with duplicate
231  * index tuples that are strongly suspected to be old versions left behind by
232  * UPDATEs that did not logically modify indexed values. Index AM may find it
233  * helpful to only mark entries as promising when they're thought to have been
234  * affected by such an UPDATE in the recent past.
235  *
236  * Bottom-up index deletion casts a wide net at first, usually by including
237  * all TIDs on a target index page. It is up to the tableam to worry about
238  * the cost of checking transaction status information. The tableam is in
239  * control, but needs careful guidance from the index AM. Index AM requests
240  * that bottomupfreespace target be met, while tableam measures progress
241  * towards that goal by tallying the per-entry freespace value for known
242  * deletable entries. (All !bottomup callers can just set these space related
243  * fields to zero.)
244  */
245 typedef struct TM_IndexDeleteOp
246 {
247  Relation irel; /* Target index relation */
248  BlockNumber iblknum; /* Index block number (for error reports) */
249  bool bottomup; /* Bottom-up (not simple) deletion? */
250  int bottomupfreespace; /* Bottom-up space target */
251 
252  /* Mutable per-TID information follows (index AM initializes entries) */
253  int ndeltids; /* Current # of deltids/status elements */
257 
258 /* "options" flag bits for table_tuple_insert */
259 /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
260 #define TABLE_INSERT_SKIP_FSM 0x0002
261 #define TABLE_INSERT_FROZEN 0x0004
262 #define TABLE_INSERT_NO_LOGICAL 0x0008
263 
264 /* flag bits for table_tuple_lock */
265 /* Follow tuples whose update is in progress if lock modes don't conflict */
266 #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
267 /* Follow update chain and lock latest version of tuple */
268 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
269 
270 
271 /* Typedef for callback function for table_index_build_scan */
273  ItemPointer tid,
274  Datum *values,
275  bool *isnull,
276  bool tupleIsAlive,
277  void *state);
278 
279 /*
280  * API struct for a table AM. Note this must be allocated in a
281  * server-lifetime manner, typically as a static const struct, which then gets
282  * returned by FormData_pg_am.amhandler.
283  *
284  * In most cases it's not appropriate to call the callbacks directly, use the
285  * table_* wrapper functions instead.
286  *
287  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
288  * to update when adding a callback.
289  */
290 typedef struct TableAmRoutine
291 {
292  /* this must be set to T_TableAmRoutine */
294 
295 
296  /* ------------------------------------------------------------------------
297  * Slot related callbacks.
298  * ------------------------------------------------------------------------
299  */
300 
301  /*
302  * Return slot implementation suitable for storing a tuple of this AM.
303  */
304  const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
305 
306 
307  /* ------------------------------------------------------------------------
308  * Table scan callbacks.
309  * ------------------------------------------------------------------------
310  */
311 
312  /*
313  * Start a scan of `rel`. The callback has to return a TableScanDesc,
314  * which will typically be embedded in a larger, AM specific, struct.
315  *
316  * If nkeys != 0, the results need to be filtered by those scan keys.
317  *
318  * pscan, if not NULL, will have already been initialized with
319  * parallelscan_initialize(), and has to be for the same relation. Will
320  * only be set coming from table_beginscan_parallel().
321  *
322  * `flags` is a bitmask indicating the type of scan (ScanOptions's
323  * SO_TYPE_*, currently only one may be specified), options controlling
324  * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
325  * specified, an AM may ignore unsupported ones) and whether the snapshot
326  * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
327  */
329  Snapshot snapshot,
330  int nkeys, struct ScanKeyData *key,
331  ParallelTableScanDesc pscan,
332  uint32 flags);
333 
334  /*
335  * Release resources and deallocate scan. If TableScanDesc.temp_snap,
336  * TableScanDesc.rs_snapshot needs to be unregistered.
337  */
338  void (*scan_end) (TableScanDesc scan);
339 
340  /*
341  * Restart relation scan. If set_params is set to true, allow_{strat,
342  * sync, pagemode} (see scan_begin) changes should be taken into account.
343  */
344  void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
345  bool set_params, bool allow_strat,
346  bool allow_sync, bool allow_pagemode);
347 
348  /*
349  * Return next tuple from `scan`, store in slot.
350  */
352  ScanDirection direction,
353  TupleTableSlot *slot);
354 
355  /*-----------
356  * Optional functions to provide scanning for ranges of ItemPointers.
357  * Implementations must either provide both of these functions, or neither
358  * of them.
359  *
360  * Implementations of scan_set_tidrange must themselves handle
361  * ItemPointers of any value. i.e, they must handle each of the following:
362  *
363  * 1) mintid or maxtid is beyond the end of the table; and
364  * 2) mintid is above maxtid; and
365  * 3) item offset for mintid or maxtid is beyond the maximum offset
366  * allowed by the AM.
367  *
368  * Implementations can assume that scan_set_tidrange is always called
369  * before scan_getnextslot_tidrange or after scan_rescan and before any
370  * further calls to scan_getnextslot_tidrange.
371  */
373  ItemPointer mintid,
374  ItemPointer maxtid);
375 
376  /*
377  * Return next tuple from `scan` that's in the range of TIDs defined by
378  * scan_set_tidrange.
379  */
381  ScanDirection direction,
382  TupleTableSlot *slot);
383 
384  /* ------------------------------------------------------------------------
385  * Parallel table scan related functions.
386  * ------------------------------------------------------------------------
387  */
388 
389  /*
390  * Estimate the size of shared memory needed for a parallel scan of this
391  * relation. The snapshot does not need to be accounted for.
392  */
394 
395  /*
396  * Initialize ParallelTableScanDesc for a parallel scan of this relation.
397  * `pscan` will be sized according to parallelscan_estimate() for the same
398  * relation.
399  */
401  ParallelTableScanDesc pscan);
402 
403  /*
404  * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
405  * when `pscan` was initialized by parallelscan_initialize.
406  */
408  ParallelTableScanDesc pscan);
409 
410 
411  /* ------------------------------------------------------------------------
412  * Index Scan Callbacks
413  * ------------------------------------------------------------------------
414  */
415 
416  /*
417  * Prepare to fetch tuples from the relation, as needed when fetching
418  * tuples for an index scan. The callback has to return an
419  * IndexFetchTableData, which the AM will typically embed in a larger
420  * structure with additional information.
421  *
422  * Tuples for an index scan can then be fetched via index_fetch_tuple.
423  */
424  struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
425 
426  /*
427  * Reset index fetch. Typically this will release cross index fetch
428  * resources held in IndexFetchTableData.
429  */
431 
432  /*
433  * Release resources and deallocate index fetch.
434  */
436 
437  /*
438  * Fetch tuple at `tid` into `slot`, after doing a visibility test
439  * according to `snapshot`. If a tuple was found and passed the visibility
440  * test, return true, false otherwise.
441  *
442  * Note that AMs that do not necessarily update indexes when indexed
443  * columns do not change, need to return the current/correct version of
444  * the tuple that is visible to the snapshot, even if the tid points to an
445  * older version of the tuple.
446  *
447  * *call_again is false on the first call to index_fetch_tuple for a tid.
448  * If there potentially is another tuple matching the tid, *call_again
449  * needs to be set to true by index_fetch_tuple, signaling to the caller
450  * that index_fetch_tuple should be called again for the same tid.
451  *
452  * *all_dead, if all_dead is not NULL, should be set to true by
453  * index_fetch_tuple iff it is guaranteed that no backend needs to see
454  * that tuple. Index AMs can use that to avoid returning that tid in
455  * future searches.
456  */
458  ItemPointer tid,
459  Snapshot snapshot,
460  TupleTableSlot *slot,
461  bool *call_again, bool *all_dead);
462 
463 
464  /* ------------------------------------------------------------------------
465  * Callbacks for non-modifying operations on individual tuples
466  * ------------------------------------------------------------------------
467  */
468 
469  /*
470  * Fetch tuple at `tid` into `slot`, after doing a visibility test
471  * according to `snapshot`. If a tuple was found and passed the visibility
472  * test, returns true, false otherwise.
473  */
475  ItemPointer tid,
476  Snapshot snapshot,
477  TupleTableSlot *slot);
478 
479  /*
480  * Is tid valid for a scan of this relation.
481  */
483  ItemPointer tid);
484 
485  /*
486  * Return the latest version of the tuple at `tid`, by updating `tid` to
487  * point at the newest version.
488  */
490  ItemPointer tid);
491 
492  /*
493  * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
494  * the appropriate type for the AM.
495  */
497  TupleTableSlot *slot,
498  Snapshot snapshot);
499 
500  /* see table_index_delete_tuples() */
502  TM_IndexDeleteOp *delstate);
503 
504 
505  /* ------------------------------------------------------------------------
506  * Manipulations of physical tuples.
507  * ------------------------------------------------------------------------
508  */
509 
510  /* see table_tuple_insert() for reference about parameters */
512  CommandId cid, int options,
513  struct BulkInsertStateData *bistate);
514 
515  /* see table_tuple_insert_speculative() for reference about parameters */
517  TupleTableSlot *slot,
518  CommandId cid,
519  int options,
520  struct BulkInsertStateData *bistate,
521  uint32 specToken);
522 
523  /* see table_tuple_complete_speculative() for reference about parameters */
525  TupleTableSlot *slot,
526  uint32 specToken,
527  bool succeeded);
528 
529  /* see table_multi_insert() for reference about parameters */
530  void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
531  CommandId cid, int options, struct BulkInsertStateData *bistate);
532 
533  /* see table_tuple_delete() for reference about parameters */
535  ItemPointer tid,
536  CommandId cid,
537  Snapshot snapshot,
538  Snapshot crosscheck,
539  bool wait,
540  TM_FailureData *tmfd,
541  bool changingPart);
542 
543  /* see table_tuple_update() for reference about parameters */
545  ItemPointer otid,
546  TupleTableSlot *slot,
547  CommandId cid,
548  Snapshot snapshot,
549  Snapshot crosscheck,
550  bool wait,
551  TM_FailureData *tmfd,
552  LockTupleMode *lockmode,
553  TU_UpdateIndexes *update_indexes);
554 
555  /* see table_tuple_lock() for reference about parameters */
557  ItemPointer tid,
558  Snapshot snapshot,
559  TupleTableSlot *slot,
560  CommandId cid,
562  LockWaitPolicy wait_policy,
563  uint8 flags,
564  TM_FailureData *tmfd);
565 
566  /*
567  * Perform operations necessary to complete insertions made via
568  * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
569  * access methods ceased to use this.
570  *
571  * Typically callers of tuple_insert and multi_insert will just pass all
572  * the flags that apply to them, and each AM has to decide which of them
573  * make sense for it, and then only take actions in finish_bulk_insert for
574  * those flags, and ignore others.
575  *
576  * Optional callback.
577  */
578  void (*finish_bulk_insert) (Relation rel, int options);
579 
580 
581  /* ------------------------------------------------------------------------
582  * DDL related functionality.
583  * ------------------------------------------------------------------------
584  */
585 
586  /*
587  * This callback needs to create new relation storage for `rel`, with
588  * appropriate durability behaviour for `persistence`.
589  *
590  * Note that only the subset of the relcache filled by
591  * RelationBuildLocalRelation() can be relied upon and that the relation's
592  * catalog entries will either not yet exist (new relation), or will still
593  * reference the old relfilelocator.
594  *
595  * As output *freezeXid, *minmulti must be set to the values appropriate
596  * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
597  * fields to be filled they can be set to InvalidTransactionId and
598  * InvalidMultiXactId, respectively.
599  *
600  * See also table_relation_set_new_filelocator().
601  */
603  const RelFileLocator *newrlocator,
604  char persistence,
605  TransactionId *freezeXid,
606  MultiXactId *minmulti);
607 
608  /*
609  * This callback needs to remove all contents from `rel`'s current
610  * relfilelocator. No provisions for transactional behaviour need to be
611  * made. Often this can be implemented by truncating the underlying
612  * storage to its minimal size.
613  *
614  * See also table_relation_nontransactional_truncate().
615  */
617 
618  /*
619  * See table_relation_copy_data().
620  *
621  * This can typically be implemented by directly copying the underlying
622  * storage, unless it contains references to the tablespace internally.
623  */
625  const RelFileLocator *newrlocator);
626 
627  /* See table_relation_copy_for_cluster() */
629  Relation NewTable,
630  Relation OldIndex,
631  bool use_sort,
632  TransactionId OldestXmin,
633  TransactionId *xid_cutoff,
634  MultiXactId *multi_cutoff,
635  double *num_tuples,
636  double *tups_vacuumed,
637  double *tups_recently_dead);
638 
639  /*
640  * React to VACUUM command on the relation. The VACUUM can be triggered by
641  * a user or by autovacuum. The specific actions performed by the AM will
642  * depend heavily on the individual AM.
643  *
644  * On entry a transaction is already established, and the relation is
645  * locked with a ShareUpdateExclusive lock.
646  *
647  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
648  * this routine, even if (for ANALYZE) it is part of the same VACUUM
649  * command.
650  *
651  * There probably, in the future, needs to be a separate callback to
652  * integrate with autovacuum's scheduling.
653  */
654  void (*relation_vacuum) (Relation rel,
655  struct VacuumParams *params,
656  BufferAccessStrategy bstrategy);
657 
658  /*
659  * Prepare to analyze the next block in the read stream. Returns false if
660  * the stream is exhausted and true otherwise. The scan must have been
661  * started with SO_TYPE_ANALYZE option.
662  *
663  * This routine holds a buffer pin and lock on the heap page. They are
664  * held until heapam_scan_analyze_next_tuple() returns false. That is
665  * until all the items of the heap page are analyzed.
666  */
667 
668  /*
669  * Prepare to analyze block `blockno` of `scan`. The scan has been started
670  * with table_beginscan_analyze(). See also
671  * table_scan_analyze_next_block().
672  *
673  * The callback may acquire resources like locks that are held until
674  * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
675  * to hold a lock until all tuples on a block have been analyzed by
676  * scan_analyze_next_tuple.
677  *
678  * The callback can return false if the block is not suitable for
679  * sampling, e.g. because it's a metapage that could never contain tuples.
680  *
681  * XXX: This obviously is primarily suited for block-based AMs. It's not
682  * clear what a good interface for non block based AMs would be, so there
683  * isn't one yet.
684  */
686  ReadStream *stream);
687 
688  /*
689  * See table_scan_analyze_next_tuple().
690  *
691  * Not every AM might have a meaningful concept of dead rows, in which
692  * case it's OK to not increment *deadrows - but note that that may
693  * influence autovacuum scheduling (see comment for relation_vacuum
694  * callback).
695  */
697  TransactionId OldestXmin,
698  double *liverows,
699  double *deadrows,
700  TupleTableSlot *slot);
701 
702  /* see table_index_build_range_scan for reference about parameters */
703  double (*index_build_range_scan) (Relation table_rel,
704  Relation index_rel,
705  struct IndexInfo *index_info,
706  bool allow_sync,
707  bool anyvisible,
708  bool progress,
709  BlockNumber start_blockno,
710  BlockNumber numblocks,
712  void *callback_state,
713  TableScanDesc scan);
714 
715  /* see table_index_validate_scan for reference about parameters */
716  void (*index_validate_scan) (Relation table_rel,
717  Relation index_rel,
718  struct IndexInfo *index_info,
719  Snapshot snapshot,
720  struct ValidateIndexState *state);
721 
722 
723  /* ------------------------------------------------------------------------
724  * Miscellaneous functions.
725  * ------------------------------------------------------------------------
726  */
727 
728  /*
729  * See table_relation_size().
730  *
731  * Note that currently a few callers use the MAIN_FORKNUM size to figure
732  * out the range of potentially interesting blocks (brin, analyze). It's
733  * probable that we'll need to revise the interface for those at some
734  * point.
735  */
736  uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
737 
738 
739  /*
740  * This callback should return true if the relation requires a TOAST table
741  * and false if it does not. It may wish to examine the relation's tuple
742  * descriptor before making a decision, but if it uses some other method
743  * of storing large values (or if it does not support them) it can simply
744  * return false.
745  */
747 
748  /*
749  * This callback should return the OID of the table AM that implements
750  * TOAST tables for this AM. If the relation_needs_toast_table callback
751  * always returns false, this callback is not required.
752  */
754 
755  /*
756  * This callback is invoked when detoasting a value stored in a toast
757  * table implemented by this AM. See table_relation_fetch_toast_slice()
758  * for more details.
759  */
760  void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
761  int32 attrsize,
762  int32 sliceoffset,
763  int32 slicelength,
764  struct varlena *result);
765 
766 
767  /* ------------------------------------------------------------------------
768  * Planner related functions.
769  * ------------------------------------------------------------------------
770  */
771 
772  /*
773  * See table_relation_estimate_size().
774  *
775  * While block oriented, it shouldn't be too hard for an AM that doesn't
776  * internally use blocks to convert into a usable representation.
777  *
778  * This differs from the relation_size callback by returning size
779  * estimates (both relation size and tuple count) for planning purposes,
780  * rather than returning a currently correct estimate.
781  */
782  void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
783  BlockNumber *pages, double *tuples,
784  double *allvisfrac);
785 
786 
787  /* ------------------------------------------------------------------------
788  * Executor related functions.
789  * ------------------------------------------------------------------------
790  */
791 
792  /*
793  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
794  * of a bitmap table scan. `scan` was started via table_beginscan_bm().
795  * Return false if there are no tuples to be found on the page, true
796  * otherwise.
797  *
798  * This will typically read and pin the target block, and do the necessary
799  * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
800  * make sense to perform tuple visibility checks at this time). For some
801  * AMs it will make more sense to do all the work referencing `tbmres`
802  * contents here, for others it might be better to defer more work to
803  * scan_bitmap_next_tuple.
804  *
805  * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
806  * on the page have to be returned, otherwise the tuples at offsets in
807  * `tbmres->offsets` need to be returned.
808  *
809  * XXX: Currently this may only be implemented if the AM uses md.c as its
810  * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
811  * blockids directly to the underlying storage. nodeBitmapHeapscan.c
812  * performs prefetching directly using that interface. This probably
813  * needs to be rectified at a later point.
814  *
815  * XXX: Currently this may only be implemented if the AM uses the
816  * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
817  * perform prefetching. This probably needs to be rectified at a later
818  * point.
819  *
820  * Optional callback, but either both scan_bitmap_next_block and
821  * scan_bitmap_next_tuple need to exist, or neither.
822  */
824  struct TBMIterateResult *tbmres);
825 
826  /*
827  * Fetch the next tuple of a bitmap table scan into `slot` and return true
828  * if a visible tuple was found, false otherwise.
829  *
830  * For some AMs it will make more sense to do all the work referencing
831  * `tbmres` contents in scan_bitmap_next_block, for others it might be
832  * better to defer more work to this callback.
833  *
834  * Optional callback, but either both scan_bitmap_next_block and
835  * scan_bitmap_next_tuple need to exist, or neither.
836  */
838  struct TBMIterateResult *tbmres,
839  TupleTableSlot *slot);
840 
841  /*
842  * Prepare to fetch tuples from the next block in a sample scan. Return
843  * false if the sample scan is finished, true otherwise. `scan` was
844  * started via table_beginscan_sampling().
845  *
846  * Typically this will first determine the target block by calling the
847  * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
848  * perform a sequential scan over all blocks. The determined block is
849  * then typically read and pinned.
850  *
851  * As the TsmRoutine interface is block based, a block needs to be passed
852  * to NextSampleBlock(). If that's not appropriate for an AM, it
853  * internally needs to perform mapping between the internal and a block
854  * based representation.
855  *
856  * Note that it's not acceptable to hold deadlock prone resources such as
857  * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
858  * block - the tuple is likely to be returned to an upper query node, and
859  * the next call could be off a long while. Holding buffer pins and such
860  * is obviously OK.
861  *
862  * Currently it is required to implement this interface, as there's no
863  * alternative way (contrary e.g. to bitmap scans) to implement sample
864  * scans. If infeasible to implement, the AM may raise an error.
865  */
867  struct SampleScanState *scanstate);
868 
869  /*
870  * This callback, only called after scan_sample_next_block has returned
871  * true, should determine the next tuple to be returned from the selected
872  * block using the TsmRoutine's NextSampleTuple() callback.
873  *
874  * The callback needs to perform visibility checks, and only return
875  * visible tuples. That obviously can mean calling NextSampleTuple()
876  * multiple times.
877  *
878  * The TsmRoutine interface assumes that there's a maximum offset on a
879  * given page, so if that doesn't apply to an AM, it needs to emulate that
880  * assumption somehow.
881  */
883  struct SampleScanState *scanstate,
884  TupleTableSlot *slot);
885 
887 
888 
889 /* ----------------------------------------------------------------------------
890  * Slot functions.
891  * ----------------------------------------------------------------------------
892  */
893 
894 /*
895  * Returns slot callbacks suitable for holding tuples of the appropriate type
896  * for the relation. Works for tables, views, foreign tables and partitioned
897  * tables.
898  */
899 extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
900 
901 /*
902  * Returns slot using the callbacks returned by table_slot_callbacks(), and
903  * registers it on *reglist.
904  */
905 extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
906 
907 
908 /* ----------------------------------------------------------------------------
909  * Table scan functions.
910  * ----------------------------------------------------------------------------
911  */
912 
913 /*
914  * Start a scan of `rel`. Returned tuples pass a visibility test of
915  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
916  */
917 static inline TableScanDesc
919  int nkeys, struct ScanKeyData *key)
920 {
921  uint32 flags = SO_TYPE_SEQSCAN |
923 
924  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
925 }
926 
927 /*
928  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
929  * snapshot appropriate for scanning catalog relations.
930  */
931 extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
932  struct ScanKeyData *key);
933 
934 /*
935  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
936  * that lets the caller control whether a nondefault buffer access strategy
937  * can be used, and whether syncscan can be chosen (possibly resulting in the
938  * scan not starting from block zero). Both of these default to true with
939  * plain table_beginscan.
940  */
941 static inline TableScanDesc
943  int nkeys, struct ScanKeyData *key,
944  bool allow_strat, bool allow_sync)
945 {
947 
948  if (allow_strat)
949  flags |= SO_ALLOW_STRAT;
950  if (allow_sync)
951  flags |= SO_ALLOW_SYNC;
952 
953  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
954 }
955 
956 /*
957  * table_beginscan_bm is an alternative entry point for setting up a
958  * TableScanDesc for a bitmap heap scan. Although that scan technology is
959  * really quite unlike a standard seqscan, there is just enough commonality to
960  * make it worth using the same data structure.
961  */
962 static inline TableScanDesc
964  int nkeys, struct ScanKeyData *key, bool need_tuple)
965 {
967 
968  if (need_tuple)
969  flags |= SO_NEED_TUPLES;
970 
971  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
972 }
973 
974 /*
975  * table_beginscan_sampling is an alternative entry point for setting up a
976  * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
977  * using the same data structure although the behavior is rather different.
978  * In addition to the options offered by table_beginscan_strat, this call
979  * also allows control of whether page-mode visibility checking is used.
980  */
981 static inline TableScanDesc
983  int nkeys, struct ScanKeyData *key,
984  bool allow_strat, bool allow_sync,
985  bool allow_pagemode)
986 {
987  uint32 flags = SO_TYPE_SAMPLESCAN;
988 
989  if (allow_strat)
990  flags |= SO_ALLOW_STRAT;
991  if (allow_sync)
992  flags |= SO_ALLOW_SYNC;
993  if (allow_pagemode)
994  flags |= SO_ALLOW_PAGEMODE;
995 
996  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
997 }
998 
999 /*
1000  * table_beginscan_tid is an alternative entry point for setting up a
1001  * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
1002  * the same data structure although the behavior is rather different.
1003  */
1004 static inline TableScanDesc
1006 {
1007  uint32 flags = SO_TYPE_TIDSCAN;
1008 
1009  return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1010 }
1011 
1012 /*
1013  * table_beginscan_analyze is an alternative entry point for setting up a
1014  * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
1015  * the same data structure although the behavior is rather different.
1016  */
1017 static inline TableScanDesc
1019 {
1020  uint32 flags = SO_TYPE_ANALYZE;
1021 
1022  return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
1023 }
1024 
1025 /*
1026  * End relation scan.
1027  */
1028 static inline void
1030 {
1031  scan->rs_rd->rd_tableam->scan_end(scan);
1032 }
1033 
1034 /*
1035  * Restart a relation scan.
1036  */
1037 static inline void
1039  struct ScanKeyData *key)
1040 {
1041  scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
1042 }
1043 
1044 /*
1045  * Restart a relation scan after changing params.
1046  *
1047  * This call allows changing the buffer strategy, syncscan, and pagemode
1048  * options before starting a fresh scan. Note that although the actual use of
1049  * syncscan might change (effectively, enabling or disabling reporting), the
1050  * previously selected startblock will be kept.
1051  */
1052 static inline void
1054  bool allow_strat, bool allow_sync, bool allow_pagemode)
1055 {
1056  scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1057  allow_strat, allow_sync,
1058  allow_pagemode);
1059 }
1060 
1061 /*
1062  * Return next tuple from `scan`, store in slot.
1063  */
1064 static inline bool
1066 {
1067  slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1068 
1069  /* We don't expect actual scans using NoMovementScanDirection */
1070  Assert(direction == ForwardScanDirection ||
1071  direction == BackwardScanDirection);
1072 
1073  /*
1074  * We don't expect direct calls to table_scan_getnextslot with valid
1075  * CheckXidAlive for catalog or regular tables. See detailed comments in
1076  * xact.c where these variables are declared.
1077  */
1079  elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1080 
1081  return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1082 }
1083 
1084 /* ----------------------------------------------------------------------------
1085  * TID Range scanning related functions.
1086  * ----------------------------------------------------------------------------
1087  */
1088 
1089 /*
1090  * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1091  * for a TID range scan.
1092  */
1093 static inline TableScanDesc
1095  ItemPointer mintid,
1096  ItemPointer maxtid)
1097 {
1098  TableScanDesc sscan;
1100 
1101  sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1102 
1103  /* Set the range of TIDs to scan */
1104  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1105 
1106  return sscan;
1107 }
1108 
1109 /*
1110  * table_rescan_tidrange resets the scan position and sets the minimum and
1111  * maximum TID range to scan for a TableScanDesc created by
1112  * table_beginscan_tidrange.
1113  */
1114 static inline void
1116  ItemPointer maxtid)
1117 {
1118  /* Ensure table_beginscan_tidrange() was used. */
1119  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1120 
1121  sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1122  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1123 }
1124 
1125 /*
1126  * Fetch the next tuple from `sscan` for a TID range scan created by
1127  * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1128  * or returns false if no more tuples exist in the range.
1129  */
1130 static inline bool
1132  TupleTableSlot *slot)
1133 {
1134  /* Ensure table_beginscan_tidrange() was used. */
1135  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1136 
1137  /* We don't expect actual scans using NoMovementScanDirection */
1138  Assert(direction == ForwardScanDirection ||
1139  direction == BackwardScanDirection);
1140 
1141  return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1142  direction,
1143  slot);
1144 }
1145 
1146 
1147 /* ----------------------------------------------------------------------------
1148  * Parallel table scan related functions.
1149  * ----------------------------------------------------------------------------
1150  */
1151 
1152 /*
1153  * Estimate the size of shared memory needed for a parallel scan of this
1154  * relation.
1155  */
1156 extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
1157 
1158 /*
1159  * Initialize ParallelTableScanDesc for a parallel scan of this
1160  * relation. `pscan` needs to be sized according to parallelscan_estimate()
1161  * for the same relation. Call this just once in the leader process; then,
1162  * individual workers attach via table_beginscan_parallel.
1163  */
1164 extern void table_parallelscan_initialize(Relation rel,
1165  ParallelTableScanDesc pscan,
1166  Snapshot snapshot);
1167 
1168 /*
1169  * Begin a parallel scan. `pscan` needs to have been initialized with
1170  * table_parallelscan_initialize(), for the same relation. The initialization
1171  * does not need to have happened in this backend.
1172  *
1173  * Caller must hold a suitable lock on the relation.
1174  */
1176  ParallelTableScanDesc pscan);
1177 
1178 /*
1179  * Restart a parallel scan. Call this in the leader process. Caller is
1180  * responsible for making sure that all workers have finished the scan
1181  * beforehand.
1182  */
1183 static inline void
1185 {
1186  rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1187 }
1188 
1189 
1190 /* ----------------------------------------------------------------------------
1191  * Index scan related functions.
1192  * ----------------------------------------------------------------------------
1193  */
1194 
1195 /*
1196  * Prepare to fetch tuples from the relation, as needed when fetching tuples
1197  * for an index scan.
1198  *
1199  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1200  */
1201 static inline IndexFetchTableData *
1203 {
1204  return rel->rd_tableam->index_fetch_begin(rel);
1205 }
1206 
1207 /*
1208  * Reset index fetch. Typically this will release cross index fetch resources
1209  * held in IndexFetchTableData.
1210  */
1211 static inline void
1213 {
1214  scan->rel->rd_tableam->index_fetch_reset(scan);
1215 }
1216 
1217 /*
1218  * Release resources and deallocate index fetch.
1219  */
1220 static inline void
1222 {
1223  scan->rel->rd_tableam->index_fetch_end(scan);
1224 }
1225 
1226 /*
1227  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1228  * a visibility test according to `snapshot`. If a tuple was found and passed
1229  * the visibility test, returns true, false otherwise. Note that *tid may be
1230  * modified when we return true (see later remarks on multiple row versions
1231  * reachable via a single index entry).
1232  *
1233  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1234  * a tid. If there potentially is another tuple matching the tid, *call_again
1235  * will be set to true, signaling that table_index_fetch_tuple() should be called
1236  * again for the same tid.
1237  *
1238  * *all_dead, if all_dead is not NULL, will be set to true by
1239  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1240  * that tuple. Index AMs can use that to avoid returning that tid in future
1241  * searches.
1242  *
1243  * The difference between this function and table_tuple_fetch_row_version()
1244  * is that this function returns the currently visible version of a row if
1245  * the AM supports storing multiple row versions reachable via a single index
1246  * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1247  * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1248  * lookups, table_tuple_fetch_row_version() is what's usually needed.
1249  */
1250 static inline bool
1252  ItemPointer tid,
1253  Snapshot snapshot,
1254  TupleTableSlot *slot,
1255  bool *call_again, bool *all_dead)
1256 {
1257  /*
1258  * We don't expect direct calls to table_index_fetch_tuple with valid
1259  * CheckXidAlive for catalog or regular tables. See detailed comments in
1260  * xact.c where these variables are declared.
1261  */
1263  elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1264 
1265  return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1266  slot, call_again,
1267  all_dead);
1268 }
1269 
1270 /*
1271  * This is a convenience wrapper around table_index_fetch_tuple() which
1272  * returns whether there are table tuple items corresponding to an index
1273  * entry. This likely is only useful to verify if there's a conflict in a
1274  * unique index.
1275  */
1276 extern bool table_index_fetch_tuple_check(Relation rel,
1277  ItemPointer tid,
1278  Snapshot snapshot,
1279  bool *all_dead);
1280 
1281 
1282 /* ------------------------------------------------------------------------
1283  * Functions for non-modifying operations on individual tuples
1284  * ------------------------------------------------------------------------
1285  */
1286 
1287 
1288 /*
1289  * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1290  * `snapshot`. If a tuple was found and passed the visibility test, returns
1291  * true, false otherwise.
1292  *
1293  * See table_index_fetch_tuple's comment about what the difference between
1294  * these functions is. It is correct to use this function outside of index
1295  * entry->table tuple lookups.
1296  */
1297 static inline bool
1299  ItemPointer tid,
1300  Snapshot snapshot,
1301  TupleTableSlot *slot)
1302 {
1303  /*
1304  * We don't expect direct calls to table_tuple_fetch_row_version with
1305  * valid CheckXidAlive for catalog or regular tables. See detailed
1306  * comments in xact.c where these variables are declared.
1307  */
1309  elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1310 
1311  return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1312 }
1313 
1314 /*
1315  * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1316  * mean that the pointed to row needs to exist or be visible, but that
1317  * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1318  * table_tuple_fetch_row_version()) should not error out if called with that
1319  * tid.
1320  *
1321  * `scan` needs to have been started via table_beginscan().
1322  */
1323 static inline bool
1325 {
1326  return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1327 }
1328 
1329 /*
1330  * Return the latest version of the tuple at `tid`, by updating `tid` to
1331  * point at the newest version.
1332  */
1334 
1335 /*
1336  * Return true iff tuple in slot satisfies the snapshot.
1337  *
1338  * This assumes the slot's tuple is valid, and of the appropriate type for the
1339  * AM.
1340  *
1341  * Some AMs might modify the data underlying the tuple as a side-effect. If so
1342  * they ought to mark the relevant buffer dirty.
1343  */
1344 static inline bool
1346  Snapshot snapshot)
1347 {
1348  return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1349 }
1350 
1351 /*
1352  * Determine which index tuples are safe to delete based on their table TID.
1353  *
1354  * Determines which entries from index AM caller's TM_IndexDeleteOp state
1355  * point to vacuumable table tuples. Entries that are found by tableam to be
1356  * vacuumable are naturally safe for index AM to delete, and so get directly
1357  * marked as deletable. See comments above TM_IndexDelete and comments above
1358  * TM_IndexDeleteOp for full details.
1359  *
1360  * Returns a snapshotConflictHorizon transaction ID that caller places in
1361  * its index deletion WAL record. This might be used during subsequent REDO
1362  * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1363  * index deletion operation might be required on the standby.
1364  */
1365 static inline TransactionId
1367 {
1368  return rel->rd_tableam->index_delete_tuples(rel, delstate);
1369 }
1370 
1371 
1372 /* ----------------------------------------------------------------------------
1373  * Functions for manipulations of physical tuples.
1374  * ----------------------------------------------------------------------------
1375  */
1376 
1377 /*
1378  * Insert a tuple from a slot into table AM routine.
1379  *
1380  * The options bitmask allows the caller to specify options that may change the
1381  * behaviour of the AM. The AM will ignore options that it does not support.
1382  *
1383  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1384  * free space in the relation. This can save some cycles when we know the
1385  * relation is new and doesn't contain useful amounts of free space.
1386  * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1387  * RelationGetBufferForTuple. See that method for more information.
1388  *
1389  * TABLE_INSERT_FROZEN should only be specified for inserts into
1390  * relation storage created during the current subtransaction and when
1391  * there are no prior snapshots or pre-existing portals open.
1392  * This causes rows to be frozen, which is an MVCC violation and
1393  * requires explicit options chosen by user.
1394  *
1395  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1396  * information for the tuple. This should solely be used during table rewrites
1397  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1398  * relation.
1399  *
1400  * Note that most of these options will be applied when inserting into the
1401  * heap's TOAST table, too, if the tuple requires any out-of-line data.
1402  *
1403  * The BulkInsertState object (if any; bistate can be NULL for default
1404  * behavior) is also just passed through to RelationGetBufferForTuple. If
1405  * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1406  *
1407  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1408  * insertion. But note that any toasting of fields within the slot is NOT
1409  * reflected in the slots contents.
1410  */
1411 static inline void
1413  int options, struct BulkInsertStateData *bistate)
1414 {
1415  rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1416  bistate);
1417 }
1418 
1419 /*
1420  * Perform a "speculative insertion". These can be backed out afterwards
1421  * without aborting the whole transaction. Other sessions can wait for the
1422  * speculative insertion to be confirmed, turning it into a regular tuple, or
1423  * aborted, as if it never existed. Speculatively inserted tuples behave as
1424  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1425  *
1426  * A transaction having performed a speculative insertion has to either abort,
1427  * or finish the speculative insertion with
1428  * table_tuple_complete_speculative(succeeded = ...).
1429  */
1430 static inline void
1432  CommandId cid, int options,
1433  struct BulkInsertStateData *bistate,
1434  uint32 specToken)
1435 {
1436  rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1437  bistate, specToken);
1438 }
1439 
1440 /*
1441  * Complete "speculative insertion" started in the same transaction. If
1442  * succeeded is true, the tuple is fully inserted, if false, it's removed.
1443  */
1444 static inline void
1446  uint32 specToken, bool succeeded)
1447 {
1448  rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1449  succeeded);
1450 }
1451 
1452 /*
1453  * Insert multiple tuples into a table.
1454  *
1455  * This is like table_tuple_insert(), but inserts multiple tuples in one
1456  * operation. That's often faster than calling table_tuple_insert() in a loop,
1457  * because e.g. the AM can reduce WAL logging and page locking overhead.
1458  *
1459  * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1460  * in `slots`, the parameters for table_multi_insert() are the same as for
1461  * table_tuple_insert().
1462  *
1463  * Note: this leaks memory into the current memory context. You can create a
1464  * temporary context before calling this, if that's a problem.
1465  */
1466 static inline void
1467 table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
1468  CommandId cid, int options, struct BulkInsertStateData *bistate)
1469 {
1470  rel->rd_tableam->multi_insert(rel, slots, nslots,
1471  cid, options, bistate);
1472 }
1473 
1474 /*
1475  * Delete a tuple.
1476  *
1477  * NB: do not call this directly unless prepared to deal with
1478  * concurrent-update conditions. Use simple_table_tuple_delete instead.
1479  *
1480  * Input parameters:
1481  * relation - table to be modified (caller must hold suitable lock)
1482  * tid - TID of tuple to be deleted
1483  * cid - delete command ID (used for visibility test, and stored into
1484  * cmax if successful)
1485  * crosscheck - if not InvalidSnapshot, also check tuple against this
1486  * wait - true if should wait for any conflicting update to commit/abort
1487  * Output parameters:
1488  * tmfd - filled in failure cases (see below)
1489  * changingPart - true iff the tuple is being moved to another partition
1490  * table due to an update of the partition key. Otherwise, false.
1491  *
1492  * Normal, successful return value is TM_Ok, which means we did actually
1493  * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1494  * TM_BeingModified (the last only possible if wait == false).
1495  *
1496  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1497  * t_xmax, and, if possible, t_cmax. See comments for struct
1498  * TM_FailureData for additional info.
1499  */
1500 static inline TM_Result
1502  Snapshot snapshot, Snapshot crosscheck, bool wait,
1503  TM_FailureData *tmfd, bool changingPart)
1504 {
1505  return rel->rd_tableam->tuple_delete(rel, tid, cid,
1506  snapshot, crosscheck,
1507  wait, tmfd, changingPart);
1508 }
1509 
1510 /*
1511  * Update a tuple.
1512  *
1513  * NB: do not call this directly unless you are prepared to deal with
1514  * concurrent-update conditions. Use simple_table_tuple_update instead.
1515  *
1516  * Input parameters:
1517  * relation - table to be modified (caller must hold suitable lock)
1518  * otid - TID of old tuple to be replaced
1519  * slot - newly constructed tuple data to store
1520  * cid - update command ID (used for visibility test, and stored into
1521  * cmax/cmin if successful)
1522  * crosscheck - if not InvalidSnapshot, also check old tuple against this
1523  * wait - true if should wait for any conflicting update to commit/abort
1524  * Output parameters:
1525  * tmfd - filled in failure cases (see below)
1526  * lockmode - filled with lock mode acquired on tuple
1527  * update_indexes - in success cases this is set to true if new index entries
1528  * are required for this tuple
1529  *
1530  * Normal, successful return value is TM_Ok, which means we did actually
1531  * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1532  * TM_BeingModified (the last only possible if wait == false).
1533  *
1534  * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1535  * stored tuple; in particular, slot->tts_tid is set to the TID where the
1536  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1537  * update was done. However, any TOAST changes in the new tuple's
1538  * data are not reflected into *newtup.
1539  *
1540  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1541  * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1542  * for additional info.
1543  */
1544 static inline TM_Result
1546  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1547  bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1548  TU_UpdateIndexes *update_indexes)
1549 {
1550  return rel->rd_tableam->tuple_update(rel, otid, slot,
1551  cid, snapshot, crosscheck,
1552  wait, tmfd,
1553  lockmode, update_indexes);
1554 }
1555 
1556 /*
1557  * Lock a tuple in the specified mode.
1558  *
1559  * Input parameters:
1560  * relation: relation containing tuple (caller must hold suitable lock)
1561  * tid: TID of tuple to lock
1562  * snapshot: snapshot to use for visibility determinations
1563  * cid: current command ID (used for visibility test, and stored into
1564  * tuple's cmax if lock is successful)
1565  * mode: lock mode desired
1566  * wait_policy: what to do if tuple lock is not available
1567  * flags:
1568  * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1569  * also lock descendant tuples if lock modes don't conflict.
1570  * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1571  * latest version.
1572  *
1573  * Output parameters:
1574  * *slot: contains the target tuple
1575  * *tmfd: filled in failure cases (see below)
1576  *
1577  * Function result may be:
1578  * TM_Ok: lock was successfully acquired
1579  * TM_Invisible: lock failed because tuple was never visible to us
1580  * TM_SelfModified: lock failed because tuple updated by self
1581  * TM_Updated: lock failed because tuple updated by other xact
1582  * TM_Deleted: lock failed because tuple deleted by other xact
1583  * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1584  *
1585  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1586  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1587  * comments for struct TM_FailureData for additional info.
1588  */
1589 static inline TM_Result
1592  LockWaitPolicy wait_policy, uint8 flags,
1593  TM_FailureData *tmfd)
1594 {
1595  return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1596  cid, mode, wait_policy,
1597  flags, tmfd);
1598 }
1599 
1600 /*
1601  * Perform operations necessary to complete insertions made via
1602  * tuple_insert and multi_insert with a BulkInsertState specified.
1603  */
1604 static inline void
1606 {
1607  /* optional callback */
1608  if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1610 }
1611 
1612 
1613 /* ------------------------------------------------------------------------
1614  * DDL related functionality.
1615  * ------------------------------------------------------------------------
1616  */
1617 
1618 /*
1619  * Create storage for `rel` in `newrlocator`, with persistence set to
1620  * `persistence`.
1621  *
1622  * This is used both during relation creation and various DDL operations to
1623  * create new rel storage that can be filled from scratch. When creating
1624  * new storage for an existing relfilelocator, this should be called before the
1625  * relcache entry has been updated.
1626  *
1627  * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1628  * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1629  */
1630 static inline void
1632  const RelFileLocator *newrlocator,
1633  char persistence,
1634  TransactionId *freezeXid,
1635  MultiXactId *minmulti)
1636 {
1637  rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
1638  persistence, freezeXid,
1639  minmulti);
1640 }
1641 
1642 /*
1643  * Remove all table contents from `rel`, in a non-transactional manner.
1644  * Non-transactional meaning that there's no need to support rollbacks. This
1645  * commonly only is used to perform truncations for relation storage created in
1646  * the current transaction.
1647  */
1648 static inline void
1650 {
1652 }
1653 
1654 /*
1655  * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
1656  * relfilelocator may not have storage associated before this function is
1657  * called. This is only supposed to be used for low level operations like
1658  * changing a relation's tablespace.
1659  */
1660 static inline void
1662 {
1663  rel->rd_tableam->relation_copy_data(rel, newrlocator);
1664 }
1665 
1666 /*
1667  * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1668  * FULL.
1669  *
1670  * Additional Input parameters:
1671  * - use_sort - if true, the table contents are sorted appropriate for
1672  * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1673  * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1674  * performed
1675  * - OldIndex - see use_sort
1676  * - OldestXmin - computed by vacuum_get_cutoffs(), even when
1677  * not needed for the relation's AM
1678  * - *xid_cutoff - ditto
1679  * - *multi_cutoff - ditto
1680  *
1681  * Output parameters:
1682  * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1683  * - *multi_cutoff - rel's new relminmxid value, may be invalid
1684  * - *tups_vacuumed - stats, for logging, if appropriate for AM
1685  * - *tups_recently_dead - stats, for logging, if appropriate for AM
1686  */
1687 static inline void
1689  Relation OldIndex,
1690  bool use_sort,
1691  TransactionId OldestXmin,
1692  TransactionId *xid_cutoff,
1693  MultiXactId *multi_cutoff,
1694  double *num_tuples,
1695  double *tups_vacuumed,
1696  double *tups_recently_dead)
1697 {
1698  OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1699  use_sort, OldestXmin,
1700  xid_cutoff, multi_cutoff,
1701  num_tuples, tups_vacuumed,
1702  tups_recently_dead);
1703 }
1704 
1705 /*
1706  * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1707  * autovacuum. The specific actions performed by the AM will depend heavily on
1708  * the individual AM.
1709  *
1710  * On entry a transaction needs to already been established, and the
1711  * table is locked with a ShareUpdateExclusive lock.
1712  *
1713  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1714  * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1715  */
1716 static inline void
1718  BufferAccessStrategy bstrategy)
1719 {
1720  rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1721 }
1722 
1723 /*
1724  * Prepare to analyze the next block in the read stream. The scan needs to
1725  * have been started with table_beginscan_analyze(). Note that this routine
1726  * might acquire resources like locks that are held until
1727  * table_scan_analyze_next_tuple() returns false.
1728  *
1729  * Returns false if block is unsuitable for sampling, true otherwise.
1730  */
1731 static inline bool
1733 {
1734  return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
1735 }
1736 
1737 /*
1738  * Iterate over tuples in the block selected with
1739  * table_scan_analyze_next_block() (which needs to have returned true, and
1740  * this routine may not have returned false for the same block before). If a
1741  * tuple that's suitable for sampling is found, true is returned and a tuple
1742  * is stored in `slot`.
1743  *
1744  * *liverows and *deadrows are incremented according to the encountered
1745  * tuples.
1746  */
1747 static inline bool
1749  double *liverows, double *deadrows,
1750  TupleTableSlot *slot)
1751 {
1752  return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1753  liverows, deadrows,
1754  slot);
1755 }
1756 
1757 /*
1758  * table_index_build_scan - scan the table to find tuples to be indexed
1759  *
1760  * This is called back from an access-method-specific index build procedure
1761  * after the AM has done whatever setup it needs. The parent table relation
1762  * is scanned to find tuples that should be entered into the index. Each
1763  * such tuple is passed to the AM's callback routine, which does the right
1764  * things to add it to the new index. After we return, the AM's index
1765  * build procedure does whatever cleanup it needs.
1766  *
1767  * The total count of live tuples is returned. This is for updating pg_class
1768  * statistics. (It's annoying not to be able to do that here, but we want to
1769  * merge that update with others; see index_update_stats.) Note that the
1770  * index AM itself must keep track of the number of index tuples; we don't do
1771  * so here because the AM might reject some of the tuples for its own reasons,
1772  * such as being unable to store NULLs.
1773  *
1774  * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1775  * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1776  *
1777  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1778  * any potentially broken HOT chains. Currently, we set this if there are any
1779  * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1780  * very hard to detect whether they're really incompatible with the chain tip.
1781  * This only really makes sense for heap AM, it might need to be generalized
1782  * for other AMs later.
1783  */
1784 static inline double
1786  Relation index_rel,
1787  struct IndexInfo *index_info,
1788  bool allow_sync,
1789  bool progress,
1791  void *callback_state,
1792  TableScanDesc scan)
1793 {
1794  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1795  index_rel,
1796  index_info,
1797  allow_sync,
1798  false,
1799  progress,
1800  0,
1802  callback,
1803  callback_state,
1804  scan);
1805 }
1806 
1807 /*
1808  * As table_index_build_scan(), except that instead of scanning the complete
1809  * table, only the given number of blocks are scanned. Scan to end-of-rel can
1810  * be signaled by passing InvalidBlockNumber as numblocks. Note that
1811  * restricting the range to scan cannot be done when requesting syncscan.
1812  *
1813  * When "anyvisible" mode is requested, all tuples visible to any transaction
1814  * are indexed and counted as live, including those inserted or deleted by
1815  * transactions that are still in progress.
1816  */
1817 static inline double
1819  Relation index_rel,
1820  struct IndexInfo *index_info,
1821  bool allow_sync,
1822  bool anyvisible,
1823  bool progress,
1824  BlockNumber start_blockno,
1825  BlockNumber numblocks,
1827  void *callback_state,
1828  TableScanDesc scan)
1829 {
1830  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1831  index_rel,
1832  index_info,
1833  allow_sync,
1834  anyvisible,
1835  progress,
1836  start_blockno,
1837  numblocks,
1838  callback,
1839  callback_state,
1840  scan);
1841 }
1842 
1843 /*
1844  * table_index_validate_scan - second table scan for concurrent index build
1845  *
1846  * See validate_index() for an explanation.
1847  */
1848 static inline void
1850  Relation index_rel,
1851  struct IndexInfo *index_info,
1852  Snapshot snapshot,
1853  struct ValidateIndexState *state)
1854 {
1855  table_rel->rd_tableam->index_validate_scan(table_rel,
1856  index_rel,
1857  index_info,
1858  snapshot,
1859  state);
1860 }
1861 
1862 
1863 /* ----------------------------------------------------------------------------
1864  * Miscellaneous functionality
1865  * ----------------------------------------------------------------------------
1866  */
1867 
1868 /*
1869  * Return the current size of `rel` in bytes. If `forkNumber` is
1870  * InvalidForkNumber, return the relation's overall size, otherwise the size
1871  * for the indicated fork.
1872  *
1873  * Note that the overall size might not be the equivalent of the sum of sizes
1874  * for the individual forks for some AMs, e.g. because the AMs storage does
1875  * not neatly map onto the builtin types of forks.
1876  */
1877 static inline uint64
1879 {
1880  return rel->rd_tableam->relation_size(rel, forkNumber);
1881 }
1882 
1883 /*
1884  * table_relation_needs_toast_table - does this relation need a toast table?
1885  */
1886 static inline bool
1888 {
1889  return rel->rd_tableam->relation_needs_toast_table(rel);
1890 }
1891 
1892 /*
1893  * Return the OID of the AM that should be used to implement the TOAST table
1894  * for this relation.
1895  */
1896 static inline Oid
1898 {
1899  return rel->rd_tableam->relation_toast_am(rel);
1900 }
1901 
1902 /*
1903  * Fetch all or part of a TOAST value from a TOAST table.
1904  *
1905  * If this AM is never used to implement a TOAST table, then this callback
1906  * is not needed. But, if toasted values are ever stored in a table of this
1907  * type, then you will need this callback.
1908  *
1909  * toastrel is the relation in which the toasted value is stored.
1910  *
1911  * valueid identifies which toast value is to be fetched. For the heap,
1912  * this corresponds to the values stored in the chunk_id column.
1913  *
1914  * attrsize is the total size of the toast value to be fetched.
1915  *
1916  * sliceoffset is the offset within the toast value of the first byte that
1917  * should be fetched.
1918  *
1919  * slicelength is the number of bytes from the toast value that should be
1920  * fetched.
1921  *
1922  * result is caller-allocated space into which the fetched bytes should be
1923  * stored.
1924  */
1925 static inline void
1927  int32 attrsize, int32 sliceoffset,
1928  int32 slicelength, struct varlena *result)
1929 {
1930  toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1931  attrsize,
1932  sliceoffset, slicelength,
1933  result);
1934 }
1935 
1936 
1937 /* ----------------------------------------------------------------------------
1938  * Planner related functionality
1939  * ----------------------------------------------------------------------------
1940  */
1941 
1942 /*
1943  * Estimate the current size of the relation, as an AM specific workhorse for
1944  * estimate_rel_size(). Look there for an explanation of the parameters.
1945  */
1946 static inline void
1948  BlockNumber *pages, double *tuples,
1949  double *allvisfrac)
1950 {
1951  rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1952  allvisfrac);
1953 }
1954 
1955 
1956 /* ----------------------------------------------------------------------------
1957  * Executor related functionality
1958  * ----------------------------------------------------------------------------
1959  */
1960 
1961 /*
1962  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
1963  * a bitmap table scan. `scan` needs to have been started via
1964  * table_beginscan_bm(). Returns false if there are no tuples to be found on
1965  * the page, true otherwise.
1966  *
1967  * Note, this is an optionally implemented function, therefore should only be
1968  * used after verifying the presence (at plan time or such).
1969  */
1970 static inline bool
1972  struct TBMIterateResult *tbmres)
1973 {
1974  /*
1975  * We don't expect direct calls to table_scan_bitmap_next_block with valid
1976  * CheckXidAlive for catalog or regular tables. See detailed comments in
1977  * xact.c where these variables are declared.
1978  */
1980  elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
1981 
1982  return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
1983  tbmres);
1984 }
1985 
1986 /*
1987  * Fetch the next tuple of a bitmap table scan into `slot` and return true if
1988  * a visible tuple was found, false otherwise.
1989  * table_scan_bitmap_next_block() needs to previously have selected a
1990  * block (i.e. returned true), and no previous
1991  * table_scan_bitmap_next_tuple() for the same block may have
1992  * returned false.
1993  */
1994 static inline bool
1996  struct TBMIterateResult *tbmres,
1997  TupleTableSlot *slot)
1998 {
1999  /*
2000  * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
2001  * CheckXidAlive for catalog or regular tables. See detailed comments in
2002  * xact.c where these variables are declared.
2003  */
2005  elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
2006 
2007  return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
2008  tbmres,
2009  slot);
2010 }
2011 
2012 /*
2013  * Prepare to fetch tuples from the next block in a sample scan. Returns false
2014  * if the sample scan is finished, true otherwise. `scan` needs to have been
2015  * started via table_beginscan_sampling().
2016  *
2017  * This will call the TsmRoutine's NextSampleBlock() callback if necessary
2018  * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
2019  * underlying relation.
2020  */
2021 static inline bool
2023  struct SampleScanState *scanstate)
2024 {
2025  /*
2026  * We don't expect direct calls to table_scan_sample_next_block with valid
2027  * CheckXidAlive for catalog or regular tables. See detailed comments in
2028  * xact.c where these variables are declared.
2029  */
2031  elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
2032  return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
2033 }
2034 
2035 /*
2036  * Fetch the next sample tuple into `slot` and return true if a visible tuple
2037  * was found, false otherwise. table_scan_sample_next_block() needs to
2038  * previously have selected a block (i.e. returned true), and no previous
2039  * table_scan_sample_next_tuple() for the same block may have returned false.
2040  *
2041  * This will call the TsmRoutine's NextSampleTuple() callback.
2042  */
2043 static inline bool
2045  struct SampleScanState *scanstate,
2046  TupleTableSlot *slot)
2047 {
2048  /*
2049  * We don't expect direct calls to table_scan_sample_next_tuple with valid
2050  * CheckXidAlive for catalog or regular tables. See detailed comments in
2051  * xact.c where these variables are declared.
2052  */
2054  elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
2055  return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
2056  slot);
2057 }
2058 
2059 
2060 /* ----------------------------------------------------------------------------
2061  * Functions to make modifications a bit simpler.
2062  * ----------------------------------------------------------------------------
2063  */
2064 
2065 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2066 extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
2067  Snapshot snapshot);
2068 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2069  TupleTableSlot *slot, Snapshot snapshot,
2070  TU_UpdateIndexes *update_indexes);
2071 
2072 
2073 /* ----------------------------------------------------------------------------
2074  * Helper functions to implement parallel scans for block oriented AMs.
2075  * ----------------------------------------------------------------------------
2076  */
2077 
2080  ParallelTableScanDesc pscan);
2082  ParallelTableScanDesc pscan);
2084  ParallelBlockTableScanWorker pbscanwork,
2087  ParallelBlockTableScanWorker pbscanwork,
2089 
2090 
2091 /* ----------------------------------------------------------------------------
2092  * Helper functions to implement relation sizing for block oriented AMs.
2093  * ----------------------------------------------------------------------------
2094  */
2095 
2096 extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2098  int32 *attr_widths,
2099  BlockNumber *pages,
2100  double *tuples,
2101  double *allvisfrac,
2102  Size overhead_bytes_per_tuple,
2103  Size usable_bytes_per_page);
2104 
2105 /* ----------------------------------------------------------------------------
2106  * Functions in tableamapi.c
2107  * ----------------------------------------------------------------------------
2108  */
2109 
2110 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2111 
2112 /* ----------------------------------------------------------------------------
2113  * Functions in heapam_handler.c
2114  * ----------------------------------------------------------------------------
2115  */
2116 
2117 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2118 
2119 #endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:152
unsigned int uint32
Definition: c.h:506
#define PGDLLIMPORT
Definition: c.h:1316
signed short int16
Definition: c.h:493
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
TransactionId MultiXactId
Definition: c.h:662
unsigned char bool
Definition: c.h:456
#define unlikely(x)
Definition: c.h:311
unsigned char uint8
Definition: c.h:504
uint32 CommandId
Definition: c.h:666
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:56
const void * data
static char ** options
int progress
Definition: pgbench.c:261
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:505
ForkNumber
Definition: relpath.h:48
struct TableScanDescData * TableScanDesc
Definition: relscan.h:52
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
Definition: pg_list.h:54
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool traversed
Definition: tableam.h:153
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
Relation irel
Definition: tableam.h:247
TM_IndexDelete * deltids
Definition: tableam.h:254
BlockNumber iblknum
Definition: tableam.h:248
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224
OffsetNumber idxoffnum
Definition: tableam.h:219
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:400
void(* relation_copy_data)(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:624
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:882
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:430
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:524
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:837
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:407
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:489
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:424
void(* relation_copy_for_cluster)(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:628
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:380
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:782
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:703
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:328
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:746
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:482
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:530
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:338
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:736
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:556
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:866
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:616
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:544
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:511
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:344
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:474
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:760
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:654
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:753
bool(* scan_analyze_next_block)(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:685
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:393
void(* relation_set_new_filelocator)(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:602
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:372
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:578
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:696
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:501
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:435
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:457
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:516
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:534
bool(* scan_bitmap_next_block)(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:823
NodeTag type
Definition: tableam.h:293
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:716
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:351
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:496
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
Oid tts_tableOid
Definition: tuptable.h:130
Definition: type.h:95
Definition: regguts.h:323
Definition: c.h:687
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1926
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:48
ScanOptions
Definition: tableam.h:47
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_TYPE_ANALYZE
Definition: tableam.h:54
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65
@ SO_TYPE_TIDSCAN
Definition: tableam.h:52
@ SO_NEED_TUPLES
Definition: tableam.h:72
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_ALLOW_SYNC
Definition: tableam.h:60
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:50
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1115
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:918
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
TU_UpdateIndexes
Definition: tableam.h:118
@ TU_Summarizing
Definition: tableam.h:126
@ TU_All
Definition: tableam.h:123
@ TU_None
Definition: tableam.h:120
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1029
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes)
Definition: tableam.c:335
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1748
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:208
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:49
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:388
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:165
struct TM_IndexDelete TM_IndexDelete
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1202
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1688
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1212
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1878
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_WouldBlock
Definition: tableam.h:110
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1995
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:982
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1038
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1590
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:276
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1324
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1849
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1818
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:421
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
static bool table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:1732
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1887
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:942
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1445
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1094
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:1545
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1221
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:1018
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1501
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:235
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1251
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1053
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1717
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:290
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:28
const TableAmRoutine * GetHeapamTableAmRoutine(void)
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1605
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:406
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:272
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:616
static void table_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1631
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1467
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1131
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1897
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1412
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:130
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1785
static void table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:1661
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2022
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:382
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1947
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1065
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1431
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1345
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1366
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2044
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1649
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1971
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:145
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:963
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1298
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1184
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:1005
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:491
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:653
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97