PostgreSQL Source Code  git master
tableam.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tableam.h
4  * POSTGRES table access method definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/tableam.h
11  *
12  * NOTES
13  * See tableam.sgml for higher level documentation.
14  *
15  *-------------------------------------------------------------------------
16  */
17 #ifndef TABLEAM_H
18 #define TABLEAM_H
19 
20 #include "access/relscan.h"
21 #include "access/sdir.h"
22 #include "access/xact.h"
23 #include "executor/tuptable.h"
24 #include "storage/read_stream.h"
25 #include "utils/rel.h"
26 #include "utils/snapshot.h"
27 
28 
29 #define DEFAULT_TABLE_ACCESS_METHOD "heap"
30 
31 /* GUCs */
34 
35 
36 struct BulkInsertStateData;
37 struct IndexInfo;
38 struct SampleScanState;
39 struct VacuumParams;
40 struct ValidateIndexState;
41 
42 /*
43  * Bitmask values for the flags argument to the scan_begin callback.
44  */
45 typedef enum ScanOptions
46 {
47  /* one of SO_TYPE_* may be specified */
48  SO_TYPE_SEQSCAN = 1 << 0,
51  SO_TYPE_TIDSCAN = 1 << 3,
53  SO_TYPE_ANALYZE = 1 << 5,
54 
55  /* several of SO_ALLOW_* may be specified */
56  /* allow or disallow use of access strategy */
57  SO_ALLOW_STRAT = 1 << 6,
58  /* report location to syncscan logic? */
59  SO_ALLOW_SYNC = 1 << 7,
60  /* verify visibility page-at-a-time? */
62 
63  /* unregister snapshot at scan end? */
64  SO_TEMP_SNAPSHOT = 1 << 9,
65 
66  /*
67  * At the discretion of the table AM, bitmap table scans may be able to
68  * skip fetching a block from the table if none of the table data is
69  * needed. If table data may be needed, set SO_NEED_TUPLES.
70  */
71  SO_NEED_TUPLES = 1 << 10,
73 
74 /*
75  * Result codes for table_{update,delete,lock_tuple}, and for visibility
76  * routines inside table AMs.
77  */
78 typedef enum TM_Result
79 {
80  /*
81  * Signals that the action succeeded (i.e. update/delete performed, lock
82  * was acquired)
83  */
85 
86  /* The affected tuple wasn't visible to the relevant snapshot */
88 
89  /* The affected tuple was already modified by the calling backend */
91 
92  /*
93  * The affected tuple was updated by another transaction. This includes
94  * the case where tuple was moved to another partition.
95  */
97 
98  /* The affected tuple was deleted by another transaction */
100 
101  /*
102  * The affected tuple is currently being modified by another session. This
103  * will only be returned if table_(update/delete/lock_tuple) are
104  * instructed not to wait.
105  */
107 
108  /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
111 
112 /*
113  * Result codes for table_update(..., update_indexes*..).
114  * Used to determine which indexes to update.
115  */
116 typedef enum TU_UpdateIndexes
117 {
118  /* No indexed columns were updated (incl. TID addressing of tuple) */
120 
121  /* A non-summarizing indexed column was updated, or the TID has changed */
123 
124  /* Only summarized columns were updated, TID is unchanged */
127 
128 /*
129  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
130  * because the target tuple is already outdated, they fill in this struct to
131  * provide information to the caller about what happened.
132  *
133  * ctid is the target's ctid link: it is the same as the target's TID if the
134  * target was deleted, or the location of the replacement tuple if the target
135  * was updated.
136  *
137  * xmax is the outdating transaction's XID. If the caller wants to visit the
138  * replacement tuple, it must check that this matches before believing the
139  * replacement is really a match.
140  *
141  * cmax is the outdating command's CID, but only when the failure code is
142  * TM_SelfModified (i.e., something in the current transaction outdated the
143  * tuple); otherwise cmax is zero. (We make this restriction because
144  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
145  * transactions.)
146  */
147 typedef struct TM_FailureData
148 {
152  bool traversed;
154 
155 /*
156  * State used when calling table_index_delete_tuples().
157  *
158  * Represents the status of table tuples, referenced by table TID and taken by
159  * index AM from index tuples. State consists of high level parameters of the
160  * deletion operation, plus two mutable palloc()'d arrays for information
161  * about the status of individual table tuples. These are conceptually one
162  * single array. Using two arrays keeps the TM_IndexDelete struct small,
163  * which makes sorting the first array (the deltids array) fast.
164  *
165  * Some index AM callers perform simple index tuple deletion (by specifying
166  * bottomup = false), and include only known-dead deltids. These known-dead
167  * entries are all marked knowndeletable = true directly (typically these are
168  * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
169  *
170  * Callers that specify bottomup = true are "bottom-up index deletion"
171  * callers. The considerations for the tableam are more subtle with these
172  * callers because they ask the tableam to perform highly speculative work,
173  * and might only expect the tableam to check a small fraction of all entries.
174  * Caller is not allowed to specify knowndeletable = true for any entry
175  * because everything is highly speculative. Bottom-up caller provides
176  * context and hints to tableam -- see comments below for details on how index
177  * AMs and tableams should coordinate during bottom-up index deletion.
178  *
179  * Simple index deletion callers may ask the tableam to perform speculative
180  * work, too. This is a little like bottom-up deletion, but not too much.
181  * The tableam will only perform speculative work when it's practically free
182  * to do so in passing for simple deletion caller (while always performing
183  * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
184  * be deleted within index AM). This is the real reason why it's possible for
185  * simple index deletion caller to specify knowndeletable = false up front
186  * (this means "check if it's possible for me to delete corresponding index
187  * tuple when it's cheap to do so in passing"). The index AM should only
188  * include "extra" entries for index tuples whose TIDs point to a table block
189  * that tableam is expected to have to visit anyway (in the event of a block
190  * orientated tableam). The tableam isn't strictly obligated to check these
191  * "extra" TIDs, but a block-based AM should always manage to do so in
192  * practice.
193  *
194  * The final contents of the deltids/status arrays are interesting to callers
195  * that ask tableam to perform speculative work (i.e. when _any_ items have
196  * knowndeletable set to false up front). These index AM callers will
197  * naturally need to consult final state to determine which index tuples are
198  * in fact deletable.
199  *
200  * The index AM can keep track of which index tuple relates to which deltid by
201  * setting idxoffnum (and/or relying on each entry being uniquely identifiable
202  * using tid), which is important when the final contents of the array will
203  * need to be interpreted -- the array can shrink from initial size after
204  * tableam processing and/or have entries in a new order (tableam may sort
205  * deltids array for its own reasons). Bottom-up callers may find that final
206  * ndeltids is 0 on return from call to tableam, in which case no index tuple
207  * deletions are possible. Simple deletion callers can rely on any entries
208  * they know to be deletable appearing in the final array as deletable.
209  */
210 typedef struct TM_IndexDelete
211 {
212  ItemPointerData tid; /* table TID from index tuple */
213  int16 id; /* Offset into TM_IndexStatus array */
215 
216 typedef struct TM_IndexStatus
217 {
218  OffsetNumber idxoffnum; /* Index am page offset number */
219  bool knowndeletable; /* Currently known to be deletable? */
220 
221  /* Bottom-up index deletion specific fields follow */
222  bool promising; /* Promising (duplicate) index tuple? */
223  int16 freespace; /* Space freed in index if deleted */
225 
226 /*
227  * Index AM/tableam coordination is central to the design of bottom-up index
228  * deletion. The index AM provides hints about where to look to the tableam
229  * by marking some entries as "promising". Index AM does this with duplicate
230  * index tuples that are strongly suspected to be old versions left behind by
231  * UPDATEs that did not logically modify indexed values. Index AM may find it
232  * helpful to only mark entries as promising when they're thought to have been
233  * affected by such an UPDATE in the recent past.
234  *
235  * Bottom-up index deletion casts a wide net at first, usually by including
236  * all TIDs on a target index page. It is up to the tableam to worry about
237  * the cost of checking transaction status information. The tableam is in
238  * control, but needs careful guidance from the index AM. Index AM requests
239  * that bottomupfreespace target be met, while tableam measures progress
240  * towards that goal by tallying the per-entry freespace value for known
241  * deletable entries. (All !bottomup callers can just set these space related
242  * fields to zero.)
243  */
244 typedef struct TM_IndexDeleteOp
245 {
246  Relation irel; /* Target index relation */
247  BlockNumber iblknum; /* Index block number (for error reports) */
248  bool bottomup; /* Bottom-up (not simple) deletion? */
249  int bottomupfreespace; /* Bottom-up space target */
250 
251  /* Mutable per-TID information follows (index AM initializes entries) */
252  int ndeltids; /* Current # of deltids/status elements */
256 
257 /* "options" flag bits for table_tuple_insert */
258 /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
259 #define TABLE_INSERT_SKIP_FSM 0x0002
260 #define TABLE_INSERT_FROZEN 0x0004
261 #define TABLE_INSERT_NO_LOGICAL 0x0008
262 
263 /* flag bits for table_tuple_lock */
264 /* Follow tuples whose update is in progress if lock modes don't conflict */
265 #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
266 /* Follow update chain and lock latest version of tuple */
267 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
268 
269 
270 /* Typedef for callback function for table_index_build_scan */
272  ItemPointer tid,
273  Datum *values,
274  bool *isnull,
275  bool tupleIsAlive,
276  void *state);
277 
278 /*
279  * API struct for a table AM. Note this must be allocated in a
280  * server-lifetime manner, typically as a static const struct, which then gets
281  * returned by FormData_pg_am.amhandler.
282  *
283  * In most cases it's not appropriate to call the callbacks directly, use the
284  * table_* wrapper functions instead.
285  *
286  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
287  * to update when adding a callback.
288  */
289 typedef struct TableAmRoutine
290 {
291  /* this must be set to T_TableAmRoutine */
293 
294 
295  /* ------------------------------------------------------------------------
296  * Slot related callbacks.
297  * ------------------------------------------------------------------------
298  */
299 
300  /*
301  * Return slot implementation suitable for storing a tuple of this AM.
302  */
303  const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
304 
305 
306  /* ------------------------------------------------------------------------
307  * Table scan callbacks.
308  * ------------------------------------------------------------------------
309  */
310 
311  /*
312  * Start a scan of `rel`. The callback has to return a TableScanDesc,
313  * which will typically be embedded in a larger, AM specific, struct.
314  *
315  * If nkeys != 0, the results need to be filtered by those scan keys.
316  *
317  * pscan, if not NULL, will have already been initialized with
318  * parallelscan_initialize(), and has to be for the same relation. Will
319  * only be set coming from table_beginscan_parallel().
320  *
321  * `flags` is a bitmask indicating the type of scan (ScanOptions's
322  * SO_TYPE_*, currently only one may be specified), options controlling
323  * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
324  * specified, an AM may ignore unsupported ones) and whether the snapshot
325  * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
326  */
328  Snapshot snapshot,
329  int nkeys, struct ScanKeyData *key,
330  ParallelTableScanDesc pscan,
331  uint32 flags);
332 
333  /*
334  * Release resources and deallocate scan. If TableScanDesc.temp_snap,
335  * TableScanDesc.rs_snapshot needs to be unregistered.
336  */
337  void (*scan_end) (TableScanDesc scan);
338 
339  /*
340  * Restart relation scan. If set_params is set to true, allow_{strat,
341  * sync, pagemode} (see scan_begin) changes should be taken into account.
342  */
343  void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
344  bool set_params, bool allow_strat,
345  bool allow_sync, bool allow_pagemode);
346 
347  /*
348  * Return next tuple from `scan`, store in slot.
349  */
351  ScanDirection direction,
352  TupleTableSlot *slot);
353 
354  /*-----------
355  * Optional functions to provide scanning for ranges of ItemPointers.
356  * Implementations must either provide both of these functions, or neither
357  * of them.
358  *
359  * Implementations of scan_set_tidrange must themselves handle
360  * ItemPointers of any value. i.e, they must handle each of the following:
361  *
362  * 1) mintid or maxtid is beyond the end of the table; and
363  * 2) mintid is above maxtid; and
364  * 3) item offset for mintid or maxtid is beyond the maximum offset
365  * allowed by the AM.
366  *
367  * Implementations can assume that scan_set_tidrange is always called
368  * before scan_getnextslot_tidrange or after scan_rescan and before any
369  * further calls to scan_getnextslot_tidrange.
370  */
372  ItemPointer mintid,
373  ItemPointer maxtid);
374 
375  /*
376  * Return next tuple from `scan` that's in the range of TIDs defined by
377  * scan_set_tidrange.
378  */
380  ScanDirection direction,
381  TupleTableSlot *slot);
382 
383  /* ------------------------------------------------------------------------
384  * Parallel table scan related functions.
385  * ------------------------------------------------------------------------
386  */
387 
388  /*
389  * Estimate the size of shared memory needed for a parallel scan of this
390  * relation. The snapshot does not need to be accounted for.
391  */
393 
394  /*
395  * Initialize ParallelTableScanDesc for a parallel scan of this relation.
396  * `pscan` will be sized according to parallelscan_estimate() for the same
397  * relation.
398  */
400  ParallelTableScanDesc pscan);
401 
402  /*
403  * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
404  * when `pscan` was initialized by parallelscan_initialize.
405  */
407  ParallelTableScanDesc pscan);
408 
409 
410  /* ------------------------------------------------------------------------
411  * Index Scan Callbacks
412  * ------------------------------------------------------------------------
413  */
414 
415  /*
416  * Prepare to fetch tuples from the relation, as needed when fetching
417  * tuples for an index scan. The callback has to return an
418  * IndexFetchTableData, which the AM will typically embed in a larger
419  * structure with additional information.
420  *
421  * Tuples for an index scan can then be fetched via index_fetch_tuple.
422  */
423  struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
424 
425  /*
426  * Reset index fetch. Typically this will release cross index fetch
427  * resources held in IndexFetchTableData.
428  */
430 
431  /*
432  * Release resources and deallocate index fetch.
433  */
435 
436  /*
437  * Fetch tuple at `tid` into `slot`, after doing a visibility test
438  * according to `snapshot`. If a tuple was found and passed the visibility
439  * test, return true, false otherwise.
440  *
441  * Note that AMs that do not necessarily update indexes when indexed
442  * columns do not change, need to return the current/correct version of
443  * the tuple that is visible to the snapshot, even if the tid points to an
444  * older version of the tuple.
445  *
446  * *call_again is false on the first call to index_fetch_tuple for a tid.
447  * If there potentially is another tuple matching the tid, *call_again
448  * needs to be set to true by index_fetch_tuple, signaling to the caller
449  * that index_fetch_tuple should be called again for the same tid.
450  *
451  * *all_dead, if all_dead is not NULL, should be set to true by
452  * index_fetch_tuple iff it is guaranteed that no backend needs to see
453  * that tuple. Index AMs can use that to avoid returning that tid in
454  * future searches.
455  */
456  bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
457  ItemPointer tid,
458  Snapshot snapshot,
459  TupleTableSlot *slot,
460  bool *call_again, bool *all_dead);
461 
462 
463  /* ------------------------------------------------------------------------
464  * Callbacks for non-modifying operations on individual tuples
465  * ------------------------------------------------------------------------
466  */
467 
468  /*
469  * Fetch tuple at `tid` into `slot`, after doing a visibility test
470  * according to `snapshot`. If a tuple was found and passed the visibility
471  * test, returns true, false otherwise.
472  */
474  ItemPointer tid,
475  Snapshot snapshot,
476  TupleTableSlot *slot);
477 
478  /*
479  * Is tid valid for a scan of this relation.
480  */
482  ItemPointer tid);
483 
484  /*
485  * Return the latest version of the tuple at `tid`, by updating `tid` to
486  * point at the newest version.
487  */
489  ItemPointer tid);
490 
491  /*
492  * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
493  * the appropriate type for the AM.
494  */
496  TupleTableSlot *slot,
497  Snapshot snapshot);
498 
499  /* see table_index_delete_tuples() */
501  TM_IndexDeleteOp *delstate);
502 
503 
504  /* ------------------------------------------------------------------------
505  * Manipulations of physical tuples.
506  * ------------------------------------------------------------------------
507  */
508 
509  /* see table_tuple_insert() for reference about parameters */
511  CommandId cid, int options,
512  struct BulkInsertStateData *bistate);
513 
514  /* see table_tuple_insert_speculative() for reference about parameters */
516  TupleTableSlot *slot,
517  CommandId cid,
518  int options,
519  struct BulkInsertStateData *bistate,
520  uint32 specToken);
521 
522  /* see table_tuple_complete_speculative() for reference about parameters */
524  TupleTableSlot *slot,
525  uint32 specToken,
526  bool succeeded);
527 
528  /* see table_multi_insert() for reference about parameters */
529  void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
530  CommandId cid, int options, struct BulkInsertStateData *bistate);
531 
532  /* see table_tuple_delete() for reference about parameters */
534  ItemPointer tid,
535  CommandId cid,
536  Snapshot snapshot,
537  Snapshot crosscheck,
538  bool wait,
539  TM_FailureData *tmfd,
540  bool changingPart);
541 
542  /* see table_tuple_update() for reference about parameters */
544  ItemPointer otid,
545  TupleTableSlot *slot,
546  CommandId cid,
547  Snapshot snapshot,
548  Snapshot crosscheck,
549  bool wait,
550  TM_FailureData *tmfd,
551  LockTupleMode *lockmode,
552  TU_UpdateIndexes *update_indexes);
553 
554  /* see table_tuple_lock() for reference about parameters */
556  ItemPointer tid,
557  Snapshot snapshot,
558  TupleTableSlot *slot,
559  CommandId cid,
561  LockWaitPolicy wait_policy,
562  uint8 flags,
563  TM_FailureData *tmfd);
564 
565  /*
566  * Perform operations necessary to complete insertions made via
567  * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
568  * access methods ceased to use this.
569  *
570  * Typically callers of tuple_insert and multi_insert will just pass all
571  * the flags that apply to them, and each AM has to decide which of them
572  * make sense for it, and then only take actions in finish_bulk_insert for
573  * those flags, and ignore others.
574  *
575  * Optional callback.
576  */
577  void (*finish_bulk_insert) (Relation rel, int options);
578 
579 
580  /* ------------------------------------------------------------------------
581  * DDL related functionality.
582  * ------------------------------------------------------------------------
583  */
584 
585  /*
586  * This callback needs to create new relation storage for `rel`, with
587  * appropriate durability behaviour for `persistence`.
588  *
589  * Note that only the subset of the relcache filled by
590  * RelationBuildLocalRelation() can be relied upon and that the relation's
591  * catalog entries will either not yet exist (new relation), or will still
592  * reference the old relfilelocator.
593  *
594  * As output *freezeXid, *minmulti must be set to the values appropriate
595  * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
596  * fields to be filled they can be set to InvalidTransactionId and
597  * InvalidMultiXactId, respectively.
598  *
599  * See also table_relation_set_new_filelocator().
600  */
602  const RelFileLocator *newrlocator,
603  char persistence,
604  TransactionId *freezeXid,
605  MultiXactId *minmulti);
606 
607  /*
608  * This callback needs to remove all contents from `rel`'s current
609  * relfilelocator. No provisions for transactional behaviour need to be
610  * made. Often this can be implemented by truncating the underlying
611  * storage to its minimal size.
612  *
613  * See also table_relation_nontransactional_truncate().
614  */
616 
617  /*
618  * See table_relation_copy_data().
619  *
620  * This can typically be implemented by directly copying the underlying
621  * storage, unless it contains references to the tablespace internally.
622  */
624  const RelFileLocator *newrlocator);
625 
626  /* See table_relation_copy_for_cluster() */
628  Relation NewTable,
629  Relation OldIndex,
630  bool use_sort,
631  TransactionId OldestXmin,
632  TransactionId *xid_cutoff,
633  MultiXactId *multi_cutoff,
634  double *num_tuples,
635  double *tups_vacuumed,
636  double *tups_recently_dead);
637 
638  /*
639  * React to VACUUM command on the relation. The VACUUM can be triggered by
640  * a user or by autovacuum. The specific actions performed by the AM will
641  * depend heavily on the individual AM.
642  *
643  * On entry a transaction is already established, and the relation is
644  * locked with a ShareUpdateExclusive lock.
645  *
646  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
647  * this routine, even if (for ANALYZE) it is part of the same VACUUM
648  * command.
649  *
650  * There probably, in the future, needs to be a separate callback to
651  * integrate with autovacuum's scheduling.
652  */
653  void (*relation_vacuum) (Relation rel,
654  struct VacuumParams *params,
655  BufferAccessStrategy bstrategy);
656 
657  /*
658  * Prepare to analyze block `blockno` of `scan`. The scan has been started
659  * with table_beginscan_analyze(). See also
660  * table_scan_analyze_next_block().
661  *
662  * The callback may acquire resources like locks that are held until
663  * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
664  * to hold a lock until all tuples on a block have been analyzed by
665  * scan_analyze_next_tuple.
666  *
667  * The callback can return false if the block is not suitable for
668  * sampling, e.g. because it's a metapage that could never contain tuples.
669  *
670  * XXX: This obviously is primarily suited for block-based AMs. It's not
671  * clear what a good interface for non block based AMs would be, so there
672  * isn't one yet.
673  */
675  ReadStream *stream);
676 
677  /*
678  * See table_scan_analyze_next_tuple().
679  *
680  * Not every AM might have a meaningful concept of dead rows, in which
681  * case it's OK to not increment *deadrows - but note that that may
682  * influence autovacuum scheduling (see comment for relation_vacuum
683  * callback).
684  */
686  TransactionId OldestXmin,
687  double *liverows,
688  double *deadrows,
689  TupleTableSlot *slot);
690 
691  /* see table_index_build_range_scan for reference about parameters */
692  double (*index_build_range_scan) (Relation table_rel,
693  Relation index_rel,
694  struct IndexInfo *index_info,
695  bool allow_sync,
696  bool anyvisible,
697  bool progress,
698  BlockNumber start_blockno,
699  BlockNumber numblocks,
701  void *callback_state,
702  TableScanDesc scan);
703 
704  /* see table_index_validate_scan for reference about parameters */
705  void (*index_validate_scan) (Relation table_rel,
706  Relation index_rel,
707  struct IndexInfo *index_info,
708  Snapshot snapshot,
709  struct ValidateIndexState *state);
710 
711 
712  /* ------------------------------------------------------------------------
713  * Miscellaneous functions.
714  * ------------------------------------------------------------------------
715  */
716 
717  /*
718  * See table_relation_size().
719  *
720  * Note that currently a few callers use the MAIN_FORKNUM size to figure
721  * out the range of potentially interesting blocks (brin, analyze). It's
722  * probable that we'll need to revise the interface for those at some
723  * point.
724  */
725  uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
726 
727 
728  /*
729  * This callback should return true if the relation requires a TOAST table
730  * and false if it does not. It may wish to examine the relation's tuple
731  * descriptor before making a decision, but if it uses some other method
732  * of storing large values (or if it does not support them) it can simply
733  * return false.
734  */
736 
737  /*
738  * This callback should return the OID of the table AM that implements
739  * TOAST tables for this AM. If the relation_needs_toast_table callback
740  * always returns false, this callback is not required.
741  */
743 
744  /*
745  * This callback is invoked when detoasting a value stored in a toast
746  * table implemented by this AM. See table_relation_fetch_toast_slice()
747  * for more details.
748  */
749  void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
750  int32 attrsize,
751  int32 sliceoffset,
752  int32 slicelength,
753  struct varlena *result);
754 
755 
756  /* ------------------------------------------------------------------------
757  * Planner related functions.
758  * ------------------------------------------------------------------------
759  */
760 
761  /*
762  * See table_relation_estimate_size().
763  *
764  * While block oriented, it shouldn't be too hard for an AM that doesn't
765  * internally use blocks to convert into a usable representation.
766  *
767  * This differs from the relation_size callback by returning size
768  * estimates (both relation size and tuple count) for planning purposes,
769  * rather than returning a currently correct estimate.
770  */
771  void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
772  BlockNumber *pages, double *tuples,
773  double *allvisfrac);
774 
775 
776  /* ------------------------------------------------------------------------
777  * Executor related functions.
778  * ------------------------------------------------------------------------
779  */
780 
781  /*
782  * Prepare to fetch / check / return tuples from `blockno` as part of a
783  * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
784  * false if the bitmap is exhausted and true otherwise.
785  *
786  * This will typically read and pin the target block, and do the necessary
787  * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
788  * make sense to perform tuple visibility checks at this time).
789  *
790  * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
791  * incremented by the table AM to indicate whether or not the block's
792  * representation in the bitmap is lossy.
793  *
794  * `recheck` is set by the table AM to indicate whether or not the tuples
795  * from this block should be rechecked. Tuples from lossy pages will
796  * always need to be rechecked, but some non-lossy pages' tuples may also
797  * require recheck.
798  *
799  * `blockno` is the current block and is set by the table AM. The table AM
800  * is responsible for advancing the main iterator, but the bitmap table
801  * scan code still advances the prefetch iterator. `blockno` is used by
802  * bitmap table scan code to validate that the prefetch block stays ahead
803  * of the current block.
804  *
805  * XXX: Currently this may only be implemented if the AM uses md.c as its
806  * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
807  * blockids directly to the underlying storage. nodeBitmapHeapscan.c
808  * performs prefetching directly using that interface. This probably
809  * needs to be rectified at a later point.
810  *
811  * XXX: Currently this may only be implemented if the AM uses the
812  * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
813  * perform prefetching. This probably needs to be rectified at a later
814  * point.
815  *
816  * Optional callback, but either both scan_bitmap_next_block and
817  * scan_bitmap_next_tuple need to exist, or neither.
818  */
820  BlockNumber *blockno,
821  bool *recheck,
822  uint64 *lossy_pages,
823  uint64 *exact_pages);
824 
825  /*
826  * Fetch the next tuple of a bitmap table scan into `slot` and return true
827  * if a visible tuple was found, false otherwise.
828  *
829  * Optional callback, but either both scan_bitmap_next_block and
830  * scan_bitmap_next_tuple need to exist, or neither.
831  */
833  TupleTableSlot *slot);
834 
835  /*
836  * Prepare to fetch tuples from the next block in a sample scan. Return
837  * false if the sample scan is finished, true otherwise. `scan` was
838  * started via table_beginscan_sampling().
839  *
840  * Typically this will first determine the target block by calling the
841  * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
842  * perform a sequential scan over all blocks. The determined block is
843  * then typically read and pinned.
844  *
845  * As the TsmRoutine interface is block based, a block needs to be passed
846  * to NextSampleBlock(). If that's not appropriate for an AM, it
847  * internally needs to perform mapping between the internal and a block
848  * based representation.
849  *
850  * Note that it's not acceptable to hold deadlock prone resources such as
851  * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
852  * block - the tuple is likely to be returned to an upper query node, and
853  * the next call could be off a long while. Holding buffer pins and such
854  * is obviously OK.
855  *
856  * Currently it is required to implement this interface, as there's no
857  * alternative way (contrary e.g. to bitmap scans) to implement sample
858  * scans. If infeasible to implement, the AM may raise an error.
859  */
861  struct SampleScanState *scanstate);
862 
863  /*
864  * This callback, only called after scan_sample_next_block has returned
865  * true, should determine the next tuple to be returned from the selected
866  * block using the TsmRoutine's NextSampleTuple() callback.
867  *
868  * The callback needs to perform visibility checks, and only return
869  * visible tuples. That obviously can mean calling NextSampleTuple()
870  * multiple times.
871  *
872  * The TsmRoutine interface assumes that there's a maximum offset on a
873  * given page, so if that doesn't apply to an AM, it needs to emulate that
874  * assumption somehow.
875  */
877  struct SampleScanState *scanstate,
878  TupleTableSlot *slot);
879 
881 
882 
883 /* ----------------------------------------------------------------------------
884  * Slot functions.
885  * ----------------------------------------------------------------------------
886  */
887 
888 /*
889  * Returns slot callbacks suitable for holding tuples of the appropriate type
890  * for the relation. Works for tables, views, foreign tables and partitioned
891  * tables.
892  */
893 extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
894 
895 /*
896  * Returns slot using the callbacks returned by table_slot_callbacks(), and
897  * registers it on *reglist.
898  */
899 extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
900 
901 
902 /* ----------------------------------------------------------------------------
903  * Table scan functions.
904  * ----------------------------------------------------------------------------
905  */
906 
907 /*
908  * Start a scan of `rel`. Returned tuples pass a visibility test of
909  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
910  */
911 static inline TableScanDesc
913  int nkeys, struct ScanKeyData *key)
914 {
915  uint32 flags = SO_TYPE_SEQSCAN |
917 
918  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
919 }
920 
921 /*
922  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
923  * snapshot appropriate for scanning catalog relations.
924  */
925 extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
926  struct ScanKeyData *key);
927 
928 /*
929  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
930  * that lets the caller control whether a nondefault buffer access strategy
931  * can be used, and whether syncscan can be chosen (possibly resulting in the
932  * scan not starting from block zero). Both of these default to true with
933  * plain table_beginscan.
934  */
935 static inline TableScanDesc
937  int nkeys, struct ScanKeyData *key,
938  bool allow_strat, bool allow_sync)
939 {
941 
942  if (allow_strat)
943  flags |= SO_ALLOW_STRAT;
944  if (allow_sync)
945  flags |= SO_ALLOW_SYNC;
946 
947  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
948 }
949 
950 /*
951  * table_beginscan_bm is an alternative entry point for setting up a
952  * TableScanDesc for a bitmap heap scan. Although that scan technology is
953  * really quite unlike a standard seqscan, there is just enough commonality to
954  * make it worth using the same data structure.
955  */
956 static inline TableScanDesc
958  int nkeys, struct ScanKeyData *key, bool need_tuple)
959 {
960  TableScanDesc result;
962 
963  if (need_tuple)
964  flags |= SO_NEED_TUPLES;
965 
966  result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
967  NULL, flags);
968  result->st.bitmap.rs_shared_iterator = NULL;
969  result->st.bitmap.rs_iterator = NULL;
970  return result;
971 }
972 
973 /*
974  * table_beginscan_sampling is an alternative entry point for setting up a
975  * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
976  * using the same data structure although the behavior is rather different.
977  * In addition to the options offered by table_beginscan_strat, this call
978  * also allows control of whether page-mode visibility checking is used.
979  */
980 static inline TableScanDesc
982  int nkeys, struct ScanKeyData *key,
983  bool allow_strat, bool allow_sync,
984  bool allow_pagemode)
985 {
986  uint32 flags = SO_TYPE_SAMPLESCAN;
987 
988  if (allow_strat)
989  flags |= SO_ALLOW_STRAT;
990  if (allow_sync)
991  flags |= SO_ALLOW_SYNC;
992  if (allow_pagemode)
993  flags |= SO_ALLOW_PAGEMODE;
994 
995  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
996 }
997 
998 /*
999  * table_beginscan_tid is an alternative entry point for setting up a
1000  * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
1001  * the same data structure although the behavior is rather different.
1002  */
1003 static inline TableScanDesc
1005 {
1006  uint32 flags = SO_TYPE_TIDSCAN;
1007 
1008  return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1009 }
1010 
1011 /*
1012  * table_beginscan_analyze is an alternative entry point for setting up a
1013  * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
1014  * the same data structure although the behavior is rather different.
1015  */
1016 static inline TableScanDesc
1018 {
1019  uint32 flags = SO_TYPE_ANALYZE;
1020 
1021  return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
1022 }
1023 
1024 /*
1025  * End relation scan.
1026  */
1027 static inline void
1029 {
1030  scan->rs_rd->rd_tableam->scan_end(scan);
1031 }
1032 
1033 /*
1034  * Restart a relation scan.
1035  */
1036 static inline void
1038  struct ScanKeyData *key)
1039 {
1040  scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
1041 }
1042 
1043 /*
1044  * Restart a relation scan after changing params.
1045  *
1046  * This call allows changing the buffer strategy, syncscan, and pagemode
1047  * options before starting a fresh scan. Note that although the actual use of
1048  * syncscan might change (effectively, enabling or disabling reporting), the
1049  * previously selected startblock will be kept.
1050  */
1051 static inline void
1053  bool allow_strat, bool allow_sync, bool allow_pagemode)
1054 {
1055  scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1056  allow_strat, allow_sync,
1057  allow_pagemode);
1058 }
1059 
1060 /*
1061  * Return next tuple from `scan`, store in slot.
1062  */
1063 static inline bool
1065 {
1066  slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1067 
1068  /* We don't expect actual scans using NoMovementScanDirection */
1069  Assert(direction == ForwardScanDirection ||
1070  direction == BackwardScanDirection);
1071 
1072  /*
1073  * We don't expect direct calls to table_scan_getnextslot with valid
1074  * CheckXidAlive for catalog or regular tables. See detailed comments in
1075  * xact.c where these variables are declared.
1076  */
1078  elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1079 
1080  return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1081 }
1082 
1083 /* ----------------------------------------------------------------------------
1084  * TID Range scanning related functions.
1085  * ----------------------------------------------------------------------------
1086  */
1087 
1088 /*
1089  * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1090  * for a TID range scan.
1091  */
1092 static inline TableScanDesc
1094  ItemPointer mintid,
1095  ItemPointer maxtid)
1096 {
1097  TableScanDesc sscan;
1099 
1100  sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1101 
1102  /* Set the range of TIDs to scan */
1103  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1104 
1105  return sscan;
1106 }
1107 
1108 /*
1109  * table_rescan_tidrange resets the scan position and sets the minimum and
1110  * maximum TID range to scan for a TableScanDesc created by
1111  * table_beginscan_tidrange.
1112  */
1113 static inline void
1115  ItemPointer maxtid)
1116 {
1117  /* Ensure table_beginscan_tidrange() was used. */
1118  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1119 
1120  sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1121  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1122 }
1123 
1124 /*
1125  * Fetch the next tuple from `sscan` for a TID range scan created by
1126  * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1127  * or returns false if no more tuples exist in the range.
1128  */
1129 static inline bool
1131  TupleTableSlot *slot)
1132 {
1133  /* Ensure table_beginscan_tidrange() was used. */
1134  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1135 
1136  /* We don't expect actual scans using NoMovementScanDirection */
1137  Assert(direction == ForwardScanDirection ||
1138  direction == BackwardScanDirection);
1139 
1140  return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1141  direction,
1142  slot);
1143 }
1144 
1145 
1146 /* ----------------------------------------------------------------------------
1147  * Parallel table scan related functions.
1148  * ----------------------------------------------------------------------------
1149  */
1150 
1151 /*
1152  * Estimate the size of shared memory needed for a parallel scan of this
1153  * relation.
1154  */
1155 extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
1156 
1157 /*
1158  * Initialize ParallelTableScanDesc for a parallel scan of this
1159  * relation. `pscan` needs to be sized according to parallelscan_estimate()
1160  * for the same relation. Call this just once in the leader process; then,
1161  * individual workers attach via table_beginscan_parallel.
1162  */
1163 extern void table_parallelscan_initialize(Relation rel,
1164  ParallelTableScanDesc pscan,
1165  Snapshot snapshot);
1166 
1167 /*
1168  * Begin a parallel scan. `pscan` needs to have been initialized with
1169  * table_parallelscan_initialize(), for the same relation. The initialization
1170  * does not need to have happened in this backend.
1171  *
1172  * Caller must hold a suitable lock on the relation.
1173  */
1175  ParallelTableScanDesc pscan);
1176 
1177 /*
1178  * Restart a parallel scan. Call this in the leader process. Caller is
1179  * responsible for making sure that all workers have finished the scan
1180  * beforehand.
1181  */
1182 static inline void
1184 {
1185  rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1186 }
1187 
1188 
1189 /* ----------------------------------------------------------------------------
1190  * Index scan related functions.
1191  * ----------------------------------------------------------------------------
1192  */
1193 
1194 /*
1195  * Prepare to fetch tuples from the relation, as needed when fetching tuples
1196  * for an index scan.
1197  *
1198  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1199  */
1200 static inline IndexFetchTableData *
1202 {
1203  return rel->rd_tableam->index_fetch_begin(rel);
1204 }
1205 
1206 /*
1207  * Reset index fetch. Typically this will release cross index fetch resources
1208  * held in IndexFetchTableData.
1209  */
1210 static inline void
1212 {
1213  scan->rel->rd_tableam->index_fetch_reset(scan);
1214 }
1215 
1216 /*
1217  * Release resources and deallocate index fetch.
1218  */
1219 static inline void
1221 {
1222  scan->rel->rd_tableam->index_fetch_end(scan);
1223 }
1224 
1225 /*
1226  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1227  * a visibility test according to `snapshot`. If a tuple was found and passed
1228  * the visibility test, returns true, false otherwise. Note that *tid may be
1229  * modified when we return true (see later remarks on multiple row versions
1230  * reachable via a single index entry).
1231  *
1232  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1233  * a tid. If there potentially is another tuple matching the tid, *call_again
1234  * will be set to true, signaling that table_index_fetch_tuple() should be called
1235  * again for the same tid.
1236  *
1237  * *all_dead, if all_dead is not NULL, will be set to true by
1238  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1239  * that tuple. Index AMs can use that to avoid returning that tid in future
1240  * searches.
1241  *
1242  * The difference between this function and table_tuple_fetch_row_version()
1243  * is that this function returns the currently visible version of a row if
1244  * the AM supports storing multiple row versions reachable via a single index
1245  * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1246  * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1247  * lookups, table_tuple_fetch_row_version() is what's usually needed.
1248  */
1249 static inline bool
1251  ItemPointer tid,
1252  Snapshot snapshot,
1253  TupleTableSlot *slot,
1254  bool *call_again, bool *all_dead)
1255 {
1256  /*
1257  * We don't expect direct calls to table_index_fetch_tuple with valid
1258  * CheckXidAlive for catalog or regular tables. See detailed comments in
1259  * xact.c where these variables are declared.
1260  */
1262  elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1263 
1264  return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1265  slot, call_again,
1266  all_dead);
1267 }
1268 
1269 /*
1270  * This is a convenience wrapper around table_index_fetch_tuple() which
1271  * returns whether there are table tuple items corresponding to an index
1272  * entry. This likely is only useful to verify if there's a conflict in a
1273  * unique index.
1274  */
1275 extern bool table_index_fetch_tuple_check(Relation rel,
1276  ItemPointer tid,
1277  Snapshot snapshot,
1278  bool *all_dead);
1279 
1280 
1281 /* ------------------------------------------------------------------------
1282  * Functions for non-modifying operations on individual tuples
1283  * ------------------------------------------------------------------------
1284  */
1285 
1286 
1287 /*
1288  * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1289  * `snapshot`. If a tuple was found and passed the visibility test, returns
1290  * true, false otherwise.
1291  *
1292  * See table_index_fetch_tuple's comment about what the difference between
1293  * these functions is. It is correct to use this function outside of index
1294  * entry->table tuple lookups.
1295  */
1296 static inline bool
1298  ItemPointer tid,
1299  Snapshot snapshot,
1300  TupleTableSlot *slot)
1301 {
1302  /*
1303  * We don't expect direct calls to table_tuple_fetch_row_version with
1304  * valid CheckXidAlive for catalog or regular tables. See detailed
1305  * comments in xact.c where these variables are declared.
1306  */
1308  elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1309 
1310  return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1311 }
1312 
1313 /*
1314  * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1315  * mean that the pointed to row needs to exist or be visible, but that
1316  * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1317  * table_tuple_fetch_row_version()) should not error out if called with that
1318  * tid.
1319  *
1320  * `scan` needs to have been started via table_beginscan().
1321  */
1322 static inline bool
1324 {
1325  return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1326 }
1327 
1328 /*
1329  * Return the latest version of the tuple at `tid`, by updating `tid` to
1330  * point at the newest version.
1331  */
1333 
1334 /*
1335  * Return true iff tuple in slot satisfies the snapshot.
1336  *
1337  * This assumes the slot's tuple is valid, and of the appropriate type for the
1338  * AM.
1339  *
1340  * Some AMs might modify the data underlying the tuple as a side-effect. If so
1341  * they ought to mark the relevant buffer dirty.
1342  */
1343 static inline bool
1345  Snapshot snapshot)
1346 {
1347  return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1348 }
1349 
1350 /*
1351  * Determine which index tuples are safe to delete based on their table TID.
1352  *
1353  * Determines which entries from index AM caller's TM_IndexDeleteOp state
1354  * point to vacuumable table tuples. Entries that are found by tableam to be
1355  * vacuumable are naturally safe for index AM to delete, and so get directly
1356  * marked as deletable. See comments above TM_IndexDelete and comments above
1357  * TM_IndexDeleteOp for full details.
1358  *
1359  * Returns a snapshotConflictHorizon transaction ID that caller places in
1360  * its index deletion WAL record. This might be used during subsequent REDO
1361  * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1362  * index deletion operation might be required on the standby.
1363  */
1364 static inline TransactionId
1366 {
1367  return rel->rd_tableam->index_delete_tuples(rel, delstate);
1368 }
1369 
1370 
1371 /* ----------------------------------------------------------------------------
1372  * Functions for manipulations of physical tuples.
1373  * ----------------------------------------------------------------------------
1374  */
1375 
1376 /*
1377  * Insert a tuple from a slot into table AM routine.
1378  *
1379  * The options bitmask allows the caller to specify options that may change the
1380  * behaviour of the AM. The AM will ignore options that it does not support.
1381  *
1382  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1383  * free space in the relation. This can save some cycles when we know the
1384  * relation is new and doesn't contain useful amounts of free space.
1385  * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1386  * RelationGetBufferForTuple. See that method for more information.
1387  *
1388  * TABLE_INSERT_FROZEN should only be specified for inserts into
1389  * relation storage created during the current subtransaction and when
1390  * there are no prior snapshots or pre-existing portals open.
1391  * This causes rows to be frozen, which is an MVCC violation and
1392  * requires explicit options chosen by user.
1393  *
1394  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1395  * information for the tuple. This should solely be used during table rewrites
1396  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1397  * relation.
1398  *
1399  * Note that most of these options will be applied when inserting into the
1400  * heap's TOAST table, too, if the tuple requires any out-of-line data.
1401  *
1402  * The BulkInsertState object (if any; bistate can be NULL for default
1403  * behavior) is also just passed through to RelationGetBufferForTuple. If
1404  * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1405  *
1406  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1407  * insertion. But note that any toasting of fields within the slot is NOT
1408  * reflected in the slots contents.
1409  */
1410 static inline void
1412  int options, struct BulkInsertStateData *bistate)
1413 {
1414  rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1415  bistate);
1416 }
1417 
1418 /*
1419  * Perform a "speculative insertion". These can be backed out afterwards
1420  * without aborting the whole transaction. Other sessions can wait for the
1421  * speculative insertion to be confirmed, turning it into a regular tuple, or
1422  * aborted, as if it never existed. Speculatively inserted tuples behave as
1423  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1424  *
1425  * A transaction having performed a speculative insertion has to either abort,
1426  * or finish the speculative insertion with
1427  * table_tuple_complete_speculative(succeeded = ...).
1428  */
1429 static inline void
1431  CommandId cid, int options,
1432  struct BulkInsertStateData *bistate,
1433  uint32 specToken)
1434 {
1435  rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1436  bistate, specToken);
1437 }
1438 
1439 /*
1440  * Complete "speculative insertion" started in the same transaction. If
1441  * succeeded is true, the tuple is fully inserted, if false, it's removed.
1442  */
1443 static inline void
1445  uint32 specToken, bool succeeded)
1446 {
1447  rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1448  succeeded);
1449 }
1450 
1451 /*
1452  * Insert multiple tuples into a table.
1453  *
1454  * This is like table_tuple_insert(), but inserts multiple tuples in one
1455  * operation. That's often faster than calling table_tuple_insert() in a loop,
1456  * because e.g. the AM can reduce WAL logging and page locking overhead.
1457  *
1458  * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1459  * in `slots`, the parameters for table_multi_insert() are the same as for
1460  * table_tuple_insert().
1461  *
1462  * Note: this leaks memory into the current memory context. You can create a
1463  * temporary context before calling this, if that's a problem.
1464  */
1465 static inline void
1466 table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
1467  CommandId cid, int options, struct BulkInsertStateData *bistate)
1468 {
1469  rel->rd_tableam->multi_insert(rel, slots, nslots,
1470  cid, options, bistate);
1471 }
1472 
1473 /*
1474  * Delete a tuple.
1475  *
1476  * NB: do not call this directly unless prepared to deal with
1477  * concurrent-update conditions. Use simple_table_tuple_delete instead.
1478  *
1479  * Input parameters:
1480  * relation - table to be modified (caller must hold suitable lock)
1481  * tid - TID of tuple to be deleted
1482  * cid - delete command ID (used for visibility test, and stored into
1483  * cmax if successful)
1484  * crosscheck - if not InvalidSnapshot, also check tuple against this
1485  * wait - true if should wait for any conflicting update to commit/abort
1486  * Output parameters:
1487  * tmfd - filled in failure cases (see below)
1488  * changingPart - true iff the tuple is being moved to another partition
1489  * table due to an update of the partition key. Otherwise, false.
1490  *
1491  * Normal, successful return value is TM_Ok, which means we did actually
1492  * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1493  * TM_BeingModified (the last only possible if wait == false).
1494  *
1495  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1496  * t_xmax, and, if possible, t_cmax. See comments for struct
1497  * TM_FailureData for additional info.
1498  */
1499 static inline TM_Result
1501  Snapshot snapshot, Snapshot crosscheck, bool wait,
1502  TM_FailureData *tmfd, bool changingPart)
1503 {
1504  return rel->rd_tableam->tuple_delete(rel, tid, cid,
1505  snapshot, crosscheck,
1506  wait, tmfd, changingPart);
1507 }
1508 
1509 /*
1510  * Update a tuple.
1511  *
1512  * NB: do not call this directly unless you are prepared to deal with
1513  * concurrent-update conditions. Use simple_table_tuple_update instead.
1514  *
1515  * Input parameters:
1516  * relation - table to be modified (caller must hold suitable lock)
1517  * otid - TID of old tuple to be replaced
1518  * slot - newly constructed tuple data to store
1519  * cid - update command ID (used for visibility test, and stored into
1520  * cmax/cmin if successful)
1521  * crosscheck - if not InvalidSnapshot, also check old tuple against this
1522  * wait - true if should wait for any conflicting update to commit/abort
1523  * Output parameters:
1524  * tmfd - filled in failure cases (see below)
1525  * lockmode - filled with lock mode acquired on tuple
1526  * update_indexes - in success cases this is set to true if new index entries
1527  * are required for this tuple
1528  *
1529  * Normal, successful return value is TM_Ok, which means we did actually
1530  * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1531  * TM_BeingModified (the last only possible if wait == false).
1532  *
1533  * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1534  * stored tuple; in particular, slot->tts_tid is set to the TID where the
1535  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1536  * update was done. However, any TOAST changes in the new tuple's
1537  * data are not reflected into *newtup.
1538  *
1539  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1540  * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1541  * for additional info.
1542  */
1543 static inline TM_Result
1545  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1546  bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1547  TU_UpdateIndexes *update_indexes)
1548 {
1549  return rel->rd_tableam->tuple_update(rel, otid, slot,
1550  cid, snapshot, crosscheck,
1551  wait, tmfd,
1552  lockmode, update_indexes);
1553 }
1554 
1555 /*
1556  * Lock a tuple in the specified mode.
1557  *
1558  * Input parameters:
1559  * relation: relation containing tuple (caller must hold suitable lock)
1560  * tid: TID of tuple to lock
1561  * snapshot: snapshot to use for visibility determinations
1562  * cid: current command ID (used for visibility test, and stored into
1563  * tuple's cmax if lock is successful)
1564  * mode: lock mode desired
1565  * wait_policy: what to do if tuple lock is not available
1566  * flags:
1567  * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1568  * also lock descendant tuples if lock modes don't conflict.
1569  * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1570  * latest version.
1571  *
1572  * Output parameters:
1573  * *slot: contains the target tuple
1574  * *tmfd: filled in failure cases (see below)
1575  *
1576  * Function result may be:
1577  * TM_Ok: lock was successfully acquired
1578  * TM_Invisible: lock failed because tuple was never visible to us
1579  * TM_SelfModified: lock failed because tuple updated by self
1580  * TM_Updated: lock failed because tuple updated by other xact
1581  * TM_Deleted: lock failed because tuple deleted by other xact
1582  * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1583  *
1584  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1585  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1586  * comments for struct TM_FailureData for additional info.
1587  */
1588 static inline TM_Result
1591  LockWaitPolicy wait_policy, uint8 flags,
1592  TM_FailureData *tmfd)
1593 {
1594  return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1595  cid, mode, wait_policy,
1596  flags, tmfd);
1597 }
1598 
1599 /*
1600  * Perform operations necessary to complete insertions made via
1601  * tuple_insert and multi_insert with a BulkInsertState specified.
1602  */
1603 static inline void
1605 {
1606  /* optional callback */
1607  if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1609 }
1610 
1611 
1612 /* ------------------------------------------------------------------------
1613  * DDL related functionality.
1614  * ------------------------------------------------------------------------
1615  */
1616 
1617 /*
1618  * Create storage for `rel` in `newrlocator`, with persistence set to
1619  * `persistence`.
1620  *
1621  * This is used both during relation creation and various DDL operations to
1622  * create new rel storage that can be filled from scratch. When creating
1623  * new storage for an existing relfilelocator, this should be called before the
1624  * relcache entry has been updated.
1625  *
1626  * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1627  * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1628  */
1629 static inline void
1631  const RelFileLocator *newrlocator,
1632  char persistence,
1633  TransactionId *freezeXid,
1634  MultiXactId *minmulti)
1635 {
1636  rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
1637  persistence, freezeXid,
1638  minmulti);
1639 }
1640 
1641 /*
1642  * Remove all table contents from `rel`, in a non-transactional manner.
1643  * Non-transactional meaning that there's no need to support rollbacks. This
1644  * commonly only is used to perform truncations for relation storage created in
1645  * the current transaction.
1646  */
1647 static inline void
1649 {
1651 }
1652 
1653 /*
1654  * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
1655  * relfilelocator may not have storage associated before this function is
1656  * called. This is only supposed to be used for low level operations like
1657  * changing a relation's tablespace.
1658  */
1659 static inline void
1661 {
1662  rel->rd_tableam->relation_copy_data(rel, newrlocator);
1663 }
1664 
1665 /*
1666  * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1667  * FULL.
1668  *
1669  * Additional Input parameters:
1670  * - use_sort - if true, the table contents are sorted appropriate for
1671  * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1672  * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1673  * performed
1674  * - OldIndex - see use_sort
1675  * - OldestXmin - computed by vacuum_get_cutoffs(), even when
1676  * not needed for the relation's AM
1677  * - *xid_cutoff - ditto
1678  * - *multi_cutoff - ditto
1679  *
1680  * Output parameters:
1681  * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1682  * - *multi_cutoff - rel's new relminmxid value, may be invalid
1683  * - *tups_vacuumed - stats, for logging, if appropriate for AM
1684  * - *tups_recently_dead - stats, for logging, if appropriate for AM
1685  */
1686 static inline void
1688  Relation OldIndex,
1689  bool use_sort,
1690  TransactionId OldestXmin,
1691  TransactionId *xid_cutoff,
1692  MultiXactId *multi_cutoff,
1693  double *num_tuples,
1694  double *tups_vacuumed,
1695  double *tups_recently_dead)
1696 {
1697  OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1698  use_sort, OldestXmin,
1699  xid_cutoff, multi_cutoff,
1700  num_tuples, tups_vacuumed,
1701  tups_recently_dead);
1702 }
1703 
1704 /*
1705  * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1706  * autovacuum. The specific actions performed by the AM will depend heavily on
1707  * the individual AM.
1708  *
1709  * On entry a transaction needs to already been established, and the
1710  * table is locked with a ShareUpdateExclusive lock.
1711  *
1712  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1713  * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1714  */
1715 static inline void
1717  BufferAccessStrategy bstrategy)
1718 {
1719  rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1720 }
1721 
1722 /*
1723  * Prepare to analyze the next block in the read stream. The scan needs to
1724  * have been started with table_beginscan_analyze(). Note that this routine
1725  * might acquire resources like locks that are held until
1726  * table_scan_analyze_next_tuple() returns false.
1727  *
1728  * Returns false if block is unsuitable for sampling, true otherwise.
1729  */
1730 static inline bool
1732 {
1733  return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
1734 }
1735 
1736 /*
1737  * Iterate over tuples in the block selected with
1738  * table_scan_analyze_next_block() (which needs to have returned true, and
1739  * this routine may not have returned false for the same block before). If a
1740  * tuple that's suitable for sampling is found, true is returned and a tuple
1741  * is stored in `slot`.
1742  *
1743  * *liverows and *deadrows are incremented according to the encountered
1744  * tuples.
1745  */
1746 static inline bool
1748  double *liverows, double *deadrows,
1749  TupleTableSlot *slot)
1750 {
1751  return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1752  liverows, deadrows,
1753  slot);
1754 }
1755 
1756 /*
1757  * table_index_build_scan - scan the table to find tuples to be indexed
1758  *
1759  * This is called back from an access-method-specific index build procedure
1760  * after the AM has done whatever setup it needs. The parent table relation
1761  * is scanned to find tuples that should be entered into the index. Each
1762  * such tuple is passed to the AM's callback routine, which does the right
1763  * things to add it to the new index. After we return, the AM's index
1764  * build procedure does whatever cleanup it needs.
1765  *
1766  * The total count of live tuples is returned. This is for updating pg_class
1767  * statistics. (It's annoying not to be able to do that here, but we want to
1768  * merge that update with others; see index_update_stats.) Note that the
1769  * index AM itself must keep track of the number of index tuples; we don't do
1770  * so here because the AM might reject some of the tuples for its own reasons,
1771  * such as being unable to store NULLs.
1772  *
1773  * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1774  * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1775  *
1776  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1777  * any potentially broken HOT chains. Currently, we set this if there are any
1778  * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1779  * very hard to detect whether they're really incompatible with the chain tip.
1780  * This only really makes sense for heap AM, it might need to be generalized
1781  * for other AMs later.
1782  */
1783 static inline double
1785  Relation index_rel,
1786  struct IndexInfo *index_info,
1787  bool allow_sync,
1788  bool progress,
1790  void *callback_state,
1791  TableScanDesc scan)
1792 {
1793  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1794  index_rel,
1795  index_info,
1796  allow_sync,
1797  false,
1798  progress,
1799  0,
1801  callback,
1802  callback_state,
1803  scan);
1804 }
1805 
1806 /*
1807  * As table_index_build_scan(), except that instead of scanning the complete
1808  * table, only the given number of blocks are scanned. Scan to end-of-rel can
1809  * be signaled by passing InvalidBlockNumber as numblocks. Note that
1810  * restricting the range to scan cannot be done when requesting syncscan.
1811  *
1812  * When "anyvisible" mode is requested, all tuples visible to any transaction
1813  * are indexed and counted as live, including those inserted or deleted by
1814  * transactions that are still in progress.
1815  */
1816 static inline double
1818  Relation index_rel,
1819  struct IndexInfo *index_info,
1820  bool allow_sync,
1821  bool anyvisible,
1822  bool progress,
1823  BlockNumber start_blockno,
1824  BlockNumber numblocks,
1826  void *callback_state,
1827  TableScanDesc scan)
1828 {
1829  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1830  index_rel,
1831  index_info,
1832  allow_sync,
1833  anyvisible,
1834  progress,
1835  start_blockno,
1836  numblocks,
1837  callback,
1838  callback_state,
1839  scan);
1840 }
1841 
1842 /*
1843  * table_index_validate_scan - second table scan for concurrent index build
1844  *
1845  * See validate_index() for an explanation.
1846  */
1847 static inline void
1849  Relation index_rel,
1850  struct IndexInfo *index_info,
1851  Snapshot snapshot,
1852  struct ValidateIndexState *state)
1853 {
1854  table_rel->rd_tableam->index_validate_scan(table_rel,
1855  index_rel,
1856  index_info,
1857  snapshot,
1858  state);
1859 }
1860 
1861 
1862 /* ----------------------------------------------------------------------------
1863  * Miscellaneous functionality
1864  * ----------------------------------------------------------------------------
1865  */
1866 
1867 /*
1868  * Return the current size of `rel` in bytes. If `forkNumber` is
1869  * InvalidForkNumber, return the relation's overall size, otherwise the size
1870  * for the indicated fork.
1871  *
1872  * Note that the overall size might not be the equivalent of the sum of sizes
1873  * for the individual forks for some AMs, e.g. because the AMs storage does
1874  * not neatly map onto the builtin types of forks.
1875  */
1876 static inline uint64
1878 {
1879  return rel->rd_tableam->relation_size(rel, forkNumber);
1880 }
1881 
1882 /*
1883  * table_relation_needs_toast_table - does this relation need a toast table?
1884  */
1885 static inline bool
1887 {
1888  return rel->rd_tableam->relation_needs_toast_table(rel);
1889 }
1890 
1891 /*
1892  * Return the OID of the AM that should be used to implement the TOAST table
1893  * for this relation.
1894  */
1895 static inline Oid
1897 {
1898  return rel->rd_tableam->relation_toast_am(rel);
1899 }
1900 
1901 /*
1902  * Fetch all or part of a TOAST value from a TOAST table.
1903  *
1904  * If this AM is never used to implement a TOAST table, then this callback
1905  * is not needed. But, if toasted values are ever stored in a table of this
1906  * type, then you will need this callback.
1907  *
1908  * toastrel is the relation in which the toasted value is stored.
1909  *
1910  * valueid identifies which toast value is to be fetched. For the heap,
1911  * this corresponds to the values stored in the chunk_id column.
1912  *
1913  * attrsize is the total size of the toast value to be fetched.
1914  *
1915  * sliceoffset is the offset within the toast value of the first byte that
1916  * should be fetched.
1917  *
1918  * slicelength is the number of bytes from the toast value that should be
1919  * fetched.
1920  *
1921  * result is caller-allocated space into which the fetched bytes should be
1922  * stored.
1923  */
1924 static inline void
1926  int32 attrsize, int32 sliceoffset,
1927  int32 slicelength, struct varlena *result)
1928 {
1929  toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1930  attrsize,
1931  sliceoffset, slicelength,
1932  result);
1933 }
1934 
1935 
1936 /* ----------------------------------------------------------------------------
1937  * Planner related functionality
1938  * ----------------------------------------------------------------------------
1939  */
1940 
1941 /*
1942  * Estimate the current size of the relation, as an AM specific workhorse for
1943  * estimate_rel_size(). Look there for an explanation of the parameters.
1944  */
1945 static inline void
1947  BlockNumber *pages, double *tuples,
1948  double *allvisfrac)
1949 {
1950  rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1951  allvisfrac);
1952 }
1953 
1954 
1955 /* ----------------------------------------------------------------------------
1956  * Executor related functionality
1957  * ----------------------------------------------------------------------------
1958  */
1959 
1960 /*
1961  * Prepare to fetch / check / return tuples as part of a bitmap table scan.
1962  * `scan` needs to have been started via table_beginscan_bm(). Returns false
1963  * if there are no more blocks in the bitmap, true otherwise.
1964  *
1965  * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
1966  * incremented by the table AM to indicate whether or not the block's
1967  * representation in the bitmap is lossy.
1968  *
1969  * `recheck` is set by the table AM to indicate whether or not the tuples
1970  * from this block should be rechecked.
1971  *
1972  * `blockno` is the current block and is set by the table AM and is used by
1973  * bitmap table scan code to validate that the prefetch block stays ahead of
1974  * the current block.
1975  *
1976  * Note, this is an optionally implemented function, therefore should only be
1977  * used after verifying the presence (at plan time or such).
1978  */
1979 static inline bool
1981  BlockNumber *blockno,
1982  bool *recheck,
1983  uint64 *lossy_pages,
1984  uint64 *exact_pages)
1985 {
1986  /*
1987  * We don't expect direct calls to table_scan_bitmap_next_block with valid
1988  * CheckXidAlive for catalog or regular tables. See detailed comments in
1989  * xact.c where these variables are declared.
1990  */
1992  elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
1993 
1994  return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
1995  blockno, recheck,
1996  lossy_pages,
1997  exact_pages);
1998 }
1999 
2000 /*
2001  * Fetch the next tuple of a bitmap table scan into `slot` and return true if
2002  * a visible tuple was found, false otherwise.
2003  * table_scan_bitmap_next_block() needs to previously have selected a
2004  * block (i.e. returned true), and no previous
2005  * table_scan_bitmap_next_tuple() for the same block may have
2006  * returned false.
2007  */
2008 static inline bool
2010  TupleTableSlot *slot)
2011 {
2012  /*
2013  * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
2014  * CheckXidAlive for catalog or regular tables. See detailed comments in
2015  * xact.c where these variables are declared.
2016  */
2018  elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
2019 
2020  return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
2021  slot);
2022 }
2023 
2024 /*
2025  * Prepare to fetch tuples from the next block in a sample scan. Returns false
2026  * if the sample scan is finished, true otherwise. `scan` needs to have been
2027  * started via table_beginscan_sampling().
2028  *
2029  * This will call the TsmRoutine's NextSampleBlock() callback if necessary
2030  * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
2031  * underlying relation.
2032  */
2033 static inline bool
2035  struct SampleScanState *scanstate)
2036 {
2037  /*
2038  * We don't expect direct calls to table_scan_sample_next_block with valid
2039  * CheckXidAlive for catalog or regular tables. See detailed comments in
2040  * xact.c where these variables are declared.
2041  */
2043  elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
2044  return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
2045 }
2046 
2047 /*
2048  * Fetch the next sample tuple into `slot` and return true if a visible tuple
2049  * was found, false otherwise. table_scan_sample_next_block() needs to
2050  * previously have selected a block (i.e. returned true), and no previous
2051  * table_scan_sample_next_tuple() for the same block may have returned false.
2052  *
2053  * This will call the TsmRoutine's NextSampleTuple() callback.
2054  */
2055 static inline bool
2057  struct SampleScanState *scanstate,
2058  TupleTableSlot *slot)
2059 {
2060  /*
2061  * We don't expect direct calls to table_scan_sample_next_tuple with valid
2062  * CheckXidAlive for catalog or regular tables. See detailed comments in
2063  * xact.c where these variables are declared.
2064  */
2066  elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
2067  return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
2068  slot);
2069 }
2070 
2071 
2072 /* ----------------------------------------------------------------------------
2073  * Functions to make modifications a bit simpler.
2074  * ----------------------------------------------------------------------------
2075  */
2076 
2077 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2078 extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
2079  Snapshot snapshot);
2080 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2081  TupleTableSlot *slot, Snapshot snapshot,
2082  TU_UpdateIndexes *update_indexes);
2083 
2084 
2085 /* ----------------------------------------------------------------------------
2086  * Helper functions to implement parallel scans for block oriented AMs.
2087  * ----------------------------------------------------------------------------
2088  */
2089 
2092  ParallelTableScanDesc pscan);
2094  ParallelTableScanDesc pscan);
2096  ParallelBlockTableScanWorker pbscanwork,
2099  ParallelBlockTableScanWorker pbscanwork,
2101 
2102 
2103 /* ----------------------------------------------------------------------------
2104  * Helper functions to implement relation sizing for block oriented AMs.
2105  * ----------------------------------------------------------------------------
2106  */
2107 
2108 extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2110  int32 *attr_widths,
2111  BlockNumber *pages,
2112  double *tuples,
2113  double *allvisfrac,
2114  Size overhead_bytes_per_tuple,
2115  Size usable_bytes_per_page);
2116 
2117 /* ----------------------------------------------------------------------------
2118  * Functions in tableamapi.c
2119  * ----------------------------------------------------------------------------
2120  */
2121 
2122 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2123 
2124 /* ----------------------------------------------------------------------------
2125  * Functions in heapam_handler.c
2126  * ----------------------------------------------------------------------------
2127  */
2128 
2129 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2130 
2131 #endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:151
unsigned int uint32
Definition: c.h:492
#define PGDLLIMPORT
Definition: c.h:1295
signed short int16
Definition: c.h:481
signed int int32
Definition: c.h:482
#define Assert(condition)
Definition: c.h:837
TransactionId MultiXactId
Definition: c.h:641
#define unlikely(x)
Definition: c.h:326
unsigned char uint8
Definition: c.h:490
uint32 CommandId
Definition: c.h:645
uint32 TransactionId
Definition: c.h:631
size_t Size
Definition: c.h:584
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:55
const void * data
static char ** options
static int progress
Definition: pgbench.c:261
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:505
ForkNumber
Definition: relpath.h:56
struct TableScanDescData * TableScanDesc
Definition: relscan.h:75
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
Definition: pg_list.h:54
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool traversed
Definition: tableam.h:152
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
TM_IndexStatus * status
Definition: tableam.h:254
int bottomupfreespace
Definition: tableam.h:249
Relation irel
Definition: tableam.h:246
TM_IndexDelete * deltids
Definition: tableam.h:253
BlockNumber iblknum
Definition: tableam.h:247
ItemPointerData tid
Definition: tableam.h:212
bool knowndeletable
Definition: tableam.h:219
bool promising
Definition: tableam.h:222
int16 freespace
Definition: tableam.h:223
OffsetNumber idxoffnum
Definition: tableam.h:218
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:399
void(* relation_copy_data)(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:623
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:876
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:429
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:523
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:406
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:488
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:423
void(* relation_copy_for_cluster)(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:627
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:379
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:771
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:692
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:327
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:735
bool(* scan_bitmap_next_block)(TableScanDesc scan, BlockNumber *blockno, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:819
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:481
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:529
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, TupleTableSlot *slot)
Definition: tableam.h:832
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:337
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:725
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:555
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:860
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:615
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:543
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:510
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:343
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:473
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:749
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:653
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:742
bool(* scan_analyze_next_block)(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:674
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:392
void(* relation_set_new_filelocator)(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:601
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:371
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:577
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:685
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:500
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:434
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:456
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:515
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:533
NodeTag type
Definition: tableam.h:292
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:705
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:350
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:495
Relation rs_rd
Definition: relscan.h:38
union TableScanDescData::@48 st
struct TableScanDescData::@48::@49 bitmap
uint32 rs_flags
Definition: relscan.h:70
Oid tts_tableOid
Definition: tuptable.h:130
Definition: type.h:96
Definition: regguts.h:323
Definition: c.h:666
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1925
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:48
ScanOptions
Definition: tableam.h:46
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_TYPE_ANALYZE
Definition: tableam.h:53
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64
@ SO_TYPE_TIDSCAN
Definition: tableam.h:51
@ SO_NEED_TUPLES
Definition: tableam.h:71
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_ALLOW_SYNC
Definition: tableam.h:59
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1114
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:912
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:58
TU_UpdateIndexes
Definition: tableam.h:117
@ TU_Summarizing
Definition: tableam.h:125
@ TU_All
Definition: tableam.h:122
@ TU_None
Definition: tableam.h:119
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1028
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes)
Definition: tableam.c:335
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1747
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:208
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:49
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:388
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:165
struct TM_IndexDelete TM_IndexDelete
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1201
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1687
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1211
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1877
TM_Result
Definition: tableam.h:79
@ TM_Ok
Definition: tableam.h:84
@ TM_BeingModified
Definition: tableam.h:106
@ TM_Deleted
Definition: tableam.h:99
@ TM_WouldBlock
Definition: tableam.h:109
@ TM_Updated
Definition: tableam.h:96
@ TM_SelfModified
Definition: tableam.h:90
@ TM_Invisible
Definition: tableam.h:87
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:981
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1037
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1589
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:276
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1323
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1848
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1817
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:421
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
static bool table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:1731
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1886
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:936
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1444
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1093
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:1544
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot)
Definition: tableam.h:2009
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1220
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:1017
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1500
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:235
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1250
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1052
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1716
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:290
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:28
const TableAmRoutine * GetHeapamTableAmRoutine(void)
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1604
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:406
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:271
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:616
static void table_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1630
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1466
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1130
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1896
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1411
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:130
static bool table_scan_bitmap_next_block(TableScanDesc scan, BlockNumber *blockno, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:1980
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1784
static void table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:1660
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:2034
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:382
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1946
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1064
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1430
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1344
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1365
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2056
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1648
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:145
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool need_tuple)
Definition: tableam.h:957
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1297
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1183
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:1004
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:491
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:653
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:99
TransactionId CheckXidAlive
Definition: xact.c:98