PostgreSQL Source Code  git master
tableam.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tableam.h
4  * POSTGRES table access method definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/tableam.h
11  *
12  * NOTES
13  * See tableam.sgml for higher level documentation.
14  *
15  *-------------------------------------------------------------------------
16  */
17 #ifndef TABLEAM_H
18 #define TABLEAM_H
19 
20 #include "access/relscan.h"
21 #include "access/sdir.h"
22 #include "access/xact.h"
23 #include "utils/guc.h"
24 #include "utils/rel.h"
25 #include "utils/snapshot.h"
26 
27 
28 #define DEFAULT_TABLE_ACCESS_METHOD "heap"
29 
30 /* GUCs */
33 
34 
35 struct BulkInsertStateData;
36 struct IndexInfo;
37 struct SampleScanState;
38 struct TBMIterateResult;
39 struct VacuumParams;
40 struct ValidateIndexState;
41 
42 /*
43  * Bitmask values for the flags argument to the scan_begin callback.
44  */
45 typedef enum ScanOptions
46 {
47  /* one of SO_TYPE_* may be specified */
48  SO_TYPE_SEQSCAN = 1 << 0,
51  SO_TYPE_TIDSCAN = 1 << 3,
53  SO_TYPE_ANALYZE = 1 << 5,
54 
55  /* several of SO_ALLOW_* may be specified */
56  /* allow or disallow use of access strategy */
57  SO_ALLOW_STRAT = 1 << 6,
58  /* report location to syncscan logic? */
59  SO_ALLOW_SYNC = 1 << 7,
60  /* verify visibility page-at-a-time? */
62 
63  /* unregister snapshot at scan end? */
64  SO_TEMP_SNAPSHOT = 1 << 9
66 
67 /*
68  * Result codes for table_{update,delete,lock_tuple}, and for visibility
69  * routines inside table AMs.
70  */
71 typedef enum TM_Result
72 {
73  /*
74  * Signals that the action succeeded (i.e. update/delete performed, lock
75  * was acquired)
76  */
78 
79  /* The affected tuple wasn't visible to the relevant snapshot */
81 
82  /* The affected tuple was already modified by the calling backend */
84 
85  /*
86  * The affected tuple was updated by another transaction. This includes
87  * the case where tuple was moved to another partition.
88  */
90 
91  /* The affected tuple was deleted by another transaction */
93 
94  /*
95  * The affected tuple is currently being modified by another session. This
96  * will only be returned if table_(update/delete/lock_tuple) are
97  * instructed not to wait.
98  */
100 
101  /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
104 
105 /*
106  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
107  * because the target tuple is already outdated, they fill in this struct to
108  * provide information to the caller about what happened.
109  *
110  * ctid is the target's ctid link: it is the same as the target's TID if the
111  * target was deleted, or the location of the replacement tuple if the target
112  * was updated.
113  *
114  * xmax is the outdating transaction's XID. If the caller wants to visit the
115  * replacement tuple, it must check that this matches before believing the
116  * replacement is really a match.
117  *
118  * cmax is the outdating command's CID, but only when the failure code is
119  * TM_SelfModified (i.e., something in the current transaction outdated the
120  * tuple); otherwise cmax is zero. (We make this restriction because
121  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
122  * transactions.)
123  */
124 typedef struct TM_FailureData
125 {
129  bool traversed;
131 
132 /*
133  * State used when calling table_index_delete_tuples().
134  *
135  * Represents the status of table tuples, referenced by table TID and taken by
136  * index AM from index tuples. State consists of high level parameters of the
137  * deletion operation, plus two mutable palloc()'d arrays for information
138  * about the status of individual table tuples. These are conceptually one
139  * single array. Using two arrays keeps the TM_IndexDelete struct small,
140  * which makes sorting the first array (the deltids array) fast.
141  *
142  * Some index AM callers perform simple index tuple deletion (by specifying
143  * bottomup = false), and include only known-dead deltids. These known-dead
144  * entries are all marked knowndeletable = true directly (typically these are
145  * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
146  *
147  * Callers that specify bottomup = true are "bottom-up index deletion"
148  * callers. The considerations for the tableam are more subtle with these
149  * callers because they ask the tableam to perform highly speculative work,
150  * and might only expect the tableam to check a small fraction of all entries.
151  * Caller is not allowed to specify knowndeletable = true for any entry
152  * because everything is highly speculative. Bottom-up caller provides
153  * context and hints to tableam -- see comments below for details on how index
154  * AMs and tableams should coordinate during bottom-up index deletion.
155  *
156  * Simple index deletion callers may ask the tableam to perform speculative
157  * work, too. This is a little like bottom-up deletion, but not too much.
158  * The tableam will only perform speculative work when it's practically free
159  * to do so in passing for simple deletion caller (while always performing
160  * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
161  * be deleted within index AM). This is the real reason why it's possible for
162  * simple index deletion caller to specify knowndeletable = false up front
163  * (this means "check if it's possible for me to delete corresponding index
164  * tuple when it's cheap to do so in passing"). The index AM should only
165  * include "extra" entries for index tuples whose TIDs point to a table block
166  * that tableam is expected to have to visit anyway (in the event of a block
167  * orientated tableam). The tableam isn't strictly obligated to check these
168  * "extra" TIDs, but a block-based AM should always manage to do so in
169  * practice.
170  *
171  * The final contents of the deltids/status arrays are interesting to callers
172  * that ask tableam to perform speculative work (i.e. when _any_ items have
173  * knowndeletable set to false up front). These index AM callers will
174  * naturally need to consult final state to determine which index tuples are
175  * in fact deletable.
176  *
177  * The index AM can keep track of which index tuple relates to which deltid by
178  * setting idxoffnum (and/or relying on each entry being uniquely identifiable
179  * using tid), which is important when the final contents of the array will
180  * need to be interpreted -- the array can shrink from initial size after
181  * tableam processing and/or have entries in a new order (tableam may sort
182  * deltids array for its own reasons). Bottom-up callers may find that final
183  * ndeltids is 0 on return from call to tableam, in which case no index tuple
184  * deletions are possible. Simple deletion callers can rely on any entries
185  * they know to be deletable appearing in the final array as deletable.
186  */
187 typedef struct TM_IndexDelete
188 {
189  ItemPointerData tid; /* table TID from index tuple */
190  int16 id; /* Offset into TM_IndexStatus array */
192 
193 typedef struct TM_IndexStatus
194 {
195  OffsetNumber idxoffnum; /* Index am page offset number */
196  bool knowndeletable; /* Currently known to be deletable? */
197 
198  /* Bottom-up index deletion specific fields follow */
199  bool promising; /* Promising (duplicate) index tuple? */
200  int16 freespace; /* Space freed in index if deleted */
202 
203 /*
204  * Index AM/tableam coordination is central to the design of bottom-up index
205  * deletion. The index AM provides hints about where to look to the tableam
206  * by marking some entries as "promising". Index AM does this with duplicate
207  * index tuples that are strongly suspected to be old versions left behind by
208  * UPDATEs that did not logically modify indexed values. Index AM may find it
209  * helpful to only mark entries as promising when they're thought to have been
210  * affected by such an UPDATE in the recent past.
211  *
212  * Bottom-up index deletion casts a wide net at first, usually by including
213  * all TIDs on a target index page. It is up to the tableam to worry about
214  * the cost of checking transaction status information. The tableam is in
215  * control, but needs careful guidance from the index AM. Index AM requests
216  * that bottomupfreespace target be met, while tableam measures progress
217  * towards that goal by tallying the per-entry freespace value for known
218  * deletable entries. (All !bottomup callers can just set these space related
219  * fields to zero.)
220  */
221 typedef struct TM_IndexDeleteOp
222 {
223  Relation irel; /* Target index relation */
224  BlockNumber iblknum; /* Index block number (for error reports) */
225  bool bottomup; /* Bottom-up (not simple) deletion? */
226  int bottomupfreespace; /* Bottom-up space target */
227 
228  /* Mutable per-TID information follows (index AM initializes entries) */
229  int ndeltids; /* Current # of deltids/status elements */
233 
234 /* "options" flag bits for table_tuple_insert */
235 /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
236 #define TABLE_INSERT_SKIP_FSM 0x0002
237 #define TABLE_INSERT_FROZEN 0x0004
238 #define TABLE_INSERT_NO_LOGICAL 0x0008
239 
240 /* flag bits for table_tuple_lock */
241 /* Follow tuples whose update is in progress if lock modes don't conflict */
242 #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
243 /* Follow update chain and lock latest version of tuple */
244 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
245 
246 
247 /* Typedef for callback function for table_index_build_scan */
249  ItemPointer tid,
250  Datum *values,
251  bool *isnull,
252  bool tupleIsAlive,
253  void *state);
254 
255 /*
256  * API struct for a table AM. Note this must be allocated in a
257  * server-lifetime manner, typically as a static const struct, which then gets
258  * returned by FormData_pg_am.amhandler.
259  *
260  * In most cases it's not appropriate to call the callbacks directly, use the
261  * table_* wrapper functions instead.
262  *
263  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
264  * to update when adding a callback.
265  */
266 typedef struct TableAmRoutine
267 {
268  /* this must be set to T_TableAmRoutine */
270 
271 
272  /* ------------------------------------------------------------------------
273  * Slot related callbacks.
274  * ------------------------------------------------------------------------
275  */
276 
277  /*
278  * Return slot implementation suitable for storing a tuple of this AM.
279  */
280  const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
281 
282 
283  /* ------------------------------------------------------------------------
284  * Table scan callbacks.
285  * ------------------------------------------------------------------------
286  */
287 
288  /*
289  * Start a scan of `rel`. The callback has to return a TableScanDesc,
290  * which will typically be embedded in a larger, AM specific, struct.
291  *
292  * If nkeys != 0, the results need to be filtered by those scan keys.
293  *
294  * pscan, if not NULL, will have already been initialized with
295  * parallelscan_initialize(), and has to be for the same relation. Will
296  * only be set coming from table_beginscan_parallel().
297  *
298  * `flags` is a bitmask indicating the type of scan (ScanOptions's
299  * SO_TYPE_*, currently only one may be specified), options controlling
300  * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
301  * specified, an AM may ignore unsupported ones) and whether the snapshot
302  * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
303  */
305  Snapshot snapshot,
306  int nkeys, struct ScanKeyData *key,
307  ParallelTableScanDesc pscan,
308  uint32 flags);
309 
310  /*
311  * Release resources and deallocate scan. If TableScanDesc.temp_snap,
312  * TableScanDesc.rs_snapshot needs to be unregistered.
313  */
314  void (*scan_end) (TableScanDesc scan);
315 
316  /*
317  * Restart relation scan. If set_params is set to true, allow_{strat,
318  * sync, pagemode} (see scan_begin) changes should be taken into account.
319  */
320  void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
321  bool set_params, bool allow_strat,
322  bool allow_sync, bool allow_pagemode);
323 
324  /*
325  * Return next tuple from `scan`, store in slot.
326  */
328  ScanDirection direction,
329  TupleTableSlot *slot);
330 
331  /*-----------
332  * Optional functions to provide scanning for ranges of ItemPointers.
333  * Implementations must either provide both of these functions, or neither
334  * of them.
335  *
336  * Implementations of scan_set_tidrange must themselves handle
337  * ItemPointers of any value. i.e, they must handle each of the following:
338  *
339  * 1) mintid or maxtid is beyond the end of the table; and
340  * 2) mintid is above maxtid; and
341  * 3) item offset for mintid or maxtid is beyond the maximum offset
342  * allowed by the AM.
343  *
344  * Implementations can assume that scan_set_tidrange is always called
345  * before can_getnextslot_tidrange or after scan_rescan and before any
346  * further calls to scan_getnextslot_tidrange.
347  */
349  ItemPointer mintid,
350  ItemPointer maxtid);
351 
352  /*
353  * Return next tuple from `scan` that's in the range of TIDs defined by
354  * scan_set_tidrange.
355  */
357  ScanDirection direction,
358  TupleTableSlot *slot);
359 
360  /* ------------------------------------------------------------------------
361  * Parallel table scan related functions.
362  * ------------------------------------------------------------------------
363  */
364 
365  /*
366  * Estimate the size of shared memory needed for a parallel scan of this
367  * relation. The snapshot does not need to be accounted for.
368  */
370 
371  /*
372  * Initialize ParallelTableScanDesc for a parallel scan of this relation.
373  * `pscan` will be sized according to parallelscan_estimate() for the same
374  * relation.
375  */
377  ParallelTableScanDesc pscan);
378 
379  /*
380  * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
381  * when `pscan` was initialized by parallelscan_initialize.
382  */
384  ParallelTableScanDesc pscan);
385 
386 
387  /* ------------------------------------------------------------------------
388  * Index Scan Callbacks
389  * ------------------------------------------------------------------------
390  */
391 
392  /*
393  * Prepare to fetch tuples from the relation, as needed when fetching
394  * tuples for an index scan. The callback has to return an
395  * IndexFetchTableData, which the AM will typically embed in a larger
396  * structure with additional information.
397  *
398  * Tuples for an index scan can then be fetched via index_fetch_tuple.
399  */
400  struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
401 
402  /*
403  * Reset index fetch. Typically this will release cross index fetch
404  * resources held in IndexFetchTableData.
405  */
407 
408  /*
409  * Release resources and deallocate index fetch.
410  */
412 
413  /*
414  * Fetch tuple at `tid` into `slot`, after doing a visibility test
415  * according to `snapshot`. If a tuple was found and passed the visibility
416  * test, return true, false otherwise.
417  *
418  * Note that AMs that do not necessarily update indexes when indexed
419  * columns do not change, need to return the current/correct version of
420  * the tuple that is visible to the snapshot, even if the tid points to an
421  * older version of the tuple.
422  *
423  * *call_again is false on the first call to index_fetch_tuple for a tid.
424  * If there potentially is another tuple matching the tid, *call_again
425  * needs to be set to true by index_fetch_tuple, signaling to the caller
426  * that index_fetch_tuple should be called again for the same tid.
427  *
428  * *all_dead, if all_dead is not NULL, should be set to true by
429  * index_fetch_tuple iff it is guaranteed that no backend needs to see
430  * that tuple. Index AMs can use that to avoid returning that tid in
431  * future searches.
432  */
434  ItemPointer tid,
435  Snapshot snapshot,
436  TupleTableSlot *slot,
437  bool *call_again, bool *all_dead);
438 
439 
440  /* ------------------------------------------------------------------------
441  * Callbacks for non-modifying operations on individual tuples
442  * ------------------------------------------------------------------------
443  */
444 
445  /*
446  * Fetch tuple at `tid` into `slot`, after doing a visibility test
447  * according to `snapshot`. If a tuple was found and passed the visibility
448  * test, returns true, false otherwise.
449  */
451  ItemPointer tid,
452  Snapshot snapshot,
453  TupleTableSlot *slot);
454 
455  /*
456  * Is tid valid for a scan of this relation.
457  */
459  ItemPointer tid);
460 
461  /*
462  * Return the latest version of the tuple at `tid`, by updating `tid` to
463  * point at the newest version.
464  */
466  ItemPointer tid);
467 
468  /*
469  * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
470  * the appropriate type for the AM.
471  */
473  TupleTableSlot *slot,
474  Snapshot snapshot);
475 
476  /* see table_index_delete_tuples() */
478  TM_IndexDeleteOp *delstate);
479 
480 
481  /* ------------------------------------------------------------------------
482  * Manipulations of physical tuples.
483  * ------------------------------------------------------------------------
484  */
485 
486  /* see table_tuple_insert() for reference about parameters */
488  CommandId cid, int options,
489  struct BulkInsertStateData *bistate);
490 
491  /* see table_tuple_insert_speculative() for reference about parameters */
493  TupleTableSlot *slot,
494  CommandId cid,
495  int options,
496  struct BulkInsertStateData *bistate,
497  uint32 specToken);
498 
499  /* see table_tuple_complete_speculative() for reference about parameters */
501  TupleTableSlot *slot,
502  uint32 specToken,
503  bool succeeded);
504 
505  /* see table_multi_insert() for reference about parameters */
506  void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
507  CommandId cid, int options, struct BulkInsertStateData *bistate);
508 
509  /* see table_tuple_delete() for reference about parameters */
511  ItemPointer tid,
512  CommandId cid,
513  Snapshot snapshot,
514  Snapshot crosscheck,
515  bool wait,
516  TM_FailureData *tmfd,
517  bool changingPart);
518 
519  /* see table_tuple_update() for reference about parameters */
521  ItemPointer otid,
522  TupleTableSlot *slot,
523  CommandId cid,
524  Snapshot snapshot,
525  Snapshot crosscheck,
526  bool wait,
527  TM_FailureData *tmfd,
528  LockTupleMode *lockmode,
529  bool *update_indexes);
530 
531  /* see table_tuple_lock() for reference about parameters */
533  ItemPointer tid,
534  Snapshot snapshot,
535  TupleTableSlot *slot,
536  CommandId cid,
538  LockWaitPolicy wait_policy,
539  uint8 flags,
540  TM_FailureData *tmfd);
541 
542  /*
543  * Perform operations necessary to complete insertions made via
544  * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
545  * access methods ceased to use this.
546  *
547  * Typically callers of tuple_insert and multi_insert will just pass all
548  * the flags that apply to them, and each AM has to decide which of them
549  * make sense for it, and then only take actions in finish_bulk_insert for
550  * those flags, and ignore others.
551  *
552  * Optional callback.
553  */
554  void (*finish_bulk_insert) (Relation rel, int options);
555 
556 
557  /* ------------------------------------------------------------------------
558  * DDL related functionality.
559  * ------------------------------------------------------------------------
560  */
561 
562  /*
563  * This callback needs to create a new relation filenode for `rel`, with
564  * appropriate durability behaviour for `persistence`.
565  *
566  * Note that only the subset of the relcache filled by
567  * RelationBuildLocalRelation() can be relied upon and that the relation's
568  * catalog entries will either not yet exist (new relation), or will still
569  * reference the old relfilenode.
570  *
571  * As output *freezeXid, *minmulti must be set to the values appropriate
572  * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
573  * fields to be filled they can be set to InvalidTransactionId and
574  * InvalidMultiXactId, respectively.
575  *
576  * See also table_relation_set_new_filenode().
577  */
579  const RelFileNode *newrnode,
580  char persistence,
581  TransactionId *freezeXid,
582  MultiXactId *minmulti);
583 
584  /*
585  * This callback needs to remove all contents from `rel`'s current
586  * relfilenode. No provisions for transactional behaviour need to be made.
587  * Often this can be implemented by truncating the underlying storage to
588  * its minimal size.
589  *
590  * See also table_relation_nontransactional_truncate().
591  */
593 
594  /*
595  * See table_relation_copy_data().
596  *
597  * This can typically be implemented by directly copying the underlying
598  * storage, unless it contains references to the tablespace internally.
599  */
601  const RelFileNode *newrnode);
602 
603  /* See table_relation_copy_for_cluster() */
605  Relation OldTable,
606  Relation OldIndex,
607  bool use_sort,
608  TransactionId OldestXmin,
609  TransactionId *xid_cutoff,
610  MultiXactId *multi_cutoff,
611  double *num_tuples,
612  double *tups_vacuumed,
613  double *tups_recently_dead);
614 
615  /*
616  * React to VACUUM command on the relation. The VACUUM can be triggered by
617  * a user or by autovacuum. The specific actions performed by the AM will
618  * depend heavily on the individual AM.
619  *
620  * On entry a transaction is already established, and the relation is
621  * locked with a ShareUpdateExclusive lock.
622  *
623  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
624  * this routine, even if (for ANALYZE) it is part of the same VACUUM
625  * command.
626  *
627  * There probably, in the future, needs to be a separate callback to
628  * integrate with autovacuum's scheduling.
629  */
630  void (*relation_vacuum) (Relation rel,
631  struct VacuumParams *params,
632  BufferAccessStrategy bstrategy);
633 
634  /*
635  * Prepare to analyze block `blockno` of `scan`. The scan has been started
636  * with table_beginscan_analyze(). See also
637  * table_scan_analyze_next_block().
638  *
639  * The callback may acquire resources like locks that are held until
640  * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
641  * to hold a lock until all tuples on a block have been analyzed by
642  * scan_analyze_next_tuple.
643  *
644  * The callback can return false if the block is not suitable for
645  * sampling, e.g. because it's a metapage that could never contain tuples.
646  *
647  * XXX: This obviously is primarily suited for block-based AMs. It's not
648  * clear what a good interface for non block based AMs would be, so there
649  * isn't one yet.
650  */
652  BlockNumber blockno,
653  BufferAccessStrategy bstrategy);
654 
655  /*
656  * See table_scan_analyze_next_tuple().
657  *
658  * Not every AM might have a meaningful concept of dead rows, in which
659  * case it's OK to not increment *deadrows - but note that that may
660  * influence autovacuum scheduling (see comment for relation_vacuum
661  * callback).
662  */
664  TransactionId OldestXmin,
665  double *liverows,
666  double *deadrows,
667  TupleTableSlot *slot);
668 
669  /* see table_index_build_range_scan for reference about parameters */
670  double (*index_build_range_scan) (Relation table_rel,
671  Relation index_rel,
672  struct IndexInfo *index_info,
673  bool allow_sync,
674  bool anyvisible,
675  bool progress,
676  BlockNumber start_blockno,
677  BlockNumber numblocks,
679  void *callback_state,
680  TableScanDesc scan);
681 
682  /* see table_index_validate_scan for reference about parameters */
683  void (*index_validate_scan) (Relation table_rel,
684  Relation index_rel,
685  struct IndexInfo *index_info,
686  Snapshot snapshot,
687  struct ValidateIndexState *state);
688 
689 
690  /* ------------------------------------------------------------------------
691  * Miscellaneous functions.
692  * ------------------------------------------------------------------------
693  */
694 
695  /*
696  * See table_relation_size().
697  *
698  * Note that currently a few callers use the MAIN_FORKNUM size to figure
699  * out the range of potentially interesting blocks (brin, analyze). It's
700  * probable that we'll need to revise the interface for those at some
701  * point.
702  */
703  uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
704 
705 
706  /*
707  * This callback should return true if the relation requires a TOAST table
708  * and false if it does not. It may wish to examine the relation's tuple
709  * descriptor before making a decision, but if it uses some other method
710  * of storing large values (or if it does not support them) it can simply
711  * return false.
712  */
714 
715  /*
716  * This callback should return the OID of the table AM that implements
717  * TOAST tables for this AM. If the relation_needs_toast_table callback
718  * always returns false, this callback is not required.
719  */
721 
722  /*
723  * This callback is invoked when detoasting a value stored in a toast
724  * table implemented by this AM. See table_relation_fetch_toast_slice()
725  * for more details.
726  */
727  void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
728  int32 attrsize,
729  int32 sliceoffset,
730  int32 slicelength,
731  struct varlena *result);
732 
733 
734  /* ------------------------------------------------------------------------
735  * Planner related functions.
736  * ------------------------------------------------------------------------
737  */
738 
739  /*
740  * See table_relation_estimate_size().
741  *
742  * While block oriented, it shouldn't be too hard for an AM that doesn't
743  * internally use blocks to convert into a usable representation.
744  *
745  * This differs from the relation_size callback by returning size
746  * estimates (both relation size and tuple count) for planning purposes,
747  * rather than returning a currently correct estimate.
748  */
749  void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
750  BlockNumber *pages, double *tuples,
751  double *allvisfrac);
752 
753 
754  /* ------------------------------------------------------------------------
755  * Executor related functions.
756  * ------------------------------------------------------------------------
757  */
758 
759  /*
760  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
761  * of a bitmap table scan. `scan` was started via table_beginscan_bm().
762  * Return false if there are no tuples to be found on the page, true
763  * otherwise.
764  *
765  * This will typically read and pin the target block, and do the necessary
766  * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
767  * make sense to perform tuple visibility checks at this time). For some
768  * AMs it will make more sense to do all the work referencing `tbmres`
769  * contents here, for others it might be better to defer more work to
770  * scan_bitmap_next_tuple.
771  *
772  * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
773  * on the page have to be returned, otherwise the tuples at offsets in
774  * `tbmres->offsets` need to be returned.
775  *
776  * XXX: Currently this may only be implemented if the AM uses md.c as its
777  * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
778  * blockids directly to the underlying storage. nodeBitmapHeapscan.c
779  * performs prefetching directly using that interface. This probably
780  * needs to be rectified at a later point.
781  *
782  * XXX: Currently this may only be implemented if the AM uses the
783  * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
784  * perform prefetching. This probably needs to be rectified at a later
785  * point.
786  *
787  * Optional callback, but either both scan_bitmap_next_block and
788  * scan_bitmap_next_tuple need to exist, or neither.
789  */
791  struct TBMIterateResult *tbmres);
792 
793  /*
794  * Fetch the next tuple of a bitmap table scan into `slot` and return true
795  * if a visible tuple was found, false otherwise.
796  *
797  * For some AMs it will make more sense to do all the work referencing
798  * `tbmres` contents in scan_bitmap_next_block, for others it might be
799  * better to defer more work to this callback.
800  *
801  * Optional callback, but either both scan_bitmap_next_block and
802  * scan_bitmap_next_tuple need to exist, or neither.
803  */
805  struct TBMIterateResult *tbmres,
806  TupleTableSlot *slot);
807 
808  /*
809  * Prepare to fetch tuples from the next block in a sample scan. Return
810  * false if the sample scan is finished, true otherwise. `scan` was
811  * started via table_beginscan_sampling().
812  *
813  * Typically this will first determine the target block by calling the
814  * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
815  * perform a sequential scan over all blocks. The determined block is
816  * then typically read and pinned.
817  *
818  * As the TsmRoutine interface is block based, a block needs to be passed
819  * to NextSampleBlock(). If that's not appropriate for an AM, it
820  * internally needs to perform mapping between the internal and a block
821  * based representation.
822  *
823  * Note that it's not acceptable to hold deadlock prone resources such as
824  * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
825  * block - the tuple is likely to be returned to an upper query node, and
826  * the next call could be off a long while. Holding buffer pins and such
827  * is obviously OK.
828  *
829  * Currently it is required to implement this interface, as there's no
830  * alternative way (contrary e.g. to bitmap scans) to implement sample
831  * scans. If infeasible to implement, the AM may raise an error.
832  */
834  struct SampleScanState *scanstate);
835 
836  /*
837  * This callback, only called after scan_sample_next_block has returned
838  * true, should determine the next tuple to be returned from the selected
839  * block using the TsmRoutine's NextSampleTuple() callback.
840  *
841  * The callback needs to perform visibility checks, and only return
842  * visible tuples. That obviously can mean calling NextSampleTuple()
843  * multiple times.
844  *
845  * The TsmRoutine interface assumes that there's a maximum offset on a
846  * given page, so if that doesn't apply to an AM, it needs to emulate that
847  * assumption somehow.
848  */
850  struct SampleScanState *scanstate,
851  TupleTableSlot *slot);
852 
854 
855 
856 /* ----------------------------------------------------------------------------
857  * Slot functions.
858  * ----------------------------------------------------------------------------
859  */
860 
861 /*
862  * Returns slot callbacks suitable for holding tuples of the appropriate type
863  * for the relation. Works for tables, views, foreign tables and partitioned
864  * tables.
865  */
867 
868 /*
869  * Returns slot using the callbacks returned by table_slot_callbacks(), and
870  * registers it on *reglist.
871  */
872 extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
873 
874 
875 /* ----------------------------------------------------------------------------
876  * Table scan functions.
877  * ----------------------------------------------------------------------------
878  */
879 
880 /*
881  * Start a scan of `rel`. Returned tuples pass a visibility test of
882  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
883  */
884 static inline TableScanDesc
886  int nkeys, struct ScanKeyData *key)
887 {
888  uint32 flags = SO_TYPE_SEQSCAN |
890 
891  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
892 }
893 
894 /*
895  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
896  * snapshot appropriate for scanning catalog relations.
897  */
898 extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
899  struct ScanKeyData *key);
900 
901 /*
902  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
903  * that lets the caller control whether a nondefault buffer access strategy
904  * can be used, and whether syncscan can be chosen (possibly resulting in the
905  * scan not starting from block zero). Both of these default to true with
906  * plain table_beginscan.
907  */
908 static inline TableScanDesc
910  int nkeys, struct ScanKeyData *key,
911  bool allow_strat, bool allow_sync)
912 {
914 
915  if (allow_strat)
916  flags |= SO_ALLOW_STRAT;
917  if (allow_sync)
918  flags |= SO_ALLOW_SYNC;
919 
920  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
921 }
922 
923 /*
924  * table_beginscan_bm is an alternative entry point for setting up a
925  * TableScanDesc for a bitmap heap scan. Although that scan technology is
926  * really quite unlike a standard seqscan, there is just enough commonality to
927  * make it worth using the same data structure.
928  */
929 static inline TableScanDesc
931  int nkeys, struct ScanKeyData *key)
932 {
934 
935  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
936 }
937 
938 /*
939  * table_beginscan_sampling is an alternative entry point for setting up a
940  * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
941  * using the same data structure although the behavior is rather different.
942  * In addition to the options offered by table_beginscan_strat, this call
943  * also allows control of whether page-mode visibility checking is used.
944  */
945 static inline TableScanDesc
947  int nkeys, struct ScanKeyData *key,
948  bool allow_strat, bool allow_sync,
949  bool allow_pagemode)
950 {
951  uint32 flags = SO_TYPE_SAMPLESCAN;
952 
953  if (allow_strat)
954  flags |= SO_ALLOW_STRAT;
955  if (allow_sync)
956  flags |= SO_ALLOW_SYNC;
957  if (allow_pagemode)
958  flags |= SO_ALLOW_PAGEMODE;
959 
960  return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
961 }
962 
963 /*
964  * table_beginscan_tid is an alternative entry point for setting up a
965  * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
966  * the same data structure although the behavior is rather different.
967  */
968 static inline TableScanDesc
970 {
971  uint32 flags = SO_TYPE_TIDSCAN;
972 
973  return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
974 }
975 
976 /*
977  * table_beginscan_analyze is an alternative entry point for setting up a
978  * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
979  * the same data structure although the behavior is rather different.
980  */
981 static inline TableScanDesc
983 {
984  uint32 flags = SO_TYPE_ANALYZE;
985 
986  return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
987 }
988 
989 /*
990  * End relation scan.
991  */
992 static inline void
994 {
995  scan->rs_rd->rd_tableam->scan_end(scan);
996 }
997 
998 /*
999  * Restart a relation scan.
1000  */
1001 static inline void
1003  struct ScanKeyData *key)
1004 {
1005  scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
1006 }
1007 
1008 /*
1009  * Restart a relation scan after changing params.
1010  *
1011  * This call allows changing the buffer strategy, syncscan, and pagemode
1012  * options before starting a fresh scan. Note that although the actual use of
1013  * syncscan might change (effectively, enabling or disabling reporting), the
1014  * previously selected startblock will be kept.
1015  */
1016 static inline void
1018  bool allow_strat, bool allow_sync, bool allow_pagemode)
1019 {
1020  scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1021  allow_strat, allow_sync,
1022  allow_pagemode);
1023 }
1024 
1025 /*
1026  * Update snapshot used by the scan.
1027  */
1028 extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
1029 
1030 /*
1031  * Return next tuple from `scan`, store in slot.
1032  */
1033 static inline bool
1035 {
1036  slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1037 
1038  /*
1039  * We don't expect direct calls to table_scan_getnextslot with valid
1040  * CheckXidAlive for catalog or regular tables. See detailed comments in
1041  * xact.c where these variables are declared.
1042  */
1044  elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1045 
1046  return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1047 }
1048 
1049 /* ----------------------------------------------------------------------------
1050  * TID Range scanning related functions.
1051  * ----------------------------------------------------------------------------
1052  */
1053 
1054 /*
1055  * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1056  * for a TID range scan.
1057  */
1058 static inline TableScanDesc
1060  ItemPointer mintid,
1061  ItemPointer maxtid)
1062 {
1063  TableScanDesc sscan;
1065 
1066  sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1067 
1068  /* Set the range of TIDs to scan */
1069  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1070 
1071  return sscan;
1072 }
1073 
1074 /*
1075  * table_rescan_tidrange resets the scan position and sets the minimum and
1076  * maximum TID range to scan for a TableScanDesc created by
1077  * table_beginscan_tidrange.
1078  */
1079 static inline void
1081  ItemPointer maxtid)
1082 {
1083  /* Ensure table_beginscan_tidrange() was used. */
1084  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1085 
1086  sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1087  sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1088 }
1089 
1090 /*
1091  * Fetch the next tuple from `sscan` for a TID range scan created by
1092  * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1093  * or returns false if no more tuples exist in the range.
1094  */
1095 static inline bool
1097  TupleTableSlot *slot)
1098 {
1099  /* Ensure table_beginscan_tidrange() was used. */
1100  Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1101 
1102  return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1103  direction,
1104  slot);
1105 }
1106 
1107 
1108 /* ----------------------------------------------------------------------------
1109  * Parallel table scan related functions.
1110  * ----------------------------------------------------------------------------
1111  */
1112 
1113 /*
1114  * Estimate the size of shared memory needed for a parallel scan of this
1115  * relation.
1116  */
1117 extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
1118 
1119 /*
1120  * Initialize ParallelTableScanDesc for a parallel scan of this
1121  * relation. `pscan` needs to be sized according to parallelscan_estimate()
1122  * for the same relation. Call this just once in the leader process; then,
1123  * individual workers attach via table_beginscan_parallel.
1124  */
1125 extern void table_parallelscan_initialize(Relation rel,
1126  ParallelTableScanDesc pscan,
1127  Snapshot snapshot);
1128 
1129 /*
1130  * Begin a parallel scan. `pscan` needs to have been initialized with
1131  * table_parallelscan_initialize(), for the same relation. The initialization
1132  * does not need to have happened in this backend.
1133  *
1134  * Caller must hold a suitable lock on the relation.
1135  */
1137  ParallelTableScanDesc pscan);
1138 
1139 /*
1140  * Restart a parallel scan. Call this in the leader process. Caller is
1141  * responsible for making sure that all workers have finished the scan
1142  * beforehand.
1143  */
1144 static inline void
1146 {
1147  rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1148 }
1149 
1150 
1151 /* ----------------------------------------------------------------------------
1152  * Index scan related functions.
1153  * ----------------------------------------------------------------------------
1154  */
1155 
1156 /*
1157  * Prepare to fetch tuples from the relation, as needed when fetching tuples
1158  * for an index scan.
1159  *
1160  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1161  */
1162 static inline IndexFetchTableData *
1164 {
1165  return rel->rd_tableam->index_fetch_begin(rel);
1166 }
1167 
1168 /*
1169  * Reset index fetch. Typically this will release cross index fetch resources
1170  * held in IndexFetchTableData.
1171  */
1172 static inline void
1174 {
1175  scan->rel->rd_tableam->index_fetch_reset(scan);
1176 }
1177 
1178 /*
1179  * Release resources and deallocate index fetch.
1180  */
1181 static inline void
1183 {
1184  scan->rel->rd_tableam->index_fetch_end(scan);
1185 }
1186 
1187 /*
1188  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1189  * a visibility test according to `snapshot`. If a tuple was found and passed
1190  * the visibility test, returns true, false otherwise. Note that *tid may be
1191  * modified when we return true (see later remarks on multiple row versions
1192  * reachable via a single index entry).
1193  *
1194  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1195  * a tid. If there potentially is another tuple matching the tid, *call_again
1196  * will be set to true, signaling that table_index_fetch_tuple() should be called
1197  * again for the same tid.
1198  *
1199  * *all_dead, if all_dead is not NULL, will be set to true by
1200  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1201  * that tuple. Index AMs can use that to avoid returning that tid in future
1202  * searches.
1203  *
1204  * The difference between this function and table_tuple_fetch_row_version()
1205  * is that this function returns the currently visible version of a row if
1206  * the AM supports storing multiple row versions reachable via a single index
1207  * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1208  * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1209  * lookups, table_tuple_fetch_row_version() is what's usually needed.
1210  */
1211 static inline bool
1213  ItemPointer tid,
1214  Snapshot snapshot,
1215  TupleTableSlot *slot,
1216  bool *call_again, bool *all_dead)
1217 {
1218  /*
1219  * We don't expect direct calls to table_index_fetch_tuple with valid
1220  * CheckXidAlive for catalog or regular tables. See detailed comments in
1221  * xact.c where these variables are declared.
1222  */
1224  elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1225 
1226  return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1227  slot, call_again,
1228  all_dead);
1229 }
1230 
1231 /*
1232  * This is a convenience wrapper around table_index_fetch_tuple() which
1233  * returns whether there are table tuple items corresponding to an index
1234  * entry. This likely is only useful to verify if there's a conflict in a
1235  * unique index.
1236  */
1237 extern bool table_index_fetch_tuple_check(Relation rel,
1238  ItemPointer tid,
1239  Snapshot snapshot,
1240  bool *all_dead);
1241 
1242 
1243 /* ------------------------------------------------------------------------
1244  * Functions for non-modifying operations on individual tuples
1245  * ------------------------------------------------------------------------
1246  */
1247 
1248 
1249 /*
1250  * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1251  * `snapshot`. If a tuple was found and passed the visibility test, returns
1252  * true, false otherwise.
1253  *
1254  * See table_index_fetch_tuple's comment about what the difference between
1255  * these functions is. It is correct to use this function outside of index
1256  * entry->table tuple lookups.
1257  */
1258 static inline bool
1260  ItemPointer tid,
1261  Snapshot snapshot,
1262  TupleTableSlot *slot)
1263 {
1264  /*
1265  * We don't expect direct calls to table_tuple_fetch_row_version with
1266  * valid CheckXidAlive for catalog or regular tables. See detailed
1267  * comments in xact.c where these variables are declared.
1268  */
1270  elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1271 
1272  return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1273 }
1274 
1275 /*
1276  * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1277  * mean that the pointed to row needs to exist or be visible, but that
1278  * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1279  * table_tuple_fetch_row_version()) should not error out if called with that
1280  * tid.
1281  *
1282  * `scan` needs to have been started via table_beginscan().
1283  */
1284 static inline bool
1286 {
1287  return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1288 }
1289 
1290 /*
1291  * Return the latest version of the tuple at `tid`, by updating `tid` to
1292  * point at the newest version.
1293  */
1295 
1296 /*
1297  * Return true iff tuple in slot satisfies the snapshot.
1298  *
1299  * This assumes the slot's tuple is valid, and of the appropriate type for the
1300  * AM.
1301  *
1302  * Some AMs might modify the data underlying the tuple as a side-effect. If so
1303  * they ought to mark the relevant buffer dirty.
1304  */
1305 static inline bool
1307  Snapshot snapshot)
1308 {
1309  return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1310 }
1311 
1312 /*
1313  * Determine which index tuples are safe to delete based on their table TID.
1314  *
1315  * Determines which entries from index AM caller's TM_IndexDeleteOp state
1316  * point to vacuumable table tuples. Entries that are found by tableam to be
1317  * vacuumable are naturally safe for index AM to delete, and so get directly
1318  * marked as deletable. See comments above TM_IndexDelete and comments above
1319  * TM_IndexDeleteOp for full details.
1320  *
1321  * Returns a latestRemovedXid transaction ID that caller generally places in
1322  * its index deletion WAL record. This might be used during subsequent REDO
1323  * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1324  * index deletion operation might be required on the standby.
1325  */
1326 static inline TransactionId
1328 {
1329  return rel->rd_tableam->index_delete_tuples(rel, delstate);
1330 }
1331 
1332 
1333 /* ----------------------------------------------------------------------------
1334  * Functions for manipulations of physical tuples.
1335  * ----------------------------------------------------------------------------
1336  */
1337 
1338 /*
1339  * Insert a tuple from a slot into table AM routine.
1340  *
1341  * The options bitmask allows the caller to specify options that may change the
1342  * behaviour of the AM. The AM will ignore options that it does not support.
1343  *
1344  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1345  * free space in the relation. This can save some cycles when we know the
1346  * relation is new and doesn't contain useful amounts of free space.
1347  * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1348  * RelationGetBufferForTuple. See that method for more information.
1349  *
1350  * TABLE_INSERT_FROZEN should only be specified for inserts into
1351  * relfilenodes created during the current subtransaction and when
1352  * there are no prior snapshots or pre-existing portals open.
1353  * This causes rows to be frozen, which is an MVCC violation and
1354  * requires explicit options chosen by user.
1355  *
1356  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1357  * information for the tuple. This should solely be used during table rewrites
1358  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1359  * relation.
1360  *
1361  * Note that most of these options will be applied when inserting into the
1362  * heap's TOAST table, too, if the tuple requires any out-of-line data.
1363  *
1364  * The BulkInsertState object (if any; bistate can be NULL for default
1365  * behavior) is also just passed through to RelationGetBufferForTuple. If
1366  * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1367  *
1368  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1369  * insertion. But note that any toasting of fields within the slot is NOT
1370  * reflected in the slots contents.
1371  */
1372 static inline void
1374  int options, struct BulkInsertStateData *bistate)
1375 {
1376  rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1377  bistate);
1378 }
1379 
1380 /*
1381  * Perform a "speculative insertion". These can be backed out afterwards
1382  * without aborting the whole transaction. Other sessions can wait for the
1383  * speculative insertion to be confirmed, turning it into a regular tuple, or
1384  * aborted, as if it never existed. Speculatively inserted tuples behave as
1385  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1386  *
1387  * A transaction having performed a speculative insertion has to either abort,
1388  * or finish the speculative insertion with
1389  * table_tuple_complete_speculative(succeeded = ...).
1390  */
1391 static inline void
1393  CommandId cid, int options,
1394  struct BulkInsertStateData *bistate,
1395  uint32 specToken)
1396 {
1397  rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1398  bistate, specToken);
1399 }
1400 
1401 /*
1402  * Complete "speculative insertion" started in the same transaction. If
1403  * succeeded is true, the tuple is fully inserted, if false, it's removed.
1404  */
1405 static inline void
1407  uint32 specToken, bool succeeded)
1408 {
1409  rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1410  succeeded);
1411 }
1412 
1413 /*
1414  * Insert multiple tuples into a table.
1415  *
1416  * This is like table_tuple_insert(), but inserts multiple tuples in one
1417  * operation. That's often faster than calling table_tuple_insert() in a loop,
1418  * because e.g. the AM can reduce WAL logging and page locking overhead.
1419  *
1420  * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1421  * in `slots`, the parameters for table_multi_insert() are the same as for
1422  * table_tuple_insert().
1423  *
1424  * Note: this leaks memory into the current memory context. You can create a
1425  * temporary context before calling this, if that's a problem.
1426  */
1427 static inline void
1428 table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
1429  CommandId cid, int options, struct BulkInsertStateData *bistate)
1430 {
1431  rel->rd_tableam->multi_insert(rel, slots, nslots,
1432  cid, options, bistate);
1433 }
1434 
1435 /*
1436  * Delete a tuple.
1437  *
1438  * NB: do not call this directly unless prepared to deal with
1439  * concurrent-update conditions. Use simple_table_tuple_delete instead.
1440  *
1441  * Input parameters:
1442  * relation - table to be modified (caller must hold suitable lock)
1443  * tid - TID of tuple to be deleted
1444  * cid - delete command ID (used for visibility test, and stored into
1445  * cmax if successful)
1446  * crosscheck - if not InvalidSnapshot, also check tuple against this
1447  * wait - true if should wait for any conflicting update to commit/abort
1448  * Output parameters:
1449  * tmfd - filled in failure cases (see below)
1450  * changingPart - true iff the tuple is being moved to another partition
1451  * table due to an update of the partition key. Otherwise, false.
1452  *
1453  * Normal, successful return value is TM_Ok, which means we did actually
1454  * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1455  * TM_BeingModified (the last only possible if wait == false).
1456  *
1457  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1458  * t_xmax, and, if possible, and, if possible, t_cmax. See comments for
1459  * struct TM_FailureData for additional info.
1460  */
1461 static inline TM_Result
1463  Snapshot snapshot, Snapshot crosscheck, bool wait,
1464  TM_FailureData *tmfd, bool changingPart)
1465 {
1466  return rel->rd_tableam->tuple_delete(rel, tid, cid,
1467  snapshot, crosscheck,
1468  wait, tmfd, changingPart);
1469 }
1470 
1471 /*
1472  * Update a tuple.
1473  *
1474  * NB: do not call this directly unless you are prepared to deal with
1475  * concurrent-update conditions. Use simple_table_tuple_update instead.
1476  *
1477  * Input parameters:
1478  * relation - table to be modified (caller must hold suitable lock)
1479  * otid - TID of old tuple to be replaced
1480  * slot - newly constructed tuple data to store
1481  * cid - update command ID (used for visibility test, and stored into
1482  * cmax/cmin if successful)
1483  * crosscheck - if not InvalidSnapshot, also check old tuple against this
1484  * wait - true if should wait for any conflicting update to commit/abort
1485  * Output parameters:
1486  * tmfd - filled in failure cases (see below)
1487  * lockmode - filled with lock mode acquired on tuple
1488  * update_indexes - in success cases this is set to true if new index entries
1489  * are required for this tuple
1490  *
1491  * Normal, successful return value is TM_Ok, which means we did actually
1492  * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1493  * TM_BeingModified (the last only possible if wait == false).
1494  *
1495  * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1496  * stored tuple; in particular, slot->tts_tid is set to the TID where the
1497  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1498  * update was done. However, any TOAST changes in the new tuple's
1499  * data are not reflected into *newtup.
1500  *
1501  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1502  * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1503  * for additional info.
1504  */
1505 static inline TM_Result
1507  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1508  bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1509  bool *update_indexes)
1510 {
1511  return rel->rd_tableam->tuple_update(rel, otid, slot,
1512  cid, snapshot, crosscheck,
1513  wait, tmfd,
1514  lockmode, update_indexes);
1515 }
1516 
1517 /*
1518  * Lock a tuple in the specified mode.
1519  *
1520  * Input parameters:
1521  * relation: relation containing tuple (caller must hold suitable lock)
1522  * tid: TID of tuple to lock
1523  * snapshot: snapshot to use for visibility determinations
1524  * cid: current command ID (used for visibility test, and stored into
1525  * tuple's cmax if lock is successful)
1526  * mode: lock mode desired
1527  * wait_policy: what to do if tuple lock is not available
1528  * flags:
1529  * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1530  * also lock descendant tuples if lock modes don't conflict.
1531  * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1532  * latest version.
1533  *
1534  * Output parameters:
1535  * *slot: contains the target tuple
1536  * *tmfd: filled in failure cases (see below)
1537  *
1538  * Function result may be:
1539  * TM_Ok: lock was successfully acquired
1540  * TM_Invisible: lock failed because tuple was never visible to us
1541  * TM_SelfModified: lock failed because tuple updated by self
1542  * TM_Updated: lock failed because tuple updated by other xact
1543  * TM_Deleted: lock failed because tuple deleted by other xact
1544  * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1545  *
1546  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1547  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1548  * comments for struct TM_FailureData for additional info.
1549  */
1550 static inline TM_Result
1553  LockWaitPolicy wait_policy, uint8 flags,
1554  TM_FailureData *tmfd)
1555 {
1556  return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1557  cid, mode, wait_policy,
1558  flags, tmfd);
1559 }
1560 
1561 /*
1562  * Perform operations necessary to complete insertions made via
1563  * tuple_insert and multi_insert with a BulkInsertState specified.
1564  */
1565 static inline void
1567 {
1568  /* optional callback */
1569  if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1571 }
1572 
1573 
1574 /* ------------------------------------------------------------------------
1575  * DDL related functionality.
1576  * ------------------------------------------------------------------------
1577  */
1578 
1579 /*
1580  * Create storage for `rel` in `newrnode`, with persistence set to
1581  * `persistence`.
1582  *
1583  * This is used both during relation creation and various DDL operations to
1584  * create a new relfilenode that can be filled from scratch. When creating
1585  * new storage for an existing relfilenode, this should be called before the
1586  * relcache entry has been updated.
1587  *
1588  * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1589  * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1590  */
1591 static inline void
1593  const RelFileNode *newrnode,
1594  char persistence,
1595  TransactionId *freezeXid,
1596  MultiXactId *minmulti)
1597 {
1598  rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence,
1599  freezeXid, minmulti);
1600 }
1601 
1602 /*
1603  * Remove all table contents from `rel`, in a non-transactional manner.
1604  * Non-transactional meaning that there's no need to support rollbacks. This
1605  * commonly only is used to perform truncations for relfilenodes created in the
1606  * current transaction.
1607  */
1608 static inline void
1610 {
1612 }
1613 
1614 /*
1615  * Copy data from `rel` into the new relfilenode `newrnode`. The new
1616  * relfilenode may not have storage associated before this function is
1617  * called. This is only supposed to be used for low level operations like
1618  * changing a relation's tablespace.
1619  */
1620 static inline void
1622 {
1623  rel->rd_tableam->relation_copy_data(rel, newrnode);
1624 }
1625 
1626 /*
1627  * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1628  * FULL.
1629  *
1630  * Additional Input parameters:
1631  * - use_sort - if true, the table contents are sorted appropriate for
1632  * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1633  * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1634  * performed
1635  * - OldIndex - see use_sort
1636  * - OldestXmin - computed by vacuum_set_xid_limits(), even when
1637  * not needed for the relation's AM
1638  * - *xid_cutoff - ditto
1639  * - *multi_cutoff - ditto
1640  *
1641  * Output parameters:
1642  * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1643  * - *multi_cutoff - rel's new relminmxid value, may be invalid
1644  * - *tups_vacuumed - stats, for logging, if appropriate for AM
1645  * - *tups_recently_dead - stats, for logging, if appropriate for AM
1646  */
1647 static inline void
1649  Relation OldIndex,
1650  bool use_sort,
1651  TransactionId OldestXmin,
1652  TransactionId *xid_cutoff,
1653  MultiXactId *multi_cutoff,
1654  double *num_tuples,
1655  double *tups_vacuumed,
1656  double *tups_recently_dead)
1657 {
1658  OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1659  use_sort, OldestXmin,
1660  xid_cutoff, multi_cutoff,
1661  num_tuples, tups_vacuumed,
1662  tups_recently_dead);
1663 }
1664 
1665 /*
1666  * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1667  * autovacuum. The specific actions performed by the AM will depend heavily on
1668  * the individual AM.
1669  *
1670  * On entry a transaction needs to already been established, and the
1671  * table is locked with a ShareUpdateExclusive lock.
1672  *
1673  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1674  * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1675  */
1676 static inline void
1678  BufferAccessStrategy bstrategy)
1679 {
1680  rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1681 }
1682 
1683 /*
1684  * Prepare to analyze block `blockno` of `scan`. The scan needs to have been
1685  * started with table_beginscan_analyze(). Note that this routine might
1686  * acquire resources like locks that are held until
1687  * table_scan_analyze_next_tuple() returns false.
1688  *
1689  * Returns false if block is unsuitable for sampling, true otherwise.
1690  */
1691 static inline bool
1693  BufferAccessStrategy bstrategy)
1694 {
1695  return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
1696  bstrategy);
1697 }
1698 
1699 /*
1700  * Iterate over tuples in the block selected with
1701  * table_scan_analyze_next_block() (which needs to have returned true, and
1702  * this routine may not have returned false for the same block before). If a
1703  * tuple that's suitable for sampling is found, true is returned and a tuple
1704  * is stored in `slot`.
1705  *
1706  * *liverows and *deadrows are incremented according to the encountered
1707  * tuples.
1708  */
1709 static inline bool
1711  double *liverows, double *deadrows,
1712  TupleTableSlot *slot)
1713 {
1714  return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1715  liverows, deadrows,
1716  slot);
1717 }
1718 
1719 /*
1720  * table_index_build_scan - scan the table to find tuples to be indexed
1721  *
1722  * This is called back from an access-method-specific index build procedure
1723  * after the AM has done whatever setup it needs. The parent table relation
1724  * is scanned to find tuples that should be entered into the index. Each
1725  * such tuple is passed to the AM's callback routine, which does the right
1726  * things to add it to the new index. After we return, the AM's index
1727  * build procedure does whatever cleanup it needs.
1728  *
1729  * The total count of live tuples is returned. This is for updating pg_class
1730  * statistics. (It's annoying not to be able to do that here, but we want to
1731  * merge that update with others; see index_update_stats.) Note that the
1732  * index AM itself must keep track of the number of index tuples; we don't do
1733  * so here because the AM might reject some of the tuples for its own reasons,
1734  * such as being unable to store NULLs.
1735  *
1736  * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1737  * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1738  *
1739  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1740  * any potentially broken HOT chains. Currently, we set this if there are any
1741  * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1742  * very hard to detect whether they're really incompatible with the chain tip.
1743  * This only really makes sense for heap AM, it might need to be generalized
1744  * for other AMs later.
1745  */
1746 static inline double
1748  Relation index_rel,
1749  struct IndexInfo *index_info,
1750  bool allow_sync,
1751  bool progress,
1753  void *callback_state,
1754  TableScanDesc scan)
1755 {
1756  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1757  index_rel,
1758  index_info,
1759  allow_sync,
1760  false,
1761  progress,
1762  0,
1764  callback,
1765  callback_state,
1766  scan);
1767 }
1768 
1769 /*
1770  * As table_index_build_scan(), except that instead of scanning the complete
1771  * table, only the given number of blocks are scanned. Scan to end-of-rel can
1772  * be signaled by passing InvalidBlockNumber as numblocks. Note that
1773  * restricting the range to scan cannot be done when requesting syncscan.
1774  *
1775  * When "anyvisible" mode is requested, all tuples visible to any transaction
1776  * are indexed and counted as live, including those inserted or deleted by
1777  * transactions that are still in progress.
1778  */
1779 static inline double
1781  Relation index_rel,
1782  struct IndexInfo *index_info,
1783  bool allow_sync,
1784  bool anyvisible,
1785  bool progress,
1786  BlockNumber start_blockno,
1787  BlockNumber numblocks,
1789  void *callback_state,
1790  TableScanDesc scan)
1791 {
1792  return table_rel->rd_tableam->index_build_range_scan(table_rel,
1793  index_rel,
1794  index_info,
1795  allow_sync,
1796  anyvisible,
1797  progress,
1798  start_blockno,
1799  numblocks,
1800  callback,
1801  callback_state,
1802  scan);
1803 }
1804 
1805 /*
1806  * table_index_validate_scan - second table scan for concurrent index build
1807  *
1808  * See validate_index() for an explanation.
1809  */
1810 static inline void
1812  Relation index_rel,
1813  struct IndexInfo *index_info,
1814  Snapshot snapshot,
1815  struct ValidateIndexState *state)
1816 {
1817  table_rel->rd_tableam->index_validate_scan(table_rel,
1818  index_rel,
1819  index_info,
1820  snapshot,
1821  state);
1822 }
1823 
1824 
1825 /* ----------------------------------------------------------------------------
1826  * Miscellaneous functionality
1827  * ----------------------------------------------------------------------------
1828  */
1829 
1830 /*
1831  * Return the current size of `rel` in bytes. If `forkNumber` is
1832  * InvalidForkNumber, return the relation's overall size, otherwise the size
1833  * for the indicated fork.
1834  *
1835  * Note that the overall size might not be the equivalent of the sum of sizes
1836  * for the individual forks for some AMs, e.g. because the AMs storage does
1837  * not neatly map onto the builtin types of forks.
1838  */
1839 static inline uint64
1841 {
1842  return rel->rd_tableam->relation_size(rel, forkNumber);
1843 }
1844 
1845 /*
1846  * table_relation_needs_toast_table - does this relation need a toast table?
1847  */
1848 static inline bool
1850 {
1851  return rel->rd_tableam->relation_needs_toast_table(rel);
1852 }
1853 
1854 /*
1855  * Return the OID of the AM that should be used to implement the TOAST table
1856  * for this relation.
1857  */
1858 static inline Oid
1860 {
1861  return rel->rd_tableam->relation_toast_am(rel);
1862 }
1863 
1864 /*
1865  * Fetch all or part of a TOAST value from a TOAST table.
1866  *
1867  * If this AM is never used to implement a TOAST table, then this callback
1868  * is not needed. But, if toasted values are ever stored in a table of this
1869  * type, then you will need this callback.
1870  *
1871  * toastrel is the relation in which the toasted value is stored.
1872  *
1873  * valueid identifes which toast value is to be fetched. For the heap,
1874  * this corresponds to the values stored in the chunk_id column.
1875  *
1876  * attrsize is the total size of the toast value to be fetched.
1877  *
1878  * sliceoffset is the offset within the toast value of the first byte that
1879  * should be fetched.
1880  *
1881  * slicelength is the number of bytes from the toast value that should be
1882  * fetched.
1883  *
1884  * result is caller-allocated space into which the fetched bytes should be
1885  * stored.
1886  */
1887 static inline void
1889  int32 attrsize, int32 sliceoffset,
1890  int32 slicelength, struct varlena *result)
1891 {
1892  toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1893  attrsize,
1894  sliceoffset, slicelength,
1895  result);
1896 }
1897 
1898 
1899 /* ----------------------------------------------------------------------------
1900  * Planner related functionality
1901  * ----------------------------------------------------------------------------
1902  */
1903 
1904 /*
1905  * Estimate the current size of the relation, as an AM specific workhorse for
1906  * estimate_rel_size(). Look there for an explanation of the parameters.
1907  */
1908 static inline void
1910  BlockNumber *pages, double *tuples,
1911  double *allvisfrac)
1912 {
1913  rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1914  allvisfrac);
1915 }
1916 
1917 
1918 /* ----------------------------------------------------------------------------
1919  * Executor related functionality
1920  * ----------------------------------------------------------------------------
1921  */
1922 
1923 /*
1924  * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
1925  * a bitmap table scan. `scan` needs to have been started via
1926  * table_beginscan_bm(). Returns false if there are no tuples to be found on
1927  * the page, true otherwise.
1928  *
1929  * Note, this is an optionally implemented function, therefore should only be
1930  * used after verifying the presence (at plan time or such).
1931  */
1932 static inline bool
1934  struct TBMIterateResult *tbmres)
1935 {
1936  /*
1937  * We don't expect direct calls to table_scan_bitmap_next_block with valid
1938  * CheckXidAlive for catalog or regular tables. See detailed comments in
1939  * xact.c where these variables are declared.
1940  */
1942  elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
1943 
1944  return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
1945  tbmres);
1946 }
1947 
1948 /*
1949  * Fetch the next tuple of a bitmap table scan into `slot` and return true if
1950  * a visible tuple was found, false otherwise.
1951  * table_scan_bitmap_next_block() needs to previously have selected a
1952  * block (i.e. returned true), and no previous
1953  * table_scan_bitmap_next_tuple() for the same block may have
1954  * returned false.
1955  */
1956 static inline bool
1958  struct TBMIterateResult *tbmres,
1959  TupleTableSlot *slot)
1960 {
1961  /*
1962  * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
1963  * CheckXidAlive for catalog or regular tables. See detailed comments in
1964  * xact.c where these variables are declared.
1965  */
1967  elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
1968 
1969  return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
1970  tbmres,
1971  slot);
1972 }
1973 
1974 /*
1975  * Prepare to fetch tuples from the next block in a sample scan. Returns false
1976  * if the sample scan is finished, true otherwise. `scan` needs to have been
1977  * started via table_beginscan_sampling().
1978  *
1979  * This will call the TsmRoutine's NextSampleBlock() callback if necessary
1980  * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
1981  * underlying relation.
1982  */
1983 static inline bool
1985  struct SampleScanState *scanstate)
1986 {
1987  /*
1988  * We don't expect direct calls to table_scan_sample_next_block with valid
1989  * CheckXidAlive for catalog or regular tables. See detailed comments in
1990  * xact.c where these variables are declared.
1991  */
1993  elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
1994  return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
1995 }
1996 
1997 /*
1998  * Fetch the next sample tuple into `slot` and return true if a visible tuple
1999  * was found, false otherwise. table_scan_sample_next_block() needs to
2000  * previously have selected a block (i.e. returned true), and no previous
2001  * table_scan_sample_next_tuple() for the same block may have returned false.
2002  *
2003  * This will call the TsmRoutine's NextSampleTuple() callback.
2004  */
2005 static inline bool
2007  struct SampleScanState *scanstate,
2008  TupleTableSlot *slot)
2009 {
2010  /*
2011  * We don't expect direct calls to table_scan_sample_next_tuple with valid
2012  * CheckXidAlive for catalog or regular tables. See detailed comments in
2013  * xact.c where these variables are declared.
2014  */
2016  elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
2017  return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
2018  slot);
2019 }
2020 
2021 
2022 /* ----------------------------------------------------------------------------
2023  * Functions to make modifications a bit simpler.
2024  * ----------------------------------------------------------------------------
2025  */
2026 
2027 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2028 extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
2029  Snapshot snapshot);
2030 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2031  TupleTableSlot *slot, Snapshot snapshot,
2032  bool *update_indexes);
2033 
2034 
2035 /* ----------------------------------------------------------------------------
2036  * Helper functions to implement parallel scans for block oriented AMs.
2037  * ----------------------------------------------------------------------------
2038  */
2039 
2042  ParallelTableScanDesc pscan);
2044  ParallelTableScanDesc pscan);
2046  ParallelBlockTableScanWorker pbscanwork,
2049  ParallelBlockTableScanWorker pbscanwork,
2051 
2052 
2053 /* ----------------------------------------------------------------------------
2054  * Helper functions to implement relation sizing for block oriented AMs.
2055  * ----------------------------------------------------------------------------
2056  */
2057 
2058 extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2060  int32 *attr_widths,
2061  BlockNumber *pages,
2062  double *tuples,
2063  double *allvisfrac,
2064  Size overhead_bytes_per_tuple,
2065  Size usable_bytes_per_page);
2066 
2067 /* ----------------------------------------------------------------------------
2068  * Functions in tableamapi.c
2069  * ----------------------------------------------------------------------------
2070  */
2071 
2072 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2073 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2074 extern bool check_default_table_access_method(char **newval, void **extra,
2075  GucSource source);
2076 
2077 #endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:156
unsigned int uint32
Definition: c.h:441
#define PGDLLIMPORT
Definition: c.h:1331
signed short int16
Definition: c.h:428
signed int int32
Definition: c.h:429
TransactionId MultiXactId
Definition: c.h:597
unsigned char bool
Definition: c.h:391
#define unlikely(x)
Definition: c.h:273
unsigned char uint8
Definition: c.h:439
uint32 CommandId
Definition: c.h:601
uint32 TransactionId
Definition: c.h:587
size_t Size
Definition: c.h:540
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define newval
GucSource
Definition: guc.h:105
Assert(fmt[strlen(fmt) - 1] !='\n')
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:65
const void * data
static char ** options
static rewind_source * source
Definition: pg_rewind.c:81
int progress
Definition: pgbench.c:274
uintptr_t Datum
Definition: postgres.h:411
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:489
ForkNumber
Definition: relpath.h:41
struct TableScanDescData * TableScanDesc
Definition: relscan.h:52
ScanDirection
Definition: sdir.h:23
Definition: pg_list.h:51
const struct TableAmRoutine * rd_tableam
Definition: rel.h:185
bool traversed
Definition: tableam.h:129
TransactionId xmax
Definition: tableam.h:127
CommandId cmax
Definition: tableam.h:128
ItemPointerData ctid
Definition: tableam.h:126
TM_IndexStatus * status
Definition: tableam.h:231
int bottomupfreespace
Definition: tableam.h:226
Relation irel
Definition: tableam.h:223
TM_IndexDelete * deltids
Definition: tableam.h:230
BlockNumber iblknum
Definition: tableam.h:224
ItemPointerData tid
Definition: tableam.h:189
bool knowndeletable
Definition: tableam.h:196
bool promising
Definition: tableam.h:199
int16 freespace
Definition: tableam.h:200
OffsetNumber idxoffnum
Definition: tableam.h:195
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:376
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:849
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:406
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:500
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:804
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:383
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:465
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:400
bool(* scan_analyze_next_block)(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Definition: tableam.h:651
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:356
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:749
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:670
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:304
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:713
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:458
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:506
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
Definition: tableam.h:520
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:314
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:703
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:532
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:833
void(* relation_copy_for_cluster)(Relation NewTable, Relation OldTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:604
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:592
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:487
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:320
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:450
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:727
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:630
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:720
void(* relation_set_new_filenode)(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:578
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:369
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:348
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:554
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:663
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:477
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:411
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:433
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:492
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:510
bool(* scan_bitmap_next_block)(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:790
NodeTag type
Definition: tableam.h:269
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:683
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:327
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:472
void(* relation_copy_data)(Relation rel, const RelFileNode *newrnode)
Definition: tableam.h:600
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
Oid tts_tableOid
Definition: tuptable.h:131
Definition: type.h:90
Definition: regguts.h:318
Definition: c.h:622
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1888
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:48
ScanOptions
Definition: tableam.h:46
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_TYPE_ANALYZE
Definition: tableam.h:53
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64
@ SO_TYPE_TIDSCAN
Definition: tableam.h:51
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_ALLOW_SYNC
Definition: tableam.h:59
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1080
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:885
static bool table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Definition: tableam.h:1692
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:993
TableScanDesc table_beginscan_parallel(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:175
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1710
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:219
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:49
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:399
struct TM_IndexDelete TM_IndexDelete
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1163
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:930
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1648
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1173
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1840
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_WouldBlock
Definition: tableam.h:102
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, struct TBMIterateResult *tbmres, TupleTableSlot *slot)
Definition: tableam.h:1957
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:946
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:1002
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1551
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:287
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1285
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1811
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1780
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:432
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1849
bool check_default_table_access_method(char **newval, void **extra, GucSource source)
Definition: tableamapi.c:111
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:909
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1406
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1059
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1182
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:982
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1462
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:246
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1212
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1017
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1677
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:301
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:34
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static void table_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1592
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1566
const TupleTableSlotOps * table_slot_callbacks(Relation rel)
Definition: tableam.c:58
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:417
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:248
TableScanDesc table_beginscan_catalog(Relation rel, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:112
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:627
static void table_relation_copy_data(Relation rel, const RelFileNode *newrnode)
Definition: tableam.h:1621
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1428
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1096
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1859
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1373
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:140
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1747
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
Definition: tableam.h:1506
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:1984
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:393
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1909
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1034
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1392
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1306
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1327
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:2006
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1609
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, bool *update_indexes)
Definition: tableam.c:346
static bool table_scan_bitmap_next_block(TableScanDesc scan, struct TBMIterateResult *tbmres)
Definition: tableam.h:1933
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:155
TupleTableSlot * table_slot_create(Relation rel, List **reglist)
Definition: tableam.c:91
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1259
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1145
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:969
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
Definition: tableam.c:124
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:502
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:664
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:99
TransactionId CheckXidAlive
Definition: xact.c:98