PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
tableam.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tableam.h
4 * POSTGRES table access method definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/access/tableam.h
11 *
12 * NOTES
13 * See tableam.sgml for higher level documentation.
14 *
15 *-------------------------------------------------------------------------
16 */
17#ifndef TABLEAM_H
18#define TABLEAM_H
19
20#include "access/relscan.h"
21#include "access/sdir.h"
22#include "access/xact.h"
23#include "executor/tuptable.h"
24#include "storage/read_stream.h"
25#include "utils/rel.h"
26#include "utils/snapshot.h"
27
28
29#define DEFAULT_TABLE_ACCESS_METHOD "heap"
30
31/* GUCs */
34
35
37struct IndexInfo;
38struct SampleScanState;
39struct VacuumParams;
41
42/*
43 * Bitmask values for the flags argument to the scan_begin callback.
44 */
45typedef enum ScanOptions
46{
47 /* one of SO_TYPE_* may be specified */
54
55 /* several of SO_ALLOW_* may be specified */
56 /* allow or disallow use of access strategy */
58 /* report location to syncscan logic? */
59 SO_ALLOW_SYNC = 1 << 7,
60 /* verify visibility page-at-a-time? */
62
63 /* unregister snapshot at scan end? */
66
67/*
68 * Result codes for table_{update,delete,lock_tuple}, and for visibility
69 * routines inside table AMs.
70 */
71typedef enum TM_Result
72{
73 /*
74 * Signals that the action succeeded (i.e. update/delete performed, lock
75 * was acquired)
76 */
78
79 /* The affected tuple wasn't visible to the relevant snapshot */
81
82 /* The affected tuple was already modified by the calling backend */
84
85 /*
86 * The affected tuple was updated by another transaction. This includes
87 * the case where tuple was moved to another partition.
88 */
90
91 /* The affected tuple was deleted by another transaction */
93
94 /*
95 * The affected tuple is currently being modified by another session. This
96 * will only be returned if table_(update/delete/lock_tuple) are
97 * instructed not to wait.
98 */
100
101 /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
104
105/*
106 * Result codes for table_update(..., update_indexes*..).
107 * Used to determine which indexes to update.
108 */
110{
111 /* No indexed columns were updated (incl. TID addressing of tuple) */
113
114 /* A non-summarizing indexed column was updated, or the TID has changed */
116
117 /* Only summarized columns were updated, TID is unchanged */
120
121/*
122 * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
123 * because the target tuple is already outdated, they fill in this struct to
124 * provide information to the caller about what happened.
125 *
126 * ctid is the target's ctid link: it is the same as the target's TID if the
127 * target was deleted, or the location of the replacement tuple if the target
128 * was updated.
129 *
130 * xmax is the outdating transaction's XID. If the caller wants to visit the
131 * replacement tuple, it must check that this matches before believing the
132 * replacement is really a match. This is InvalidTransactionId if the target
133 * was !LP_NORMAL (expected only for a TID retrieved from syscache).
134 *
135 * cmax is the outdating command's CID, but only when the failure code is
136 * TM_SelfModified (i.e., something in the current transaction outdated the
137 * tuple); otherwise cmax is zero. (We make this restriction because
138 * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
139 * transactions.)
140 */
141typedef struct TM_FailureData
142{
148
149/*
150 * State used when calling table_index_delete_tuples().
151 *
152 * Represents the status of table tuples, referenced by table TID and taken by
153 * index AM from index tuples. State consists of high level parameters of the
154 * deletion operation, plus two mutable palloc()'d arrays for information
155 * about the status of individual table tuples. These are conceptually one
156 * single array. Using two arrays keeps the TM_IndexDelete struct small,
157 * which makes sorting the first array (the deltids array) fast.
158 *
159 * Some index AM callers perform simple index tuple deletion (by specifying
160 * bottomup = false), and include only known-dead deltids. These known-dead
161 * entries are all marked knowndeletable = true directly (typically these are
162 * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
163 *
164 * Callers that specify bottomup = true are "bottom-up index deletion"
165 * callers. The considerations for the tableam are more subtle with these
166 * callers because they ask the tableam to perform highly speculative work,
167 * and might only expect the tableam to check a small fraction of all entries.
168 * Caller is not allowed to specify knowndeletable = true for any entry
169 * because everything is highly speculative. Bottom-up caller provides
170 * context and hints to tableam -- see comments below for details on how index
171 * AMs and tableams should coordinate during bottom-up index deletion.
172 *
173 * Simple index deletion callers may ask the tableam to perform speculative
174 * work, too. This is a little like bottom-up deletion, but not too much.
175 * The tableam will only perform speculative work when it's practically free
176 * to do so in passing for simple deletion caller (while always performing
177 * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
178 * be deleted within index AM). This is the real reason why it's possible for
179 * simple index deletion caller to specify knowndeletable = false up front
180 * (this means "check if it's possible for me to delete corresponding index
181 * tuple when it's cheap to do so in passing"). The index AM should only
182 * include "extra" entries for index tuples whose TIDs point to a table block
183 * that tableam is expected to have to visit anyway (in the event of a block
184 * orientated tableam). The tableam isn't strictly obligated to check these
185 * "extra" TIDs, but a block-based AM should always manage to do so in
186 * practice.
187 *
188 * The final contents of the deltids/status arrays are interesting to callers
189 * that ask tableam to perform speculative work (i.e. when _any_ items have
190 * knowndeletable set to false up front). These index AM callers will
191 * naturally need to consult final state to determine which index tuples are
192 * in fact deletable.
193 *
194 * The index AM can keep track of which index tuple relates to which deltid by
195 * setting idxoffnum (and/or relying on each entry being uniquely identifiable
196 * using tid), which is important when the final contents of the array will
197 * need to be interpreted -- the array can shrink from initial size after
198 * tableam processing and/or have entries in a new order (tableam may sort
199 * deltids array for its own reasons). Bottom-up callers may find that final
200 * ndeltids is 0 on return from call to tableam, in which case no index tuple
201 * deletions are possible. Simple deletion callers can rely on any entries
202 * they know to be deletable appearing in the final array as deletable.
203 */
204typedef struct TM_IndexDelete
205{
206 ItemPointerData tid; /* table TID from index tuple */
207 int16 id; /* Offset into TM_IndexStatus array */
209
210typedef struct TM_IndexStatus
211{
212 OffsetNumber idxoffnum; /* Index am page offset number */
213 bool knowndeletable; /* Currently known to be deletable? */
214
215 /* Bottom-up index deletion specific fields follow */
216 bool promising; /* Promising (duplicate) index tuple? */
217 int16 freespace; /* Space freed in index if deleted */
219
220/*
221 * Index AM/tableam coordination is central to the design of bottom-up index
222 * deletion. The index AM provides hints about where to look to the tableam
223 * by marking some entries as "promising". Index AM does this with duplicate
224 * index tuples that are strongly suspected to be old versions left behind by
225 * UPDATEs that did not logically modify indexed values. Index AM may find it
226 * helpful to only mark entries as promising when they're thought to have been
227 * affected by such an UPDATE in the recent past.
228 *
229 * Bottom-up index deletion casts a wide net at first, usually by including
230 * all TIDs on a target index page. It is up to the tableam to worry about
231 * the cost of checking transaction status information. The tableam is in
232 * control, but needs careful guidance from the index AM. Index AM requests
233 * that bottomupfreespace target be met, while tableam measures progress
234 * towards that goal by tallying the per-entry freespace value for known
235 * deletable entries. (All !bottomup callers can just set these space related
236 * fields to zero.)
237 */
238typedef struct TM_IndexDeleteOp
239{
240 Relation irel; /* Target index relation */
241 BlockNumber iblknum; /* Index block number (for error reports) */
242 bool bottomup; /* Bottom-up (not simple) deletion? */
243 int bottomupfreespace; /* Bottom-up space target */
244
245 /* Mutable per-TID information follows (index AM initializes entries) */
246 int ndeltids; /* Current # of deltids/status elements */
250
251/* "options" flag bits for table_tuple_insert */
252/* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
253#define TABLE_INSERT_SKIP_FSM 0x0002
254#define TABLE_INSERT_FROZEN 0x0004
255#define TABLE_INSERT_NO_LOGICAL 0x0008
256
257/* flag bits for table_tuple_lock */
258/* Follow tuples whose update is in progress if lock modes don't conflict */
259#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
260/* Follow update chain and lock latest version of tuple */
261#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
262
263
264/* Typedef for callback function for table_index_build_scan */
266 ItemPointer tid,
267 Datum *values,
268 bool *isnull,
269 bool tupleIsAlive,
270 void *state);
271
272/*
273 * API struct for a table AM. Note this must be allocated in a
274 * server-lifetime manner, typically as a static const struct, which then gets
275 * returned by FormData_pg_am.amhandler.
276 *
277 * In most cases it's not appropriate to call the callbacks directly, use the
278 * table_* wrapper functions instead.
279 *
280 * GetTableAmRoutine() asserts that required callbacks are filled in, remember
281 * to update when adding a callback.
282 */
283typedef struct TableAmRoutine
284{
285 /* this must be set to T_TableAmRoutine */
287
288
289 /* ------------------------------------------------------------------------
290 * Slot related callbacks.
291 * ------------------------------------------------------------------------
292 */
293
294 /*
295 * Return slot implementation suitable for storing a tuple of this AM.
296 */
297 const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
298
299
300 /* ------------------------------------------------------------------------
301 * Table scan callbacks.
302 * ------------------------------------------------------------------------
303 */
304
305 /*
306 * Start a scan of `rel`. The callback has to return a TableScanDesc,
307 * which will typically be embedded in a larger, AM specific, struct.
308 *
309 * If nkeys != 0, the results need to be filtered by those scan keys.
310 *
311 * pscan, if not NULL, will have already been initialized with
312 * parallelscan_initialize(), and has to be for the same relation. Will
313 * only be set coming from table_beginscan_parallel().
314 *
315 * `flags` is a bitmask indicating the type of scan (ScanOptions's
316 * SO_TYPE_*, currently only one may be specified), options controlling
317 * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
318 * specified, an AM may ignore unsupported ones) and whether the snapshot
319 * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
320 */
322 Snapshot snapshot,
323 int nkeys, struct ScanKeyData *key,
325 uint32 flags);
326
327 /*
328 * Release resources and deallocate scan. If TableScanDesc.temp_snap,
329 * TableScanDesc.rs_snapshot needs to be unregistered.
330 */
331 void (*scan_end) (TableScanDesc scan);
332
333 /*
334 * Restart relation scan. If set_params is set to true, allow_{strat,
335 * sync, pagemode} (see scan_begin) changes should be taken into account.
336 */
337 void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
338 bool set_params, bool allow_strat,
339 bool allow_sync, bool allow_pagemode);
340
341 /*
342 * Return next tuple from `scan`, store in slot.
343 */
345 ScanDirection direction,
346 TupleTableSlot *slot);
347
348 /*-----------
349 * Optional functions to provide scanning for ranges of ItemPointers.
350 * Implementations must either provide both of these functions, or neither
351 * of them.
352 *
353 * Implementations of scan_set_tidrange must themselves handle
354 * ItemPointers of any value. i.e, they must handle each of the following:
355 *
356 * 1) mintid or maxtid is beyond the end of the table; and
357 * 2) mintid is above maxtid; and
358 * 3) item offset for mintid or maxtid is beyond the maximum offset
359 * allowed by the AM.
360 *
361 * Implementations can assume that scan_set_tidrange is always called
362 * before scan_getnextslot_tidrange or after scan_rescan and before any
363 * further calls to scan_getnextslot_tidrange.
364 */
366 ItemPointer mintid,
367 ItemPointer maxtid);
368
369 /*
370 * Return next tuple from `scan` that's in the range of TIDs defined by
371 * scan_set_tidrange.
372 */
374 ScanDirection direction,
375 TupleTableSlot *slot);
376
377 /* ------------------------------------------------------------------------
378 * Parallel table scan related functions.
379 * ------------------------------------------------------------------------
380 */
381
382 /*
383 * Estimate the size of shared memory needed for a parallel scan of this
384 * relation. The snapshot does not need to be accounted for.
385 */
387
388 /*
389 * Initialize ParallelTableScanDesc for a parallel scan of this relation.
390 * `pscan` will be sized according to parallelscan_estimate() for the same
391 * relation.
392 */
395
396 /*
397 * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
398 * when `pscan` was initialized by parallelscan_initialize.
399 */
402
403
404 /* ------------------------------------------------------------------------
405 * Index Scan Callbacks
406 * ------------------------------------------------------------------------
407 */
408
409 /*
410 * Prepare to fetch tuples from the relation, as needed when fetching
411 * tuples for an index scan. The callback has to return an
412 * IndexFetchTableData, which the AM will typically embed in a larger
413 * structure with additional information.
414 *
415 * Tuples for an index scan can then be fetched via index_fetch_tuple.
416 */
417 struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
418
419 /*
420 * Reset index fetch. Typically this will release cross index fetch
421 * resources held in IndexFetchTableData.
422 */
424
425 /*
426 * Release resources and deallocate index fetch.
427 */
429
430 /*
431 * Fetch tuple at `tid` into `slot`, after doing a visibility test
432 * according to `snapshot`. If a tuple was found and passed the visibility
433 * test, return true, false otherwise.
434 *
435 * Note that AMs that do not necessarily update indexes when indexed
436 * columns do not change, need to return the current/correct version of
437 * the tuple that is visible to the snapshot, even if the tid points to an
438 * older version of the tuple.
439 *
440 * *call_again is false on the first call to index_fetch_tuple for a tid.
441 * If there potentially is another tuple matching the tid, *call_again
442 * needs to be set to true by index_fetch_tuple, signaling to the caller
443 * that index_fetch_tuple should be called again for the same tid.
444 *
445 * *all_dead, if all_dead is not NULL, should be set to true by
446 * index_fetch_tuple iff it is guaranteed that no backend needs to see
447 * that tuple. Index AMs can use that to avoid returning that tid in
448 * future searches.
449 */
451 ItemPointer tid,
452 Snapshot snapshot,
453 TupleTableSlot *slot,
454 bool *call_again, bool *all_dead);
455
456
457 /* ------------------------------------------------------------------------
458 * Callbacks for non-modifying operations on individual tuples
459 * ------------------------------------------------------------------------
460 */
461
462 /*
463 * Fetch tuple at `tid` into `slot`, after doing a visibility test
464 * according to `snapshot`. If a tuple was found and passed the visibility
465 * test, returns true, false otherwise.
466 */
468 ItemPointer tid,
469 Snapshot snapshot,
470 TupleTableSlot *slot);
471
472 /*
473 * Is tid valid for a scan of this relation.
474 */
476 ItemPointer tid);
477
478 /*
479 * Return the latest version of the tuple at `tid`, by updating `tid` to
480 * point at the newest version.
481 */
483 ItemPointer tid);
484
485 /*
486 * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
487 * the appropriate type for the AM.
488 */
490 TupleTableSlot *slot,
491 Snapshot snapshot);
492
493 /* see table_index_delete_tuples() */
495 TM_IndexDeleteOp *delstate);
496
497
498 /* ------------------------------------------------------------------------
499 * Manipulations of physical tuples.
500 * ------------------------------------------------------------------------
501 */
502
503 /* see table_tuple_insert() for reference about parameters */
505 CommandId cid, int options,
506 struct BulkInsertStateData *bistate);
507
508 /* see table_tuple_insert_speculative() for reference about parameters */
510 TupleTableSlot *slot,
511 CommandId cid,
512 int options,
513 struct BulkInsertStateData *bistate,
514 uint32 specToken);
515
516 /* see table_tuple_complete_speculative() for reference about parameters */
518 TupleTableSlot *slot,
519 uint32 specToken,
520 bool succeeded);
521
522 /* see table_multi_insert() for reference about parameters */
523 void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
524 CommandId cid, int options, struct BulkInsertStateData *bistate);
525
526 /* see table_tuple_delete() for reference about parameters */
528 ItemPointer tid,
529 CommandId cid,
530 Snapshot snapshot,
531 Snapshot crosscheck,
532 bool wait,
533 TM_FailureData *tmfd,
534 bool changingPart);
535
536 /* see table_tuple_update() for reference about parameters */
538 ItemPointer otid,
539 TupleTableSlot *slot,
540 CommandId cid,
541 Snapshot snapshot,
542 Snapshot crosscheck,
543 bool wait,
544 TM_FailureData *tmfd,
545 LockTupleMode *lockmode,
546 TU_UpdateIndexes *update_indexes);
547
548 /* see table_tuple_lock() for reference about parameters */
550 ItemPointer tid,
551 Snapshot snapshot,
552 TupleTableSlot *slot,
553 CommandId cid,
555 LockWaitPolicy wait_policy,
556 uint8 flags,
557 TM_FailureData *tmfd);
558
559 /*
560 * Perform operations necessary to complete insertions made via
561 * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
562 * access methods ceased to use this.
563 *
564 * Typically callers of tuple_insert and multi_insert will just pass all
565 * the flags that apply to them, and each AM has to decide which of them
566 * make sense for it, and then only take actions in finish_bulk_insert for
567 * those flags, and ignore others.
568 *
569 * Optional callback.
570 */
572
573
574 /* ------------------------------------------------------------------------
575 * DDL related functionality.
576 * ------------------------------------------------------------------------
577 */
578
579 /*
580 * This callback needs to create new relation storage for `rel`, with
581 * appropriate durability behaviour for `persistence`.
582 *
583 * Note that only the subset of the relcache filled by
584 * RelationBuildLocalRelation() can be relied upon and that the relation's
585 * catalog entries will either not yet exist (new relation), or will still
586 * reference the old relfilelocator.
587 *
588 * As output *freezeXid, *minmulti must be set to the values appropriate
589 * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
590 * fields to be filled they can be set to InvalidTransactionId and
591 * InvalidMultiXactId, respectively.
592 *
593 * See also table_relation_set_new_filelocator().
594 */
596 const RelFileLocator *newrlocator,
597 char persistence,
598 TransactionId *freezeXid,
599 MultiXactId *minmulti);
600
601 /*
602 * This callback needs to remove all contents from `rel`'s current
603 * relfilelocator. No provisions for transactional behaviour need to be
604 * made. Often this can be implemented by truncating the underlying
605 * storage to its minimal size.
606 *
607 * See also table_relation_nontransactional_truncate().
608 */
610
611 /*
612 * See table_relation_copy_data().
613 *
614 * This can typically be implemented by directly copying the underlying
615 * storage, unless it contains references to the tablespace internally.
616 */
618 const RelFileLocator *newrlocator);
619
620 /* See table_relation_copy_for_cluster() */
622 Relation NewTable,
623 Relation OldIndex,
624 bool use_sort,
625 TransactionId OldestXmin,
626 TransactionId *xid_cutoff,
627 MultiXactId *multi_cutoff,
628 double *num_tuples,
629 double *tups_vacuumed,
630 double *tups_recently_dead);
631
632 /*
633 * React to VACUUM command on the relation. The VACUUM can be triggered by
634 * a user or by autovacuum. The specific actions performed by the AM will
635 * depend heavily on the individual AM.
636 *
637 * On entry a transaction is already established, and the relation is
638 * locked with a ShareUpdateExclusive lock.
639 *
640 * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
641 * this routine, even if (for ANALYZE) it is part of the same VACUUM
642 * command.
643 *
644 * There probably, in the future, needs to be a separate callback to
645 * integrate with autovacuum's scheduling.
646 */
648 struct VacuumParams *params,
649 BufferAccessStrategy bstrategy);
650
651 /*
652 * Prepare to analyze block `blockno` of `scan`. The scan has been started
653 * with table_beginscan_analyze(). See also
654 * table_scan_analyze_next_block().
655 *
656 * The callback may acquire resources like locks that are held until
657 * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
658 * to hold a lock until all tuples on a block have been analyzed by
659 * scan_analyze_next_tuple.
660 *
661 * The callback can return false if the block is not suitable for
662 * sampling, e.g. because it's a metapage that could never contain tuples.
663 *
664 * XXX: This obviously is primarily suited for block-based AMs. It's not
665 * clear what a good interface for non block based AMs would be, so there
666 * isn't one yet.
667 */
669 ReadStream *stream);
670
671 /*
672 * See table_scan_analyze_next_tuple().
673 *
674 * Not every AM might have a meaningful concept of dead rows, in which
675 * case it's OK to not increment *deadrows - but note that that may
676 * influence autovacuum scheduling (see comment for relation_vacuum
677 * callback).
678 */
680 TransactionId OldestXmin,
681 double *liverows,
682 double *deadrows,
683 TupleTableSlot *slot);
684
685 /* see table_index_build_range_scan for reference about parameters */
686 double (*index_build_range_scan) (Relation table_rel,
687 Relation index_rel,
688 struct IndexInfo *index_info,
689 bool allow_sync,
690 bool anyvisible,
691 bool progress,
692 BlockNumber start_blockno,
693 BlockNumber numblocks,
695 void *callback_state,
696 TableScanDesc scan);
697
698 /* see table_index_validate_scan for reference about parameters */
699 void (*index_validate_scan) (Relation table_rel,
700 Relation index_rel,
701 struct IndexInfo *index_info,
702 Snapshot snapshot,
703 struct ValidateIndexState *state);
704
705
706 /* ------------------------------------------------------------------------
707 * Miscellaneous functions.
708 * ------------------------------------------------------------------------
709 */
710
711 /*
712 * See table_relation_size().
713 *
714 * Note that currently a few callers use the MAIN_FORKNUM size to figure
715 * out the range of potentially interesting blocks (brin, analyze). It's
716 * probable that we'll need to revise the interface for those at some
717 * point.
718 */
720
721
722 /*
723 * This callback should return true if the relation requires a TOAST table
724 * and false if it does not. It may wish to examine the relation's tuple
725 * descriptor before making a decision, but if it uses some other method
726 * of storing large values (or if it does not support them) it can simply
727 * return false.
728 */
730
731 /*
732 * This callback should return the OID of the table AM that implements
733 * TOAST tables for this AM. If the relation_needs_toast_table callback
734 * always returns false, this callback is not required.
735 */
737
738 /*
739 * This callback is invoked when detoasting a value stored in a toast
740 * table implemented by this AM. See table_relation_fetch_toast_slice()
741 * for more details.
742 */
743 void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
744 int32 attrsize,
745 int32 sliceoffset,
746 int32 slicelength,
747 struct varlena *result);
748
749
750 /* ------------------------------------------------------------------------
751 * Planner related functions.
752 * ------------------------------------------------------------------------
753 */
754
755 /*
756 * See table_relation_estimate_size().
757 *
758 * While block oriented, it shouldn't be too hard for an AM that doesn't
759 * internally use blocks to convert into a usable representation.
760 *
761 * This differs from the relation_size callback by returning size
762 * estimates (both relation size and tuple count) for planning purposes,
763 * rather than returning a currently correct estimate.
764 */
765 void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
766 BlockNumber *pages, double *tuples,
767 double *allvisfrac);
768
769
770 /* ------------------------------------------------------------------------
771 * Executor related functions.
772 * ------------------------------------------------------------------------
773 */
774
775 /*
776 * Fetch the next tuple of a bitmap table scan into `slot` and return true
777 * if a visible tuple was found, false otherwise.
778 *
779 * `lossy_pages` is incremented if the bitmap is lossy for the selected
780 * page; otherwise, `exact_pages` is incremented. These are tracked for
781 * display in EXPLAIN ANALYZE output.
782 *
783 * Prefetching additional data from the bitmap is left to the table AM.
784 *
785 * This is an optional callback.
786 */
788 TupleTableSlot *slot,
789 bool *recheck,
790 uint64 *lossy_pages,
791 uint64 *exact_pages);
792
793 /*
794 * Prepare to fetch tuples from the next block in a sample scan. Return
795 * false if the sample scan is finished, true otherwise. `scan` was
796 * started via table_beginscan_sampling().
797 *
798 * Typically this will first determine the target block by calling the
799 * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
800 * perform a sequential scan over all blocks. The determined block is
801 * then typically read and pinned.
802 *
803 * As the TsmRoutine interface is block based, a block needs to be passed
804 * to NextSampleBlock(). If that's not appropriate for an AM, it
805 * internally needs to perform mapping between the internal and a block
806 * based representation.
807 *
808 * Note that it's not acceptable to hold deadlock prone resources such as
809 * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
810 * block - the tuple is likely to be returned to an upper query node, and
811 * the next call could be off a long while. Holding buffer pins and such
812 * is obviously OK.
813 *
814 * Currently it is required to implement this interface, as there's no
815 * alternative way (contrary e.g. to bitmap scans) to implement sample
816 * scans. If infeasible to implement, the AM may raise an error.
817 */
819 struct SampleScanState *scanstate);
820
821 /*
822 * This callback, only called after scan_sample_next_block has returned
823 * true, should determine the next tuple to be returned from the selected
824 * block using the TsmRoutine's NextSampleTuple() callback.
825 *
826 * The callback needs to perform visibility checks, and only return
827 * visible tuples. That obviously can mean calling NextSampleTuple()
828 * multiple times.
829 *
830 * The TsmRoutine interface assumes that there's a maximum offset on a
831 * given page, so if that doesn't apply to an AM, it needs to emulate that
832 * assumption somehow.
833 */
835 struct SampleScanState *scanstate,
836 TupleTableSlot *slot);
837
839
840
841/* ----------------------------------------------------------------------------
842 * Slot functions.
843 * ----------------------------------------------------------------------------
844 */
845
846/*
847 * Returns slot callbacks suitable for holding tuples of the appropriate type
848 * for the relation. Works for tables, views, foreign tables and partitioned
849 * tables.
850 */
851extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
852
853/*
854 * Returns slot using the callbacks returned by table_slot_callbacks(), and
855 * registers it on *reglist.
856 */
857extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
858
859
860/* ----------------------------------------------------------------------------
861 * Table scan functions.
862 * ----------------------------------------------------------------------------
863 */
864
865/*
866 * Start a scan of `rel`. Returned tuples pass a visibility test of
867 * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
868 */
869static inline TableScanDesc
871 int nkeys, struct ScanKeyData *key)
872{
873 uint32 flags = SO_TYPE_SEQSCAN |
875
876 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
877}
878
879/*
880 * Like table_beginscan(), but for scanning catalog. It'll automatically use a
881 * snapshot appropriate for scanning catalog relations.
882 */
883extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
884 struct ScanKeyData *key);
885
886/*
887 * Like table_beginscan(), but table_beginscan_strat() offers an extended API
888 * that lets the caller control whether a nondefault buffer access strategy
889 * can be used, and whether syncscan can be chosen (possibly resulting in the
890 * scan not starting from block zero). Both of these default to true with
891 * plain table_beginscan.
892 */
893static inline TableScanDesc
895 int nkeys, struct ScanKeyData *key,
896 bool allow_strat, bool allow_sync)
897{
899
900 if (allow_strat)
901 flags |= SO_ALLOW_STRAT;
902 if (allow_sync)
903 flags |= SO_ALLOW_SYNC;
904
905 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
906}
907
908/*
909 * table_beginscan_bm is an alternative entry point for setting up a
910 * TableScanDesc for a bitmap heap scan. Although that scan technology is
911 * really quite unlike a standard seqscan, there is just enough commonality to
912 * make it worth using the same data structure.
913 */
914static inline TableScanDesc
916 int nkeys, struct ScanKeyData *key)
917{
919
920 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
921 NULL, flags);
922}
923
924/*
925 * table_beginscan_sampling is an alternative entry point for setting up a
926 * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
927 * using the same data structure although the behavior is rather different.
928 * In addition to the options offered by table_beginscan_strat, this call
929 * also allows control of whether page-mode visibility checking is used.
930 */
931static inline TableScanDesc
933 int nkeys, struct ScanKeyData *key,
934 bool allow_strat, bool allow_sync,
935 bool allow_pagemode)
936{
938
939 if (allow_strat)
940 flags |= SO_ALLOW_STRAT;
941 if (allow_sync)
942 flags |= SO_ALLOW_SYNC;
943 if (allow_pagemode)
944 flags |= SO_ALLOW_PAGEMODE;
945
946 return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
947}
948
949/*
950 * table_beginscan_tid is an alternative entry point for setting up a
951 * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
952 * the same data structure although the behavior is rather different.
953 */
954static inline TableScanDesc
956{
957 uint32 flags = SO_TYPE_TIDSCAN;
958
959 return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
960}
961
962/*
963 * table_beginscan_analyze is an alternative entry point for setting up a
964 * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
965 * the same data structure although the behavior is rather different.
966 */
967static inline TableScanDesc
969{
970 uint32 flags = SO_TYPE_ANALYZE;
971
972 return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
973}
974
975/*
976 * End relation scan.
977 */
978static inline void
980{
981 scan->rs_rd->rd_tableam->scan_end(scan);
982}
983
984/*
985 * Restart a relation scan.
986 */
987static inline void
989 struct ScanKeyData *key)
990{
991 scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
992}
993
994/*
995 * Restart a relation scan after changing params.
996 *
997 * This call allows changing the buffer strategy, syncscan, and pagemode
998 * options before starting a fresh scan. Note that although the actual use of
999 * syncscan might change (effectively, enabling or disabling reporting), the
1000 * previously selected startblock will be kept.
1001 */
1002static inline void
1004 bool allow_strat, bool allow_sync, bool allow_pagemode)
1005{
1006 scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
1007 allow_strat, allow_sync,
1008 allow_pagemode);
1009}
1010
1011/*
1012 * Return next tuple from `scan`, store in slot.
1013 */
1014static inline bool
1016{
1017 slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
1018
1019 /* We don't expect actual scans using NoMovementScanDirection */
1020 Assert(direction == ForwardScanDirection ||
1021 direction == BackwardScanDirection);
1022
1023 /*
1024 * We don't expect direct calls to table_scan_getnextslot with valid
1025 * CheckXidAlive for catalog or regular tables. See detailed comments in
1026 * xact.c where these variables are declared.
1027 */
1029 elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
1030
1031 return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
1032}
1033
1034/* ----------------------------------------------------------------------------
1035 * TID Range scanning related functions.
1036 * ----------------------------------------------------------------------------
1037 */
1038
1039/*
1040 * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
1041 * for a TID range scan.
1042 */
1043static inline TableScanDesc
1045 ItemPointer mintid,
1046 ItemPointer maxtid)
1047{
1048 TableScanDesc sscan;
1050
1051 sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
1052
1053 /* Set the range of TIDs to scan */
1054 sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1055
1056 return sscan;
1057}
1058
1059/*
1060 * table_rescan_tidrange resets the scan position and sets the minimum and
1061 * maximum TID range to scan for a TableScanDesc created by
1062 * table_beginscan_tidrange.
1063 */
1064static inline void
1066 ItemPointer maxtid)
1067{
1068 /* Ensure table_beginscan_tidrange() was used. */
1069 Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1070
1071 sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
1072 sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
1073}
1074
1075/*
1076 * Fetch the next tuple from `sscan` for a TID range scan created by
1077 * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
1078 * or returns false if no more tuples exist in the range.
1079 */
1080static inline bool
1082 TupleTableSlot *slot)
1083{
1084 /* Ensure table_beginscan_tidrange() was used. */
1085 Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
1086
1087 /* We don't expect actual scans using NoMovementScanDirection */
1088 Assert(direction == ForwardScanDirection ||
1089 direction == BackwardScanDirection);
1090
1091 return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
1092 direction,
1093 slot);
1094}
1095
1096
1097/* ----------------------------------------------------------------------------
1098 * Parallel table scan related functions.
1099 * ----------------------------------------------------------------------------
1100 */
1101
1102/*
1103 * Estimate the size of shared memory needed for a parallel scan of this
1104 * relation.
1105 */
1107
1108/*
1109 * Initialize ParallelTableScanDesc for a parallel scan of this
1110 * relation. `pscan` needs to be sized according to parallelscan_estimate()
1111 * for the same relation. Call this just once in the leader process; then,
1112 * individual workers attach via table_beginscan_parallel.
1113 */
1116 Snapshot snapshot);
1117
1118/*
1119 * Begin a parallel scan. `pscan` needs to have been initialized with
1120 * table_parallelscan_initialize(), for the same relation. The initialization
1121 * does not need to have happened in this backend.
1122 *
1123 * Caller must hold a suitable lock on the relation.
1124 */
1126 ParallelTableScanDesc pscan);
1127
1128/*
1129 * Restart a parallel scan. Call this in the leader process. Caller is
1130 * responsible for making sure that all workers have finished the scan
1131 * beforehand.
1132 */
1133static inline void
1135{
1136 rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
1137}
1138
1139
1140/* ----------------------------------------------------------------------------
1141 * Index scan related functions.
1142 * ----------------------------------------------------------------------------
1143 */
1144
1145/*
1146 * Prepare to fetch tuples from the relation, as needed when fetching tuples
1147 * for an index scan.
1148 *
1149 * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
1150 */
1151static inline IndexFetchTableData *
1153{
1154 return rel->rd_tableam->index_fetch_begin(rel);
1155}
1156
1157/*
1158 * Reset index fetch. Typically this will release cross index fetch resources
1159 * held in IndexFetchTableData.
1160 */
1161static inline void
1163{
1164 scan->rel->rd_tableam->index_fetch_reset(scan);
1165}
1166
1167/*
1168 * Release resources and deallocate index fetch.
1169 */
1170static inline void
1172{
1173 scan->rel->rd_tableam->index_fetch_end(scan);
1174}
1175
1176/*
1177 * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
1178 * a visibility test according to `snapshot`. If a tuple was found and passed
1179 * the visibility test, returns true, false otherwise. Note that *tid may be
1180 * modified when we return true (see later remarks on multiple row versions
1181 * reachable via a single index entry).
1182 *
1183 * *call_again needs to be false on the first call to table_index_fetch_tuple() for
1184 * a tid. If there potentially is another tuple matching the tid, *call_again
1185 * will be set to true, signaling that table_index_fetch_tuple() should be called
1186 * again for the same tid.
1187 *
1188 * *all_dead, if all_dead is not NULL, will be set to true by
1189 * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
1190 * that tuple. Index AMs can use that to avoid returning that tid in future
1191 * searches.
1192 *
1193 * The difference between this function and table_tuple_fetch_row_version()
1194 * is that this function returns the currently visible version of a row if
1195 * the AM supports storing multiple row versions reachable via a single index
1196 * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
1197 * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
1198 * lookups, table_tuple_fetch_row_version() is what's usually needed.
1199 */
1200static inline bool
1202 ItemPointer tid,
1203 Snapshot snapshot,
1204 TupleTableSlot *slot,
1205 bool *call_again, bool *all_dead)
1206{
1207 /*
1208 * We don't expect direct calls to table_index_fetch_tuple with valid
1209 * CheckXidAlive for catalog or regular tables. See detailed comments in
1210 * xact.c where these variables are declared.
1211 */
1213 elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
1214
1215 return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
1216 slot, call_again,
1217 all_dead);
1218}
1219
1220/*
1221 * This is a convenience wrapper around table_index_fetch_tuple() which
1222 * returns whether there are table tuple items corresponding to an index
1223 * entry. This likely is only useful to verify if there's a conflict in a
1224 * unique index.
1225 */
1227 ItemPointer tid,
1228 Snapshot snapshot,
1229 bool *all_dead);
1230
1231
1232/* ------------------------------------------------------------------------
1233 * Functions for non-modifying operations on individual tuples
1234 * ------------------------------------------------------------------------
1235 */
1236
1237
1238/*
1239 * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
1240 * `snapshot`. If a tuple was found and passed the visibility test, returns
1241 * true, false otherwise.
1242 *
1243 * See table_index_fetch_tuple's comment about what the difference between
1244 * these functions is. It is correct to use this function outside of index
1245 * entry->table tuple lookups.
1246 */
1247static inline bool
1249 ItemPointer tid,
1250 Snapshot snapshot,
1251 TupleTableSlot *slot)
1252{
1253 /*
1254 * We don't expect direct calls to table_tuple_fetch_row_version with
1255 * valid CheckXidAlive for catalog or regular tables. See detailed
1256 * comments in xact.c where these variables are declared.
1257 */
1259 elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
1260
1261 return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
1262}
1263
1264/*
1265 * Verify that `tid` is a potentially valid tuple identifier. That doesn't
1266 * mean that the pointed to row needs to exist or be visible, but that
1267 * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
1268 * table_tuple_fetch_row_version()) should not error out if called with that
1269 * tid.
1270 *
1271 * `scan` needs to have been started via table_beginscan().
1272 */
1273static inline bool
1275{
1276 return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
1277}
1278
1279/*
1280 * Return the latest version of the tuple at `tid`, by updating `tid` to
1281 * point at the newest version.
1282 */
1284
1285/*
1286 * Return true iff tuple in slot satisfies the snapshot.
1287 *
1288 * This assumes the slot's tuple is valid, and of the appropriate type for the
1289 * AM.
1290 *
1291 * Some AMs might modify the data underlying the tuple as a side-effect. If so
1292 * they ought to mark the relevant buffer dirty.
1293 */
1294static inline bool
1296 Snapshot snapshot)
1297{
1298 return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
1299}
1300
1301/*
1302 * Determine which index tuples are safe to delete based on their table TID.
1303 *
1304 * Determines which entries from index AM caller's TM_IndexDeleteOp state
1305 * point to vacuumable table tuples. Entries that are found by tableam to be
1306 * vacuumable are naturally safe for index AM to delete, and so get directly
1307 * marked as deletable. See comments above TM_IndexDelete and comments above
1308 * TM_IndexDeleteOp for full details.
1309 *
1310 * Returns a snapshotConflictHorizon transaction ID that caller places in
1311 * its index deletion WAL record. This might be used during subsequent REDO
1312 * of the WAL record when in Hot Standby mode -- a recovery conflict for the
1313 * index deletion operation might be required on the standby.
1314 */
1315static inline TransactionId
1317{
1318 return rel->rd_tableam->index_delete_tuples(rel, delstate);
1319}
1320
1321
1322/* ----------------------------------------------------------------------------
1323 * Functions for manipulations of physical tuples.
1324 * ----------------------------------------------------------------------------
1325 */
1326
1327/*
1328 * Insert a tuple from a slot into table AM routine.
1329 *
1330 * The options bitmask allows the caller to specify options that may change the
1331 * behaviour of the AM. The AM will ignore options that it does not support.
1332 *
1333 * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
1334 * free space in the relation. This can save some cycles when we know the
1335 * relation is new and doesn't contain useful amounts of free space.
1336 * TABLE_INSERT_SKIP_FSM is commonly passed directly to
1337 * RelationGetBufferForTuple. See that method for more information.
1338 *
1339 * TABLE_INSERT_FROZEN should only be specified for inserts into
1340 * relation storage created during the current subtransaction and when
1341 * there are no prior snapshots or pre-existing portals open.
1342 * This causes rows to be frozen, which is an MVCC violation and
1343 * requires explicit options chosen by user.
1344 *
1345 * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
1346 * information for the tuple. This should solely be used during table rewrites
1347 * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
1348 * relation.
1349 *
1350 * Note that most of these options will be applied when inserting into the
1351 * heap's TOAST table, too, if the tuple requires any out-of-line data.
1352 *
1353 * The BulkInsertState object (if any; bistate can be NULL for default
1354 * behavior) is also just passed through to RelationGetBufferForTuple. If
1355 * `bistate` is provided, table_finish_bulk_insert() needs to be called.
1356 *
1357 * On return the slot's tts_tid and tts_tableOid are updated to reflect the
1358 * insertion. But note that any toasting of fields within the slot is NOT
1359 * reflected in the slots contents.
1360 */
1361static inline void
1363 int options, struct BulkInsertStateData *bistate)
1364{
1365 rel->rd_tableam->tuple_insert(rel, slot, cid, options,
1366 bistate);
1367}
1368
1369/*
1370 * Perform a "speculative insertion". These can be backed out afterwards
1371 * without aborting the whole transaction. Other sessions can wait for the
1372 * speculative insertion to be confirmed, turning it into a regular tuple, or
1373 * aborted, as if it never existed. Speculatively inserted tuples behave as
1374 * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
1375 *
1376 * A transaction having performed a speculative insertion has to either abort,
1377 * or finish the speculative insertion with
1378 * table_tuple_complete_speculative(succeeded = ...).
1379 */
1380static inline void
1382 CommandId cid, int options,
1383 struct BulkInsertStateData *bistate,
1384 uint32 specToken)
1385{
1386 rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
1387 bistate, specToken);
1388}
1389
1390/*
1391 * Complete "speculative insertion" started in the same transaction. If
1392 * succeeded is true, the tuple is fully inserted, if false, it's removed.
1393 */
1394static inline void
1396 uint32 specToken, bool succeeded)
1397{
1398 rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
1399 succeeded);
1400}
1401
1402/*
1403 * Insert multiple tuples into a table.
1404 *
1405 * This is like table_tuple_insert(), but inserts multiple tuples in one
1406 * operation. That's often faster than calling table_tuple_insert() in a loop,
1407 * because e.g. the AM can reduce WAL logging and page locking overhead.
1408 *
1409 * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
1410 * in `slots`, the parameters for table_multi_insert() are the same as for
1411 * table_tuple_insert().
1412 *
1413 * Note: this leaks memory into the current memory context. You can create a
1414 * temporary context before calling this, if that's a problem.
1415 */
1416static inline void
1418 CommandId cid, int options, struct BulkInsertStateData *bistate)
1419{
1420 rel->rd_tableam->multi_insert(rel, slots, nslots,
1421 cid, options, bistate);
1422}
1423
1424/*
1425 * Delete a tuple.
1426 *
1427 * NB: do not call this directly unless prepared to deal with
1428 * concurrent-update conditions. Use simple_table_tuple_delete instead.
1429 *
1430 * Input parameters:
1431 * relation - table to be modified (caller must hold suitable lock)
1432 * tid - TID of tuple to be deleted
1433 * cid - delete command ID (used for visibility test, and stored into
1434 * cmax if successful)
1435 * crosscheck - if not InvalidSnapshot, also check tuple against this
1436 * wait - true if should wait for any conflicting update to commit/abort
1437 * Output parameters:
1438 * tmfd - filled in failure cases (see below)
1439 * changingPart - true iff the tuple is being moved to another partition
1440 * table due to an update of the partition key. Otherwise, false.
1441 *
1442 * Normal, successful return value is TM_Ok, which means we did actually
1443 * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
1444 * TM_BeingModified (the last only possible if wait == false).
1445 *
1446 * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1447 * t_xmax, and, if possible, t_cmax. See comments for struct
1448 * TM_FailureData for additional info.
1449 */
1450static inline TM_Result
1452 Snapshot snapshot, Snapshot crosscheck, bool wait,
1453 TM_FailureData *tmfd, bool changingPart)
1454{
1455 return rel->rd_tableam->tuple_delete(rel, tid, cid,
1456 snapshot, crosscheck,
1457 wait, tmfd, changingPart);
1458}
1459
1460/*
1461 * Update a tuple.
1462 *
1463 * NB: do not call this directly unless you are prepared to deal with
1464 * concurrent-update conditions. Use simple_table_tuple_update instead.
1465 *
1466 * Input parameters:
1467 * relation - table to be modified (caller must hold suitable lock)
1468 * otid - TID of old tuple to be replaced
1469 * slot - newly constructed tuple data to store
1470 * cid - update command ID (used for visibility test, and stored into
1471 * cmax/cmin if successful)
1472 * crosscheck - if not InvalidSnapshot, also check old tuple against this
1473 * wait - true if should wait for any conflicting update to commit/abort
1474 * Output parameters:
1475 * tmfd - filled in failure cases (see below)
1476 * lockmode - filled with lock mode acquired on tuple
1477 * update_indexes - in success cases this is set to true if new index entries
1478 * are required for this tuple
1479 *
1480 * Normal, successful return value is TM_Ok, which means we did actually
1481 * update it. Failure return codes are TM_SelfModified, TM_Updated, and
1482 * TM_BeingModified (the last only possible if wait == false).
1483 *
1484 * On success, the slot's tts_tid and tts_tableOid are updated to match the new
1485 * stored tuple; in particular, slot->tts_tid is set to the TID where the
1486 * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
1487 * update was done. However, any TOAST changes in the new tuple's
1488 * data are not reflected into *newtup.
1489 *
1490 * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
1491 * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
1492 * for additional info.
1493 */
1494static inline TM_Result
1496 CommandId cid, Snapshot snapshot, Snapshot crosscheck,
1497 bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
1498 TU_UpdateIndexes *update_indexes)
1499{
1500 return rel->rd_tableam->tuple_update(rel, otid, slot,
1501 cid, snapshot, crosscheck,
1502 wait, tmfd,
1503 lockmode, update_indexes);
1504}
1505
1506/*
1507 * Lock a tuple in the specified mode.
1508 *
1509 * Input parameters:
1510 * relation: relation containing tuple (caller must hold suitable lock)
1511 * tid: TID of tuple to lock
1512 * snapshot: snapshot to use for visibility determinations
1513 * cid: current command ID (used for visibility test, and stored into
1514 * tuple's cmax if lock is successful)
1515 * mode: lock mode desired
1516 * wait_policy: what to do if tuple lock is not available
1517 * flags:
1518 * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
1519 * also lock descendant tuples if lock modes don't conflict.
1520 * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
1521 * latest version.
1522 *
1523 * Output parameters:
1524 * *slot: contains the target tuple
1525 * *tmfd: filled in failure cases (see below)
1526 *
1527 * Function result may be:
1528 * TM_Ok: lock was successfully acquired
1529 * TM_Invisible: lock failed because tuple was never visible to us
1530 * TM_SelfModified: lock failed because tuple updated by self
1531 * TM_Updated: lock failed because tuple updated by other xact
1532 * TM_Deleted: lock failed because tuple deleted by other xact
1533 * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
1534 *
1535 * In the failure cases other than TM_Invisible and TM_Deleted, the routine
1536 * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
1537 * comments for struct TM_FailureData for additional info.
1538 */
1539static inline TM_Result
1542 LockWaitPolicy wait_policy, uint8 flags,
1543 TM_FailureData *tmfd)
1544{
1545 return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
1546 cid, mode, wait_policy,
1547 flags, tmfd);
1548}
1549
1550/*
1551 * Perform operations necessary to complete insertions made via
1552 * tuple_insert and multi_insert with a BulkInsertState specified.
1553 */
1554static inline void
1556{
1557 /* optional callback */
1558 if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
1560}
1561
1562
1563/* ------------------------------------------------------------------------
1564 * DDL related functionality.
1565 * ------------------------------------------------------------------------
1566 */
1567
1568/*
1569 * Create storage for `rel` in `newrlocator`, with persistence set to
1570 * `persistence`.
1571 *
1572 * This is used both during relation creation and various DDL operations to
1573 * create new rel storage that can be filled from scratch. When creating
1574 * new storage for an existing relfilelocator, this should be called before the
1575 * relcache entry has been updated.
1576 *
1577 * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
1578 * that pg_class.{relfrozenxid, relminmxid} have to be set to.
1579 */
1580static inline void
1582 const RelFileLocator *newrlocator,
1583 char persistence,
1584 TransactionId *freezeXid,
1585 MultiXactId *minmulti)
1586{
1587 rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
1588 persistence, freezeXid,
1589 minmulti);
1590}
1591
1592/*
1593 * Remove all table contents from `rel`, in a non-transactional manner.
1594 * Non-transactional meaning that there's no need to support rollbacks. This
1595 * commonly only is used to perform truncations for relation storage created in
1596 * the current transaction.
1597 */
1598static inline void
1600{
1602}
1603
1604/*
1605 * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
1606 * relfilelocator may not have storage associated before this function is
1607 * called. This is only supposed to be used for low level operations like
1608 * changing a relation's tablespace.
1609 */
1610static inline void
1612{
1613 rel->rd_tableam->relation_copy_data(rel, newrlocator);
1614}
1615
1616/*
1617 * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
1618 * FULL.
1619 *
1620 * Additional Input parameters:
1621 * - use_sort - if true, the table contents are sorted appropriate for
1622 * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
1623 * in that index's order; if false and OldIndex is InvalidOid, no sorting is
1624 * performed
1625 * - OldIndex - see use_sort
1626 * - OldestXmin - computed by vacuum_get_cutoffs(), even when
1627 * not needed for the relation's AM
1628 * - *xid_cutoff - ditto
1629 * - *multi_cutoff - ditto
1630 *
1631 * Output parameters:
1632 * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
1633 * - *multi_cutoff - rel's new relminmxid value, may be invalid
1634 * - *tups_vacuumed - stats, for logging, if appropriate for AM
1635 * - *tups_recently_dead - stats, for logging, if appropriate for AM
1636 */
1637static inline void
1639 Relation OldIndex,
1640 bool use_sort,
1641 TransactionId OldestXmin,
1642 TransactionId *xid_cutoff,
1643 MultiXactId *multi_cutoff,
1644 double *num_tuples,
1645 double *tups_vacuumed,
1646 double *tups_recently_dead)
1647{
1648 OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
1649 use_sort, OldestXmin,
1650 xid_cutoff, multi_cutoff,
1651 num_tuples, tups_vacuumed,
1652 tups_recently_dead);
1653}
1654
1655/*
1656 * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
1657 * autovacuum. The specific actions performed by the AM will depend heavily on
1658 * the individual AM.
1659 *
1660 * On entry a transaction needs to already been established, and the
1661 * table is locked with a ShareUpdateExclusive lock.
1662 *
1663 * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
1664 * routine, even if (for ANALYZE) it is part of the same VACUUM command.
1665 */
1666static inline void
1668 BufferAccessStrategy bstrategy)
1669{
1670 rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
1671}
1672
1673/*
1674 * Prepare to analyze the next block in the read stream. The scan needs to
1675 * have been started with table_beginscan_analyze(). Note that this routine
1676 * might acquire resources like locks that are held until
1677 * table_scan_analyze_next_tuple() returns false.
1678 *
1679 * Returns false if block is unsuitable for sampling, true otherwise.
1680 */
1681static inline bool
1683{
1684 return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
1685}
1686
1687/*
1688 * Iterate over tuples in the block selected with
1689 * table_scan_analyze_next_block() (which needs to have returned true, and
1690 * this routine may not have returned false for the same block before). If a
1691 * tuple that's suitable for sampling is found, true is returned and a tuple
1692 * is stored in `slot`.
1693 *
1694 * *liverows and *deadrows are incremented according to the encountered
1695 * tuples.
1696 */
1697static inline bool
1699 double *liverows, double *deadrows,
1700 TupleTableSlot *slot)
1701{
1702 return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
1703 liverows, deadrows,
1704 slot);
1705}
1706
1707/*
1708 * table_index_build_scan - scan the table to find tuples to be indexed
1709 *
1710 * This is called back from an access-method-specific index build procedure
1711 * after the AM has done whatever setup it needs. The parent table relation
1712 * is scanned to find tuples that should be entered into the index. Each
1713 * such tuple is passed to the AM's callback routine, which does the right
1714 * things to add it to the new index. After we return, the AM's index
1715 * build procedure does whatever cleanup it needs.
1716 *
1717 * The total count of live tuples is returned. This is for updating pg_class
1718 * statistics. (It's annoying not to be able to do that here, but we want to
1719 * merge that update with others; see index_update_stats.) Note that the
1720 * index AM itself must keep track of the number of index tuples; we don't do
1721 * so here because the AM might reject some of the tuples for its own reasons,
1722 * such as being unable to store NULLs.
1723 *
1724 * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
1725 * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
1726 *
1727 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
1728 * any potentially broken HOT chains. Currently, we set this if there are any
1729 * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
1730 * very hard to detect whether they're really incompatible with the chain tip.
1731 * This only really makes sense for heap AM, it might need to be generalized
1732 * for other AMs later.
1733 */
1734static inline double
1736 Relation index_rel,
1737 struct IndexInfo *index_info,
1738 bool allow_sync,
1739 bool progress,
1741 void *callback_state,
1742 TableScanDesc scan)
1743{
1744 return table_rel->rd_tableam->index_build_range_scan(table_rel,
1745 index_rel,
1746 index_info,
1747 allow_sync,
1748 false,
1749 progress,
1750 0,
1752 callback,
1753 callback_state,
1754 scan);
1755}
1756
1757/*
1758 * As table_index_build_scan(), except that instead of scanning the complete
1759 * table, only the given number of blocks are scanned. Scan to end-of-rel can
1760 * be signaled by passing InvalidBlockNumber as numblocks. Note that
1761 * restricting the range to scan cannot be done when requesting syncscan.
1762 *
1763 * When "anyvisible" mode is requested, all tuples visible to any transaction
1764 * are indexed and counted as live, including those inserted or deleted by
1765 * transactions that are still in progress.
1766 */
1767static inline double
1769 Relation index_rel,
1770 struct IndexInfo *index_info,
1771 bool allow_sync,
1772 bool anyvisible,
1773 bool progress,
1774 BlockNumber start_blockno,
1775 BlockNumber numblocks,
1777 void *callback_state,
1778 TableScanDesc scan)
1779{
1780 return table_rel->rd_tableam->index_build_range_scan(table_rel,
1781 index_rel,
1782 index_info,
1783 allow_sync,
1784 anyvisible,
1785 progress,
1786 start_blockno,
1787 numblocks,
1788 callback,
1789 callback_state,
1790 scan);
1791}
1792
1793/*
1794 * table_index_validate_scan - second table scan for concurrent index build
1795 *
1796 * See validate_index() for an explanation.
1797 */
1798static inline void
1800 Relation index_rel,
1801 struct IndexInfo *index_info,
1802 Snapshot snapshot,
1803 struct ValidateIndexState *state)
1804{
1805 table_rel->rd_tableam->index_validate_scan(table_rel,
1806 index_rel,
1807 index_info,
1808 snapshot,
1809 state);
1810}
1811
1812
1813/* ----------------------------------------------------------------------------
1814 * Miscellaneous functionality
1815 * ----------------------------------------------------------------------------
1816 */
1817
1818/*
1819 * Return the current size of `rel` in bytes. If `forkNumber` is
1820 * InvalidForkNumber, return the relation's overall size, otherwise the size
1821 * for the indicated fork.
1822 *
1823 * Note that the overall size might not be the equivalent of the sum of sizes
1824 * for the individual forks for some AMs, e.g. because the AMs storage does
1825 * not neatly map onto the builtin types of forks.
1826 */
1827static inline uint64
1829{
1830 return rel->rd_tableam->relation_size(rel, forkNumber);
1831}
1832
1833/*
1834 * table_relation_needs_toast_table - does this relation need a toast table?
1835 */
1836static inline bool
1838{
1839 return rel->rd_tableam->relation_needs_toast_table(rel);
1840}
1841
1842/*
1843 * Return the OID of the AM that should be used to implement the TOAST table
1844 * for this relation.
1845 */
1846static inline Oid
1848{
1849 return rel->rd_tableam->relation_toast_am(rel);
1850}
1851
1852/*
1853 * Fetch all or part of a TOAST value from a TOAST table.
1854 *
1855 * If this AM is never used to implement a TOAST table, then this callback
1856 * is not needed. But, if toasted values are ever stored in a table of this
1857 * type, then you will need this callback.
1858 *
1859 * toastrel is the relation in which the toasted value is stored.
1860 *
1861 * valueid identifies which toast value is to be fetched. For the heap,
1862 * this corresponds to the values stored in the chunk_id column.
1863 *
1864 * attrsize is the total size of the toast value to be fetched.
1865 *
1866 * sliceoffset is the offset within the toast value of the first byte that
1867 * should be fetched.
1868 *
1869 * slicelength is the number of bytes from the toast value that should be
1870 * fetched.
1871 *
1872 * result is caller-allocated space into which the fetched bytes should be
1873 * stored.
1874 */
1875static inline void
1877 int32 attrsize, int32 sliceoffset,
1878 int32 slicelength, struct varlena *result)
1879{
1880 toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
1881 attrsize,
1882 sliceoffset, slicelength,
1883 result);
1884}
1885
1886
1887/* ----------------------------------------------------------------------------
1888 * Planner related functionality
1889 * ----------------------------------------------------------------------------
1890 */
1891
1892/*
1893 * Estimate the current size of the relation, as an AM specific workhorse for
1894 * estimate_rel_size(). Look there for an explanation of the parameters.
1895 */
1896static inline void
1898 BlockNumber *pages, double *tuples,
1899 double *allvisfrac)
1900{
1901 rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1902 allvisfrac);
1903}
1904
1905
1906/* ----------------------------------------------------------------------------
1907 * Executor related functionality
1908 * ----------------------------------------------------------------------------
1909 */
1910
1911/*
1912 * Fetch / check / return tuples as part of a bitmap table scan. `scan` needs
1913 * to have been started via table_beginscan_bm(). Fetch the next tuple of a
1914 * bitmap table scan into `slot` and return true if a visible tuple was found,
1915 * false otherwise.
1916 *
1917 * `recheck` is set by the table AM to indicate whether or not the tuple in
1918 * `slot` should be rechecked. Tuples from lossy pages will always need to be
1919 * rechecked, but some non-lossy pages' tuples may also require recheck.
1920 *
1921 * `lossy_pages` is incremented if the block's representation in the bitmap is
1922 * lossy; otherwise, `exact_pages` is incremented.
1923 */
1924static inline bool
1926 TupleTableSlot *slot,
1927 bool *recheck,
1928 uint64 *lossy_pages,
1929 uint64 *exact_pages)
1930{
1931 /*
1932 * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
1933 * CheckXidAlive for catalog or regular tables. See detailed comments in
1934 * xact.c where these variables are declared.
1935 */
1937 elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
1938
1939 return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
1940 slot,
1941 recheck,
1942 lossy_pages,
1943 exact_pages);
1944}
1945
1946/*
1947 * Prepare to fetch tuples from the next block in a sample scan. Returns false
1948 * if the sample scan is finished, true otherwise. `scan` needs to have been
1949 * started via table_beginscan_sampling().
1950 *
1951 * This will call the TsmRoutine's NextSampleBlock() callback if necessary
1952 * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
1953 * underlying relation.
1954 */
1955static inline bool
1957 struct SampleScanState *scanstate)
1958{
1959 /*
1960 * We don't expect direct calls to table_scan_sample_next_block with valid
1961 * CheckXidAlive for catalog or regular tables. See detailed comments in
1962 * xact.c where these variables are declared.
1963 */
1965 elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
1966 return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
1967}
1968
1969/*
1970 * Fetch the next sample tuple into `slot` and return true if a visible tuple
1971 * was found, false otherwise. table_scan_sample_next_block() needs to
1972 * previously have selected a block (i.e. returned true), and no previous
1973 * table_scan_sample_next_tuple() for the same block may have returned false.
1974 *
1975 * This will call the TsmRoutine's NextSampleTuple() callback.
1976 */
1977static inline bool
1979 struct SampleScanState *scanstate,
1980 TupleTableSlot *slot)
1981{
1982 /*
1983 * We don't expect direct calls to table_scan_sample_next_tuple with valid
1984 * CheckXidAlive for catalog or regular tables. See detailed comments in
1985 * xact.c where these variables are declared.
1986 */
1988 elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
1989 return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
1990 slot);
1991}
1992
1993
1994/* ----------------------------------------------------------------------------
1995 * Functions to make modifications a bit simpler.
1996 * ----------------------------------------------------------------------------
1997 */
1998
1999extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
2001 Snapshot snapshot);
2002extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
2003 TupleTableSlot *slot, Snapshot snapshot,
2004 TU_UpdateIndexes *update_indexes);
2005
2006
2007/* ----------------------------------------------------------------------------
2008 * Helper functions to implement parallel scans for block oriented AMs.
2009 * ----------------------------------------------------------------------------
2010 */
2011
2014 ParallelTableScanDesc pscan);
2016 ParallelTableScanDesc pscan);
2023
2024
2025/* ----------------------------------------------------------------------------
2026 * Helper functions to implement relation sizing for block oriented AMs.
2027 * ----------------------------------------------------------------------------
2028 */
2029
2030extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
2032 int32 *attr_widths,
2033 BlockNumber *pages,
2034 double *tuples,
2035 double *allvisfrac,
2036 Size overhead_bytes_per_tuple,
2037 Size usable_bytes_per_page);
2038
2039/* ----------------------------------------------------------------------------
2040 * Functions in tableamapi.c
2041 * ----------------------------------------------------------------------------
2042 */
2043
2044extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
2045
2046/* ----------------------------------------------------------------------------
2047 * Functions in heapam_handler.c
2048 * ----------------------------------------------------------------------------
2049 */
2050
2051extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
2052
2053#endif /* TABLEAM_H */
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define PGDLLIMPORT
Definition: c.h:1291
uint8_t uint8
Definition: c.h:500
TransactionId MultiXactId
Definition: c.h:633
int16_t int16
Definition: c.h:497
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
#define unlikely(x)
Definition: c.h:347
uint32_t uint32
Definition: c.h:502
uint32 CommandId
Definition: c.h:637
uint32 TransactionId
Definition: c.h:623
size_t Size
Definition: c.h:576
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
LockWaitPolicy
Definition: lockoptions.h:37
LockTupleMode
Definition: lockoptions.h:50
NodeTag
Definition: nodes.h:27
uint16 OffsetNumber
Definition: off.h:24
static PgChecksumMode mode
Definition: pg_checksums.c:55
const void * data
static char ** options
static int progress
Definition: pgbench.c:262
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
#define RelationGetRelid(relation)
Definition: rel.h:516
ForkNumber
Definition: relpath.h:56
struct TableScanDescData * TableScanDesc
Definition: relscan.h:69
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
Definition: pg_list.h:54
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool traversed
Definition: tableam.h:146
TransactionId xmax
Definition: tableam.h:144
CommandId cmax
Definition: tableam.h:145
ItemPointerData ctid
Definition: tableam.h:143
TM_IndexStatus * status
Definition: tableam.h:248
int bottomupfreespace
Definition: tableam.h:243
Relation irel
Definition: tableam.h:240
TM_IndexDelete * deltids
Definition: tableam.h:247
BlockNumber iblknum
Definition: tableam.h:241
ItemPointerData tid
Definition: tableam.h:206
bool knowndeletable
Definition: tableam.h:213
bool promising
Definition: tableam.h:216
int16 freespace
Definition: tableam.h:217
OffsetNumber idxoffnum
Definition: tableam.h:212
Size(* parallelscan_initialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:393
void(* relation_copy_data)(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:617
bool(* scan_sample_next_tuple)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:834
void(* index_fetch_reset)(struct IndexFetchTableData *data)
Definition: tableam.h:423
void(* tuple_complete_speculative)(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:517
void(* parallelscan_reinitialize)(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:400
void(* tuple_get_latest_tid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:482
void(* relation_copy_for_cluster)(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:621
bool(* scan_bitmap_next_tuple)(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:787
bool(* scan_getnextslot_tidrange)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:373
void(* relation_estimate_size)(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:765
double(* index_build_range_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:686
TableScanDesc(* scan_begin)(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, ParallelTableScanDesc pscan, uint32 flags)
Definition: tableam.h:321
bool(* relation_needs_toast_table)(Relation rel)
Definition: tableam.h:729
bool(* tuple_tid_valid)(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:475
void(* multi_insert)(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:523
void(* scan_end)(TableScanDesc scan)
Definition: tableam.h:331
uint64(* relation_size)(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:719
TM_Result(* tuple_lock)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:549
bool(* scan_sample_next_block)(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:818
void(* relation_nontransactional_truncate)(Relation rel)
Definition: tableam.h:609
TM_Result(* tuple_update)(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:537
void(* tuple_insert)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:504
void(* scan_rescan)(TableScanDesc scan, struct ScanKeyData *key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:337
bool(* tuple_fetch_row_version)(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:467
void(* relation_fetch_toast_slice)(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:743
void(* relation_vacuum)(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:647
Oid(* relation_toast_am)(Relation rel)
Definition: tableam.h:736
bool(* scan_analyze_next_block)(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:668
Size(* parallelscan_estimate)(Relation rel)
Definition: tableam.h:386
void(* relation_set_new_filelocator)(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:595
void(* scan_set_tidrange)(TableScanDesc scan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:365
struct IndexFetchTableData *(* index_fetch_begin)(Relation rel)
Definition: tableam.h:417
void(* finish_bulk_insert)(Relation rel, int options)
Definition: tableam.h:571
bool(* scan_analyze_next_tuple)(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:679
TransactionId(* index_delete_tuples)(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:494
void(* index_fetch_end)(struct IndexFetchTableData *data)
Definition: tableam.h:428
bool(* index_fetch_tuple)(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:450
void(* tuple_insert_speculative)(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:509
TM_Result(* tuple_delete)(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:527
NodeTag type
Definition: tableam.h:286
void(* index_validate_scan)(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:699
bool(* scan_getnextslot)(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:344
bool(* tuple_satisfies_snapshot)(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:489
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
Oid tts_tableOid
Definition: tuptable.h:130
Definition: type.h:96
Definition: regguts.h:323
Definition: c.h:658
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1876
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
PGDLLIMPORT char * default_table_access_method
Definition: tableam.c:49
ScanOptions
Definition: tableam.h:46
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_TYPE_ANALYZE
Definition: tableam.h:53
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64
@ SO_TYPE_TIDSCAN
Definition: tableam.h:51
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_ALLOW_SYNC
Definition: tableam.h:59
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49
static void table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1065
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:870
TU_UpdateIndexes
Definition: tableam.h:110
@ TU_Summarizing
Definition: tableam.h:118
@ TU_All
Definition: tableam.h:115
@ TU_None
Definition: tableam.h:112
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:979
void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes)
Definition: tableam.c:336
static bool table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Definition: tableam.h:1698
bool table_index_fetch_tuple_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead)
Definition: tableam.c:209
PGDLLIMPORT bool synchronize_seqscans
Definition: tableam.c:50
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:389
TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
Definition: tableam.c:166
struct TM_IndexDelete TM_IndexDelete
static TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:915
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1638
static void table_index_fetch_reset(struct IndexFetchTableData *scan)
Definition: tableam.h:1162
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1828
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_WouldBlock
Definition: tableam.h:102
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
static bool table_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Definition: tableam.h:1925
static TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:932
static void table_rescan(TableScanDesc scan, struct ScanKeyData *key)
Definition: tableam.h:988
static TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
Definition: tableam.h:1540
void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
Definition: tableam.c:277
static bool table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.h:1274
static IndexFetchTableData * table_index_fetch_begin(Relation rel)
Definition: tableam.h:1152
static void table_index_validate_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, Snapshot snapshot, struct ValidateIndexState *state)
Definition: tableam.h:1799
static double table_index_build_range_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1768
void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:422
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:113
static bool table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
Definition: tableam.h:1682
static bool table_relation_needs_toast_table(Relation rel)
Definition: tableam.h:1837
struct TM_IndexStatus TM_IndexStatus
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:894
static void table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken, bool succeeded)
Definition: tableam.h:1395
static TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, ItemPointer maxtid)
Definition: tableam.h:1044
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: tableam.h:1495
static void table_index_fetch_end(struct IndexFetchTableData *scan)
Definition: tableam.h:1171
static TableScanDesc table_beginscan_analyze(Relation rel)
Definition: tableam.h:968
const TableAmRoutine * GetTableAmRoutine(Oid amhandler)
Definition: tableamapi.c:28
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: tableam.h:1451
void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Definition: tableam.c:236
static bool table_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
Definition: tableam.h:1201
static void table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: tableam.h:1003
static void table_relation_vacuum(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: tableam.h:1667
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
Definition: tableam.c:291
struct TM_FailureData TM_FailureData
static void table_finish_bulk_insert(Relation rel, int options)
Definition: tableam.h:1555
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:407
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:265
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:617
static void table_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
Definition: tableam.h:1581
static void table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1417
static bool table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1081
static Oid table_relation_toast_am(Relation rel)
Definition: tableam.h:1847
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate)
Definition: tableam.h:1362
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot)
Definition: tableam.c:131
static double table_index_build_scan(Relation table_rel, Relation index_rel, struct IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
Definition: tableam.h:1735
static void table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
Definition: tableam.h:1611
static bool table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
Definition: tableam.h:1956
struct TM_IndexDeleteOp TM_IndexDeleteOp
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:383
static void table_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
Definition: tableam.h:1897
struct TableAmRoutine TableAmRoutine
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1015
static void table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate, uint32 specToken)
Definition: tableam.h:1381
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1295
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1316
static bool table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
Definition: tableam.h:1978
static void table_relation_nontransactional_truncate(Relation rel)
Definition: tableam.h:1599
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot)
Definition: tableam.c:146
static bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
Definition: tableam.h:1248
static void table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.h:1134
static TableScanDesc table_beginscan_tid(Relation rel, Snapshot snapshot)
Definition: tableam.h:955
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition: tableam.c:59
BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan)
Definition: tableam.c:492
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:654
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99