rewriteheap_8c_source.html

/*-------------------------------------------------------------------------

 *

 * rewriteheap.c

 *    Support functions to rewrite tables.

 *

 * These functions provide a facility to completely rewrite a heap, while

 * preserving visibility information and update chains.

 *

 * INTERFACE

 *

 * The caller is responsible for creating the new heap, all catalog

 * changes, supplying the tuples to be written to the new heap, and

 * rebuilding indexes.  The caller must hold AccessExclusiveLock on the

 * target table, because we assume no one else is writing into it.

 *

 * To use the facility:

 *

 * begin_heap_rewrite

 * while (fetch next tuple)

 * {

 *     if (tuple is dead)

 *         rewrite_heap_dead_tuple

 *     else

 *     {

 *         // do any transformations here if required

 *         rewrite_heap_tuple

 *     }

 * }

 * end_heap_rewrite

 *

 * The contents of the new relation shouldn't be relied on until after

 * end_heap_rewrite is called.

 *

 *

 * IMPLEMENTATION

 *

 * This would be a fairly trivial affair, except that we need to maintain

 * the ctid chains that link versions of an updated tuple together.

 * Since the newly stored tuples will have tids different from the original

 * ones, if we just copied t_ctid fields to the new table the links would

 * be wrong.  When we are required to copy a (presumably recently-dead or

 * delete-in-progress) tuple whose ctid doesn't point to itself, we have

 * to substitute the correct ctid instead.

 *

 * For each ctid reference from A -> B, we might encounter either A first

 * or B first.  (Note that a tuple in the middle of a chain is both A and B

 * of different pairs.)

 *

 * If we encounter A first, we'll store the tuple in the unresolved_tups

 * hash table. When we later encounter B, we remove A from the hash table,

 * fix the ctid to point to the new location of B, and insert both A and B

 * to the new heap.

 *

 * If we encounter B first, we can insert B to the new heap right away.

 * We then add an entry to the old_new_tid_map hash table showing B's

 * original tid (in the old heap) and new tid (in the new heap).

 * When we later encounter A, we get the new location of B from the table,

 * and can write A immediately with the correct ctid.

 *

 * Entries in the hash tables can be removed as soon as the later tuple

 * is encountered.  That helps to keep the memory usage down.  At the end,

 * both tables are usually empty; we should have encountered both A and B

 * of each pair.  However, it's possible for A to be RECENTLY_DEAD and B

 * entirely DEAD according to HeapTupleSatisfiesVacuum, because the test

 * for deadness using OldestXmin is not exact.  In such a case we might

 * encounter B first, and skip it, and find A later.  Then A would be added

 * to unresolved_tups, and stay there until end of the rewrite.  Since

 * this case is very unusual, we don't worry about the memory usage.

 *

 * Using in-memory hash tables means that we use some memory for each live

 * update chain in the table, from the time we find one end of the

 * reference until we find the other end.  That shouldn't be a problem in

 * practice, but if you do something like an UPDATE without a where-clause

 * on a large table, and then run CLUSTER in the same transaction, you

 * could run out of memory.  It doesn't seem worthwhile to add support for

 * spill-to-disk, as there shouldn't be that many RECENTLY_DEAD tuples in a

 * table under normal circumstances.  Furthermore, in the typical scenario

 * of CLUSTERing on an unchanging key column, we'll see all the versions

 * of a given tuple together anyway, and so the peak memory usage is only

 * proportional to the number of RECENTLY_DEAD versions of a single row, not

 * in the whole table.  Note that if we do fail halfway through a CLUSTER,

 * the old table is still valid, so failure is not catastrophic.

 *

 * We can't use the normal heap_insert function to insert into the new

 * heap, because heap_insert overwrites the visibility information.

 * We use a special-purpose raw_heap_insert function instead, which

 * is optimized for bulk inserting a lot of tuples, knowing that we have

 * exclusive access to the heap.  raw_heap_insert builds new pages in

 * local storage.  When a page is full, or at the end of the process,

 * we insert it to WAL as a single record and then write it to disk with

 * the bulk smgr writer.  Note, however, that any data sent to the new

 * heap's TOAST table will go through the normal bufmgr.

 *

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994-5, Regents of the University of California

 *

 * IDENTIFICATION

 *    src/backend/access/heap/rewriteheap.c

 *

 *-------------------------------------------------------------------------

 */

#include "postgres.h"


#include <unistd.h>


#include "access/heapam.h"

#include "access/heapam_xlog.h"

#include "access/heaptoast.h"

#include "access/rewriteheap.h"

#include "access/transam.h"

#include "access/xact.h"

#include "access/xloginsert.h"

#include "common/file_utils.h"

#include "lib/ilist.h"

#include "miscadmin.h"

#include "pgstat.h"

#include "replication/slot.h"

#include "storage/bufmgr.h"

#include "storage/bulk_write.h"

#include "storage/fd.h"

#include "storage/procarray.h"

#include "utils/memutils.h"

#include "utils/rel.h"


/*

 * State associated with a rewrite operation. This is opaque to the user

 * of the rewrite facility.

 */

typedef struct RewriteStateData

{

    Relation    rs_old_rel;     /* source heap */

    Relation    rs_new_rel;     /* destination heap */

    BulkWriteState *rs_bulkstate;   /* writer for the destination */

    BulkWriteBuffer rs_buffer;  /* page currently being built */

    BlockNumber rs_blockno;     /* block where page will go */

    bool        rs_logical_rewrite; /* do we need to do logical rewriting */

    TransactionId rs_oldest_xmin;   /* oldest xmin used by caller to determine

                                     * tuple visibility */

    TransactionId rs_freeze_xid;    /* Xid that will be used as freeze cutoff

                                     * point */

    TransactionId rs_logical_xmin;  /* Xid that will be used as cutoff point

                                     * for logical rewrites */

    MultiXactId rs_cutoff_multi;    /* MultiXactId that will be used as cutoff

                                     * point for multixacts */

    MemoryContext rs_cxt;       /* for hash tables and entries and tuples in

                                 * them */

    XLogRecPtr  rs_begin_lsn;   /* XLogInsertLsn when starting the rewrite */

    HTAB       *rs_unresolved_tups; /* unmatched A tuples */

    HTAB       *rs_old_new_tid_map; /* unmatched B tuples */

    HTAB       *rs_logical_mappings;    /* logical remapping files */

    uint32      rs_num_rewrite_mappings;    /* # in memory mappings */

}           RewriteStateData;


/*

 * The lookup keys for the hash tables are tuple TID and xmin (we must check

 * both to avoid false matches from dead tuples).  Beware that there is

 * probably some padding space in this struct; it must be zeroed out for

 * correct hashtable operation.

 */

typedef struct

{

    TransactionId xmin;         /* tuple xmin */

    ItemPointerData tid;        /* tuple location in old heap */

} TidHashKey;


/*

 * Entry structures for the hash tables

 */

typedef struct

{

    TidHashKey  key;            /* expected xmin/old location of B tuple */

    ItemPointerData old_tid;    /* A's location in the old heap */

    HeapTuple   tuple;          /* A's tuple contents */

} UnresolvedTupData;


typedef UnresolvedTupData *UnresolvedTup;


typedef struct

{

    TidHashKey  key;            /* actual xmin/old location of B tuple */

    ItemPointerData new_tid;    /* where we put it in the new heap */

} OldToNewMappingData;


typedef OldToNewMappingData *OldToNewMapping;


/*

 * In-Memory data for an xid that might need logical remapping entries

 * to be logged.

 */

typedef struct RewriteMappingFile

{

    TransactionId xid;          /* xid that might need to see the row */

    int         vfd;            /* fd of mappings file */

    off_t       off;            /* how far have we written yet */

    dclist_head mappings;       /* list of in-memory mappings */

    char        path[MAXPGPATH];    /* path, for error messages */

} RewriteMappingFile;


/*

 * A single In-Memory logical rewrite mapping, hanging off

 * RewriteMappingFile->mappings.

 */

typedef struct RewriteMappingDataEntry

{

    LogicalRewriteMappingData map;  /* map between old and new location of the

                                     * tuple */

    dlist_node  node;

} RewriteMappingDataEntry;


/* prototypes for internal functions */

static void raw_heap_insert(RewriteState state, HeapTuple tup);


/* internal logical remapping prototypes */

static void logical_begin_heap_rewrite(RewriteState state);

static void logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, HeapTuple new_tuple);

static void logical_end_heap_rewrite(RewriteState state);


/*

 * Begin a rewrite of a table

 *

 * old_heap     old, locked heap relation tuples will be read from

 * new_heap     new, locked heap relation to insert tuples to

 * oldest_xmin  xid used by the caller to determine which tuples are dead

 * freeze_xid   xid before which tuples will be frozen

 * cutoff_multi multixact before which multis will be removed

 *

 * Returns an opaque RewriteState, allocated in current memory context,

 * to be used in subsequent calls to the other functions.

 */

RewriteState

begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,

                   TransactionId freeze_xid, MultiXactId cutoff_multi)

{

    RewriteState state;

    MemoryContext rw_cxt;

    MemoryContext old_cxt;

    HASHCTL     hash_ctl;


    /*

     * To ease cleanup, make a separate context that will contain the

     * RewriteState struct itself plus all subsidiary data.

     */

    rw_cxt = AllocSetContextCreate(CurrentMemoryContext,

                                   "Table rewrite",

                                   ALLOCSET_DEFAULT_SIZES);

    old_cxt = MemoryContextSwitchTo(rw_cxt);


    /* Create and fill in the state struct */

    state = palloc0(sizeof(RewriteStateData));


    state->rs_old_rel = old_heap;

    state->rs_new_rel = new_heap;

    state->rs_buffer = NULL;

    /* new_heap needn't be empty, just locked */

    state->rs_blockno = RelationGetNumberOfBlocks(new_heap);

    state->rs_oldest_xmin = oldest_xmin;

    state->rs_freeze_xid = freeze_xid;

    state->rs_cutoff_multi = cutoff_multi;

    state->rs_cxt = rw_cxt;

    state->rs_bulkstate = smgr_bulk_start_rel(new_heap, MAIN_FORKNUM);


    /* Initialize hash tables used to track update chains */

    hash_ctl.keysize = sizeof(TidHashKey);

    hash_ctl.entrysize = sizeof(UnresolvedTupData);

    hash_ctl.hcxt = state->rs_cxt;


    state->rs_unresolved_tups =

        hash_create("Rewrite / Unresolved ctids",

                    128,        /* arbitrary initial size */

                    &hash_ctl,

                    HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);


    hash_ctl.entrysize = sizeof(OldToNewMappingData);


    state->rs_old_new_tid_map =

        hash_create("Rewrite / Old to new tid map",

                    128,        /* arbitrary initial size */

                    &hash_ctl,

                    HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);


    MemoryContextSwitchTo(old_cxt);


    logical_begin_heap_rewrite(state);


    return state;

}


/*

 * End a rewrite.

 *

 * state and any other resources are freed.

 */

void

end_heap_rewrite(RewriteState state)

{

    HASH_SEQ_STATUS seq_status;

    UnresolvedTup unresolved;


    /*

     * Write any remaining tuples in the UnresolvedTups table. If we have any

     * left, they should in fact be dead, but let's err on the safe side.

     */

    hash_seq_init(&seq_status, state->rs_unresolved_tups);


    while ((unresolved = hash_seq_search(&seq_status)) != NULL)

    {

        ItemPointerSetInvalid(&unresolved->tuple->t_data->t_ctid);

        raw_heap_insert(state, unresolved->tuple);

    }


    /* Write the last page, if any */

    if (state->rs_buffer)

    {

        smgr_bulk_write(state->rs_bulkstate, state->rs_blockno, state->rs_buffer, true);

        state->rs_buffer = NULL;

    }


    smgr_bulk_finish(state->rs_bulkstate);


    logical_end_heap_rewrite(state);


    /* Deleting the context frees everything */

    MemoryContextDelete(state->rs_cxt);

}


/*

 * Add a tuple to the new heap.

 *

 * Visibility information is copied from the original tuple, except that

 * we "freeze" very-old tuples.  Note that since we scribble on new_tuple,

 * it had better be temp storage not a pointer to the original tuple.

 *

 * state        opaque state as returned by begin_heap_rewrite

 * old_tuple    original tuple in the old heap

 * new_tuple    new, rewritten tuple to be inserted to new heap

 */

void

rewrite_heap_tuple(RewriteState state,

                   HeapTuple old_tuple, HeapTuple new_tuple)

{

    MemoryContext old_cxt;

    ItemPointerData old_tid;

    TidHashKey  hashkey;

    bool        found;

    bool        free_new;


    old_cxt = MemoryContextSwitchTo(state->rs_cxt);


    /*

     * Copy the original tuple's visibility information into new_tuple.

     *

     * XXX we might later need to copy some t_infomask2 bits, too? Right now,

     * we intentionally clear the HOT status bits.

     */

    memcpy(&new_tuple->t_data->t_choice.t_heap,

           &old_tuple->t_data->t_choice.t_heap,

           sizeof(HeapTupleFields));


    new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;

    new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;

    new_tuple->t_data->t_infomask |=

        old_tuple->t_data->t_infomask & HEAP_XACT_MASK;


    /*

     * While we have our hands on the tuple, we may as well freeze any

     * eligible xmin or xmax, so that future VACUUM effort can be saved.

     */

    heap_freeze_tuple(new_tuple->t_data,

                      state->rs_old_rel->rd_rel->relfrozenxid,

                      state->rs_old_rel->rd_rel->relminmxid,

                      state->rs_freeze_xid,

                      state->rs_cutoff_multi);


    /*

     * Invalid ctid means that ctid should point to the tuple itself. We'll

     * override it later if the tuple is part of an update chain.

     */

    ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);


    /*

     * If the tuple has been updated, check the old-to-new mapping hash table.

     */

    if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||

          HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&

        !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&

        !(ItemPointerEquals(&(old_tuple->t_self),

                            &(old_tuple->t_data->t_ctid))))

    {

        OldToNewMapping mapping;


        memset(&hashkey, 0, sizeof(hashkey));

        hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);

        hashkey.tid = old_tuple->t_data->t_ctid;


        mapping = (OldToNewMapping)

            hash_search(state->rs_old_new_tid_map, &hashkey,

                        HASH_FIND, NULL);


        if (mapping != NULL)

        {

            /*

             * We've already copied the tuple that t_ctid points to, so we can

             * set the ctid of this tuple to point to the new location, and

             * insert it right away.

             */

            new_tuple->t_data->t_ctid = mapping->new_tid;


            /* We don't need the mapping entry anymore */

            hash_search(state->rs_old_new_tid_map, &hashkey,

                        HASH_REMOVE, &found);

            Assert(found);

        }

        else

        {

            /*

             * We haven't seen the tuple t_ctid points to yet. Stash this

             * tuple into unresolved_tups to be written later.

             */

            UnresolvedTup unresolved;


            unresolved = hash_search(state->rs_unresolved_tups, &hashkey,

                                     HASH_ENTER, &found);

            Assert(!found);


            unresolved->old_tid = old_tuple->t_self;

            unresolved->tuple = heap_copytuple(new_tuple);


            /*

             * We can't do anything more now, since we don't know where the

             * tuple will be written.

             */

            MemoryContextSwitchTo(old_cxt);

            return;

        }

    }


    /*

     * Now we will write the tuple, and then check to see if it is the B tuple

     * in any new or known pair.  When we resolve a known pair, we will be

     * able to write that pair's A tuple, and then we have to check if it

     * resolves some other pair.  Hence, we need a loop here.

     */

    old_tid = old_tuple->t_self;

    free_new = false;


    for (;;)

    {

        ItemPointerData new_tid;


        /* Insert the tuple and find out where it's put in new_heap */

        raw_heap_insert(state, new_tuple);

        new_tid = new_tuple->t_self;


        logical_rewrite_heap_tuple(state, old_tid, new_tuple);


        /*

         * If the tuple is the updated version of a row, and the prior version

         * wouldn't be DEAD yet, then we need to either resolve the prior

         * version (if it's waiting in rs_unresolved_tups), or make an entry

         * in rs_old_new_tid_map (so we can resolve it when we do see it). The

         * previous tuple's xmax would equal this one's xmin, so it's

         * RECENTLY_DEAD if and only if the xmin is not before OldestXmin.

         */

        if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&

            !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),

                                   state->rs_oldest_xmin))

        {

            /*

             * Okay, this is B in an update pair.  See if we've seen A.

             */

            UnresolvedTup unresolved;


            memset(&hashkey, 0, sizeof(hashkey));

            hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);

            hashkey.tid = old_tid;


            unresolved = hash_search(state->rs_unresolved_tups, &hashkey,

                                     HASH_FIND, NULL);


            if (unresolved != NULL)

            {

                /*

                 * We have seen and memorized the previous tuple already. Now

                 * that we know where we inserted the tuple its t_ctid points

                 * to, fix its t_ctid and insert it to the new heap.

                 */

                if (free_new)

                    heap_freetuple(new_tuple);

                new_tuple = unresolved->tuple;

                free_new = true;

                old_tid = unresolved->old_tid;

                new_tuple->t_data->t_ctid = new_tid;


                /*

                 * We don't need the hash entry anymore, but don't free its

                 * tuple just yet.

                 */

                hash_search(state->rs_unresolved_tups, &hashkey,

                            HASH_REMOVE, &found);

                Assert(found);


                /* loop back to insert the previous tuple in the chain */

                continue;

            }

            else

            {

                /*

                 * Remember the new tid of this tuple. We'll use it to set the

                 * ctid when we find the previous tuple in the chain.

                 */

                OldToNewMapping mapping;


                mapping = hash_search(state->rs_old_new_tid_map, &hashkey,

                                      HASH_ENTER, &found);

                Assert(!found);


                mapping->new_tid = new_tid;

            }

        }


        /* Done with this (chain of) tuples, for now */

        if (free_new)

            heap_freetuple(new_tuple);

        break;

    }


    MemoryContextSwitchTo(old_cxt);

}


/*

 * Register a dead tuple with an ongoing rewrite. Dead tuples are not

 * copied to the new table, but we still make note of them so that we

 * can release some resources earlier.

 *

 * Returns true if a tuple was removed from the unresolved_tups table.

 * This indicates that that tuple, previously thought to be "recently dead",

 * is now known really dead and won't be written to the output.

 */

bool

rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)

{

    /*

     * If we have already seen an earlier tuple in the update chain that

     * points to this tuple, let's forget about that earlier tuple. It's in

     * fact dead as well, our simple xmax < OldestXmin test in

     * HeapTupleSatisfiesVacuum just wasn't enough to detect it. It happens

     * when xmin of a tuple is greater than xmax, which sounds

     * counter-intuitive but is perfectly valid.

     *

     * We don't bother to try to detect the situation the other way round,

     * when we encounter the dead tuple first and then the recently dead one

     * that points to it. If that happens, we'll have some unmatched entries

     * in the UnresolvedTups hash table at the end. That can happen anyway,

     * because a vacuum might have removed the dead tuple in the chain before

     * us.

     */

    UnresolvedTup unresolved;

    TidHashKey  hashkey;

    bool        found;


    memset(&hashkey, 0, sizeof(hashkey));

    hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);

    hashkey.tid = old_tuple->t_self;


    unresolved = hash_search(state->rs_unresolved_tups, &hashkey,

                             HASH_FIND, NULL);


    if (unresolved != NULL)

    {

        /* Need to free the contained tuple as well as the hashtable entry */

        heap_freetuple(unresolved->tuple);

        hash_search(state->rs_unresolved_tups, &hashkey,

                    HASH_REMOVE, &found);

        Assert(found);

        return true;

    }


    return false;

}


/*

 * Insert a tuple to the new relation.  This has to track heap_insert

 * and its subsidiary functions!

 *

 * t_self of the tuple is set to the new TID of the tuple. If t_ctid of the

 * tuple is invalid on entry, it's replaced with the new TID as well (in

 * the inserted data only, not in the caller's copy).

 */

static void

raw_heap_insert(RewriteState state, HeapTuple tup)

{

    Page        page;

    Size        pageFreeSpace,

                saveFreeSpace;

    Size        len;

    OffsetNumber newoff;

    HeapTuple   heaptup;


    /*

     * If the new tuple is too big for storage or contains already toasted

     * out-of-line attributes from some other relation, invoke the toaster.

     *

     * Note: below this point, heaptup is the data we actually intend to store

     * into the relation; tup is the caller's original untoasted data.

     */

    if (state->rs_new_rel->rd_rel->relkind == RELKIND_TOASTVALUE)

    {

        /* toast table entries should never be recursively toasted */

        Assert(!HeapTupleHasExternal(tup));

        heaptup = tup;

    }

    else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)

    {

        int         options = HEAP_INSERT_SKIP_FSM;


        /*

         * While rewriting the heap for VACUUM FULL / CLUSTER, make sure data

         * for the TOAST table are not logically decoded.  The main heap is

         * WAL-logged as XLOG FPI records, which are not logically decoded.

         */

        options |= HEAP_INSERT_NO_LOGICAL;


        heaptup = heap_toast_insert_or_update(state->rs_new_rel, tup, NULL,

                                              options);

    }

    else

        heaptup = tup;


    len = MAXALIGN(heaptup->t_len); /* be conservative */


    /*

     * If we're gonna fail for oversize tuple, do it right away

     */

    if (len > MaxHeapTupleSize)

        ereport(ERROR,

                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

                 errmsg("row is too big: size %zu, maximum size %zu",

                        len, MaxHeapTupleSize)));


    /* Compute desired extra freespace due to fillfactor option */

    saveFreeSpace = RelationGetTargetPageFreeSpace(state->rs_new_rel,

                                                   HEAP_DEFAULT_FILLFACTOR);


    /* Now we can check to see if there's enough free space already. */

    page = (Page) state->rs_buffer;

    if (page)

    {

        pageFreeSpace = PageGetHeapFreeSpace(page);


        if (len + saveFreeSpace > pageFreeSpace)

        {

            /*

             * Doesn't fit, so write out the existing page.  It always

             * contains a tuple.  Hence, unlike RelationGetBufferForTuple(),

             * enforce saveFreeSpace unconditionally.

             */

            smgr_bulk_write(state->rs_bulkstate, state->rs_blockno, state->rs_buffer, true);

            state->rs_buffer = NULL;

            page = NULL;

            state->rs_blockno++;

        }

    }


    if (!page)

    {

        /* Initialize a new empty page */

        state->rs_buffer = smgr_bulk_get_buf(state->rs_bulkstate);

        page = (Page) state->rs_buffer;

        PageInit(page, BLCKSZ, 0);

    }


    /* And now we can insert the tuple into the page */

    newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len,

                         InvalidOffsetNumber, false, true);

    if (newoff == InvalidOffsetNumber)

        elog(ERROR, "failed to add tuple");


    /* Update caller's t_self to the actual position where it was stored */

    ItemPointerSet(&(tup->t_self), state->rs_blockno, newoff);


    /*

     * Insert the correct position into CTID of the stored tuple, too, if the

     * caller didn't supply a valid CTID.

     */

    if (!ItemPointerIsValid(&tup->t_data->t_ctid))

    {

        ItemId      newitemid;

        HeapTupleHeader onpage_tup;


        newitemid = PageGetItemId(page, newoff);

        onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid);


        onpage_tup->t_ctid = tup->t_self;

    }


    /* If heaptup is a private copy, release it. */

    if (heaptup != tup)

        heap_freetuple(heaptup);

}


/* ------------------------------------------------------------------------

 * Logical rewrite support

 *

 * When doing logical decoding - which relies on using cmin/cmax of catalog

 * tuples, via xl_heap_new_cid records - heap rewrites have to log enough

 * information to allow the decoding backend to update its internal mapping

 * of (relfilelocator,ctid) => (cmin, cmax) to be correct for the rewritten heap.

 *

 * For that, every time we find a tuple that's been modified in a catalog

 * relation within the xmin horizon of any decoding slot, we log a mapping

 * from the old to the new location.

 *

 * To deal with rewrites that abort the filename of a mapping file contains

 * the xid of the transaction performing the rewrite, which then can be

 * checked before being read in.

 *

 * For efficiency we don't immediately spill every single map mapping for a

 * row to disk but only do so in batches when we've collected several of them

 * in memory or when end_heap_rewrite() has been called.

 *

 * Crash-Safety: This module diverts from the usual patterns of doing WAL

 * since it cannot rely on checkpoint flushing out all buffers and thus

 * waiting for exclusive locks on buffers. Usually the XLogInsert() covering

 * buffer modifications is performed while the buffer(s) that are being

 * modified are exclusively locked guaranteeing that both the WAL record and

 * the modified heap are on either side of the checkpoint. But since the

 * mapping files we log aren't in shared_buffers that interlock doesn't work.

 *

 * Instead we simply write the mapping files out to disk, *before* the

 * XLogInsert() is performed. That guarantees that either the XLogInsert() is

 * inserted after the checkpoint's redo pointer or that the checkpoint (via

 * CheckPointLogicalRewriteHeap()) has flushed the (partial) mapping file to

 * disk. That leaves the tail end that has not yet been flushed open to

 * corruption, which is solved by including the current offset in the

 * xl_heap_rewrite_mapping records and truncating the mapping file to it

 * during replay. Every time a rewrite is finished all generated mapping files

 * are synced to disk.

 *

 * Note that if we were only concerned about crash safety we wouldn't have to

 * deal with WAL logging at all - an fsync() at the end of a rewrite would be

 * sufficient for crash safety. Any mapping that hasn't been safely flushed to

 * disk has to be by an aborted (explicitly or via a crash) transaction and is

 * ignored by virtue of the xid in its name being subject to a

 * TransactionDidCommit() check. But we want to support having standbys via

 * physical replication, both for availability and to do logical decoding

 * there.

 * ------------------------------------------------------------------------

 */


/*

 * Do preparations for logging logical mappings during a rewrite if

 * necessary. If we detect that we don't need to log anything we'll prevent

 * any further action by the various logical rewrite functions.

 */

static void

logical_begin_heap_rewrite(RewriteState state)

{

    HASHCTL     hash_ctl;

    TransactionId logical_xmin;


    /*

     * We only need to persist these mappings if the rewritten table can be

     * accessed during logical decoding, if not, we can skip doing any

     * additional work.

     */

    state->rs_logical_rewrite =

        RelationIsAccessibleInLogicalDecoding(state->rs_old_rel);


    if (!state->rs_logical_rewrite)

        return;


    ProcArrayGetReplicationSlotXmin(NULL, &logical_xmin);


    /*

     * If there are no logical slots in progress we don't need to do anything,

     * there cannot be any remappings for relevant rows yet. The relation's

     * lock protects us against races.

     */

    if (logical_xmin == InvalidTransactionId)

    {

        state->rs_logical_rewrite = false;

        return;

    }


    state->rs_logical_xmin = logical_xmin;

    state->rs_begin_lsn = GetXLogInsertRecPtr();

    state->rs_num_rewrite_mappings = 0;


    hash_ctl.keysize = sizeof(TransactionId);

    hash_ctl.entrysize = sizeof(RewriteMappingFile);

    hash_ctl.hcxt = state->rs_cxt;


    state->rs_logical_mappings =

        hash_create("Logical rewrite mapping",

                    128,        /* arbitrary initial size */

                    &hash_ctl,

                    HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);

}


/*

 * Flush all logical in-memory mappings to disk, but don't fsync them yet.

 */

static void

logical_heap_rewrite_flush_mappings(RewriteState state)

{

    HASH_SEQ_STATUS seq_status;

    RewriteMappingFile *src;

    dlist_mutable_iter iter;


    Assert(state->rs_logical_rewrite);


    /* no logical rewrite in progress, no need to iterate over mappings */

    if (state->rs_num_rewrite_mappings == 0)

        return;


    elog(DEBUG1, "flushing %u logical rewrite mapping entries",

         state->rs_num_rewrite_mappings);


    hash_seq_init(&seq_status, state->rs_logical_mappings);

    while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)

    {

        char       *waldata;

        char       *waldata_start;

        xl_heap_rewrite_mapping xlrec;

        Oid         dboid;

        uint32      len;

        int         written;

        uint32      num_mappings = dclist_count(&src->mappings);


        /* this file hasn't got any new mappings */

        if (num_mappings == 0)

            continue;


        if (state->rs_old_rel->rd_rel->relisshared)

            dboid = InvalidOid;

        else

            dboid = MyDatabaseId;


        xlrec.num_mappings = num_mappings;

        xlrec.mapped_rel = RelationGetRelid(state->rs_old_rel);

        xlrec.mapped_xid = src->xid;

        xlrec.mapped_db = dboid;

        xlrec.offset = src->off;

        xlrec.start_lsn = state->rs_begin_lsn;


        /* write all mappings consecutively */

        len = num_mappings * sizeof(LogicalRewriteMappingData);

        waldata_start = waldata = palloc(len);


        /*

         * collect data we need to write out, but don't modify ondisk data yet

         */

        dclist_foreach_modify(iter, &src->mappings)

        {

            RewriteMappingDataEntry *pmap;


            pmap = dclist_container(RewriteMappingDataEntry, node, iter.cur);


            memcpy(waldata, &pmap->map, sizeof(pmap->map));

            waldata += sizeof(pmap->map);


            /* remove from the list and free */

            dclist_delete_from(&src->mappings, &pmap->node);

            pfree(pmap);


            /* update bookkeeping */

            state->rs_num_rewrite_mappings--;

        }


        Assert(dclist_count(&src->mappings) == 0);

        Assert(waldata == waldata_start + len);


        /*

         * Note that we deviate from the usual WAL coding practices here,

         * check the above "Logical rewrite support" comment for reasoning.

         */

        written = FileWrite(src->vfd, waldata_start, len, src->off,

                            WAIT_EVENT_LOGICAL_REWRITE_WRITE);

        if (written != len)

            ereport(ERROR,

                    (errcode_for_file_access(),

                     errmsg("could not write to file \"%s\", wrote %d of %d: %m", src->path,

                            written, len)));

        src->off += len;


        XLogBeginInsert();

        XLogRegisterData(&xlrec, sizeof(xlrec));

        XLogRegisterData(waldata_start, len);


        /* write xlog record */

        XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);


        pfree(waldata_start);

    }

    Assert(state->rs_num_rewrite_mappings == 0);

}


/*

 * Logical remapping part of end_heap_rewrite().

 */

static void

logical_end_heap_rewrite(RewriteState state)

{

    HASH_SEQ_STATUS seq_status;

    RewriteMappingFile *src;


    /* done, no logical rewrite in progress */

    if (!state->rs_logical_rewrite)

        return;


    /* writeout remaining in-memory entries */

    if (state->rs_num_rewrite_mappings > 0)

        logical_heap_rewrite_flush_mappings(state);


    /* Iterate over all mappings we have written and fsync the files. */

    hash_seq_init(&seq_status, state->rs_logical_mappings);

    while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)

    {

        if (FileSync(src->vfd, WAIT_EVENT_LOGICAL_REWRITE_SYNC) != 0)

            ereport(data_sync_elevel(ERROR),

                    (errcode_for_file_access(),

                     errmsg("could not fsync file \"%s\": %m", src->path)));

        FileClose(src->vfd);

    }

    /* memory context cleanup will deal with the rest */

}


/*

 * Log a single (old->new) mapping for 'xid'.

 */

static void

logical_rewrite_log_mapping(RewriteState state, TransactionId xid,

                            LogicalRewriteMappingData *map)

{

    RewriteMappingFile *src;

    RewriteMappingDataEntry *pmap;

    Oid         relid;

    bool        found;


    relid = RelationGetRelid(state->rs_old_rel);


    /* look for existing mappings for this 'mapped' xid */

    src = hash_search(state->rs_logical_mappings, &xid,

                      HASH_ENTER, &found);


    /*

     * We haven't yet had the need to map anything for this xid, create

     * per-xid data structures.

     */

    if (!found)

    {

        char        path[MAXPGPATH];

        Oid         dboid;


        if (state->rs_old_rel->rd_rel->relisshared)

            dboid = InvalidOid;

        else

            dboid = MyDatabaseId;


        snprintf(path, MAXPGPATH,

                 "%s/" LOGICAL_REWRITE_FORMAT,

                 PG_LOGICAL_MAPPINGS_DIR, dboid, relid,

                 LSN_FORMAT_ARGS(state->rs_begin_lsn),

                 xid, GetCurrentTransactionId());


        dclist_init(&src->mappings);

        src->off = 0;

        memcpy(src->path, path, sizeof(path));

        src->vfd = PathNameOpenFile(path,

                                    O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);

        if (src->vfd < 0)

            ereport(ERROR,

                    (errcode_for_file_access(),

                     errmsg("could not create file \"%s\": %m", path)));

    }


    pmap = MemoryContextAlloc(state->rs_cxt,

                              sizeof(RewriteMappingDataEntry));

    memcpy(&pmap->map, map, sizeof(LogicalRewriteMappingData));

    dclist_push_tail(&src->mappings, &pmap->node);

    state->rs_num_rewrite_mappings++;


    /*

     * Write out buffer every time we've too many in-memory entries across all

     * mapping files.

     */

    if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */ )

        logical_heap_rewrite_flush_mappings(state);

}


/*

 * Perform logical remapping for a tuple that's mapped from old_tid to

 * new_tuple->t_self by rewrite_heap_tuple() if necessary for the tuple.

 */

static void

logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,

                           HeapTuple new_tuple)

{

    ItemPointerData new_tid = new_tuple->t_self;

    TransactionId cutoff = state->rs_logical_xmin;

    TransactionId xmin;

    TransactionId xmax;

    bool        do_log_xmin = false;

    bool        do_log_xmax = false;

    LogicalRewriteMappingData map;


    /* no logical rewrite in progress, we don't need to log anything */

    if (!state->rs_logical_rewrite)

        return;


    xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);

    /* use *GetUpdateXid to correctly deal with multixacts */

    xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data);


    /*

     * Log the mapping iff the tuple has been created recently.

     */

    if (TransactionIdIsNormal(xmin) && !TransactionIdPrecedes(xmin, cutoff))

        do_log_xmin = true;


    if (!TransactionIdIsNormal(xmax))

    {

        /*

         * no xmax is set, can't have any permanent ones, so this check is

         * sufficient

         */

    }

    else if (HEAP_XMAX_IS_LOCKED_ONLY(new_tuple->t_data->t_infomask))

    {

        /* only locked, we don't care */

    }

    else if (!TransactionIdPrecedes(xmax, cutoff))

    {

        /* tuple has been deleted recently, log */

        do_log_xmax = true;

    }


    /* if neither needs to be logged, we're done */

    if (!do_log_xmin && !do_log_xmax)

        return;


    /* fill out mapping information */

    map.old_locator = state->rs_old_rel->rd_locator;

    map.old_tid = old_tid;

    map.new_locator = state->rs_new_rel->rd_locator;

    map.new_tid = new_tid;


    /* ---

     * Now persist the mapping for the individual xids that are affected. We

     * need to log for both xmin and xmax if they aren't the same transaction

     * since the mapping files are per "affected" xid.

     * We don't muster all that much effort detecting whether xmin and xmax

     * are actually the same transaction, we just check whether the xid is the

     * same disregarding subtransactions. Logging too much is relatively

     * harmless and we could never do the check fully since subtransaction

     * data is thrown away during restarts.

     * ---

     */

    if (do_log_xmin)

        logical_rewrite_log_mapping(state, xmin, &map);

    /* separately log mapping for xmax unless it'd be redundant */

    if (do_log_xmax && !TransactionIdEquals(xmin, xmax))

        logical_rewrite_log_mapping(state, xmax, &map);

}


/*

 * Replay XLOG_HEAP2_REWRITE records

 */

void

heap_xlog_logical_rewrite(XLogReaderState *r)

{

    char        path[MAXPGPATH];

    int         fd;

    xl_heap_rewrite_mapping *xlrec;

    uint32      len;

    char       *data;


    xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r);


    snprintf(path, MAXPGPATH,

             "%s/" LOGICAL_REWRITE_FORMAT,

             PG_LOGICAL_MAPPINGS_DIR, xlrec->mapped_db, xlrec->mapped_rel,

             LSN_FORMAT_ARGS(xlrec->start_lsn),

             xlrec->mapped_xid, XLogRecGetXid(r));


    fd = OpenTransientFile(path,

                           O_CREAT | O_WRONLY | PG_BINARY);

    if (fd < 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not create file \"%s\": %m", path)));


    /*

     * Truncate all data that's not guaranteed to have been safely fsynced (by

     * previous record or by the last checkpoint).

     */

    pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE);

    if (ftruncate(fd, xlrec->offset) != 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not truncate file \"%s\" to %u: %m",

                        path, (uint32) xlrec->offset)));

    pgstat_report_wait_end();


    data = XLogRecGetData(r) + sizeof(*xlrec);


    len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);


    /* write out tail end of mapping file (again) */

    errno = 0;

    pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);

    if (pg_pwrite(fd, data, len, xlrec->offset) != len)

    {

        /* if write didn't set errno, assume problem is no disk space */

        if (errno == 0)

            errno = ENOSPC;

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not write to file \"%s\": %m", path)));

    }

    pgstat_report_wait_end();


    /*

     * Now fsync all previously written data. We could improve things and only

     * do this for the last write to a file, but the required bookkeeping

     * doesn't seem worth the trouble.

     */

    pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC);

    if (pg_fsync(fd) != 0)

        ereport(data_sync_elevel(ERROR),

                (errcode_for_file_access(),

                 errmsg("could not fsync file \"%s\": %m", path)));

    pgstat_report_wait_end();


    if (CloseTransientFile(fd) != 0)

        ereport(ERROR,

                (errcode_for_file_access(),

                 errmsg("could not close file \"%s\": %m", path)));

}


/* ---

 * Perform a checkpoint for logical rewrite mappings

 *

 * This serves two tasks:

 * 1) Remove all mappings not needed anymore based on the logical restart LSN

 * 2) Flush all remaining mappings to disk, so that replay after a checkpoint

 *    only has to deal with the parts of a mapping that have been written out

 *    after the checkpoint started.

 * ---

 */

void

CheckPointLogicalRewriteHeap(void)

{

    XLogRecPtr  cutoff;

    XLogRecPtr  redo;

    DIR        *mappings_dir;

    struct dirent *mapping_de;

    char        path[MAXPGPATH + sizeof(PG_LOGICAL_MAPPINGS_DIR)];


    /*

     * We start of with a minimum of the last redo pointer. No new decoding

     * slot will start before that, so that's a safe upper bound for removal.

     */

    redo = GetRedoRecPtr();


    /* now check for the restart ptrs from existing slots */

    cutoff = ReplicationSlotsComputeLogicalRestartLSN();


    /* don't start earlier than the restart lsn */

    if (cutoff != InvalidXLogRecPtr && redo < cutoff)

        cutoff = redo;


    mappings_dir = AllocateDir(PG_LOGICAL_MAPPINGS_DIR);

    while ((mapping_de = ReadDir(mappings_dir, PG_LOGICAL_MAPPINGS_DIR)) != NULL)

    {

        Oid         dboid;

        Oid         relid;

        XLogRecPtr  lsn;

        TransactionId rewrite_xid;

        TransactionId create_xid;

        uint32      hi,

                    lo;

        PGFileType  de_type;


        if (strcmp(mapping_de->d_name, ".") == 0 ||

            strcmp(mapping_de->d_name, "..") == 0)

            continue;


        snprintf(path, sizeof(path), "%s/%s", PG_LOGICAL_MAPPINGS_DIR, mapping_de->d_name);

        de_type = get_dirent_type(path, mapping_de, false, DEBUG1);


        if (de_type != PGFILETYPE_ERROR && de_type != PGFILETYPE_REG)

            continue;


        /* Skip over files that cannot be ours. */

        if (strncmp(mapping_de->d_name, "map-", 4) != 0)

            continue;


        if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,

                   &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)

            elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);


        lsn = ((uint64) hi) << 32 | lo;


        if (lsn < cutoff || cutoff == InvalidXLogRecPtr)

        {

            elog(DEBUG1, "removing logical rewrite file \"%s\"", path);

            if (unlink(path) < 0)

                ereport(ERROR,

                        (errcode_for_file_access(),

                         errmsg("could not remove file \"%s\": %m", path)));

        }

        else

        {

            /* on some operating systems fsyncing a file requires O_RDWR */

            int         fd = OpenTransientFile(path, O_RDWR | PG_BINARY);


            /*

             * The file cannot vanish due to concurrency since this function

             * is the only one removing logical mappings and only one

             * checkpoint can be in progress at a time.

             */

            if (fd < 0)

                ereport(ERROR,

                        (errcode_for_file_access(),

                         errmsg("could not open file \"%s\": %m", path)));


            /*

             * We could try to avoid fsyncing files that either haven't

             * changed or have only been created since the checkpoint's start,

             * but it's currently not deemed worth the effort.

             */

            pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC);

            if (pg_fsync(fd) != 0)

                ereport(data_sync_elevel(ERROR),

                        (errcode_for_file_access(),

                         errmsg("could not fsync file \"%s\": %m", path)));

            pgstat_report_wait_end();


            if (CloseTransientFile(fd) != 0)

                ereport(ERROR,

                        (errcode_for_file_access(),

                         errmsg("could not close file \"%s\": %m", path)));

        }

    }

    FreeDir(mappings_dir);


    /* persist directory entries to disk */

    fsync_fname(PG_LOGICAL_MAPPINGS_DIR, true);

}

BlockNumber
uint32 BlockNumber
Definition: block.h:31

bufmgr.h

RelationGetNumberOfBlocks
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:283

PageGetHeapFreeSpace
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990

PageInit
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

PageGetItem
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354

PageGetItemId
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244

Page
PageData * Page
Definition: bufpage.h:82

PageAddItem
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:472

smgr_bulk_start_rel
BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)
Definition: bulk_write.c:87

smgr_bulk_write
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323

smgr_bulk_get_buf
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347

smgr_bulk_finish
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130

bulk_write.h

MAXALIGN
#define MAXALIGN(LEN)
Definition: c.h:782

PG_BINARY
#define PG_BINARY
Definition: c.h:1244

MultiXactId
TransactionId MultiXactId
Definition: c.h:633

uint64
uint64_t uint64
Definition: c.h:503

uint32
uint32_t uint32
Definition: c.h:502

TransactionId
uint32 TransactionId
Definition: c.h:623

Size
size_t Size
Definition: c.h:576

hash_search
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:956

hash_seq_search
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1421

hash_create
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352

hash_seq_init
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1386

errcode_for_file_access
int errcode_for_file_access(void)
Definition: elog.c:877

errcode
int errcode(int sqlerrcode)
Definition: elog.c:854

errmsg
int errmsg(const char *fmt,...)
Definition: elog.c:1071

DEBUG1
#define DEBUG1
Definition: elog.h:30

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

ereport
#define ereport(elevel,...)
Definition: elog.h:149

FreeDir
int FreeDir(DIR *dir)
Definition: fd.c:3025

FileSync
int FileSync(File file, uint32 wait_event_info)
Definition: fd.c:2352

CloseTransientFile
int CloseTransientFile(int fd)
Definition: fd.c:2871

FileClose
void FileClose(File file)
Definition: fd.c:1982

fsync_fname
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756

data_sync_elevel
int data_sync_elevel(int elevel)
Definition: fd.c:4001

PathNameOpenFile
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1579

AllocateDir
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2907

ReadDir
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2973

pg_fsync
int pg_fsync(int fd)
Definition: fd.c:386

OpenTransientFile
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694

fd.h

FileWrite
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
Definition: fd.h:211

get_dirent_type
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:547

file_utils.h

PGFileType
PGFileType
Definition: file_utils.h:19

PGFILETYPE_REG
@ PGFILETYPE_REG
Definition: file_utils.h:22

PGFILETYPE_ERROR
@ PGFILETYPE_ERROR
Definition: file_utils.h:20

MyDatabaseId
Oid MyDatabaseId
Definition: globals.c:94

Assert
Assert(PointerIsAligned(start, uint64))

heap_freeze_tuple
bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
Definition: heapam.c:7366

heapam.h

HEAP_INSERT_SKIP_FSM
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:36

HEAP_INSERT_NO_LOGICAL
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:38

HeapTupleHeaderIsOnlyLocked
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
Definition: heapam_visibility.c:1529

heapam_xlog.h

XLOG_HEAP2_REWRITE
#define XLOG_HEAP2_REWRITE
Definition: heapam_xlog.h:59

heap_toast_insert_or_update
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96

heaptoast.h

TOAST_TUPLE_THRESHOLD
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48

heap_copytuple
HeapTuple heap_copytuple(HeapTuple tuple)
Definition: heaptuple.c:778

heap_freetuple
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435

HASH_FIND
@ HASH_FIND
Definition: hsearch.h:113

HASH_REMOVE
@ HASH_REMOVE
Definition: hsearch.h:115

HASH_ENTER
@ HASH_ENTER
Definition: hsearch.h:114

HASH_CONTEXT
#define HASH_CONTEXT
Definition: hsearch.h:102

HASH_ELEM
#define HASH_ELEM
Definition: hsearch.h:95

HASH_BLOBS
#define HASH_BLOBS
Definition: hsearch.h:97

HeapTupleHeader
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23

HEAP_XMAX_IS_LOCKED_ONLY
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
Definition: htup_details.h:226

HeapTupleHasExternal
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
Definition: htup_details.h:762

HeapTupleHeaderGetXmin
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324

HEAP_XACT_MASK
#define HEAP_XACT_MASK
Definition: htup_details.h:215

HeapTupleHeaderIndicatesMovedPartitions
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)
Definition: htup_details.h:480

HEAP_XMAX_INVALID
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208

HeapTupleHeaderGetUpdateXid
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:397

HEAP_UPDATED
#define HEAP_UPDATED
Definition: htup_details.h:210

MaxHeapTupleSize
#define MaxHeapTupleSize
Definition: htup_details.h:610

ilist.h

dclist_container
#define dclist_container(type, membername, ptr)
Definition: ilist.h:947

dclist_push_tail
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition: ilist.h:709

dclist_count
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932

dclist_delete_from
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition: ilist.h:763

dclist_init
static void dclist_init(dclist_head *head)
Definition: ilist.h:671

dclist_foreach_modify
#define dclist_foreach_modify(iter, lhead)
Definition: ilist.h:973

if
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81

Item
Pointer Item
Definition: item.h:17

ItemPointerEquals
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35

ItemPointerSet
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135

ItemPointerSetInvalid
static void ItemPointerSetInvalid(ItemPointerData *pointer)
Definition: itemptr.h:184

ItemPointerIsValid
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83

MemoryContextAlloc
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1185

pfree
void pfree(void *pointer)
Definition: mcxt.c:1528

palloc0
void * palloc0(Size size)
Definition: mcxt.c:1351

palloc
void * palloc(Size size)
Definition: mcxt.c:1321

CurrentMemoryContext
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143

MemoryContextDelete
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454

memutils.h

AllocSetContextCreate
#define AllocSetContextCreate
Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

miscadmin.h

InvalidOffsetNumber
#define InvalidOffsetNumber
Definition: off.h:26

OffsetNumber
uint16 OffsetNumber
Definition: off.h:24

MemoryContextSwitchTo
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124

MAXPGPATH
#define MAXPGPATH
Definition: pg_config_manual.h:100

len
const void size_t len
Definition: pg_crc32c_sse42.c:28

data
const void * data
Definition: pg_crc32c_sse42.c:27

pgstat.h

pg_pwrite
#define pg_pwrite
Definition: port.h:227

snprintf
#define snprintf
Definition: port.h:239

postgres.h

InvalidOid
#define InvalidOid
Definition: postgres_ext.h:35

Oid
unsigned int Oid
Definition: postgres_ext.h:30

fd
static int fd(const char *x, int i)
Definition: preproc-init.c:105

ProcArrayGetReplicationSlotXmin
void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin)
Definition: procarray.c:3968

procarray.h

rel.h

RelationGetRelid
#define RelationGetRelid(relation)
Definition: rel.h:516

RelationGetTargetPageFreeSpace
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:389

RelationIsAccessibleInLogicalDecoding
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:695

HEAP_DEFAULT_FILLFACTOR
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:360

MAIN_FORKNUM
@ MAIN_FORKNUM
Definition: relpath.h:58

PG_LOGICAL_MAPPINGS_DIR
#define PG_LOGICAL_MAPPINGS_DIR
Definition: reorderbuffer.h:23

RewriteMappingDataEntry
struct RewriteMappingDataEntry RewriteMappingDataEntry

raw_heap_insert
static void raw_heap_insert(RewriteState state, HeapTuple tup)
Definition: rewriteheap.c:593

end_heap_rewrite
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:297

rewrite_heap_dead_tuple
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:543

UnresolvedTup
UnresolvedTupData * UnresolvedTup
Definition: rewriteheap.c:177

begin_heap_rewrite
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:234

logical_rewrite_heap_tuple
static void logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, HeapTuple new_tuple)
Definition: rewriteheap.c:999

logical_heap_rewrite_flush_mappings
static void logical_heap_rewrite_flush_mappings(RewriteState state)
Definition: rewriteheap.c:807

heap_xlog_logical_rewrite
void heap_xlog_logical_rewrite(XLogReaderState *r)
Definition: rewriteheap.c:1073

logical_begin_heap_rewrite
static void logical_begin_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:759

CheckPointLogicalRewriteHeap
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155

RewriteMappingFile
struct RewriteMappingFile RewriteMappingFile

logical_end_heap_rewrite
static void logical_end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:905

OldToNewMapping
OldToNewMappingData * OldToNewMapping
Definition: rewriteheap.c:185

RewriteStateData
struct RewriteStateData RewriteStateData

rewrite_heap_tuple
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:341

logical_rewrite_log_mapping
static void logical_rewrite_log_mapping(RewriteState state, TransactionId xid, LogicalRewriteMappingData *map)
Definition: rewriteheap.c:935

rewriteheap.h

LOGICAL_REWRITE_FORMAT
#define LOGICAL_REWRITE_FORMAT
Definition: rewriteheap.h:54

LogicalRewriteMappingData
struct LogicalRewriteMappingData LogicalRewriteMappingData

ReplicationSlotsComputeLogicalRestartLSN
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
Definition: slot.c:1205

slot.h

BulkWriteState
Definition: bulk_write.c:62

DIR
Definition: dirent.c:26

HASHCTL
Definition: hsearch.h:66

HASHCTL::keysize
Size keysize
Definition: hsearch.h:75

HASHCTL::entrysize
Size entrysize
Definition: hsearch.h:76

HASHCTL::hcxt
MemoryContext hcxt
Definition: hsearch.h:86

HASH_SEQ_STATUS
Definition: hsearch.h:121

HTAB
Definition: dynahash.c:220

HeapTupleData
Definition: htup.h:63

HeapTupleData::t_self
ItemPointerData t_self
Definition: htup.h:65

HeapTupleData::t_len
uint32 t_len
Definition: htup.h:64

HeapTupleData::t_data
HeapTupleHeader t_data
Definition: htup.h:68

HeapTupleFields
Definition: htup_details.h:123

HeapTupleHeaderData
Definition: htup_details.h:154

HeapTupleHeaderData::t_infomask2
uint16 t_infomask2
Definition: htup_details.h:167

HeapTupleHeaderData::t_infomask
uint16 t_infomask
Definition: htup_details.h:170

HeapTupleHeaderData::t_choice
union HeapTupleHeaderData::@47 t_choice

HeapTupleHeaderData::t_ctid
ItemPointerData t_ctid
Definition: htup_details.h:161

HeapTupleHeaderData::t_heap
HeapTupleFields t_heap
Definition: htup_details.h:157

ItemIdData
Definition: itemid.h:26

ItemPointerData
Definition: itemptr.h:37

LogicalRewriteMappingData
Definition: rewriteheap.h:36

LogicalRewriteMappingData::new_tid
ItemPointerData new_tid
Definition: rewriteheap.h:40

LogicalRewriteMappingData::old_locator
RelFileLocator old_locator
Definition: rewriteheap.h:37

LogicalRewriteMappingData::old_tid
ItemPointerData old_tid
Definition: rewriteheap.h:39

LogicalRewriteMappingData::new_locator
RelFileLocator new_locator
Definition: rewriteheap.h:38

MemoryContextData
Definition: memnodes.h:118

OldToNewMappingData
Definition: rewriteheap.c:180

OldToNewMappingData::key
TidHashKey key
Definition: rewriteheap.c:181

OldToNewMappingData::new_tid
ItemPointerData new_tid
Definition: rewriteheap.c:182

RelationData
Definition: rel.h:56

RewriteMappingDataEntry
Definition: rewriteheap.c:205

RewriteMappingDataEntry::node
dlist_node node
Definition: rewriteheap.c:208

RewriteMappingDataEntry::map
LogicalRewriteMappingData map
Definition: rewriteheap.c:206

RewriteMappingFile
Definition: rewriteheap.c:192

RewriteMappingFile::path
char path[MAXPGPATH]
Definition: rewriteheap.c:197

RewriteMappingFile::xid
TransactionId xid
Definition: rewriteheap.c:193

RewriteMappingFile::vfd
int vfd
Definition: rewriteheap.c:194

RewriteMappingFile::mappings
dclist_head mappings
Definition: rewriteheap.c:196

RewriteMappingFile::off
off_t off
Definition: rewriteheap.c:195

RewriteStateData
Definition: rewriteheap.c:131

RewriteStateData::rs_freeze_xid
TransactionId rs_freeze_xid
Definition: rewriteheap.c:140

RewriteStateData::rs_cxt
MemoryContext rs_cxt
Definition: rewriteheap.c:146

RewriteStateData::rs_logical_rewrite
bool rs_logical_rewrite
Definition: rewriteheap.c:137

RewriteStateData::rs_oldest_xmin
TransactionId rs_oldest_xmin
Definition: rewriteheap.c:138

RewriteStateData::rs_logical_mappings
HTAB * rs_logical_mappings
Definition: rewriteheap.c:151

RewriteStateData::rs_new_rel
Relation rs_new_rel
Definition: rewriteheap.c:133

RewriteStateData::rs_unresolved_tups
HTAB * rs_unresolved_tups
Definition: rewriteheap.c:149

RewriteStateData::rs_num_rewrite_mappings
uint32 rs_num_rewrite_mappings
Definition: rewriteheap.c:152

RewriteStateData::rs_old_rel
Relation rs_old_rel
Definition: rewriteheap.c:132

RewriteStateData::rs_logical_xmin
TransactionId rs_logical_xmin
Definition: rewriteheap.c:142

RewriteStateData::rs_bulkstate
BulkWriteState * rs_bulkstate
Definition: rewriteheap.c:134

RewriteStateData::rs_buffer
BulkWriteBuffer rs_buffer
Definition: rewriteheap.c:135

RewriteStateData::rs_old_new_tid_map
HTAB * rs_old_new_tid_map
Definition: rewriteheap.c:150

RewriteStateData::rs_begin_lsn
XLogRecPtr rs_begin_lsn
Definition: rewriteheap.c:148

RewriteStateData::rs_blockno
BlockNumber rs_blockno
Definition: rewriteheap.c:136

RewriteStateData::rs_cutoff_multi
MultiXactId rs_cutoff_multi
Definition: rewriteheap.c:144

TidHashKey
Definition: rewriteheap.c:162

TidHashKey::xmin
TransactionId xmin
Definition: rewriteheap.c:163

TidHashKey::tid
ItemPointerData tid
Definition: rewriteheap.c:164

UnresolvedTupData
Definition: rewriteheap.c:171

UnresolvedTupData::tuple
HeapTuple tuple
Definition: rewriteheap.c:174

UnresolvedTupData::key
TidHashKey key
Definition: rewriteheap.c:172

UnresolvedTupData::old_tid
ItemPointerData old_tid
Definition: rewriteheap.c:173

XLogReaderState
Definition: xlogreader.h:176

dclist_head
Definition: ilist.h:213

dirent
Definition: dirent.h:10

dirent::d_name
char d_name[MAX_PATH]
Definition: dirent.h:15

dlist_mutable_iter
Definition: ilist.h:199

dlist_mutable_iter::cur
dlist_node * cur
Definition: ilist.h:200

dlist_node
Definition: ilist.h:138

options
Definition: oid2name.c:30

state
Definition: regguts.h:323

xl_heap_rewrite_mapping
Definition: heapam_xlog.h:474

xl_heap_rewrite_mapping::start_lsn
XLogRecPtr start_lsn
Definition: heapam_xlog.h:480

xl_heap_rewrite_mapping::mapped_xid
TransactionId mapped_xid
Definition: heapam_xlog.h:475

xl_heap_rewrite_mapping::mapped_db
Oid mapped_db
Definition: heapam_xlog.h:476

xl_heap_rewrite_mapping::mapped_rel
Oid mapped_rel
Definition: heapam_xlog.h:477

xl_heap_rewrite_mapping::num_mappings
uint32 num_mappings
Definition: heapam_xlog.h:479

xl_heap_rewrite_mapping::offset
off_t offset
Definition: heapam_xlog.h:478

TransactionIdPrecedes
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280

transam.h

InvalidTransactionId
#define InvalidTransactionId
Definition: transam.h:31

TransactionIdEquals
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43

TransactionIdIsNormal
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

PGIOAlignedBlock
Definition: c.h:1104

unistd.h

pgstat_report_wait_start
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69

pgstat_report_wait_end
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85

GetCurrentTransactionId
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:454

xact.h

GetRedoRecPtr
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6625

GetXLogInsertRecPtr
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9612

LSN_FORMAT_ARGS
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43

XLogRecPtr
uint64 XLogRecPtr
Definition: xlogdefs.h:21

InvalidXLogRecPtr
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

XLogInsert
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474

XLogRegisterData
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364

XLogBeginInsert
void XLogBeginInsert(void)
Definition: xloginsert.c:149

xloginsert.h

XLogRecGetData
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415

XLogRecGetXid
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:412