#include "postgres.h"
#include <unistd.h>
#include "access/timeline.h"
#include "access/xlogrecovery.h"
#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "storage/fd.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/rel.h"

Include dependency graph for xlogutils.c:

Data Structures
struct	xl_invalid_page_key

struct	xl_invalid_page

struct	FakeRelCacheEntryData

Typedefs
typedef struct xl_invalid_page_key	xl_invalid_page_key

typedef struct xl_invalid_page	xl_invalid_page

typedef FakeRelCacheEntryData *	FakeRelCacheEntry

Functions
static int	read_local_xlog_page_guts (XLogReaderState state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char cur_page, bool wait_for_wal)

static void	report_invalid_page (int elevel, RelFileLocator locator, ForkNumber forkno, BlockNumber blkno, bool present)

static void	log_invalid_page (RelFileLocator locator, ForkNumber forkno, BlockNumber blkno, bool present)

static void	forget_invalid_pages (RelFileLocator locator, ForkNumber forkno, BlockNumber minblkno)

static void	forget_invalid_pages_db (Oid dbid)

bool	XLogHaveInvalidPages (void)

void	XLogCheckInvalidPages (void)

XLogRedoAction	XLogReadBufferForRedo (XLogReaderState record, uint8 block_id, Buffer buf)

Buffer	XLogInitBufferForRedo (XLogReaderState *record, uint8 block_id)

XLogRedoAction	XLogReadBufferForRedoExtended (XLogReaderState record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer buf)

Buffer	XLogReadBufferExtended (RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode, Buffer recent_buffer)

Relation	CreateFakeRelcacheEntry (RelFileLocator rlocator)

void	FreeFakeRelcacheEntry (Relation fakerel)

void	XLogDropRelation (RelFileLocator rlocator, ForkNumber forknum)

void	XLogDropDatabase (Oid dbid)

void	XLogTruncateRelation (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nblocks)

void	XLogReadDetermineTimeline (XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength, TimeLineID currTLI)

void	wal_segment_open (XLogReaderState state, XLogSegNo nextSegNo, TimeLineID tli_p)

void	wal_segment_close (XLogReaderState *state)

int	read_local_xlog_page (XLogReaderState state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char cur_page)

int	read_local_xlog_page_no_wait (XLogReaderState state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char cur_page)

void	WALReadRaiseError (WALReadError *errinfo)

Variables
bool	ignore_invalid_pages = false

bool	InRecovery = false

HotStandbyState	standbyState = STANDBY_DISABLED

static HTAB *	invalid_page_tab = NULL

Typedef Documentation

◆ FakeRelCacheEntry

typedef FakeRelCacheEntryData* FakeRelCacheEntry

Definition at line 554 of file xlogutils.c.

◆ xl_invalid_page

typedef struct xl_invalid_page xl_invalid_page

◆ xl_invalid_page_key

typedef struct xl_invalid_page_key xl_invalid_page_key

Function Documentation

◆ CreateFakeRelcacheEntry()

Relation CreateFakeRelcacheEntry ( RelFileLocator rlocator )

Definition at line 571 of file xlogutils.c.

{
    FakeRelCacheEntry fakeentry;
    Relation    rel;
 
    /* Allocate the Relation struct and all related space in one block. */
    fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
    rel = (Relation) fakeentry;
 
    rel->rd_rel = &fakeentry->pgc;
    rel->rd_locator = rlocator;
 
    /*
     * We will never be working with temp rels during recovery or while
     * syncing WAL-skipped files.
     */
    rel->rd_backend = INVALID_PROC_NUMBER;
 
    /* It must be a permanent table here */
    rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
 
    /* We don't know the name of the relation; use relfilenumber instead */
    sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
 
    /*
     * We set up the lockRelId in case anything tries to lock the dummy
     * relation.  Note that this is fairly bogus since relNumber may be
     * different from the relation's OID.  It shouldn't really matter though.
     * In recovery, we are running by ourselves and can't have any lock
     * conflicts.  While syncing, we already hold AccessExclusiveLock.
     */
    rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
    rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
 
    /*
     * Set up a non-pinned SMgrRelation reference, so that we don't need to
     * worry about unpinning it on error.
     */
    rel->rd_smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
 
    return rel;
}

References LockRelId::dbId, RelFileLocator::dbOid, INVALID_PROC_NUMBER, LockInfoData::lockRelId, palloc0(), FakeRelCacheEntryData::pgc, RelationData::rd_backend, RelationData::rd_locator, RelationData::rd_lockInfo, RelationData::rd_rel, RelationData::rd_smgr, RelationGetRelationName, LockRelId::relId, RelFileLocator::relNumber, smgropen(), and sprintf.

Referenced by heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), smgr_redo(), and smgrDoPendingSyncs().

◆ forget_invalid_pages()

static void forget_invalid_pages	(	RelFileLocator	locator,
		ForkNumber	forkno,
		BlockNumber	minblkno
	)

static

Definition at line 165 of file xlogutils.c.

{
    HASH_SEQ_STATUS status;
    xl_invalid_page *hentry;
 
    if (invalid_page_tab == NULL)
        return;                 /* nothing to do */
 
    hash_seq_init(&status, invalid_page_tab);
 
    while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
    {
        if (RelFileLocatorEquals(hentry->key.locator, locator) &&
            hentry->key.forkno == forkno &&
            hentry->key.blkno >= minblkno)
        {
            elog(DEBUG2, "page %u of relation %s has been dropped",
                 hentry->key.blkno,
                 relpathperm(hentry->key.locator, forkno).str);
 
            if (hash_search(invalid_page_tab,
                            &hentry->key,
                            HASH_REMOVE, NULL) == NULL)
                elog(ERROR, "hash table corrupted");
        }
    }
}

References xl_invalid_page_key::blkno, DEBUG2, elog, ERROR, xl_invalid_page_key::forkno, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), invalid_page_tab, xl_invalid_page::key, xl_invalid_page_key::locator, RelFileLocatorEquals, and relpathperm.

Referenced by XLogDropRelation(), and XLogTruncateRelation().

◆ forget_invalid_pages_db()

static void forget_invalid_pages_db ( Oid dbid )

static

Definition at line 196 of file xlogutils.c.

{
    HASH_SEQ_STATUS status;
    xl_invalid_page *hentry;
 
    if (invalid_page_tab == NULL)
        return;                 /* nothing to do */
 
    hash_seq_init(&status, invalid_page_tab);
 
    while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
    {
        if (hentry->key.locator.dbOid == dbid)
        {
            elog(DEBUG2, "page %u of relation %s has been dropped",
                 hentry->key.blkno,
                 relpathperm(hentry->key.locator, hentry->key.forkno).str);
 
            if (hash_search(invalid_page_tab,
                            &hentry->key,
                            HASH_REMOVE, NULL) == NULL)
                elog(ERROR, "hash table corrupted");
        }
    }
}

References xl_invalid_page_key::blkno, RelFileLocator::dbOid, DEBUG2, elog, ERROR, xl_invalid_page_key::forkno, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), invalid_page_tab, xl_invalid_page::key, xl_invalid_page_key::locator, and relpathperm.

Referenced by XLogDropDatabase().

◆ FreeFakeRelcacheEntry()

void FreeFakeRelcacheEntry ( Relation fakerel )

Definition at line 618 of file xlogutils.c.

{
    pfree(fakerel);
}

References pfree().

Referenced by heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), smgr_redo(), and smgrDoPendingSyncs().

◆ log_invalid_page()

static void log_invalid_page	(	RelFileLocator	locator,
		ForkNumber	forkno,
		BlockNumber	blkno,
		bool	present
	)

static

Definition at line 101 of file xlogutils.c.

{
    xl_invalid_page_key key;
    xl_invalid_page *hentry;
    bool        found;
 
    /*
     * Once recovery has reached a consistent state, the invalid-page table
     * should be empty and remain so. If a reference to an invalid page is
     * found after consistency is reached, PANIC immediately. This might seem
     * aggressive, but it's better than letting the invalid reference linger
     * in the hash table until the end of recovery and PANIC there, which
     * might come only much later if this is a standby server.
     */
    if (reachedConsistency)
    {
        report_invalid_page(WARNING, locator, forkno, blkno, present);
        elog(ignore_invalid_pages ? WARNING : PANIC,
             "WAL contains references to invalid pages");
    }
 
    /*
     * Log references to invalid pages at DEBUG1 level.  This allows some
     * tracing of the cause (note the elog context mechanism will tell us
     * something about the XLOG record that generated the reference).
     */
    if (message_level_is_interesting(DEBUG1))
        report_invalid_page(DEBUG1, locator, forkno, blkno, present);
 
    if (invalid_page_tab == NULL)
    {
        /* create hash table when first needed */
        HASHCTL     ctl;
 
        ctl.keysize = sizeof(xl_invalid_page_key);
        ctl.entrysize = sizeof(xl_invalid_page);
 
        invalid_page_tab = hash_create("XLOG invalid-page table",
                                       100,
                                       &ctl,
                                       HASH_ELEM | HASH_BLOBS);
    }
 
    /* we currently assume xl_invalid_page_key contains no padding */
    key.locator = locator;
    key.forkno = forkno;
    key.blkno = blkno;
    hentry = (xl_invalid_page *)
        hash_search(invalid_page_tab, &key, HASH_ENTER, &found);
 
    if (!found)
    {
        /* hash_search already filled in the key */
        hentry->present = present;
    }
    else
    {
        /* repeat reference ... leave "present" as it was */
    }
}

References ctl, DEBUG1, elog, HASH_BLOBS, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), ignore_invalid_pages, invalid_page_tab, sort-test::key, message_level_is_interesting(), PANIC, xl_invalid_page::present, reachedConsistency, report_invalid_page(), and WARNING.

Referenced by XLogReadBufferExtended().

◆ read_local_xlog_page()

int read_local_xlog_page	(	XLogReaderState *	state,
		XLogRecPtr	targetPagePtr,
		int	reqLen,
		XLogRecPtr	targetRecPtr,
		char *	cur_page
	)

Definition at line 845 of file xlogutils.c.

{
    return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                     targetRecPtr, cur_page, true);
}

References read_local_xlog_page_guts().

Referenced by create_logical_replication_slot(), LogicalReplicationSlotHasPendingWal(), LogicalSlotAdvanceAndCheckSnapState(), pg_logical_slot_get_changes_guts(), and XlogReadTwoPhaseData().

◆ read_local_xlog_page_guts()

static int read_local_xlog_page_guts	(	XLogReaderState *	state,
		XLogRecPtr	targetPagePtr,
		int	reqLen,
		XLogRecPtr	targetRecPtr,
		char *	cur_page,
		bool	wait_for_wal
	)

static

Definition at line 869 of file xlogutils.c.

{
    XLogRecPtr  read_upto,
                loc;
    TimeLineID  tli;
    int         count;
    WALReadError errinfo;
    TimeLineID  currTLI;
 
    loc = targetPagePtr + reqLen;
 
    /*
     * Loop waiting for xlog to be available if necessary
     *
     * TODO: The walsender has its own version of this function, which uses a
     * condition variable to wake up whenever WAL is flushed. We could use the
     * same infrastructure here, instead of the check/sleep/repeat style of
     * loop.
     */
    while (1)
    {
        /*
         * Determine the limit of xlog we can currently read to, and what the
         * most recent timeline is.
         */
        if (!RecoveryInProgress())
            read_upto = GetFlushRecPtr(&currTLI);
        else
            read_upto = GetXLogReplayRecPtr(&currTLI);
        tli = currTLI;
 
        /*
         * Check which timeline to get the record from.
         *
         * We have to do it each time through the loop because if we're in
         * recovery as a cascading standby, the current timeline might've
         * become historical. We can't rely on RecoveryInProgress() because in
         * a standby configuration like
         *
         * A => B => C
         *
         * if we're a logical decoding session on C, and B gets promoted, our
         * timeline will change while we remain in recovery.
         *
         * We can't just keep reading from the old timeline as the last WAL
         * archive in the timeline will get renamed to .partial by
         * StartupXLOG().
         *
         * If that happens after our caller determined the TLI but before we
         * actually read the xlog page, we might still try to read from the
         * old (now renamed) segment and fail. There's not much we can do
         * about this, but it can only happen when we're a leaf of a cascading
         * standby whose primary gets promoted while we're decoding, so a
         * one-off ERROR isn't too bad.
         */
        XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
 
        if (state->currTLI == currTLI)
        {
 
            if (loc <= read_upto)
                break;
 
            /* If asked, let's not wait for future WAL. */
            if (!wait_for_wal)
            {
                ReadLocalXLogPageNoWaitPrivate *private_data;
 
                /*
                 * Inform the caller of read_local_xlog_page_no_wait that the
                 * end of WAL has been reached.
                 */
                private_data = (ReadLocalXLogPageNoWaitPrivate *)
                    state->private_data;
                private_data->end_of_wal = true;
                break;
            }
 
            CHECK_FOR_INTERRUPTS();
            pg_usleep(1000L);
        }
        else
        {
            /*
             * We're on a historical timeline, so limit reading to the switch
             * point where we moved to the next timeline.
             *
             * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
             * about the new timeline, so we must've received past the end of
             * it.
             */
            read_upto = state->currTLIValidUntil;
 
            /*
             * Setting tli to our wanted record's TLI is slightly wrong; the
             * page might begin on an older timeline if it contains a timeline
             * switch, since its xlog segment will have been copied from the
             * prior timeline. This is pretty harmless though, as nothing
             * cares so long as the timeline doesn't go backwards.  We should
             * read the page header instead; FIXME someday.
             */
            tli = state->currTLI;
 
            /* No need to wait on a historical timeline */
            break;
        }
    }
 
    if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
    {
        /*
         * more than one block available; read only that block, have caller
         * come back if they need more.
         */
        count = XLOG_BLCKSZ;
    }
    else if (targetPagePtr + reqLen > read_upto)
    {
        /* not enough data there */
        return -1;
    }
    else
    {
        /* enough bytes available to satisfy the request */
        count = read_upto - targetPagePtr;
    }
 
    if (!WALRead(state, cur_page, targetPagePtr, count, tli,
                 &errinfo))
        WALReadRaiseError(&errinfo);
 
    /* number of valid bytes in the buffer */
    return count;
}

References CHECK_FOR_INTERRUPTS, ReadLocalXLogPageNoWaitPrivate::end_of_wal, GetFlushRecPtr(), GetXLogReplayRecPtr(), pg_usleep(), RecoveryInProgress(), WALRead(), WALReadRaiseError(), and XLogReadDetermineTimeline().

Referenced by read_local_xlog_page(), and read_local_xlog_page_no_wait().

◆ read_local_xlog_page_no_wait()

int read_local_xlog_page_no_wait	(	XLogReaderState *	state,
		XLogRecPtr	targetPagePtr,
		int	reqLen,
		XLogRecPtr	targetRecPtr,
		char *	cur_page
	)

Definition at line 857 of file xlogutils.c.

{
    return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                     targetRecPtr, cur_page, false);
}

References read_local_xlog_page_guts().

Referenced by InitXLogReaderState().

◆ report_invalid_page()

static void report_invalid_page	(	int	elevel,
		RelFileLocator	locator,
		ForkNumber	forkno,
		BlockNumber	blkno,
		bool	present
	)

static

Definition at line 86 of file xlogutils.c.

{
    RelPathStr  path = relpathperm(locator, forkno);
 
    if (present)
        elog(elevel, "page %u of relation %s is uninitialized",
             blkno, path.str);
    else
        elog(elevel, "page %u of relation %s does not exist",
             blkno, path.str);
}

References elog, relpathperm, and RelPathStr::str.

Referenced by log_invalid_page(), and XLogCheckInvalidPages().

◆ wal_segment_close()

void wal_segment_close ( XLogReaderState * state )

Definition at line 831 of file xlogutils.c.

{
    close(state->seg.ws_file);
    /* need to check errno? */
    state->seg.ws_file = -1;
}

References close.

Referenced by create_logical_replication_slot(), CreateReplicationSlot(), InitWalRecovery(), InitXLogReaderState(), LogicalReplicationSlotHasPendingWal(), LogicalSlotAdvanceAndCheckSnapState(), pg_logical_slot_get_changes_guts(), StartLogicalReplication(), StartReplication(), SummarizeWAL(), WalSndErrorCleanup(), XlogReadTwoPhaseData(), and XLogSendPhysical().

◆ wal_segment_open()

void wal_segment_open	(	XLogReaderState *	state,
		XLogSegNo	nextSegNo,
		TimeLineID *	tli_p
	)

Definition at line 806 of file xlogutils.c.

{
    TimeLineID  tli = *tli_p;
    char        path[MAXPGPATH];
 
    XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
    state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
    if (state->seg.ws_file >= 0)
        return;
 
    if (errno == ENOENT)
        ereport(ERROR,
                (errcode_for_file_access(),
                 errmsg("requested WAL segment %s has already been removed",
                        path)));
    else
        ereport(ERROR,
                (errcode_for_file_access(),
                 errmsg("could not open file \"%s\": %m",
                        path)));
}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, MAXPGPATH, PG_BINARY, and XLogFilePath().

Referenced by create_logical_replication_slot(), InitXLogReaderState(), LogicalReplicationSlotHasPendingWal(), LogicalSlotAdvanceAndCheckSnapState(), pg_logical_slot_get_changes_guts(), SummarizeWAL(), and XlogReadTwoPhaseData().

◆ WALReadRaiseError()

void WALReadRaiseError ( WALReadError * errinfo )

Definition at line 1011 of file xlogutils.c.

{
    WALOpenSegment *seg = &errinfo->wre_seg;
    char        fname[MAXFNAMELEN];
 
    XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
 
    if (errinfo->wre_read < 0)
    {
        errno = errinfo->wre_errno;
        ereport(ERROR,
                (errcode_for_file_access(),
                 errmsg("could not read from WAL segment %s, offset %d: %m",
                        fname, errinfo->wre_off)));
    }
    else if (errinfo->wre_read == 0)
    {
        ereport(ERROR,
                (errcode(ERRCODE_DATA_CORRUPTED),
                 errmsg("could not read from WAL segment %s, offset %d: read %d of %d",
                        fname, errinfo->wre_off, errinfo->wre_read,
                        errinfo->wre_req)));
    }
}

References ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, MAXFNAMELEN, wal_segment_size, WALReadError::wre_errno, WALReadError::wre_off, WALReadError::wre_read, WALReadError::wre_req, WALReadError::wre_seg, WALOpenSegment::ws_segno, WALOpenSegment::ws_tli, and XLogFileName().

Referenced by logical_read_xlog_page(), read_local_xlog_page_guts(), summarizer_read_local_xlog_page(), and XLogSendPhysical().

◆ XLogCheckInvalidPages()

void XLogCheckInvalidPages ( void )

Definition at line 234 of file xlogutils.c.

{
    HASH_SEQ_STATUS status;
    xl_invalid_page *hentry;
    bool        foundone = false;
 
    if (invalid_page_tab == NULL)
        return;                 /* nothing to do */
 
    hash_seq_init(&status, invalid_page_tab);
 
    /*
     * Our strategy is to emit WARNING messages for all remaining entries and
     * only PANIC after we've dumped all the available info.
     */
    while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
    {
        report_invalid_page(WARNING, hentry->key.locator, hentry->key.forkno,
                            hentry->key.blkno, hentry->present);
        foundone = true;
    }
 
    if (foundone)
        elog(ignore_invalid_pages ? WARNING : PANIC,
             "WAL contains references to invalid pages");
 
    hash_destroy(invalid_page_tab);
    invalid_page_tab = NULL;
}

References xl_invalid_page_key::blkno, elog, xl_invalid_page_key::forkno, hash_destroy(), hash_seq_init(), hash_seq_search(), ignore_invalid_pages, invalid_page_tab, xl_invalid_page::key, xl_invalid_page_key::locator, PANIC, xl_invalid_page::present, report_invalid_page(), and WARNING.

Referenced by CheckRecoveryConsistency().

◆ XLogDropDatabase()

void XLogDropDatabase ( Oid dbid )

Definition at line 641 of file xlogutils.c.

{
    /*
     * This is unnecessarily heavy-handed, as it will close SMgrRelation
     * objects for other databases as well. DROP DATABASE occurs seldom enough
     * that it's not worth introducing a variant of smgrdestroy for just this
     * purpose.
     */
    smgrdestroyall();
 
    forget_invalid_pages_db(dbid);
}

References forget_invalid_pages_db(), and smgrdestroyall().

Referenced by dbase_redo().

◆ XLogDropRelation()

void XLogDropRelation	(	RelFileLocator	rlocator,
		ForkNumber	forknum
	)

Definition at line 630 of file xlogutils.c.

{
    forget_invalid_pages(rlocator, forknum, 0);
}

References forget_invalid_pages().

Referenced by DropRelationFiles().

◆ XLogHaveInvalidPages()

bool XLogHaveInvalidPages ( void )

Definition at line 224 of file xlogutils.c.

{
    if (invalid_page_tab != NULL &&
        hash_get_num_entries(invalid_page_tab) > 0)
        return true;
    return false;
}

References hash_get_num_entries(), and invalid_page_tab.

Referenced by RecoveryRestartPoint().

◆ XLogInitBufferForRedo()

Buffer XLogInitBufferForRedo	(	XLogReaderState *	record,
		uint8	block_id
	)

Definition at line 315 of file xlogutils.c.

{
    Buffer      buf;
 
    XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
                                  &buf);
    return buf;
}

References buf, RBM_ZERO_AND_LOCK, and XLogReadBufferForRedoExtended().

Referenced by _bt_restore_meta(), brin_xlog_createidx(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), gistRedoPageSplitRecord(), hash_xlog_add_ovfl_page(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), heap_xlog_insert(), heap_xlog_multi_insert(), heap_xlog_update(), seq_redo(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), and spgRedoSplitTuple().

◆ XLogReadBufferExtended()

Buffer XLogReadBufferExtended	(	RelFileLocator	rlocator,
		ForkNumber	forknum,
		BlockNumber	blkno,
		ReadBufferMode	mode,
		Buffer	recent_buffer
	)

Definition at line 460 of file xlogutils.c.

{
    BlockNumber lastblock;
    Buffer      buffer;
    SMgrRelation smgr;
 
    Assert(blkno != P_NEW);
 
    /* Do we have a clue where the buffer might be already? */
    if (BufferIsValid(recent_buffer) &&
        mode == RBM_NORMAL &&
        ReadRecentBuffer(rlocator, forknum, blkno, recent_buffer))
    {
        buffer = recent_buffer;
        goto recent_buffer_fast_path;
    }
 
    /* Open the relation at smgr level */
    smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
 
    /*
     * Create the target file if it doesn't already exist.  This lets us cope
     * if the replay sequence contains writes to a relation that is later
     * deleted.  (The original coding of this routine would instead suppress
     * the writes, but that seems like it risks losing valuable data if the
     * filesystem loses an inode during a crash.  Better to write the data
     * until we are actually told to delete the file.)
     */
    smgrcreate(smgr, forknum, true);
 
    lastblock = smgrnblocks(smgr, forknum);
 
    if (blkno < lastblock)
    {
        /* page exists in file */
        buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno,
                                           mode, NULL, true);
    }
    else
    {
        /* hm, page doesn't exist in file */
        if (mode == RBM_NORMAL)
        {
            log_invalid_page(rlocator, forknum, blkno, false);
            return InvalidBuffer;
        }
        if (mode == RBM_NORMAL_NO_LOG)
            return InvalidBuffer;
        /* OK to extend the file */
        /* we do this in recovery only - no rel-extension lock needed */
        Assert(InRecovery);
        buffer = ExtendBufferedRelTo(BMR_SMGR(smgr, RELPERSISTENCE_PERMANENT),
                                     forknum,
                                     NULL,
                                     EB_PERFORMING_RECOVERY |
                                     EB_SKIP_EXTENSION_LOCK,
                                     blkno + 1,
                                     mode);
    }
 
recent_buffer_fast_path:
    if (mode == RBM_NORMAL)
    {
        /* check that page has been initialized */
        Page        page = BufferGetPage(buffer);
 
        /*
         * We assume that PageIsNew is safe without a lock. During recovery,
         * there should be no other backends that could modify the buffer at
         * the same time.
         */
        if (PageIsNew(page))
        {
            ReleaseBuffer(buffer);
            log_invalid_page(rlocator, forknum, blkno, true);
            return InvalidBuffer;
        }
    }
 
    return buffer;
}

References Assert(), BMR_SMGR, BufferGetPage(), BufferIsValid(), EB_PERFORMING_RECOVERY, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRelTo(), InRecovery, INVALID_PROC_NUMBER, InvalidBuffer, log_invalid_page(), mode, P_NEW, PageIsNew(), RBM_NORMAL, RBM_NORMAL_NO_LOG, ReadBufferWithoutRelcache(), ReadRecentBuffer(), ReleaseBuffer(), smgrcreate(), smgrnblocks(), and smgropen().

Referenced by verifyBackupPageConsistency(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

◆ XLogReadBufferForRedo()

XLogRedoAction XLogReadBufferForRedo	(	XLogReaderState *	record,
		uint8	block_id,
		Buffer *	buf
	)

Definition at line 303 of file xlogutils.c.

{
    return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
                                         false, buf);
}

References buf, RBM_NORMAL, and XLogReadBufferForRedoExtended().

Referenced by _bt_clear_incomplete_split(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_split(), btree_xlog_unlink_page(), generic_redo(), ginRedoClearIncompleteSplit(), ginRedoDeletePage(), ginRedoInsert(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageUpdateRecord(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), and xlog_redo().

◆ XLogReadBufferForRedoExtended()

XLogRedoAction XLogReadBufferForRedoExtended	(	XLogReaderState *	record,
		uint8	block_id,
		ReadBufferMode	mode,
		bool	get_cleanup_lock,
		Buffer *	buf
	)

Definition at line 340 of file xlogutils.c.

{
    XLogRecPtr  lsn = record->EndRecPtr;
    RelFileLocator rlocator;
    ForkNumber  forknum;
    BlockNumber blkno;
    Buffer      prefetch_buffer;
    Page        page;
    bool        zeromode;
    bool        willinit;
 
    if (!XLogRecGetBlockTagExtended(record, block_id, &rlocator, &forknum, &blkno,
                                    &prefetch_buffer))
    {
        /* Caller specified a bogus block_id */
        elog(PANIC, "failed to locate backup block with ID %d in WAL record",
             block_id);
    }
 
    /*
     * Make sure that if the block is marked with WILL_INIT, the caller is
     * going to initialize it. And vice versa.
     */
    zeromode = (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
    willinit = (XLogRecGetBlock(record, block_id)->flags & BKPBLOCK_WILL_INIT) != 0;
    if (willinit && !zeromode)
        elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
    if (!willinit && zeromode)
        elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
 
    /* If it has a full-page image and it should be restored, do it. */
    if (XLogRecBlockImageApply(record, block_id))
    {
        Assert(XLogRecHasBlockImage(record, block_id));
        *buf = XLogReadBufferExtended(rlocator, forknum, blkno,
                                      get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK,
                                      prefetch_buffer);
        page = BufferGetPage(*buf);
        if (!RestoreBlockImage(record, block_id, page))
            ereport(ERROR,
                    (errcode(ERRCODE_INTERNAL_ERROR),
                     errmsg_internal("%s", record->errormsg_buf)));
 
        /*
         * The page may be uninitialized. If so, we can't set the LSN because
         * that would corrupt the page.
         */
        if (!PageIsNew(page))
        {
            PageSetLSN(page, lsn);
        }
 
        MarkBufferDirty(*buf);
 
        /*
         * At the end of crash recovery the init forks of unlogged relations
         * are copied, without going through shared buffers. So we need to
         * force the on-disk state of init forks to always be in sync with the
         * state in shared buffers.
         */
        if (forknum == INIT_FORKNUM)
            FlushOneBuffer(*buf);
 
        return BLK_RESTORED;
    }
    else
    {
        *buf = XLogReadBufferExtended(rlocator, forknum, blkno, mode, prefetch_buffer);
        if (BufferIsValid(*buf))
        {
            if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
            {
                if (get_cleanup_lock)
                    LockBufferForCleanup(*buf);
                else
                    LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
            }
            if (lsn <= PageGetLSN(BufferGetPage(*buf)))
                return BLK_DONE;
            else
                return BLK_NEEDS_REDO;
        }
        else
            return BLK_NOTFOUND;
    }
}

References Assert(), BKPBLOCK_WILL_INIT, BLK_DONE, BLK_NEEDS_REDO, BLK_NOTFOUND, BLK_RESTORED, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsValid(), elog, XLogReaderState::EndRecPtr, ereport, errcode(), errmsg_internal(), ERROR, XLogReaderState::errormsg_buf, FlushOneBuffer(), INIT_FORKNUM, LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), mode, PageGetLSN(), PageIsNew(), PageSetLSN(), PANIC, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RestoreBlockImage(), XLogReadBufferExtended(), XLogRecBlockImageApply, XLogRecGetBlock, XLogRecGetBlockTagExtended(), and XLogRecHasBlockImage.

Referenced by btree_xlog_vacuum(), hash_xlog_delete(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_squeeze_page(), hash_xlog_vacuum_one_page(), heap_xlog_prune_freeze(), heap_xlog_visible(), XLogInitBufferForRedo(), and XLogReadBufferForRedo().

◆ XLogReadDetermineTimeline()

void XLogReadDetermineTimeline	(	XLogReaderState *	state,
		XLogRecPtr	wantPage,
		uint32	wantLength,
		TimeLineID	currTLI
	)

Definition at line 707 of file xlogutils.c.

{
    const XLogRecPtr lastReadPage = (state->seg.ws_segno *
                                     state->segcxt.ws_segsize + state->segoff);
 
    Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
    Assert(wantLength <= XLOG_BLCKSZ);
    Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
    Assert(currTLI != 0);
 
    /*
     * If the desired page is currently read in and valid, we have nothing to
     * do.
     *
     * The caller should've ensured that it didn't previously advance readOff
     * past the valid limit of this timeline, so it doesn't matter if the
     * current TLI has since become historical.
     */
    if (lastReadPage == wantPage &&
        state->readLen != 0 &&
        lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
        return;
 
    /*
     * If we're reading from the current timeline, it hasn't become historical
     * and the page we're reading is after the last page read, we can again
     * just carry on. (Seeking backwards requires a check to make sure the
     * older page isn't on a prior timeline).
     *
     * currTLI might've become historical since the caller obtained the value,
     * but the caller is required not to read past the flush limit it saw at
     * the time it looked up the timeline. There's nothing we can do about it
     * if StartupXLOG() renames it to .partial concurrently.
     */
    if (state->currTLI == currTLI && wantPage >= lastReadPage)
    {
        Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
        return;
    }
 
    /*
     * If we're just reading pages from a previously validated historical
     * timeline and the timeline we're reading from is valid until the end of
     * the current segment we can just keep reading.
     */
    if (state->currTLIValidUntil != InvalidXLogRecPtr &&
        state->currTLI != currTLI &&
        state->currTLI != 0 &&
        ((wantPage + wantLength) / state->segcxt.ws_segsize) <
        (state->currTLIValidUntil / state->segcxt.ws_segsize))
        return;
 
    /*
     * If we reach this point we're either looking up a page for random
     * access, the current timeline just became historical, or we're reading
     * from a new segment containing a timeline switch. In all cases we need
     * to determine the newest timeline on the segment.
     *
     * If it's the current timeline we can just keep reading from here unless
     * we detect a timeline switch that makes the current timeline historical.
     * If it's a historical timeline we can read all the segment on the newest
     * timeline because it contains all the old timelines' data too. So only
     * one switch check is required.
     */
    {
        /*
         * We need to re-read the timeline history in case it's been changed
         * by a promotion or replay from a cascaded replica.
         */
        List       *timelineHistory = readTimeLineHistory(currTLI);
        XLogRecPtr  endOfSegment;
 
        endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
            state->segcxt.ws_segsize - 1;
        Assert(wantPage / state->segcxt.ws_segsize ==
               endOfSegment / state->segcxt.ws_segsize);
 
        /*
         * Find the timeline of the last LSN on the segment containing
         * wantPage.
         */
        state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
        state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
                                                  &state->nextTLI);
 
        Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
               wantPage + wantLength < state->currTLIValidUntil);
 
        list_free_deep(timelineHistory);
 
        elog(DEBUG3, "switched to timeline %u valid until %X/%08X",
             state->currTLI,
             LSN_FORMAT_ARGS(state->currTLIValidUntil));
    }
}

References Assert(), DEBUG3, elog, InvalidXLogRecPtr, list_free_deep(), LSN_FORMAT_ARGS, Min, readTimeLineHistory(), tliOfPointInHistory(), and tliSwitchPoint().

Referenced by logical_read_xlog_page(), and read_local_xlog_page_guts().

◆ XLogTruncateRelation()

void XLogTruncateRelation	(	RelFileLocator	rlocator,
		ForkNumber	forkNum,
		BlockNumber	nblocks
	)

Definition at line 660 of file xlogutils.c.

{
    forget_invalid_pages(rlocator, forkNum, nblocks);
}

References forget_invalid_pages().

Referenced by smgr_redo().

Variable Documentation

◆ ignore_invalid_pages

bool ignore_invalid_pages = false

Definition at line 34 of file xlogutils.c.

Referenced by log_invalid_page(), and XLogCheckInvalidPages().

◆ InRecovery

◆ invalid_page_tab

HTAB* invalid_page_tab = NULL

static

Definition at line 78 of file xlogutils.c.

Referenced by forget_invalid_pages(), forget_invalid_pages_db(), log_invalid_page(), XLogCheckInvalidPages(), and XLogHaveInvalidPages().

◆ standbyState

HotStandbyState standbyState = STANDBY_DISABLED

Definition at line 53 of file xlogutils.c.

Referenced by ApplyWalRecord(), CheckRecoveryConsistency(), ExpireTreeKnownAssignedTransactionIds(), InitRecoveryTransactionEnvironment(), ProcArrayApplyRecoveryInfo(), ProcArrayApplyXidAssignment(), ProcArrayInitRecovery(), RecordKnownAssignedTransactionIds(), standby_redo(), StartupProcExit(), StartupXLOG(), xact_redo(), xact_redo_abort(), xact_redo_commit(), and xlog_redo().

Data Structures

Typedefs

Functions

Variables

Typedef Documentation

◆ FakeRelCacheEntry

◆ xl_invalid_page

◆ xl_invalid_page_key

Function Documentation

◆ CreateFakeRelcacheEntry()

◆ forget_invalid_pages()

◆ forget_invalid_pages_db()

◆ FreeFakeRelcacheEntry()

◆ log_invalid_page()

◆ read_local_xlog_page()

◆ read_local_xlog_page_guts()

◆ read_local_xlog_page_no_wait()

◆ report_invalid_page()

◆ wal_segment_close()

◆ wal_segment_open()

◆ WALReadRaiseError()

◆ XLogCheckInvalidPages()

◆ XLogDropDatabase()

◆ XLogDropRelation()

◆ XLogHaveInvalidPages()

◆ XLogInitBufferForRedo()

◆ XLogReadBufferExtended()

◆ XLogReadBufferForRedo()

◆ XLogReadBufferForRedoExtended()

◆ XLogReadDetermineTimeline()

◆ XLogTruncateRelation()

Variable Documentation

◆ ignore_invalid_pages

◆ InRecovery

◆ invalid_page_tab

◆ standbyState