PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "access/xlogwait.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/datachecksum_state.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/procsignal.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/subsystems.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
#include "utils/wait_event.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void XLOGShmemRequest (void *arg)
 
static void XLOGShmemInit (void *arg)
 
static void XLOGShmemAttach (void *arg)
 
static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static charstr_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static charGetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
static void XLogChecksums (uint32 new_type)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, uint64 fpi_bytes, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
charGetMockAuthenticationNonce (void)
 
bool DataChecksumsNeedWrite (void)
 
bool DataChecksumsOff (void)
 
bool DataChecksumsOn (void)
 
bool DataChecksumsInProgressOn (void)
 
bool DataChecksumsNeedVerify (void)
 
void SetDataChecksumsOnInProgress (void)
 
void SetDataChecksumsOn (void)
 
void SetDataChecksumsOff (void)
 
void InitLocalDataChecksumState (void)
 
void SetLocalDataChecksumState (uint32 data_checksum_version)
 
const charshow_data_checksums (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const charshow_archive_command (void)
 
const charshow_in_hot_standby (void)
 
const charshow_effective_wal_level (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static const charCheckpointFlagsString (int flags)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint, int flags)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
XLogRecPtr XLogAssignLSN (void)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void xlog2_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogInsertEndRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
void ResetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
charXLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
charwal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataLocalControlFile = NULL
 
static ControlFileDataControlFile = NULL
 
const ShmemCallbacks XLOGShmemCallbacks
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static ChecksumStateType LocalDataChecksumState = 0
 
int data_checksums = 0
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 118 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 623 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 600 of file xlog.c.

601 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
static int fb(int x)

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 604 of file xlog.c.

605 : ((idx) + 1))
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:263

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 157 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
pg_atomic_uint64 logWriteResult
Definition xlog.c:478
pg_atomic_uint64 logFlushResult
Definition xlog.c:479
static XLogCtlData * XLogCtl
Definition xlog.c:575

Definition at line 640 of file xlog.c.

641 { \
645 } while (0)

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 617 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 611 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

◆ XLogCtlInsert

◆ XLogwrtResult

◆ XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 568 of file xlog.c.

569{
WalInsertClass
Definition xlog.c:569
@ WALINSERT_SPECIAL_SWITCH
Definition xlog.c:571
@ WALINSERT_NORMAL
Definition xlog.c:570
@ WALINSERT_SPECIAL_CHECKPOINT
Definition xlog.c:572

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 2026 of file xlog.c.

2027{
2028 int nextidx;
2034 int npages pg_attribute_unused() = 0;
2035
2037
2038 /*
2039 * Now that we have the lock, check if someone initialized the page
2040 * already.
2041 */
2043 {
2045
2046 /*
2047 * Get ending-offset of the buffer page we need to replace (this may
2048 * be zero if the buffer hasn't been used yet). Fall through if it's
2049 * already written out.
2050 */
2053 {
2054 /*
2055 * Nope, got work to do. If we just want to pre-initialize as much
2056 * as we can without flushing, give up now.
2057 */
2058 if (opportunistic)
2059 break;
2060
2061 /* Advance shared memory write request position */
2066
2067 /*
2068 * Acquire an up-to-date LogwrtResult value and see if we still
2069 * need to write it or if someone else already did.
2070 */
2073 {
2074 /*
2075 * Must acquire write lock. Release WALBufMappingLock first,
2076 * to make sure that all insertions that we need to wait for
2077 * can finish (up to this same position). Otherwise we risk
2078 * deadlock.
2079 */
2081
2083
2085
2088 {
2089 /* OK, someone wrote it already */
2091 }
2092 else
2093 {
2094 /* Have to write it ourselves */
2096 WriteRqst.Write = OldPageRqstPtr;
2098 XLogWrite(WriteRqst, tli, false);
2102
2103 /*
2104 * Required for the flush of pending stats WAL data, per
2105 * update of pgWalUsage.
2106 */
2107 pgstat_report_fixed = true;
2108 }
2109 /* Re-acquire WALBufMappingLock and retry */
2111 continue;
2112 }
2113 }
2114
2115 /*
2116 * Now the next buffer slot is free and we can set it up to be the
2117 * next output page.
2118 */
2121
2123
2125
2126 /*
2127 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2128 * before initializing. Otherwise, the old page may be partially
2129 * zeroed but look valid.
2130 */
2133
2134 /*
2135 * Be sure to re-zero the buffer so that bytes beyond what we've
2136 * written will look like zeroes and not valid XLOG records...
2137 */
2139
2140 /*
2141 * Fill the new page's header
2142 */
2143 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2144
2145 /* NewPage->xlp_info = 0; */ /* done by memset */
2146 NewPage->xlp_tli = tli;
2147 NewPage->xlp_pageaddr = NewPageBeginPtr;
2148
2149 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2150
2151 /*
2152 * If first page of an XLOG segment file, make it a long header.
2153 */
2154 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2155 {
2157
2159 NewLongPage->xlp_seg_size = wal_segment_size;
2160 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2161 NewPage->xlp_info |= XLP_LONG_HEADER;
2162 }
2163
2164 /*
2165 * Make sure the initialization of the page becomes visible to others
2166 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2167 * holding a lock.
2168 */
2170
2173
2174 npages++;
2175 }
2177
2178#ifdef WAL_DEBUG
2179 if (XLOG_DEBUG && npages > 0)
2180 {
2181 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2183 }
2184#endif
2185}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
#define pg_write_barrier()
Definition atomics.h:155
#define pg_attribute_unused()
Definition c.h:149
#define Assert(condition)
Definition c.h:943
#define MemSet(start, val, len)
Definition c.h:1107
size_t Size
Definition c.h:689
#define DEBUG1
Definition elog.h:31
#define elog(elevel,...)
Definition elog.h:228
WalUsage pgWalUsage
Definition instrument.c:27
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_EXCLUSIVE
Definition lwlock.h:104
bool pgstat_report_fixed
Definition pgstat.c:219
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
uint64 system_identifier
Definition pg_control.h:118
int64 wal_buffers_full
Definition instrument.h:57
XLogwrtRqst LogwrtRqst
Definition xlog.c:462
slock_t info_lck
Definition xlog.c:562
XLogRecPtr InitializedUpTo
Definition xlog.c:491
char * pages
Definition xlog.c:498
pg_atomic_uint64 * xlblocks
Definition xlog.c:499
XLogRecPtr Write
Definition xlog.c:334
XLogRecPtr Write
Definition xlog.c:328
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition xlog.c:1545
#define RefreshXLogWriteResult(_target)
Definition xlog.c:640
int wal_segment_size
Definition xlog.c:150
static XLogwrtResult LogwrtResult
Definition xlog.c:632
#define XLogRecPtrToBufIdx(recptr)
Definition xlog.c:611
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition xlog.c:2325
static ControlFileData * ControlFile
Definition xlog.c:584
XLogLongPageHeaderData * XLogLongPageHeader
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
#define XLP_LONG_HEADER
#define XLOG_PAGE_MAGIC
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, fb(), XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_LONG_HEADER, and XLogLongPageHeaderData::xlp_sysid.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void extra 
)

Definition at line 2228 of file xlog.c.

2229{
2232}
double CheckPointCompletionTarget
#define newval
static void CalculateCheckpointSegments(void)
Definition xlog.c:2192

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void extra 
)

Definition at line 2221 of file xlog.c.

2222{
2225}
int max_wal_size_mb
Definition xlog.c:121

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char newval,
void extra 
)

Definition at line 5163 of file xlog.c.

5164{
5165 /*
5166 * If some checks were deferred, it's possible that the checks will fail
5167 * later during InitializeWalConsistencyChecking(). But in that case, the
5168 * postmaster will exit anyway, so it's safe to proceed with the
5169 * assignment.
5170 *
5171 * Any built-in resource managers specified are assigned immediately,
5172 * which affects WAL created before shared_preload_libraries are
5173 * processed. Any custom resource managers specified won't be assigned
5174 * until after shared_preload_libraries are processed, but that's OK
5175 * because WAL for a custom resource manager can't be written before the
5176 * module is loaded anyway.
5177 */
5179}
bool * wal_consistency_checking
Definition xlog.c:133

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void extra 
)

Definition at line 9316 of file xlog.c.

9317{
9319 {
9320 /*
9321 * To ensure that no blocks escape unsynced, force an fsync on the
9322 * currently open log segment (if any). Also, if the open flag is
9323 * changing, close the log file so it will be reopened (with new flag
9324 * bit) at next use.
9325 */
9326 if (openLogFile >= 0)
9327 {
9329 if (pg_fsync(openLogFile) != 0)
9330 {
9331 char xlogfname[MAXFNAMELEN];
9332 int save_errno;
9333
9334 save_errno = errno;
9337 errno = save_errno;
9338 ereport(PANIC,
9340 errmsg("could not fsync file \"%s\": %m", xlogfname)));
9341 }
9342
9345 XLogFileClose();
9346 }
9347 }
9348}
int errcode_for_file_access(void)
Definition elog.c:898
#define PANIC
Definition elog.h:44
#define ereport(elevel,...)
Definition elog.h:152
int pg_fsync(int fd)
Definition fd.c:390
static char * errmsg
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:67
static void pgstat_report_wait_end(void)
Definition wait_event.h:83
static int openLogFile
Definition xlog.c:655
static int get_sync_bit(int method)
Definition xlog.c:9268
int wal_sync_method
Definition xlog.c:137
static TimeLineID openLogTLI
Definition xlog.c:657
static void XLogFileClose(void)
Definition xlog.c:3690
static XLogSegNo openLogSegNo
Definition xlog.c:656
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg, fb(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5455 of file xlog.c.

5456{
5457 CheckPoint checkPoint;
5458 PGAlignedXLogBlock buffer;
5459 XLogPageHeader page;
5461 XLogRecord *record;
5462 char *recptr;
5463 uint64 sysidentifier;
5464 struct timeval tv;
5465 pg_crc32c crc;
5466
5467 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5469
5470 /*
5471 * Select a hopefully-unique system identifier code for this installation.
5472 * We use the result of gettimeofday(), including the fractional seconds
5473 * field, as being about as unique as we can easily get. (Think not to
5474 * use random(), since it hasn't been seeded and there's no portable way
5475 * to seed it other than the system clock value...) The upper half of the
5476 * uint64 value is just the tv_sec part, while the lower half contains the
5477 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5478 * PID for a little extra uniqueness. A person knowing this encoding can
5479 * determine the initialization time of the installation, which could
5480 * perhaps be useful sometimes.
5481 */
5482 gettimeofday(&tv, NULL);
5483 sysidentifier = ((uint64) tv.tv_sec) << 32;
5484 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5485 sysidentifier |= getpid() & 0xFFF;
5486
5487 memset(&buffer, 0, sizeof buffer);
5488 page = (XLogPageHeader) &buffer;
5489
5490 /*
5491 * Set up information for the initial checkpoint record
5492 *
5493 * The initial checkpoint record is written to the beginning of the WAL
5494 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5495 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5496 */
5500 checkPoint.fullPageWrites = fullPageWrites;
5502 checkPoint.wal_level = wal_level;
5503 checkPoint.nextXid =
5505 checkPoint.nextOid = FirstGenbkiObjectId;
5506 checkPoint.nextMulti = FirstMultiXactId;
5507 checkPoint.nextMultiOffset = 1;
5509 checkPoint.oldestXidDB = Template1DbOid;
5510 checkPoint.oldestMulti = FirstMultiXactId;
5511 checkPoint.oldestMultiDB = Template1DbOid;
5514 checkPoint.time = (pg_time_t) time(NULL);
5516 checkPoint.dataChecksumState = data_checksum_version;
5517
5518 TransamVariables->nextXid = checkPoint.nextXid;
5519 TransamVariables->nextOid = checkPoint.nextOid;
5521 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5522 AdvanceOldestClogXid(checkPoint.oldestXid);
5523 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5524 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5526
5527 /* Set up the XLOG page header */
5528 page->xlp_magic = XLOG_PAGE_MAGIC;
5529 page->xlp_info = XLP_LONG_HEADER;
5533 longpage->xlp_sysid = sysidentifier;
5534 longpage->xlp_seg_size = wal_segment_size;
5535 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5536
5537 /* Insert the initial checkpoint record */
5538 recptr = ((char *) page + SizeOfXLogLongPHD);
5539 record = (XLogRecord *) recptr;
5540 record->xl_prev = InvalidXLogRecPtr;
5541 record->xl_xid = InvalidTransactionId;
5542 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5544 record->xl_rmid = RM_XLOG_ID;
5546 /* fill the XLogRecordDataHeaderShort struct */
5547 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5548 *(recptr++) = sizeof(checkPoint);
5549 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5550 recptr += sizeof(checkPoint);
5551 Assert(recptr - (char *) record == record->xl_tot_len);
5552
5554 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5555 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5556 FIN_CRC32C(crc);
5557 record->xl_crc = crc;
5558
5559 /* Create first XLOG segment file */
5562
5563 /*
5564 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5565 * close the file again in a moment.
5566 */
5567
5568 /* Write the first page with the initial record */
5569 errno = 0;
5571 if (write(openLogFile, &buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5572 {
5573 /* if write didn't set errno, assume problem is no disk space */
5574 if (errno == 0)
5575 errno = ENOSPC;
5576 ereport(PANIC,
5578 errmsg("could not write bootstrap write-ahead log file: %m")));
5579 }
5581
5583 if (pg_fsync(openLogFile) != 0)
5584 ereport(PANIC,
5586 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5588
5589 if (close(openLogFile) != 0)
5590 ereport(PANIC,
5592 errmsg("could not close bootstrap write-ahead log file: %m")));
5593
5594 openLogFile = -1;
5595
5596 /* Now create pg_control */
5597 InitControlFile(sysidentifier, data_checksum_version);
5598 ControlFile->time = checkPoint.time;
5599 ControlFile->checkPoint = checkPoint.redo;
5600 ControlFile->checkPointCopy = checkPoint;
5601
5602 /* some additional ControlFile fields are set in WriteControlFile() */
5604
5605 /* Bootstrap the commit log, too */
5606 BootStrapCLOG();
5610
5611 /*
5612 * Force control file to be read - in contrast to normal processing we'd
5613 * otherwise never run the checks and GUC related initializations therein.
5614 */
5616}
uint64_t uint64
Definition c.h:625
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void BootStrapCLOG(void)
Definition clog.c:851
void BootStrapCommitTs(void)
Definition commit_ts.c:599
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition commit_ts.c:892
#define close(a)
Definition win32.h:12
#define write(a, b, c)
Definition win32.h:14
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:2063
void BootStrapMultiXact(void)
Definition multixact.c:1863
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2085
#define FirstMultiXactId
Definition multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:72
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:177
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:182
return crc
int64 pg_time_t
Definition pgtime.h:23
Oid oldestMultiDB
Definition pg_control.h:52
MultiXactId oldestMulti
Definition pg_control.h:51
MultiXactOffset nextMultiOffset
Definition pg_control.h:48
TransactionId newestCommitTsXid
Definition pg_control.h:56
TransactionId oldestXid
Definition pg_control.h:49
TimeLineID PrevTimeLineID
Definition pg_control.h:40
TimeLineID ThisTimeLineID
Definition pg_control.h:39
TransactionId oldestActiveXid
Definition pg_control.h:65
bool fullPageWrites
Definition pg_control.h:42
MultiXactId nextMulti
Definition pg_control.h:47
FullTransactionId nextXid
Definition pg_control.h:45
TransactionId oldestCommitTsXid
Definition pg_control.h:54
pg_time_t time
Definition pg_control.h:53
int wal_level
Definition pg_control.h:43
bool logicalDecodingEnabled
Definition pg_control.h:44
uint32 dataChecksumState
Definition pg_control.h:68
XLogRecPtr redo
Definition pg_control.h:37
Oid oldestXidDB
Definition pg_control.h:50
CheckPoint checkPointCopy
Definition pg_control.h:143
pg_time_t time
Definition pg_control.h:140
XLogRecPtr checkPoint
Definition pg_control.h:141
FullTransactionId nextXid
Definition transam.h:220
XLogRecPtr xlp_pageaddr
XLogRecPtr xl_prev
Definition xlogrecord.h:45
uint8 xl_info
Definition xlogrecord.h:46
uint32 xl_tot_len
Definition xlogrecord.h:43
TransactionId xl_xid
Definition xlogrecord.h:44
RmgrId xl_rmid
Definition xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition subtrans.c:288
#define InvalidTransactionId
Definition transam.h:31
#define FirstGenbkiObjectId
Definition transam.h:195
#define FirstNormalTransactionId
Definition transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition varsup.c:367
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition varsup.c:350
TransamVariablesData * TransamVariables
Definition varsup.c:37
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition xlog.c:3431
bool fullPageWrites
Definition xlog.c:129
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition xlog.c:4255
void SetInstallXLogFileSegmentActive(void)
Definition xlog.c:10156
int wal_level
Definition xlog.c:138
static void WriteControlFile(void)
Definition xlog.c:4296
#define BootstrapTimeLineID
Definition xlog.c:118
static void ReadControlFile(void)
Definition xlog.c:4406
@ WAL_LEVEL_LOGICAL
Definition xlog.h:78
#define SizeOfXLogLongPHD
#define SizeOfXLogRecordDataHeaderShort
Definition xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition xlogrecord.h:241
#define SizeOfXLogRecord
Definition xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, CheckPoint::dataChecksumState, ereport, errcode_for_file_access(), errmsg, fb(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, InvalidXLogRecPtr, CheckPoint::logicalDecodingEnabled, memcpy(), MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, wal_level, CheckPoint::wal_level, WAL_LEVEL_LOGICAL, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogPageHeaderData::xlp_tli, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2192 of file xlog.c.

2193{
2194 double target;
2195
2196 /*-------
2197 * Calculate the distance at which to trigger a checkpoint, to avoid
2198 * exceeding max_wal_size_mb. This is based on two assumptions:
2199 *
2200 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2201 * WAL for two checkpoint cycles to allow us to recover from the
2202 * secondary checkpoint if the first checkpoint failed, though we
2203 * only did this on the primary anyway, not on standby. Keeping just
2204 * one checkpoint simplifies processing and reduces disk space in
2205 * many smaller databases.)
2206 * b) during checkpoint, we consume checkpoint_completion_target *
2207 * number of segments consumed between checkpoints.
2208 *-------
2209 */
2212
2213 /* round down */
2214 CheckPointSegments = (int) target;
2215
2216 if (CheckPointSegments < 1)
2218}
#define ConvertToXSegs(x, segsize)
Definition xlog.c:623
int CheckPointSegments
Definition xlog.c:163

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, fb(), max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 5041 of file xlog.c.

5042{
5043 /*
5044 * -1 indicates a request for auto-tune.
5045 */
5046 if (*newval == -1)
5047 {
5048 /*
5049 * If we haven't yet changed the boot_val default of -1, just let it
5050 * be. We'll fix it when XLOGShmemRequest is called.
5051 */
5052 if (XLOGbuffers == -1)
5053 return true;
5054
5055 /* Otherwise, substitute the auto-tune value */
5057 }
5058
5059 /*
5060 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
5061 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
5062 * the case, we just silently treat such values as a request for the
5063 * minimum. (We could throw an error instead, but that doesn't seem very
5064 * helpful.)
5065 */
5066 if (*newval < 4)
5067 *newval = 4;
5068
5069 return true;
5070}
static int XLOGChooseNumBuffers(void)
Definition xlog.c:5025
int XLOGbuffers
Definition xlog.c:124

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 5076 of file xlog.c.

5077{
5078 char *rawstring;
5079 List *elemlist;
5080 ListCell *l;
5081 bool newwalconsistency[RM_MAX_ID + 1];
5082
5083 /* Initialize the array */
5084 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
5085
5086 /* Need a modifiable copy of string */
5088
5089 /* Parse string into list of identifiers */
5091 {
5092 /* syntax error in list */
5093 GUC_check_errdetail("List syntax is invalid.");
5096 return false;
5097 }
5098
5099 foreach(l, elemlist)
5100 {
5101 char *tok = (char *) lfirst(l);
5102 int rmid;
5103
5104 /* Check for 'all'. */
5105 if (pg_strcasecmp(tok, "all") == 0)
5106 {
5107 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
5108 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
5109 newwalconsistency[rmid] = true;
5110 }
5111 else
5112 {
5113 /* Check if the token matches any known resource manager. */
5114 bool found = false;
5115
5116 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
5117 {
5118 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
5119 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
5120 {
5121 newwalconsistency[rmid] = true;
5122 found = true;
5123 break;
5124 }
5125 }
5126 if (!found)
5127 {
5128 /*
5129 * During startup, it might be a not-yet-loaded custom
5130 * resource manager. Defer checking until
5131 * InitializeWalConsistencyChecking().
5132 */
5134 {
5136 }
5137 else
5138 {
5139 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
5142 return false;
5143 }
5144 }
5145 }
5146 }
5147
5150
5151 /* assign new value */
5152 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
5153 if (!*extra)
5154 return false;
5155 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
5156 return true;
5157}
#define LOG
Definition elog.h:32
void * guc_malloc(int elevel, size_t size)
Definition guc.c:637
#define GUC_check_errdetail
Definition guc.h:507
void list_free(List *list)
Definition list.c:1546
char * pstrdup(const char *in)
Definition mcxt.c:1910
void pfree(void *pointer)
Definition mcxt.c:1619
bool process_shared_preload_libraries_done
Definition miscinit.c:1789
#define lfirst(lc)
Definition pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
#define RM_MAX_ID
Definition rmgr.h:33
Definition pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition varlena.c:2870
static bool check_wal_consistency_checking_deferred
Definition xlog.c:173
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, fb(), GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, memcpy(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 2235 of file xlog.c.

2236{
2238 {
2239 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2240 return false;
2241 }
2242
2243 return true;
2244}
#define IsValidWalSegSize(size)

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckpointFlagsString()

static const char * CheckpointFlagsString ( int  flags)
static

Definition at line 7148 of file xlog.c.

7149{
7150 static char buf[128];
7151
7152 snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
7153 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
7154 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
7155 (flags & CHECKPOINT_FAST) ? " fast" : "",
7156 (flags & CHECKPOINT_FORCE) ? " force" : "",
7157 (flags & CHECKPOINT_WAIT) ? " wait" : "",
7158 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
7159 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
7160 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "");
7161
7162 return buf;
7163}
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define snprintf
Definition port.h:261
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:155
#define CHECKPOINT_CAUSE_XLOG
Definition xlog.h:160
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:152
#define CHECKPOINT_CAUSE_TIME
Definition xlog.h:161
#define CHECKPOINT_FORCE
Definition xlog.h:154
#define CHECKPOINT_WAIT
Definition xlog.h:157
#define CHECKPOINT_FAST
Definition xlog.h:153
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:151

References buf, CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, and snprintf.

Referenced by LogCheckpointEnd(), and LogCheckpointStart().

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 8046 of file xlog.c.

8047{
8053
8054 /* Write out all dirty data in SLRUs and the main buffer pool */
8062 CheckPointBuffers(flags);
8063
8064 /* Perform all queued up fsyncs */
8070
8071 /* We deliberately delay 2PC checkpointing as long as possible */
8073}
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1649
void CheckPointBuffers(int flags)
Definition bufmgr.c:4432
void CheckPointCLOG(void)
Definition clog.c:922
void CheckPointCommitTs(void)
Definition commit_ts.c:799
void CheckPointMultiXact(void)
Definition multixact.c:2039
void CheckPointReplicationOrigin(void)
Definition origin.c:614
void CheckPointPredicate(void)
Definition predicate.c:1022
void CheckPointRelationMap(void)
Definition relmapper.c:612
void CheckPointLogicalRewriteHeap(void)
void CheckPointReplicationSlots(bool is_shutdown)
Definition slot.c:2324
void CheckPointSnapBuild(void)
Definition snapbuild.c:2030
TimestampTz ckpt_write_t
Definition xlog.h:174
TimestampTz ckpt_sync_end_t
Definition xlog.h:176
TimestampTz ckpt_sync_t
Definition xlog.h:175
void CheckPointSUBTRANS(void)
Definition subtrans.c:348
void ProcessSyncRequests(void)
Definition sync.c:287
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1828
CheckpointStatsData CheckpointStats
Definition xlog.c:216

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, fb(), GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5803 of file xlog.c.

5804{
5805 /*
5806 * For archive recovery, the WAL must be generated with at least 'replica'
5807 * wal_level.
5808 */
5810 {
5811 ereport(FATAL,
5813 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5814 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5815 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5816 }
5817
5818 /*
5819 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5820 * must have at least as many backend slots as the primary.
5821 */
5823 {
5824 /* We ignore autovacuum_worker_slots when we make this test. */
5825 RecoveryRequiresIntParameter("max_connections",
5828 RecoveryRequiresIntParameter("max_worker_processes",
5831 RecoveryRequiresIntParameter("max_wal_senders",
5834 RecoveryRequiresIntParameter("max_prepared_transactions",
5837 RecoveryRequiresIntParameter("max_locks_per_transaction",
5840 }
5841}
int errcode(int sqlerrcode)
Definition elog.c:875
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define FATAL
Definition elog.h:42
int MaxConnections
Definition globals.c:145
int max_worker_processes
Definition globals.c:146
int max_locks_per_xact
Definition lock.c:56
int max_prepared_xacts
Definition twophase.c:118
int max_wal_senders
Definition walsender.c:141
bool EnableHotStandby
Definition xlog.c:128
@ WAL_LEVEL_MINIMAL
Definition xlog.h:76
bool ArchiveRecoveryRequested
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg, FATAL, fb(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3778 of file xlog.c.

3779{
3780 int save_errno = errno;
3781 XLogSegNo lastRemovedSegNo;
3782
3784 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3786
3787 if (segno <= lastRemovedSegNo)
3788 {
3789 char filename[MAXFNAMELEN];
3790
3792 errno = save_errno;
3793 ereport(ERROR,
3795 errmsg("requested WAL segment %s has already been removed",
3796 filename)));
3797 }
3798 errno = save_errno;
3799}
#define ERROR
Definition elog.h:40
static char * filename
Definition pg_dumpall.c:133
XLogSegNo lastRemovedSegNo
Definition xlog.c:467
uint64 XLogSegNo
Definition xlogdefs.h:52

References ereport, errcode_for_file_access(), errmsg, ERROR, fb(), filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5707 of file xlog.c.

5709{
5710 /*
5711 * Execute the recovery_end_command, if any.
5712 */
5715 "recovery_end_command",
5716 true,
5718
5719 /*
5720 * We switched to a new timeline. Clean up segments on the old timeline.
5721 *
5722 * If there are any higher-numbered segments on the old timeline, remove
5723 * them. They might contain valid WAL, but they might also be
5724 * pre-allocated files containing garbage. In any case, they are not part
5725 * of the new timeline's history so we don't need them.
5726 */
5728
5729 /*
5730 * If the switch happened in the middle of a segment, what to do with the
5731 * last, partial segment on the old timeline? If we don't archive it, and
5732 * the server that created the WAL never archives it either (e.g. because
5733 * it was hit by a meteor), it will never make it to the archive. That's
5734 * OK from our point of view, because the new segment that we created with
5735 * the new TLI contains all the WAL from the old timeline up to the switch
5736 * point. But if you later try to do PITR to the "missing" WAL on the old
5737 * timeline, recovery won't find it in the archive. It's physically
5738 * present in the new file with new TLI, but recovery won't look there
5739 * when it's recovering to the older timeline. On the other hand, if we
5740 * archive the partial segment, and the original server on that timeline
5741 * is still running and archives the completed version of the same segment
5742 * later, it will fail. (We used to do that in 9.4 and below, and it
5743 * caused such problems).
5744 *
5745 * As a compromise, we rename the last segment with the .partial suffix,
5746 * and archive it. Archive recovery will never try to read .partial
5747 * segments, so they will normally go unused. But in the odd PITR case,
5748 * the administrator can copy them manually to the pg_wal directory
5749 * (removing the suffix). They can be useful in debugging, too.
5750 *
5751 * If a .done or .ready file already exists for the old timeline, however,
5752 * we had already determined that the segment is complete, so we can let
5753 * it be archived normally. (In particular, if it was restored from the
5754 * archive to begin with, it's expected to have a .done file).
5755 */
5758 {
5759 char origfname[MAXFNAMELEN];
5761
5764
5766 {
5767 char origpath[MAXPGPATH];
5769 char partialpath[MAXPGPATH];
5770
5771 /*
5772 * If we're summarizing WAL, we can't rename the partial file
5773 * until the summarizer finishes with it, else it will fail.
5774 */
5775 if (summarize_wal)
5777
5779 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5780 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5781
5782 /*
5783 * Make sure there's no .done or .ready file for the .partial
5784 * file.
5785 */
5787
5790 }
5791 }
5792}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
#define MAXPGPATH
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition xlog.c:3991
#define XLogArchivingActive()
Definition xlog.h:102
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
void XLogArchiveNotify(const char *xlog)
void XLogArchiveCleanup(const char *xlog)
char * recoveryEndCommand

References durable_rename(), ERROR, ExecuteRecoveryCommand(), fb(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4212 of file xlog.c.

4213{
4214 DIR *xldir;
4215 struct dirent *xlde;
4216 char path[MAXPGPATH + sizeof(XLOGDIR)];
4217
4219
4220 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4221 {
4222 if (IsBackupHistoryFileName(xlde->d_name))
4223 {
4224 if (XLogArchiveCheckDone(xlde->d_name))
4225 {
4226 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4227 xlde->d_name);
4228 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4229 unlink(path);
4230 XLogArchiveCleanup(xlde->d_name);
4231 }
4232 }
4233 }
4234
4235 FreeDir(xldir);
4236}
#define DEBUG2
Definition elog.h:30
int FreeDir(DIR *dir)
Definition fd.c:3009
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
Definition dirent.c:26
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1266 of file xlog.c.

1268{
1269 char *currpos;
1270 int freespace;
1271 int written;
1274
1275 /*
1276 * Get a pointer to the right place in the right WAL buffer to start
1277 * inserting to.
1278 */
1279 CurrPos = StartPos;
1280 currpos = GetXLogBuffer(CurrPos, tli);
1281 freespace = INSERT_FREESPACE(CurrPos);
1282
1283 /*
1284 * there should be enough space for at least the first field (xl_tot_len)
1285 * on this page.
1286 */
1287 Assert(freespace >= sizeof(uint32));
1288
1289 /* Copy record data */
1290 written = 0;
1291 while (rdata != NULL)
1292 {
1293 const char *rdata_data = rdata->data;
1294 int rdata_len = rdata->len;
1295
1296 while (rdata_len > freespace)
1297 {
1298 /*
1299 * Write what fits on this page, and continue on the next page.
1300 */
1301 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1302 memcpy(currpos, rdata_data, freespace);
1303 rdata_data += freespace;
1304 rdata_len -= freespace;
1305 written += freespace;
1306 CurrPos += freespace;
1307
1308 /*
1309 * Get pointer to beginning of next page, and set the xlp_rem_len
1310 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1311 *
1312 * It's safe to set the contrecord flag and xlp_rem_len without a
1313 * lock on the page. All the other flags were already set when the
1314 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1315 * only backend that needs to set the contrecord flag.
1316 */
1317 currpos = GetXLogBuffer(CurrPos, tli);
1318 pagehdr = (XLogPageHeader) currpos;
1319 pagehdr->xlp_rem_len = write_len - written;
1320 pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1321
1322 /* skip over the page header */
1324 {
1326 currpos += SizeOfXLogLongPHD;
1327 }
1328 else
1329 {
1331 currpos += SizeOfXLogShortPHD;
1332 }
1333 freespace = INSERT_FREESPACE(CurrPos);
1334 }
1335
1336 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1337 memcpy(currpos, rdata_data, rdata_len);
1338 currpos += rdata_len;
1339 CurrPos += rdata_len;
1340 freespace -= rdata_len;
1341 written += rdata_len;
1342
1343 rdata = rdata->next;
1344 }
1346
1347 /*
1348 * If this was an xlog-switch, it's not enough to write the switch record,
1349 * we also have to consume all the remaining space in the WAL segment. We
1350 * have already reserved that space, but we need to actually fill it.
1351 */
1353 {
1354 /* An xlog-switch record doesn't contain any data besides the header */
1356
1357 /* Assert that we did reserve the right amount of space */
1359
1360 /* Use up all the remaining space on the current page */
1361 CurrPos += freespace;
1362
1363 /*
1364 * Cause all remaining pages in the segment to be flushed, leaving the
1365 * XLog position where it should be, at the start of the next segment.
1366 * We do this one page at a time, to make sure we don't deadlock
1367 * against ourselves if wal_buffers < wal_segment_size.
1368 */
1369 while (CurrPos < EndPos)
1370 {
1371 /*
1372 * The minimal action to flush the page would be to call
1373 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1374 * AdvanceXLInsertBuffer(...). The page would be left initialized
1375 * mostly to zeros, except for the page header (always the short
1376 * variant, as this is never a segment's first page).
1377 *
1378 * The large vistas of zeros are good for compressibility, but the
1379 * headers interrupting them every XLOG_BLCKSZ (with values that
1380 * differ from page to page) are not. The effect varies with
1381 * compression tool, but bzip2 for instance compresses about an
1382 * order of magnitude worse if those headers are left in place.
1383 *
1384 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1385 * called in heavily-loaded circumstances as well as this lightly-
1386 * loaded one) with variant behavior, we just use GetXLogBuffer
1387 * (which itself calls the two methods we need) to get the pointer
1388 * and zero most of the page. Then we just zero the page header.
1389 */
1390 currpos = GetXLogBuffer(CurrPos, tli);
1391 MemSet(currpos, 0, SizeOfXLogShortPHD);
1392
1394 }
1395 }
1396 else
1397 {
1398 /* Align the end position, so that the next record starts aligned */
1400 }
1401
1402 if (CurrPos != EndPos)
1403 ereport(PANIC,
1405 errmsg_internal("space reserved for WAL record does not match what was written"));
1406}
uint32_t uint32
Definition c.h:624
#define MAXALIGN64(LEN)
Definition c.h:921
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define ERRCODE_DATA_CORRUPTED
#define INSERT_FREESPACE(endptr)
Definition xlog.c:600
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition xlog.c:1673
#define XLP_FIRST_IS_CONTRECORD
#define SizeOfXLogShortPHD

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), fb(), GetXLogBuffer(), INSERT_FREESPACE, MAXALIGN64, memcpy(), MemSet, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, and XLP_FIRST_IS_CONTRECORD.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 7397 of file xlog.c.

7398{
7399 bool shutdown;
7400 CheckPoint checkPoint;
7404 uint32 freespace;
7408 int nvxids;
7409 int oldXLogAllowed = 0;
7410
7411 /*
7412 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
7413 * issued at a different time.
7414 */
7416 shutdown = true;
7417 else
7418 shutdown = false;
7419
7420 /* sanity check */
7421 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
7422 elog(ERROR, "can't create a checkpoint during recovery");
7423
7424 /*
7425 * Prepare to accumulate statistics.
7426 *
7427 * Note: because it is possible for log_checkpoints to change while a
7428 * checkpoint proceeds, we always accumulate stats, even if
7429 * log_checkpoints is currently off.
7430 */
7433
7434 /*
7435 * Let smgr prepare for checkpoint; this has to happen outside the
7436 * critical section and before we determine the REDO pointer. Note that
7437 * smgr must not do anything that'd have to be undone if we decide no
7438 * checkpoint is needed.
7439 */
7441
7442 /* Run these points outside the critical section. */
7443 INJECTION_POINT("create-checkpoint-initial", NULL);
7444 INJECTION_POINT_LOAD("create-checkpoint-run");
7445
7446 /*
7447 * Use a critical section to force system panic if we have trouble.
7448 */
7450
7451 if (shutdown)
7452 {
7457 }
7458
7459 /* Begin filling in the checkpoint WAL record */
7460 MemSet(&checkPoint, 0, sizeof(checkPoint));
7461 checkPoint.time = (pg_time_t) time(NULL);
7462
7463 /*
7464 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7465 * pointer. This allows us to begin accumulating changes to assemble our
7466 * starting snapshot of locks and transactions.
7467 */
7469 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7470 else
7472
7473 /*
7474 * Get location of last important record before acquiring insert locks (as
7475 * GetLastImportantRecPtr() also locks WAL locks).
7476 */
7478
7479 /*
7480 * If this isn't a shutdown or forced checkpoint, and if there has been no
7481 * WAL activity requiring a checkpoint, skip it. The idea here is to
7482 * avoid inserting duplicate checkpoints when the system is idle.
7483 */
7485 CHECKPOINT_FORCE)) == 0)
7486 {
7488 {
7491 (errmsg_internal("checkpoint skipped because system is idle")));
7492 return false;
7493 }
7494 }
7495
7496 /*
7497 * An end-of-recovery checkpoint is created before anyone is allowed to
7498 * write WAL. To allow us to write the checkpoint record, temporarily
7499 * enable XLogInsertAllowed.
7500 */
7501 if (flags & CHECKPOINT_END_OF_RECOVERY)
7503
7505 if (flags & CHECKPOINT_END_OF_RECOVERY)
7507 else
7508 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7509
7510 /*
7511 * We must block concurrent insertions while examining insert state.
7512 */
7514
7515 checkPoint.fullPageWrites = Insert->fullPageWrites;
7516 checkPoint.wal_level = wal_level;
7517
7518 /*
7519 * Get the current data_checksum_version value from xlogctl, valid at the
7520 * time of the checkpoint.
7521 */
7525
7526 if (shutdown)
7527 {
7529
7530 /*
7531 * Compute new REDO record ptr = location of next XLOG record.
7532 *
7533 * Since this is a shutdown checkpoint, there can't be any concurrent
7534 * WAL insertion.
7535 */
7536 freespace = INSERT_FREESPACE(curInsert);
7537 if (freespace == 0)
7538 {
7541 else
7543 }
7544 checkPoint.redo = curInsert;
7545
7546 /*
7547 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7548 * this must be done while holding all the insertion locks.
7549 *
7550 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7551 * left pointing past where it really needs to point. This is okay;
7552 * the only consequence is that XLogInsert might back up whole buffers
7553 * that it didn't really need to. We can't postpone advancing
7554 * RedoRecPtr because XLogInserts that happen while we are dumping
7555 * buffers must assume that their buffer changes are not included in
7556 * the checkpoint.
7557 */
7558 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7559 }
7560
7561 /*
7562 * Now we can release the WAL insertion locks, allowing other xacts to
7563 * proceed while we are flushing disk buffers.
7564 */
7566
7567 /*
7568 * If this is an online checkpoint, we have not yet determined the redo
7569 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7570 * record; the LSN at which it starts becomes the new redo pointer. We
7571 * don't do this for a shutdown checkpoint, because in that case no WAL
7572 * can be written between the redo point and the insertion of the
7573 * checkpoint record itself, so the checkpoint record itself serves to
7574 * mark the redo point.
7575 */
7576 if (!shutdown)
7577 {
7579
7581 redo_rec.wal_level = wal_level;
7583 redo_rec.data_checksum_version = XLogCtl->data_checksum_version;
7586
7587 /* Include WAL level in record for WAL summarizer's benefit. */
7591
7592 /*
7593 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7594 * shared memory and RedoRecPtr in backend-local memory, but we need
7595 * to copy that into the record that will be inserted when the
7596 * checkpoint is complete.
7597 */
7598 checkPoint.redo = RedoRecPtr;
7599 }
7600
7601 /* Update the info_lck-protected copy of RedoRecPtr as well */
7603 XLogCtl->RedoRecPtr = checkPoint.redo;
7605
7606 /*
7607 * If enabled, log checkpoint start. We postpone this until now so as not
7608 * to log anything if we decided to skip the checkpoint.
7609 */
7610 if (log_checkpoints)
7611 LogCheckpointStart(flags, false);
7612
7613 INJECTION_POINT_CACHED("create-checkpoint-run", NULL);
7614
7615 /* Update the process title */
7616 update_checkpoint_display(flags, false, false);
7617
7619
7620 /*
7621 * Get the other info we need for the checkpoint record.
7622 *
7623 * We don't need to save oldestClogXid in the checkpoint, it only matters
7624 * for the short period in which clog is being truncated, and if we crash
7625 * during that we'll redo the clog truncation and fix up oldestClogXid
7626 * there.
7627 */
7629 checkPoint.nextXid = TransamVariables->nextXid;
7630 checkPoint.oldestXid = TransamVariables->oldestXid;
7633
7638
7640 checkPoint.nextOid = TransamVariables->nextOid;
7641 if (!shutdown)
7642 checkPoint.nextOid += TransamVariables->oidCount;
7644
7646
7648 &checkPoint.nextMulti,
7649 &checkPoint.nextMultiOffset,
7650 &checkPoint.oldestMulti,
7651 &checkPoint.oldestMultiDB);
7652
7653 /*
7654 * Having constructed the checkpoint record, ensure all shmem disk buffers
7655 * and commit-log buffers are flushed to disk.
7656 *
7657 * This I/O could fail for various reasons. If so, we will fail to
7658 * complete the checkpoint, but there is no reason to force a system
7659 * panic. Accordingly, exit critical section while doing it.
7660 */
7662
7663 /*
7664 * In some cases there are groups of actions that must all occur on one
7665 * side or the other of a checkpoint record. Before flushing the
7666 * checkpoint record we must explicitly wait for any backend currently
7667 * performing those groups of actions.
7668 *
7669 * One example is end of transaction, so we must wait for any transactions
7670 * that are currently in commit critical sections. If an xact inserted
7671 * its commit record into XLOG just before the REDO point, then a crash
7672 * restart from the REDO point would not replay that record, which means
7673 * that our flushing had better include the xact's update of pg_xact. So
7674 * we wait till he's out of his commit critical section before proceeding.
7675 * See notes in RecordTransactionCommit().
7676 *
7677 * Because we've already released the insertion locks, this test is a bit
7678 * fuzzy: it is possible that we will wait for xacts we didn't really need
7679 * to wait for. But the delay should be short and it seems better to make
7680 * checkpoint take a bit longer than to hold off insertions longer than
7681 * necessary. (In fact, the whole reason we have this issue is that xact.c
7682 * does commit record XLOG insertion and clog update as two separate steps
7683 * protected by different locks, but again that seems best on grounds of
7684 * minimizing lock contention.)
7685 *
7686 * A transaction that has not yet set delayChkptFlags when we look cannot
7687 * be at risk, since it has not inserted its commit record yet; and one
7688 * that's already cleared it is not at risk either, since it's done fixing
7689 * clog and we will correctly flush the update below. So we cannot miss
7690 * any xacts we need to wait for.
7691 */
7693 if (nvxids > 0)
7694 {
7695 do
7696 {
7697 /*
7698 * Keep absorbing fsync requests while we wait. There could even
7699 * be a deadlock if we don't, if the process that prevents the
7700 * checkpoint is trying to add a request to the queue.
7701 */
7703
7705 pg_usleep(10000L); /* wait for 10 msec */
7709 }
7710 pfree(vxids);
7711
7712 CheckPointGuts(checkPoint.redo, flags);
7713
7715 if (nvxids > 0)
7716 {
7717 do
7718 {
7720
7722 pg_usleep(10000L); /* wait for 10 msec */
7726 }
7727 pfree(vxids);
7728
7729 /*
7730 * Take a snapshot of running transactions and write this to WAL. This
7731 * allows us to reconstruct the state of running transactions during
7732 * archive recovery, if required. Skip, if this info disabled.
7733 *
7734 * If we are shutting down, or Startup process is completing crash
7735 * recovery we don't need to write running xact data.
7736 */
7739
7741
7742 /*
7743 * Now insert the checkpoint record into XLOG.
7744 */
7746 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7750
7752
7753 /*
7754 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7755 * overwritten at next startup. No-one should even try, this just allows
7756 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7757 * to just temporarily disable writing until the system has exited
7758 * recovery.
7759 */
7760 if (shutdown)
7761 {
7762 if (flags & CHECKPOINT_END_OF_RECOVERY)
7764 else
7765 LocalXLogInsertAllowed = 0; /* never again write WAL */
7766 }
7767
7768 /*
7769 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7770 * = end of actual checkpoint record.
7771 */
7772 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7773 ereport(PANIC,
7774 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7775
7776 /*
7777 * Remember the prior checkpoint's redo ptr for
7778 * UpdateCheckPointDistanceEstimate()
7779 */
7781
7782 /*
7783 * Update the control file.
7784 */
7786 if (shutdown)
7789 ControlFile->checkPointCopy = checkPoint;
7790 /* crash recovery should always recover to the end of WAL */
7793
7794 /*
7795 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7796 * unused on non-shutdown checkpoints, but seems useful to store it always
7797 * for debugging purposes.
7798 */
7800
7803
7804 /*
7805 * We are now done with critical updates; no need for system panic if we
7806 * have trouble while fooling with old log segments.
7807 */
7809
7810 /*
7811 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7812 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7813 * where (a) we're not inside of a critical section and (b) we can be
7814 * certain that the relevant record has been flushed to disk, which must
7815 * happen before it can be summarized.
7816 *
7817 * If this is a shutdown checkpoint, then this happens reasonably
7818 * promptly: we've only just inserted and flushed the
7819 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7820 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7821 * record was written before we began flushing data to disk, and that
7822 * could be many minutes ago at this point. However, we don't XLogFlush()
7823 * after inserting that record, so we're not guaranteed that it's on disk
7824 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7825 * record.
7826 */
7828
7829 /*
7830 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7831 */
7833
7834 /*
7835 * Update the average distance between checkpoints if the prior checkpoint
7836 * exists.
7837 */
7840
7841 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7842
7843 /*
7844 * Delete old log files, those no longer needed for last checkpoint to
7845 * prevent the disk holding the xlog from growing full.
7846 */
7852 {
7853 /*
7854 * Some slots have been invalidated; recalculate the old-segment
7855 * horizon, starting again from RedoRecPtr.
7856 */
7859 }
7860 _logSegNo--;
7862 checkPoint.ThisTimeLineID);
7863
7864 /*
7865 * Make more log segments if needed. (Do this after recycling old log
7866 * segments, since that may supply some of the needed files.)
7867 */
7868 if (!shutdown)
7870
7871 /*
7872 * Truncate pg_subtrans if possible. We can throw away all data before
7873 * the oldest XMIN of any running transaction. No future transaction will
7874 * attempt to reference any pg_subtrans entry older than that (see Asserts
7875 * in subtrans.c). During recovery, though, we mustn't do this because
7876 * StartupSUBTRANS hasn't been called yet.
7877 */
7878 if (!RecoveryInProgress())
7880
7881 /* Real work is done; log and update stats. */
7882 LogCheckpointEnd(false, flags);
7883
7884 /* Reset the process title */
7885 update_checkpoint_display(flags, false, true);
7886
7888 NBuffers,
7892
7893 return true;
7894}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:476
void AbsorbSyncRequests(void)
static void Insert(File file)
Definition fd.c:1301
int NBuffers
Definition globals.c:144
#define INJECTION_POINT(name, arg)
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:202
@ LW_SHARED
Definition lwlock.h:105
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:2017
#define XLOG_CHECKPOINT_REDO
Definition pg_control.h:86
@ DB_SHUTDOWNING
Definition pg_control.h:102
@ DB_SHUTDOWNED
Definition pg_control.h:100
#define XLOG_CHECKPOINT_ONLINE
Definition pg_control.h:73
#define InvalidOid
#define DELAY_CHKPT_START
Definition proc.h:139
#define DELAY_CHKPT_COMPLETE
Definition proc.h:140
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition procarray.c:1973
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition procarray.c:3062
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition procarray.c:2845
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition procarray.c:3017
void pg_usleep(long microsec)
Definition signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition slot.c:2220
@ RS_INVAL_WAL_REMOVED
Definition slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition slot.h:68
XLogRecPtr LogStandbySnapshot(Oid dbid)
Definition standby.c:1303
TimestampTz ckpt_start_t
Definition xlog.h:173
int ckpt_segs_removed
Definition xlog.h:183
int ckpt_bufs_written
Definition xlog.h:179
int ckpt_segs_recycled
Definition xlog.h:184
XLogRecPtr minRecoveryPoint
Definition pg_control.h:176
XLogRecPtr unloggedLSN
Definition pg_control.h:145
TimeLineID minRecoveryPointTLI
Definition pg_control.h:177
TransactionId oldestCommitTsXid
Definition transam.h:232
TransactionId newestCommitTsXid
Definition transam.h:233
TransactionId oldestXid
Definition transam.h:222
uint32 data_checksum_version
Definition xlog.c:560
TimeLineID InsertTimeLineID
Definition xlog.c:515
XLogRecPtr RedoRecPtr
Definition xlog.c:463
XLogCtlInsert Insert
Definition xlog.c:459
TimeLineID PrevTimeLineID
Definition xlog.c:516
pg_atomic_uint64 unloggedLSN
Definition xlog.c:470
XLogRecPtr RedoRecPtr
Definition xlog.c:437
void TruncateSUBTRANS(TransactionId oldestXact)
Definition subtrans.c:404
void SyncPreCheckpoint(void)
Definition sync.c:178
void SyncPostCheckpoint(void)
Definition sync.c:203
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition xlog.c:260
bool RecoveryInProgress(void)
Definition xlog.c:6832
static void WALInsertLockRelease(void)
Definition xlog.c:1486
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition xlog.c:1899
static void WALInsertLockAcquireExclusive(void)
Definition xlog.c:1457
static void UpdateControlFile(void)
Definition xlog.c:4634
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition xlog.c:3916
static void LogCheckpointStart(int flags, bool restartpoint)
Definition xlog.c:7169
static XLogRecPtr RedoRecPtr
Definition xlog.c:280
static void LogCheckpointEnd(bool restartpoint, int flags)
Definition xlog.c:7187
static void WALInsertLockAcquire(void)
Definition xlog.c:1412
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition xlog.c:3741
bool log_checkpoints
Definition xlog.c:136
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition xlog.c:8495
static int LocalSetXLogInsertAllowed(void)
Definition xlog.c:6920
XLogRecPtr GetLastImportantRecPtr(void)
Definition xlog.c:7054
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition xlog.c:7294
static int LocalXLogInsertAllowed
Definition xlog.c:243
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition xlog.c:8046
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition xlog.c:7332
#define XLogStandbyInfoActive()
Definition xlog.h:126
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogBeginInsert(void)
Definition xloginsert.c:153

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, XLogCtlData::data_checksum_version, CheckPoint::dataChecksumState, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg, errmsg_internal(), ERROR, fb(), CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, INJECTION_POINT_CACHED, INJECTION_POINT_LOAD, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsLogicalDecodingEnabled(), KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), CheckPoint::logicalDecodingEnabled, LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRecPtrIsValid, XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7905 of file xlog.c.

7906{
7909
7910 /* sanity check */
7911 if (!RecoveryInProgress())
7912 elog(ERROR, "can only be used to end recovery");
7913
7914 xlrec.end_time = GetCurrentTimestamp();
7915 xlrec.wal_level = wal_level;
7916
7918 xlrec.ThisTimeLineID = XLogCtl->InsertTimeLineID;
7919 xlrec.PrevTimeLineID = XLogCtl->PrevTimeLineID;
7921
7923
7927
7929
7930 /*
7931 * Update the control file so that crash recovery can follow the timeline
7932 * changes to this point.
7933 */
7936 ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7937
7938 /* start with the latest checksum version (as of the end of recovery) */
7942
7945
7947}
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:81
uint32 data_checksum_version
Definition pg_control.h:232

References ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7976 of file xlog.c.

7978{
7983
7984 /* sanity checks */
7985 if (!RecoveryInProgress())
7986 elog(ERROR, "can only be used at end of recovery");
7987 if (pagePtr % XLOG_BLCKSZ != 0)
7988 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7990
7991 /* The current WAL insert position should be right after the page header */
7992 startPos = pagePtr;
7995 else
7998 if (recptr != startPos)
7999 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
8001
8003
8004 /*
8005 * Initialize the XLOG page header (by GetXLogBuffer), and set the
8006 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
8007 *
8008 * No other backend is allowed to write WAL yet, so acquiring the WAL
8009 * insertion lock is just pro forma.
8010 */
8015
8016 /*
8017 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
8018 * page. We know it becomes the first record, because no other backend is
8019 * allowed to write WAL yet.
8020 */
8022 xlrec.overwritten_lsn = aborted_lsn;
8023 xlrec.overwrite_time = GetCurrentTimestamp();
8026
8027 /* check that the record was inserted to the right place */
8028 if (ProcLastRecPtr != startPos)
8029 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
8031
8033
8035
8036 return recptr;
8037}
#define XLOG_OVERWRITE_CONTRECORD
Definition pg_control.h:85
XLogRecPtr GetXLogInsertRecPtr(void)
Definition xlog.c:10092
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD

References elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLP_FIRST_IS_OVERWRITE_CONTRECORD.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 8126 of file xlog.c.

8127{
8128 XLogRecPtr lastCheckPointRecPtr;
8129 XLogRecPtr lastCheckPointEndPtr;
8130 CheckPoint lastCheckPoint;
8134 TimeLineID replayTLI;
8135 XLogRecPtr endptr;
8138
8139 /* Concurrent checkpoint/restartpoint cannot happen */
8141
8142 /* Get a local copy of the last safe checkpoint record. */
8144 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
8145 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
8146 lastCheckPoint = XLogCtl->lastCheckPoint;
8148
8149 /*
8150 * Check that we're still in recovery mode. It's ok if we exit recovery
8151 * mode after this check, the restart point is valid anyway.
8152 */
8153 if (!RecoveryInProgress())
8154 {
8156 (errmsg_internal("skipping restartpoint, recovery has already ended")));
8157 return false;
8158 }
8159
8160 /*
8161 * If the last checkpoint record we've replayed is already our last
8162 * restartpoint, we can't perform a new restart point. We still update
8163 * minRecoveryPoint in that case, so that if this is a shutdown restart
8164 * point, we won't start up earlier than before. That's not strictly
8165 * necessary, but when hot standby is enabled, it would be rather weird if
8166 * the database opened up for read-only connections at a point-in-time
8167 * before the last shutdown. Such time travel is still possible in case of
8168 * immediate shutdown, though.
8169 *
8170 * We don't explicitly advance minRecoveryPoint when we do create a
8171 * restartpoint. It's assumed that flushing the buffers will do that as a
8172 * side-effect.
8173 */
8174 if (!XLogRecPtrIsValid(lastCheckPointRecPtr) ||
8175 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
8176 {
8178 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
8179 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
8180
8182 if (flags & CHECKPOINT_IS_SHUTDOWN)
8183 {
8188 }
8189 return false;
8190 }
8191
8192 /*
8193 * Update the shared RedoRecPtr so that the startup process can calculate
8194 * the number of segments replayed since last restartpoint, and request a
8195 * restartpoint if it exceeds CheckPointSegments.
8196 *
8197 * Like in CreateCheckPoint(), hold off insertions to update it, although
8198 * during recovery this is just pro forma, because no WAL insertions are
8199 * happening.
8200 */
8202 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
8204
8205 /* Also update the info_lck-protected copy */
8207 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
8209
8210 /*
8211 * Prepare to accumulate statistics.
8212 *
8213 * Note: because it is possible for log_checkpoints to change while a
8214 * checkpoint proceeds, we always accumulate stats, even if
8215 * log_checkpoints is currently off.
8216 */
8219
8220 if (log_checkpoints)
8221 LogCheckpointStart(flags, true);
8222
8223 /* Update the process title */
8224 update_checkpoint_display(flags, true, false);
8225
8226 CheckPointGuts(lastCheckPoint.redo, flags);
8227
8228 /*
8229 * This location needs to be after CheckPointGuts() to ensure that some
8230 * work has already happened during this checkpoint.
8231 */
8232 INJECTION_POINT("create-restart-point", NULL);
8233
8234 /*
8235 * Remember the prior checkpoint's redo ptr for
8236 * UpdateCheckPointDistanceEstimate()
8237 */
8239
8240 /*
8241 * Update pg_control, using current time. Check that it still shows an
8242 * older checkpoint, else do nothing; this is a quick hack to make sure
8243 * nothing really bad happens if somehow we get here after the
8244 * end-of-recovery checkpoint.
8245 */
8247 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
8248 {
8249 /*
8250 * Update the checkpoint information. We do this even if the cluster
8251 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
8252 * segments recycled below.
8253 */
8254 ControlFile->checkPoint = lastCheckPointRecPtr;
8255 ControlFile->checkPointCopy = lastCheckPoint;
8256
8257 /*
8258 * Ensure minRecoveryPoint is past the checkpoint record and update it
8259 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
8260 * this will have happened already while writing out dirty buffers,
8261 * but not necessarily - e.g. because no buffers were dirtied. We do
8262 * this because a backup performed in recovery uses minRecoveryPoint
8263 * to determine which WAL files must be included in the backup, and
8264 * the file (or files) containing the checkpoint record must be
8265 * included, at a minimum. Note that for an ordinary restart of
8266 * recovery there's no value in having the minimum recovery point any
8267 * earlier than this anyway, because redo will begin just after the
8268 * checkpoint record.
8269 */
8271 {
8272 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
8273 {
8274 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
8276
8277 /* update local copy */
8280 }
8281 if (flags & CHECKPOINT_IS_SHUTDOWN)
8283 }
8284
8285 /* we shall start with the latest checksum version */
8287
8289 }
8291
8292 /*
8293 * Update the average distance between checkpoints/restartpoints if the
8294 * prior checkpoint exists.
8295 */
8298
8299 /*
8300 * Delete old log files, those no longer needed for last restartpoint to
8301 * prevent the disk holding the xlog from growing full.
8302 */
8304
8305 /*
8306 * Retreat _logSegNo using the current end of xlog replayed or received,
8307 * whichever is later.
8308 */
8310 replayPtr = GetXLogReplayRecPtr(&replayTLI);
8311 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
8312 KeepLogSeg(endptr, &_logSegNo);
8313
8314 INJECTION_POINT("restartpoint-before-slot-invalidation", NULL);
8315
8319 {
8320 /*
8321 * Some slots have been invalidated; recalculate the old-segment
8322 * horizon, starting again from RedoRecPtr.
8323 */
8325 KeepLogSeg(endptr, &_logSegNo);
8326 }
8327 _logSegNo--;
8328
8329 /*
8330 * Try to recycle segments on a useful timeline. If we've been promoted
8331 * since the beginning of this restartpoint, use the new timeline chosen
8332 * at end of recovery. If we're still in recovery, use the timeline we're
8333 * currently replaying.
8334 *
8335 * There is no guarantee that the WAL segments will be useful on the
8336 * current timeline; if recovery proceeds to a new timeline right after
8337 * this, the pre-allocated WAL segments on this timeline will not be used,
8338 * and will go wasted until recycled on the next restartpoint. We'll live
8339 * with that.
8340 */
8341 if (!RecoveryInProgress())
8342 replayTLI = XLogCtl->InsertTimeLineID;
8343
8344 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
8345
8346 /*
8347 * Make more log segments if needed. (Do this after recycling old log
8348 * segments, since that may supply some of the needed files.)
8349 */
8350 PreallocXlogFiles(endptr, replayTLI);
8351
8352 /*
8353 * Truncate pg_subtrans if possible. We can throw away all data before
8354 * the oldest XMIN of any running transaction. No future transaction will
8355 * attempt to reference any pg_subtrans entry older than that (see Asserts
8356 * in subtrans.c). When hot standby is disabled, though, we mustn't do
8357 * this because StartupSUBTRANS hasn't been called yet.
8358 */
8359 if (EnableHotStandby)
8361
8362 /* Real work is done; log and update stats. */
8363 LogCheckpointEnd(true, flags);
8364
8365 /* Reset the process title */
8366 update_checkpoint_display(flags, true, true);
8367
8370 errmsg("recovery restart point at %X/%08X",
8371 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
8372 xtime ? errdetail("Last completed transaction was at log time %s.",
8374
8375 /*
8376 * Finally, execute archive_cleanup_command, if any.
8377 */
8380 "archive_cleanup_command",
8381 false,
8383
8384 return true;
8385}
const char * timestamptz_to_str(TimestampTz t)
Definition timestamp.c:1870
int64 TimestampTz
Definition timestamp.h:39
bool IsUnderPostmaster
Definition globals.c:122
@ B_CHECKPOINTER
Definition miscadmin.h:375
BackendType MyBackendType
Definition miscinit.c:65
@ DB_IN_ARCHIVE_RECOVERY
Definition pg_control.h:104
@ DB_SHUTDOWNED_IN_RECOVERY
Definition pg_control.h:101
CheckPoint lastCheckPoint
Definition xlog.c:551
XLogRecPtr lastCheckPointRecPtr
Definition xlog.c:549
XLogRecPtr lastCheckPointEndPtr
Definition xlog.c:550
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition xlog.c:2721
static XLogRecPtr LocalMinRecoveryPoint
Definition xlog.c:666
static TimeLineID LocalMinRecoveryPointTLI
Definition xlog.c:667
uint32 TimeLineID
Definition xlogdefs.h:63
char * archiveCleanupCommand
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, ControlFileData::data_checksum_version, CheckPoint::dataChecksumState, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg, errmsg_internal(), ExecuteRecoveryCommand(), fb(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire(), SpinLockRelease(), ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsValid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsInProgressOn()

bool DataChecksumsInProgressOn ( void  )

Definition at line 4707 of file xlog.c.

4708{
4709 bool ret;
4710
4714
4715 return ret;
4716}
@ PG_DATA_CHECKSUM_INPROGRESS_ON
Definition checksum.h:31

References XLogCtlData::data_checksum_version, XLogCtlData::info_lck, PG_DATA_CHECKSUM_INPROGRESS_ON, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by createdb(), and launcher_exit().

◆ DataChecksumsNeedVerify()

bool DataChecksumsNeedVerify ( void  )

Definition at line 4733 of file xlog.c.

4734{
4736}
@ PG_DATA_CHECKSUM_VERSION
Definition checksum.h:29
static ChecksumStateType LocalDataChecksumState
Definition xlog.c:677

References LocalDataChecksumState, and PG_DATA_CHECKSUM_VERSION.

Referenced by DataChecksumsWorkerLauncherMain(), PageIsVerified(), read_file_data_into_buffer(), sendFile(), and verify_page_checksum().

◆ DataChecksumsNeedWrite()

◆ DataChecksumsOff()

bool DataChecksumsOff ( void  )

Definition at line 4683 of file xlog.c.

4684{
4685 bool ret;
4686
4690
4691 return ret;
4692}
@ PG_DATA_CHECKSUM_OFF
Definition checksum.h:28

References XLogCtlData::data_checksum_version, XLogCtlData::info_lck, PG_DATA_CHECKSUM_OFF, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by StartDataChecksumsWorkerLauncher().

◆ DataChecksumsOn()

bool DataChecksumsOn ( void  )

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 10051 of file xlog.c.

10052{
10054
10055 /* If called during backup start, there shouldn't be one already running */
10057
10059 {
10063
10066
10069 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
10070 }
10071}
Datum arg
Definition elog.c:1323
#define WARNING
Definition elog.h:37
static bool DatumGetBool(Datum X)
Definition postgres.h:100
int runningBackups
Definition xlog.c:445
static SessionBackupState sessionBackupState
Definition xlog.c:398
@ SESSION_BACKUP_NONE
Definition xlog.h:319

References arg, Assert, DatumGetBool(), ereport, errmsg, fb(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 9455 of file xlog.c.

9457{
9459
9460 Assert(state != NULL);
9462
9463 /*
9464 * During recovery, we don't need to check WAL level. Because, if WAL
9465 * level is not sufficient, it's impossible to get here during recovery.
9466 */
9468 ereport(ERROR,
9470 errmsg("WAL level not sufficient for making an online backup"),
9471 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9472
9474 ereport(ERROR,
9476 errmsg("backup label too long (max %d bytes)",
9477 MAXPGPATH)));
9478
9479 strlcpy(state->name, backupidstr, sizeof(state->name));
9480
9481 /*
9482 * Mark backup active in shared memory. We must do full-page WAL writes
9483 * during an on-line backup even if not doing so at other times, because
9484 * it's quite possible for the backup dump to obtain a "torn" (partially
9485 * written) copy of a database page if it reads the page concurrently with
9486 * our write to the same page. This can be fixed as long as the first
9487 * write to the page in the WAL sequence is a full-page write. Hence, we
9488 * increment runningBackups then force a CHECKPOINT, to ensure there are
9489 * no dirty pages in shared memory that might get dumped while the backup
9490 * is in progress without having a corresponding WAL record. (Once the
9491 * backup is complete, we need not force full-page writes anymore, since
9492 * we expect that any pages not modified during the backup interval must
9493 * have been correctly captured by the backup.)
9494 *
9495 * Note that forcing full-page writes has no effect during an online
9496 * backup from the standby.
9497 *
9498 * We must hold all the insertion locks to change the value of
9499 * runningBackups, to ensure adequate interlocking against
9500 * XLogInsertRecord().
9501 */
9505
9506 /*
9507 * Ensure we decrement runningBackups if we fail below. NB -- for this to
9508 * work correctly, it is critical that sessionBackupState is only updated
9509 * after this block is over.
9510 */
9512 {
9513 bool gotUniqueStartpoint = false;
9514 DIR *tblspcdir;
9515 struct dirent *de;
9517 int datadirpathlen;
9518
9519 /*
9520 * Force an XLOG file switch before the checkpoint, to ensure that the
9521 * WAL segment the checkpoint is written to doesn't contain pages with
9522 * old timeline IDs. That would otherwise happen if you called
9523 * pg_backup_start() right after restoring from a PITR archive: the
9524 * first WAL segment containing the startup checkpoint has pages in
9525 * the beginning with the old timeline ID. That can cause trouble at
9526 * recovery: we won't have a history file covering the old timeline if
9527 * pg_wal directory was not included in the base backup and the WAL
9528 * archive was cleared too before starting the backup.
9529 *
9530 * During recovery, we skip forcing XLOG file switch, which means that
9531 * the backup taken during recovery is not available for the special
9532 * recovery case described above.
9533 */
9535 RequestXLogSwitch(false);
9536
9537 do
9538 {
9539 bool checkpointfpw;
9540
9541 /*
9542 * Force a CHECKPOINT. Aside from being necessary to prevent torn
9543 * page problems, this guarantees that two successive backup runs
9544 * will have different checkpoint positions and hence different
9545 * history file names, even if nothing happened in between.
9546 *
9547 * During recovery, establish a restartpoint if possible. We use
9548 * the last restartpoint as the backup starting checkpoint. This
9549 * means that two successive backup runs can have same checkpoint
9550 * positions.
9551 *
9552 * Since the fact that we are executing do_pg_backup_start()
9553 * during recovery means that checkpointer is running, we can use
9554 * RequestCheckpoint() to establish a restartpoint.
9555 *
9556 * We use CHECKPOINT_FAST only if requested by user (via passing
9557 * fast = true). Otherwise this can take awhile.
9558 */
9560 (fast ? CHECKPOINT_FAST : 0));
9561
9562 /*
9563 * Now we need to fetch the checkpoint record location, and also
9564 * its REDO pointer. The oldest point in WAL that would be needed
9565 * to restore starting from the checkpoint is precisely the REDO
9566 * pointer.
9567 */
9569 state->checkpointloc = ControlFile->checkPoint;
9570 state->startpoint = ControlFile->checkPointCopy.redo;
9574
9576 {
9578
9579 /*
9580 * Check to see if all WAL replayed during online backup
9581 * (i.e., since last restartpoint used as backup starting
9582 * checkpoint) contain full-page writes.
9583 */
9587
9588 if (!checkpointfpw || state->startpoint <= recptr)
9589 ereport(ERROR,
9591 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9592 "since last restartpoint"),
9593 errhint("This means that the backup being taken on the standby "
9594 "is corrupt and should not be used. "
9595 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9596 "and then try an online backup again.")));
9597
9598 /*
9599 * During recovery, since we don't use the end-of-backup WAL
9600 * record and don't write the backup history file, the
9601 * starting WAL location doesn't need to be unique. This means
9602 * that two base backups started at the same time might use
9603 * the same checkpoint as starting locations.
9604 */
9605 gotUniqueStartpoint = true;
9606 }
9607
9608 /*
9609 * If two base backups are started at the same time (in WAL sender
9610 * processes), we need to make sure that they use different
9611 * checkpoints as starting locations, because we use the starting
9612 * WAL location as a unique identifier for the base backup in the
9613 * end-of-backup WAL record and when we write the backup history
9614 * file. Perhaps it would be better generate a separate unique ID
9615 * for each backup instead of forcing another checkpoint, but
9616 * taking a checkpoint right after another is not that expensive
9617 * either because only few buffers have been dirtied yet.
9618 */
9620 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9621 {
9622 XLogCtl->Insert.lastBackupStart = state->startpoint;
9623 gotUniqueStartpoint = true;
9624 }
9626 } while (!gotUniqueStartpoint);
9627
9628 /*
9629 * Construct tablespace_map file.
9630 */
9632
9633 /* Collect information about all tablespaces */
9635 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9636 {
9637 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9638 char linkpath[MAXPGPATH];
9639 char *relpath = NULL;
9640 char *s;
9642 char *badp;
9643 Oid tsoid;
9644
9645 /*
9646 * Try to parse the directory name as an unsigned integer.
9647 *
9648 * Tablespace directories should be positive integers that can be
9649 * represented in 32 bits, with no leading zeroes or trailing
9650 * garbage. If we come across a name that doesn't meet those
9651 * criteria, skip it.
9652 */
9653 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9654 continue;
9655 errno = 0;
9656 tsoid = strtoul(de->d_name, &badp, 10);
9657 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9658 continue;
9659
9660 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9661
9662 de_type = get_dirent_type(fullpath, de, false, ERROR);
9663
9664 if (de_type == PGFILETYPE_LNK)
9665 {
9667 int rllen;
9668
9669 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9670 if (rllen < 0)
9671 {
9673 (errmsg("could not read symbolic link \"%s\": %m",
9674 fullpath)));
9675 continue;
9676 }
9677 else if (rllen >= sizeof(linkpath))
9678 {
9680 (errmsg("symbolic link \"%s\" target is too long",
9681 fullpath)));
9682 continue;
9683 }
9684 linkpath[rllen] = '\0';
9685
9686 /*
9687 * Relpath holds the relative path of the tablespace directory
9688 * when it's located within PGDATA, or NULL if it's located
9689 * elsewhere.
9690 */
9691 if (rllen > datadirpathlen &&
9695
9696 /*
9697 * Add a backslash-escaped version of the link path to the
9698 * tablespace map file.
9699 */
9701 for (s = linkpath; *s; s++)
9702 {
9703 if (*s == '\n' || *s == '\r' || *s == '\\')
9706 }
9708 de->d_name, escapedpath.data);
9709 pfree(escapedpath.data);
9710 }
9711 else if (de_type == PGFILETYPE_DIR)
9712 {
9713 /*
9714 * It's possible to use allow_in_place_tablespaces to create
9715 * directories directly under pg_tblspc, for testing purposes
9716 * only.
9717 *
9718 * In this case, we store a relative path rather than an
9719 * absolute path into the tablespaceinfo.
9720 */
9721 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9722 PG_TBLSPC_DIR, de->d_name);
9724 }
9725 else
9726 {
9727 /* Skip any other file type that appears here. */
9728 continue;
9729 }
9730
9732 ti->oid = tsoid;
9733 ti->path = pstrdup(linkpath);
9734 ti->rpath = relpath;
9735 ti->size = -1;
9736
9737 if (tablespaces)
9738 *tablespaces = lappend(*tablespaces, ti);
9739 }
9741
9742 state->starttime = (pg_time_t) time(NULL);
9743 }
9745
9746 state->started_in_recovery = backup_started_in_recovery;
9747
9748 /*
9749 * Mark that the start phase has correctly finished for the backup.
9750 */
9752}
static bool backup_started_in_recovery
Definition basebackup.c:129
void RequestCheckpoint(int flags)
#define palloc_object(type)
Definition fe_memutils.h:89
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition file_utils.c:547
PGFileType
Definition file_utils.h:19
@ PGFILETYPE_LNK
Definition file_utils.h:24
@ PGFILETYPE_DIR
Definition file_utils.h:23
char * DataDir
Definition globals.c:73
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
List * lappend(List *list, void *datum)
Definition list.c:339
#define IS_DIR_SEP(ch)
Definition port.h:104
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
unsigned int Oid
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define PG_TBLSPC_DIR
Definition relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition xlog.c:557
XLogRecPtr lastBackupStart
Definition xlog.c:446
#define readlink(path, buf, size)
Definition win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition xlog.c:8604
void do_pg_abort_backup(int code, Datum arg)
Definition xlog.c:10051
@ SESSION_BACKUP_RUNNING
Definition xlog.h:320
#define XLogIsNeeded()
Definition xlog.h:112

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, palloc_object, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, snprintf, SpinLockAcquire(), SpinLockRelease(), strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9777 of file xlog.c.

9778{
9779 bool backup_stopped_in_recovery = false;
9780 char histfilepath[MAXPGPATH];
9784 FILE *fp;
9786 int waits = 0;
9787 bool reported_waiting = false;
9788
9789 Assert(state != NULL);
9790
9792
9793 /*
9794 * During recovery, we don't need to check WAL level. Because, if WAL
9795 * level is not sufficient, it's impossible to get here during recovery.
9796 */
9798 ereport(ERROR,
9800 errmsg("WAL level not sufficient for making an online backup"),
9801 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9802
9803 /*
9804 * OK to update backup counter and session-level lock.
9805 *
9806 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9807 * otherwise they can be updated inconsistently, which might cause
9808 * do_pg_abort_backup() to fail.
9809 */
9811
9812 /*
9813 * It is expected that each do_pg_backup_start() call is matched by
9814 * exactly one do_pg_backup_stop() call.
9815 */
9818
9819 /*
9820 * Clean up session-level lock.
9821 *
9822 * You might think that WALInsertLockRelease() can be called before
9823 * cleaning up session-level lock because session-level lock doesn't need
9824 * to be protected with WAL insertion lock. But since
9825 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9826 * cleaned up before it.
9827 */
9829
9831
9832 /*
9833 * If we are taking an online backup from the standby, we confirm that the
9834 * standby has not been promoted during the backup.
9835 */
9836 if (state->started_in_recovery && !backup_stopped_in_recovery)
9837 ereport(ERROR,
9839 errmsg("the standby was promoted during online backup"),
9840 errhint("This means that the backup being taken is corrupt "
9841 "and should not be used. "
9842 "Try taking another online backup.")));
9843
9844 /*
9845 * During recovery, we don't write an end-of-backup record. We assume that
9846 * pg_control was backed up last and its minimum recovery point can be
9847 * available as the backup end location. Since we don't have an
9848 * end-of-backup record, we use the pg_control value to check whether
9849 * we've reached the end of backup when starting recovery from this
9850 * backup. We have no way of checking if pg_control wasn't backed up last
9851 * however.
9852 *
9853 * We don't force a switch to new WAL file but it is still possible to
9854 * wait for all the required files to be archived if waitforarchive is
9855 * true. This is okay if we use the backup to start a standby and fetch
9856 * the missing WAL using streaming replication. But in the case of an
9857 * archive recovery, a user should set waitforarchive to true and wait for
9858 * them to be archived to ensure that all the required files are
9859 * available.
9860 *
9861 * We return the current minimum recovery point as the backup end
9862 * location. Note that it can be greater than the exact backup end
9863 * location if the minimum recovery point is updated after the backup of
9864 * pg_control. This is harmless for current uses.
9865 *
9866 * XXX currently a backup history file is for informational and debug
9867 * purposes only. It's not essential for an online backup. Furthermore,
9868 * even if it's created, it will not be archived during recovery because
9869 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9870 * backup history file during recovery.
9871 */
9873 {
9875
9876 /*
9877 * Check to see if all WAL replayed during online backup contain
9878 * full-page writes.
9879 */
9883
9884 if (state->startpoint <= recptr)
9885 ereport(ERROR,
9887 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9888 "during online backup"),
9889 errhint("This means that the backup being taken on the standby "
9890 "is corrupt and should not be used. "
9891 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9892 "and then try an online backup again.")));
9893
9894
9896 state->stoppoint = ControlFile->minRecoveryPoint;
9899 }
9900 else
9901 {
9902 char *history_file;
9903
9904 /*
9905 * Write the backup-end xlog record
9906 */
9908 XLogRegisterData(&state->startpoint,
9909 sizeof(state->startpoint));
9911
9912 /*
9913 * Given that we're not in recovery, InsertTimeLineID is set and can't
9914 * change, so we can read it without a lock.
9915 */
9916 state->stoptli = XLogCtl->InsertTimeLineID;
9917
9918 /*
9919 * Force a switch to a new xlog segment file, so that the backup is
9920 * valid as soon as archiver moves out the current segment file.
9921 */
9922 RequestXLogSwitch(false);
9923
9924 state->stoptime = (pg_time_t) time(NULL);
9925
9926 /*
9927 * Write the backup history file
9928 */
9931 state->startpoint, wal_segment_size);
9932 fp = AllocateFile(histfilepath, "w");
9933 if (!fp)
9934 ereport(ERROR,
9936 errmsg("could not create file \"%s\": %m",
9937 histfilepath)));
9938
9939 /* Build and save the contents of the backup history file */
9941 fprintf(fp, "%s", history_file);
9943
9944 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9945 ereport(ERROR,
9947 errmsg("could not write file \"%s\": %m",
9948 histfilepath)));
9949
9950 /*
9951 * Clean out any no-longer-needed history files. As a side effect,
9952 * this will post a .ready file for the newly created history file,
9953 * notifying the archiver that history file may be archived
9954 * immediately.
9955 */
9957 }
9958
9959 /*
9960 * If archiving is enabled, wait for all the required WAL files to be
9961 * archived before returning. If archiving isn't enabled, the required WAL
9962 * needs to be transported via streaming replication (hopefully with
9963 * wal_keep_size set high enough), or some more exotic mechanism like
9964 * polling and copying files from pg_wal with script. We have no knowledge
9965 * of those mechanisms, so it's up to the user to ensure that he gets all
9966 * the required WAL.
9967 *
9968 * We wait until both the last WAL file filled during backup and the
9969 * history file have been archived, and assume that the alphabetic sorting
9970 * property of the WAL files ensures any earlier WAL files are safely
9971 * archived as well.
9972 *
9973 * We wait forever, since archive_command is supposed to work and we
9974 * assume the admin wanted his backup to work completely. If you don't
9975 * wish to wait, then either waitforarchive should be passed in as false,
9976 * or you can set statement_timeout. Also, some notices are issued to
9977 * clue in anyone who might be doing this interactively.
9978 */
9979
9980 if (waitforarchive &&
9983 {
9987
9990 state->startpoint, wal_segment_size);
9991
9993 waits = 0;
9994
9997 {
9999
10000 if (!reported_waiting && waits > 5)
10001 {
10003 (errmsg("base backup done, waiting for required WAL segments to be archived")));
10004 reported_waiting = true;
10005 }
10006
10009 1000L,
10012
10013 if (++waits >= seconds_before_warning)
10014 {
10015 seconds_before_warning *= 2; /* This wraps in >10 years... */
10017 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
10018 waits),
10019 errhint("Check that your \"archive_command\" is executing properly. "
10020 "You can safely cancel this backup, "
10021 "but the database backup will not be usable without all the WAL segments.")));
10022 }
10023 }
10024
10026 (errmsg("all required WAL segments have been archived")));
10027 }
10028 else if (waitforarchive)
10030 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
10031}
#define fprintf(file, fmt, msg)
Definition cubescan.l:21
#define NOTICE
Definition elog.h:36
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
struct Latch * MyLatch
Definition globals.c:65
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define XLOG_BACKUP_END
Definition pg_control.h:77
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static void CleanupBackupHistory(void)
Definition xlog.c:4212
#define XLogArchivingAlways()
Definition xlog.h:105
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg, ERROR, fb(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire(), SpinLockRelease(), WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9758 of file xlog.c.

9759{
9760 return sessionBackupState;
9761}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 9268 of file xlog.c.

9269{
9270 int o_direct_flag = 0;
9271
9272 /*
9273 * Use O_DIRECT if requested, except in walreceiver process. The WAL
9274 * written by walreceiver is normally read by the startup process soon
9275 * after it's written. Also, walreceiver performs unaligned writes, which
9276 * don't work with O_DIRECT, so it is required for correctness too.
9277 */
9280
9281 /* If fsync is disabled, never open in sync mode */
9282 if (!enableFsync)
9283 return o_direct_flag;
9284
9285 switch (method)
9286 {
9287 /*
9288 * enum values for all sync options are defined even if they are
9289 * not supported on the current platform. But if not, they are
9290 * not included in the enum option array, and therefore will never
9291 * be seen here.
9292 */
9296 return o_direct_flag;
9297#ifdef O_SYNC
9299 return O_SYNC | o_direct_flag;
9300#endif
9301#ifdef O_DSYNC
9303 return O_DSYNC | o_direct_flag;
9304#endif
9305 default:
9306 /* can't happen (unless we are out of sync with option array) */
9307 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
9308 return 0; /* silence warning */
9309 }
9310}
int io_direct_flags
Definition fd.c:172
#define IO_DIRECT_WAL
Definition fd.h:55
#define PG_O_DIRECT
Definition fd.h:123
bool enableFsync
Definition globals.c:131
#define AmWalReceiverProcess()
Definition miscadmin.h:406
#define O_DSYNC
Definition win32_port.h:346
@ WAL_SYNC_METHOD_OPEN
Definition xlog.h:27
@ WAL_SYNC_METHOD_FDATASYNC
Definition xlog.h:26
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition xlog.h:28
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition xlog.h:29
@ WAL_SYNC_METHOD_FSYNC
Definition xlog.h:25

References AmWalReceiverProcess, elog, enableFsync, ERROR, fb(), io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 5286 of file xlog.c.

5287{
5288 return ControlFile->wal_level;
5289}

References ControlFile, and ControlFileData::wal_level.

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4993 of file xlog.c.

4994{
4996}
bool default_char_signedness
Definition pg_control.h:238

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE(), and trigram_qsort().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 5008 of file xlog.c.

5009{
5011}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition atomics.h:532

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by XLogGetFakeLSN().

◆ GetFlushRecPtr()

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6965 of file xlog.c.

6966{
6969}
static bool doPageWrites
Definition xlog.c:293

References doPageWrites, fb(), and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 7054 of file xlog.c.

7055{
7057 int i;
7058
7059 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
7060 {
7062
7063 /*
7064 * Need to take a lock to prevent torn reads of the LSN, which are
7065 * possible on some of the supported platforms. WAL insert locks only
7066 * support exclusive mode, so we have to use that.
7067 */
7070 LWLockRelease(&WALInsertLocks[i].l.lock);
7071
7072 if (res < last_important)
7073 res = last_important;
7074 }
7075
7076 return res;
7077}
int i
Definition isn.c:77
XLogRecPtr lastImportantAt
Definition xlog.c:378
WALInsertLock l
Definition xlog.c:390
static WALInsertLockPadded * WALInsertLocks
Definition xlog.c:578
#define NUM_XLOGINSERT_LOCKS
Definition xlog.c:157

References fb(), i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 7083 of file xlog.c.

7084{
7086
7087 /* Need WALWriteLock, but shared lock is sufficient */
7092
7093 return result;
7094}
uint32 result
pg_time_t lastSegSwitchTime
Definition xlog.c:473
XLogRecPtr lastSegSwitchLSN
Definition xlog.c:474

References fb(), XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), result, and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4653 of file xlog.c.

4654{
4657}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition pg_control.h:245

References Assert, ControlFile, fb(), and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6868 of file xlog.c.

6869{
6870 RecoveryState retval;
6871
6873 retval = XLogCtl->SharedRecoveryState;
6875
6876 return retval;
6877}
RecoveryState
Definition xlog.h:92

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6935 of file xlog.c.

6936{
6937 XLogRecPtr ptr;
6938
6939 /*
6940 * The possibly not up-to-date copy in XLogCtl is enough. Even if we
6941 * grabbed a WAL insertion lock to read the authoritative value in
6942 * Insert->RedoRecPtr, someone might update it just after we've released
6943 * the lock.
6944 */
6946 ptr = XLogCtl->RedoRecPtr;
6948
6949 if (RedoRecPtr < ptr)
6950 RedoRecPtr = ptr;
6951
6952 return RedoRecPtr;
6953}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), reserve_wal_for_local_slot(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 8411 of file xlog.c.

8412{
8413 XLogRecPtr currpos; /* current write LSN */
8414 XLogSegNo currSeg; /* segid of currpos */
8415 XLogSegNo targetSeg; /* segid of targetLSN */
8416 XLogSegNo oldestSeg; /* actual oldest segid */
8417 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
8418 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
8420
8421 /*
8422 * slot does not reserve WAL. Either deactivated, or has never been active
8423 */
8425 return WALAVAIL_INVALID_LSN;
8426
8427 /*
8428 * Calculate the oldest segment currently reserved by all slots,
8429 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
8430 * oldestSlotSeg to the current segment.
8431 */
8432 currpos = GetXLogWriteRecPtr();
8434 KeepLogSeg(currpos, &oldestSlotSeg);
8435
8436 /*
8437 * Find the oldest extant segment file. We get 1 until checkpoint removes
8438 * the first WAL segment file since startup, which causes the status being
8439 * wrong under certain abnormal conditions but that doesn't actually harm.
8440 */
8442
8443 /* calculate oldest segment by max_wal_size */
8446
8447 if (currSeg > keepSegs)
8449 else
8451
8452 /* the segment we care about */
8454
8455 /*
8456 * No point in returning reserved or extended status values if the
8457 * targetSeg is known to be lost.
8458 */
8459 if (targetSeg >= oldestSlotSeg)
8460 {
8461 /* show "reserved" when targetSeg is within max_wal_size */
8463 return WALAVAIL_RESERVED;
8464
8465 /* being retained by slots exceeding max_wal_size */
8466 return WALAVAIL_EXTENDED;
8467 }
8468
8469 /* WAL segments are no longer retained but haven't been removed yet */
8470 if (targetSeg >= oldestSeg)
8471 return WALAVAIL_UNRESERVED;
8472
8473 /* Definitely lost */
8474 return WALAVAIL_REMOVED;
8475}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3809
XLogRecPtr GetXLogWriteRecPtr(void)
Definition xlog.c:10124
@ WALAVAIL_REMOVED
Definition xlog.h:206
@ WALAVAIL_RESERVED
Definition xlog.h:202
@ WALAVAIL_UNRESERVED
Definition xlog.h:205
@ WALAVAIL_EXTENDED
Definition xlog.h:203
@ WALAVAIL_INVALID_LSN
Definition xlog.h:201

References ConvertToXSegs, fb(), GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsValid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 7018 of file xlog.c.

7019{
7021
7022 /* Since the value can't be changing, no lock is required. */
7023 return XLogCtl->InsertTimeLineID;
7024}

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1673 of file xlog.c.

1674{
1675 int idx;
1676 XLogRecPtr endptr;
1677 static uint64 cachedPage = 0;
1678 static char *cachedPos = NULL;
1680
1681 /*
1682 * Fast path for the common case that we need to access again the same
1683 * page as last time.
1684 */
1685 if (ptr / XLOG_BLCKSZ == cachedPage)
1686 {
1688 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1689 return cachedPos + ptr % XLOG_BLCKSZ;
1690 }
1691
1692 /*
1693 * The XLog buffer cache is organized so that a page is always loaded to a
1694 * particular buffer. That way we can easily calculate the buffer a given
1695 * page must be loaded into, from the XLogRecPtr alone.
1696 */
1697 idx = XLogRecPtrToBufIdx(ptr);
1698
1699 /*
1700 * See what page is loaded in the buffer at the moment. It could be the
1701 * page we're looking for, or something older. It can't be anything newer
1702 * - that would imply the page we're looking for has already been written
1703 * out to disk and evicted, and the caller is responsible for making sure
1704 * that doesn't happen.
1705 *
1706 * We don't hold a lock while we read the value. If someone is just about
1707 * to initialize or has just initialized the page, it's possible that we
1708 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1709 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1710 * we're looking for.
1711 */
1712 expectedEndPtr = ptr;
1714
1716 if (expectedEndPtr != endptr)
1717 {
1719
1720 /*
1721 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1722 * know how far we're finished with inserting the record.
1723 *
1724 * NB: If 'ptr' points to just after the page header, advertise a
1725 * position at the beginning of the page rather than 'ptr' itself. If
1726 * there are no other insertions running, someone might try to flush
1727 * up to our advertised location. If we advertised a position after
1728 * the page header, someone might try to flush the page header, even
1729 * though page might actually not be initialized yet. As the first
1730 * inserter on the page, we are effectively responsible for making
1731 * sure that it's initialized, before we let insertingAt to move past
1732 * the page header.
1733 */
1734 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1737 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1740 else
1741 initializedUpto = ptr;
1742
1744
1745 AdvanceXLInsertBuffer(ptr, tli, false);
1747
1748 if (expectedEndPtr != endptr)
1749 elog(PANIC, "could not find WAL buffer for %X/%08X",
1750 LSN_FORMAT_ARGS(ptr));
1751 }
1752 else
1753 {
1754 /*
1755 * Make sure the initialization of the page is visible to us, and
1756 * won't arrive later to overwrite the WAL data we write on the page.
1757 */
1759 }
1760
1761 /*
1762 * Found the buffer holding this page. Return a pointer to the right
1763 * offset within the page.
1764 */
1765 cachedPage = ptr / XLOG_BLCKSZ;
1767
1769 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1770
1771 return cachedPos + ptr % XLOG_BLCKSZ;
1772}
#define pg_memory_barrier()
Definition atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition xlog.c:1512
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition xlog.c:2026

References AdvanceXLInsertBuffer(), Assert, elog, fb(), idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertEndRecPtr()

XLogRecPtr GetXLogInsertEndRecPtr ( void  )

Definition at line 10108 of file xlog.c.

10109{
10112
10113 SpinLockAcquire(&Insert->insertpos_lck);
10114 current_bytepos = Insert->CurrBytePos;
10115 SpinLockRelease(&Insert->insertpos_lck);
10116
10118}
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition xlog.c:1939

References fb(), XLogCtlData::Insert, Insert(), SpinLockAcquire(), SpinLockRelease(), XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by rebuild_relation_finish_concurrent(), WalSndWaitForWal(), and XLogGetFakeLSN().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4255 of file xlog.c.

4256{
4258
4259 /*
4260 * Generate a random nonce. This is used for authentication requests that
4261 * will fail because the user does not exist. The nonce is used to create
4262 * a genuine-looking password challenge for the non-existent user, in lieu
4263 * of an actual stored password.
4264 */
4266 ereport(PANIC,
4268 errmsg("could not generate secret authorization token")));
4269
4270 memset(ControlFile, 0, sizeof(ControlFileData));
4271 /* Initialize pg_control status fields */
4272 ControlFile->system_identifier = sysidentifier;
4276
4277 /* Set important parameter values for use when replaying WAL */
4286 ControlFile->data_checksum_version = data_checksum_version;
4287
4288 /*
4289 * Set the data_checksum_version value into XLogCtl, which is where all
4290 * processes get the current value from.
4291 */
4292 XLogCtl->data_checksum_version = data_checksum_version;
4293}
bool track_commit_timestamp
Definition commit_ts.c:121
#define MOCK_AUTH_NONCE_LEN
Definition pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition pg_control.h:193
bool wal_log_hints
Definition xlog.c:130
#define FirstNormalUnloggedLSN
Definition xlogdefs.h:37

References ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg, fb(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, memcpy(), MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, and XLogCtl.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 5190 of file xlog.c.

5191{
5193
5195 {
5196 struct config_generic *guc;
5197
5198 guc = find_option("wal_consistency_checking", false, false, ERROR);
5199
5201
5202 set_config_option_ext("wal_consistency_checking",
5204 guc->scontext, guc->source, guc->srole,
5205 GUC_ACTION_SET, true, ERROR, false);
5206
5207 /* checking should not be deferred again */
5209 }
5210}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition guc.c:3288
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition guc.c:1114
@ GUC_ACTION_SET
Definition guc.h:203
char * wal_consistency_checking_string
Definition xlog.c:132

References Assert, check_wal_consistency_checking_deferred, ERROR, fb(), find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, set_config_option_ext(), and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InitLocalDataChecksumState()

void InitLocalDataChecksumState ( void  )

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3614 of file xlog.c.

3616{
3617 char path[MAXPGPATH];
3618 struct stat stat_buf;
3619
3620 Assert(tli != 0);
3621
3622 XLogFilePath(path, tli, *segno, wal_segment_size);
3623
3626 {
3628 return false;
3629 }
3630
3631 if (!find_free)
3632 {
3633 /* Force installation: get rid of any pre-existing segment file */
3634 durable_unlink(path, DEBUG1);
3635 }
3636 else
3637 {
3638 /* Find a free slot to put it in */
3639 while (stat(path, &stat_buf) == 0)
3640 {
3641 if ((*segno) >= max_segno)
3642 {
3643 /* Failed to find a free slot within specified range */
3645 return false;
3646 }
3647 (*segno)++;
3648 XLogFilePath(path, tli, *segno, wal_segment_size);
3649 }
3650 }
3651
3652 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3653 if (durable_rename(tmppath, path, LOG) != 0)
3654 {
3656 /* durable_rename already emitted log message */
3657 return false;
3658 }
3659
3661
3662 return true;
3663}
int durable_unlink(const char *fname, int elevel)
Definition fd.c:873
short access
bool InstallXLogFileSegmentActive
Definition xlog.c:532
#define stat
Definition win32_port.h:74

References Assert, DEBUG1, durable_rename(), durable_unlink(), fb(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 10173 of file xlog.c.

10174{
10175 bool result;
10176
10180
10181 return result;
10182}

References fb(), XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), result, and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 9358 of file xlog.c.

9359{
9360 char *msg = NULL;
9362
9363 Assert(tli != 0);
9364
9365 /*
9366 * Quick exit if fsync is disabled or write() has already synced the WAL
9367 * file.
9368 */
9369 if (!enableFsync ||
9372 return;
9373
9374 /*
9375 * Measure I/O timing to sync the WAL file for pg_stat_io.
9376 */
9378
9380 switch (wal_sync_method)
9381 {
9383 if (pg_fsync_no_writethrough(fd) != 0)
9384 msg = _("could not fsync file \"%s\": %m");
9385 break;
9386#ifdef HAVE_FSYNC_WRITETHROUGH
9388 if (pg_fsync_writethrough(fd) != 0)
9389 msg = _("could not fsync write-through file \"%s\": %m");
9390 break;
9391#endif
9393 if (pg_fdatasync(fd) != 0)
9394 msg = _("could not fdatasync file \"%s\": %m");
9395 break;
9398 /* not reachable */
9399 Assert(false);
9400 break;
9401 default:
9402 ereport(PANIC,
9404 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
9405 break;
9406 }
9407
9408 /* PANIC if failed to fsync */
9409 if (msg)
9410 {
9411 char xlogfname[MAXFNAMELEN];
9412 int save_errno = errno;
9413
9415 errno = save_errno;
9416 ereport(PANIC,
9418 errmsg(msg, xlogfname)));
9419 }
9420
9422
9424 start, 1, 0);
9425}
#define _(x)
Definition elog.c:96
int pg_fsync_no_writethrough(int fd)
Definition fd.c:442
int pg_fdatasync(int fd)
Definition fd.c:481
int pg_fsync_writethrough(int fd)
Definition fd.c:462
return str start
@ IOOBJECT_WAL
Definition pgstat.h:283
@ IOCONTEXT_NORMAL
Definition pgstat.h:293
@ IOOP_FSYNC
Definition pgstat.h:312
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
static int fd(const char *x, int i)
bool track_wal_io_timing
Definition xlog.c:144

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg, errmsg_internal(), fb(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8495 of file xlog.c.

8496{
8498 XLogSegNo segno;
8500
8502 segno = currSegNo;
8503
8504 /* Calculate how many segments are kept by slots. */
8507 {
8509
8510 /*
8511 * Account for max_slot_wal_keep_size to avoid keeping more than
8512 * configured. However, don't do that during a binary upgrade: if
8513 * slots were to be invalidated because of this, it would not be
8514 * possible to preserve logical ones during the upgrade.
8515 */
8517 {
8519
8522
8523 if (currSegNo - segno > slot_keep_segs)
8524 segno = currSegNo - slot_keep_segs;
8525 }
8526 }
8527
8528 /*
8529 * If WAL summarization is in use, don't remove WAL that has yet to be
8530 * summarized.
8531 */
8534 {
8536
8538 if (unsummarized_segno < segno)
8539 segno = unsummarized_segno;
8540 }
8541
8542 /* but, keep at least wal_keep_size if that's set */
8543 if (wal_keep_size_mb > 0)
8544 {
8546
8548 if (currSegNo - segno < keep_segs)
8549 {
8550 /* avoid underflow, don't go below 1 */
8551 if (currSegNo <= keep_segs)
8552 segno = 1;
8553 else
8554 segno = currSegNo - keep_segs;
8555 }
8556 }
8557
8558 /* don't delete WAL segments newer than the calculated segment */
8559 if (segno < *logSegNo)
8560 *logSegNo = segno;
8561}
bool IsBinaryUpgrade
Definition globals.c:123
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition xlog.c:123
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2700
int max_slot_wal_keep_size_mb
Definition xlog.c:142

References ConvertToXSegs, fb(), GetOldestUnsummarizedLSN(), IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6920 of file xlog.c.

6921{
6923
6925
6926 return oldXLogAllowed;
6927}

References fb(), and LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint,
int  flags 
)
static

Definition at line 7187 of file xlog.c.

7188{
7189 long write_msecs,
7190 sync_msecs,
7195
7197
7200
7203
7204 /* Accumulate checkpoint timing summary data, in milliseconds. */
7207
7208 /*
7209 * All of the published timing statistics are accounted for. Only
7210 * continue if a log message is to be written.
7211 */
7212 if (!log_checkpoints)
7213 return;
7214
7217
7218 /*
7219 * Timing values returned from CheckpointStats are in microseconds.
7220 * Convert to milliseconds for consistent printing.
7221 */
7223
7228 average_msecs = (long) ((average_sync_time + 999) / 1000);
7229
7230 /*
7231 * ControlFileLock is not required to see ControlFile->checkPoint and
7232 * ->checkPointCopy here as we are the only updator of those variables at
7233 * this moment.
7234 */
7235 if (restartpoint)
7236 ereport(LOG,
7237 (errmsg("restartpoint complete:%s: wrote %d buffers (%.1f%%), "
7238 "wrote %d SLRU buffers; %d WAL file(s) added, "
7239 "%d removed, %d recycled; write=%ld.%03d s, "
7240 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
7241 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
7242 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
7243 CheckpointFlagsString(flags),
7250 write_msecs / 1000, (int) (write_msecs % 1000),
7251 sync_msecs / 1000, (int) (sync_msecs % 1000),
7252 total_msecs / 1000, (int) (total_msecs % 1000),
7254 longest_msecs / 1000, (int) (longest_msecs % 1000),
7255 average_msecs / 1000, (int) (average_msecs % 1000),
7256 (int) (PrevCheckPointDistance / 1024.0),
7257 (int) (CheckPointDistanceEstimate / 1024.0),
7260 else
7261 ereport(LOG,
7262 (errmsg("checkpoint complete:%s: wrote %d buffers (%.1f%%), "
7263 "wrote %d SLRU buffers; %d WAL file(s) added, "
7264 "%d removed, %d recycled; write=%ld.%03d s, "
7265 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
7266 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
7267 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
7268 CheckpointFlagsString(flags),
7275 write_msecs / 1000, (int) (write_msecs % 1000),
7276 sync_msecs / 1000, (int) (sync_msecs % 1000),
7277 total_msecs / 1000, (int) (total_msecs % 1000),
7279 longest_msecs / 1000, (int) (longest_msecs % 1000),
7280 average_msecs / 1000, (int) (average_msecs % 1000),
7281 (int) (PrevCheckPointDistance / 1024.0),
7282 (int) (CheckPointDistanceEstimate / 1024.0),
7285}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition timestamp.c:1765
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition xlog.h:188
uint64 ckpt_longest_sync
Definition xlog.h:187
TimestampTz ckpt_end_t
Definition xlog.h:177
int ckpt_slru_written
Definition xlog.h:180
PgStat_Counter sync_time
Definition pgstat.h:269
PgStat_Counter write_time
Definition pgstat.h:268
static const char * CheckpointFlagsString(int flags)
Definition xlog.c:7148
static double CheckPointDistanceEstimate
Definition xlog.c:166
static double PrevCheckPointDistance
Definition xlog.c:167

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointFlagsString(), CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg, fb(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 7169 of file xlog.c.

7170{
7171 if (restartpoint)
7172 ereport(LOG,
7173 /* translator: the placeholder shows checkpoint options */
7174 (errmsg("restartpoint starting:%s",
7175 CheckpointFlagsString(flags))));
7176 else
7177 ereport(LOG,
7178 /* translator: the placeholder shows checkpoint options */
7179 (errmsg("checkpoint starting:%s",
7180 CheckpointFlagsString(flags))));
7181}

References CheckpointFlagsString(), ereport, errmsg, fb(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6782 of file xlog.c.

6783{
6784 bool promoted = false;
6785
6786 /*
6787 * Perform a checkpoint to update all our recovery activity to disk.
6788 *
6789 * Note that we write a shutdown checkpoint rather than an on-line one.
6790 * This is not particularly critical, but since we may be assigning a new
6791 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6792 * only changes in shutdown checkpoints, which allows some extra error
6793 * checking in xlog_redo.
6794 *
6795 * In promotion, only create a lightweight end-of-recovery record instead
6796 * of a full checkpoint. A checkpoint is requested later, after we're
6797 * fully out of recovery mode and already accepting queries.
6798 */
6801 {
6802 promoted = true;
6803
6804 /*
6805 * Insert a special WAL record to mark the end of recovery, since we
6806 * aren't doing a checkpoint. That means that the checkpointer process
6807 * may likely be in the middle of a time-smoothed restartpoint and
6808 * could continue to be for minutes after this. That sounds strange,
6809 * but the effect is roughly the same and it would be stranger to try
6810 * to come out of the restartpoint and then checkpoint. We request a
6811 * checkpoint later anyway, just for safety.
6812 */
6814 }
6815 else
6816 {
6820 }
6821
6822 return promoted;
6823}
static void CreateEndOfRecoveryRecord(void)
Definition xlog.c:7905
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), fb(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3741 of file xlog.c.

3742{
3744 int lf;
3745 bool added;
3746 char path[MAXPGPATH];
3747 uint64 offset;
3748
3750 return; /* unlocked check says no */
3751
3753 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3754 if (offset >= (uint32) (0.75 * wal_segment_size))
3755 {
3756 _logSegNo++;
3757 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3758 if (lf >= 0)
3759 close(lf);
3760 if (added)
3762 }
3763}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition xlog.c:3243

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, fb(), XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6745 of file xlog.c.

6746{
6747 /*
6748 * We have reached the end of base backup, as indicated by pg_control. The
6749 * data on disk is now consistent (unless minRecoveryPoint is further
6750 * ahead, which can happen if we crashed during previous recovery). Reset
6751 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6752 * make sure we don't allow starting up at an earlier point even if
6753 * recovery is stopped and restarted soon after this.
6754 */
6756
6757 if (ControlFile->minRecoveryPoint < EndRecPtr)
6758 {
6759 ControlFile->minRecoveryPoint = EndRecPtr;
6761 }
6762
6767
6769}
XLogRecPtr backupStartPoint
Definition pg_control.h:178
XLogRecPtr backupEndPoint
Definition pg_control.h:179

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, fb(), InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4406 of file xlog.c.

4407{
4408 pg_crc32c crc;
4409 int fd;
4410 char wal_segsz_str[20];
4411 int r;
4412
4413 /*
4414 * Read data...
4415 */
4417 O_RDWR | PG_BINARY);
4418 if (fd < 0)
4419 ereport(PANIC,
4421 errmsg("could not open file \"%s\": %m",
4423
4425 r = read(fd, ControlFile, sizeof(ControlFileData));
4426 if (r != sizeof(ControlFileData))
4427 {
4428 if (r < 0)
4429 ereport(PANIC,
4431 errmsg("could not read file \"%s\": %m",
4433 else
4434 ereport(PANIC,
4436 errmsg("could not read file \"%s\": read %d of %zu",
4437 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4438 }
4440
4441 close(fd);
4442
4443 /*
4444 * Check for expected pg_control format version. If this is wrong, the
4445 * CRC check will likely fail because we'll be checking the wrong number
4446 * of bytes. Complaining about wrong version will probably be more
4447 * enlightening than complaining about wrong CRC.
4448 */
4449
4451 ereport(FATAL,
4453 errmsg("database files are incompatible with server"),
4454 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4455 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4458 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4459
4461 ereport(FATAL,
4463 errmsg("database files are incompatible with server"),
4464 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4465 " but the server was compiled with PG_CONTROL_VERSION %d.",
4467 errhint("It looks like you need to initdb.")));
4468
4469 /* Now check the CRC. */
4474 FIN_CRC32C(crc);
4475
4476 if (!EQ_CRC32C(crc, ControlFile->crc))
4477 ereport(FATAL,
4479 errmsg("incorrect checksum in control file")));
4480
4481 /*
4482 * Do compatibility checking immediately. If the database isn't
4483 * compatible with the backend executable, we want to abort before we can
4484 * possibly do any damage.
4485 */
4487 ereport(FATAL,
4489 errmsg("database files are incompatible with server"),
4490 /* translator: %s is a variable name and %d is its value */
4491 errdetail("The database cluster was initialized with %s %d,"
4492 " but the server was compiled with %s %d.",
4493 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4494 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4495 errhint("It looks like you need to initdb.")));
4497 ereport(FATAL,
4499 errmsg("database files are incompatible with server"),
4500 /* translator: %s is a variable name and %d is its value */
4501 errdetail("The database cluster was initialized with %s %d,"
4502 " but the server was compiled with %s %d.",
4503 "MAXALIGN", ControlFile->maxAlign,
4504 "MAXALIGN", MAXIMUM_ALIGNOF),
4505 errhint("It looks like you need to initdb.")));
4507 ereport(FATAL,
4509 errmsg("database files are incompatible with server"),
4510 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4511 errhint("It looks like you need to initdb.")));
4512 if (ControlFile->blcksz != BLCKSZ)
4513 ereport(FATAL,
4515 errmsg("database files are incompatible with server"),
4516 /* translator: %s is a variable name and %d is its value */
4517 errdetail("The database cluster was initialized with %s %d,"
4518 " but the server was compiled with %s %d.",
4519 "BLCKSZ", ControlFile->blcksz,
4520 "BLCKSZ", BLCKSZ),
4521 errhint("It looks like you need to recompile or initdb.")));
4523 ereport(FATAL,
4525 errmsg("database files are incompatible with server"),
4526 /* translator: %s is a variable name and %d is its value */
4527 errdetail("The database cluster was initialized with %s %d,"
4528 " but the server was compiled with %s %d.",
4529 "RELSEG_SIZE", ControlFile->relseg_size,
4530 "RELSEG_SIZE", RELSEG_SIZE),
4531 errhint("It looks like you need to recompile or initdb.")));
4533 ereport(FATAL,
4535 errmsg("database files are incompatible with server"),
4536 /* translator: %s is a variable name and %d is its value */
4537 errdetail("The database cluster was initialized with %s %d,"
4538 " but the server was compiled with %s %d.",
4539 "SLRU_PAGES_PER_SEGMENT", ControlFile->slru_pages_per_segment,
4540 "SLRU_PAGES_PER_SEGMENT", SLRU_PAGES_PER_SEGMENT),
4541 errhint("It looks like you need to recompile or initdb.")));
4543 ereport(FATAL,
4545 errmsg("database files are incompatible with server"),
4546 /* translator: %s is a variable name and %d is its value */
4547 errdetail("The database cluster was initialized with %s %d,"
4548 " but the server was compiled with %s %d.",
4549 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4550 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4551 errhint("It looks like you need to recompile or initdb.")));
4553 ereport(FATAL,
4555 errmsg("database files are incompatible with server"),
4556 /* translator: %s is a variable name and %d is its value */
4557 errdetail("The database cluster was initialized with %s %d,"
4558 " but the server was compiled with %s %d.",
4559 "NAMEDATALEN", ControlFile->nameDataLen,
4560 "NAMEDATALEN", NAMEDATALEN),
4561 errhint("It looks like you need to recompile or initdb.")));
4563 ereport(FATAL,
4565 errmsg("database files are incompatible with server"),
4566 /* translator: %s is a variable name and %d is its value */
4567 errdetail("The database cluster was initialized with %s %d,"
4568 " but the server was compiled with %s %d.",
4569 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4570 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4571 errhint("It looks like you need to recompile or initdb.")));
4573 ereport(FATAL,
4575 errmsg("database files are incompatible with server"),
4576 /* translator: %s is a variable name and %d is its value */
4577 errdetail("The database cluster was initialized with %s %d,"
4578 " but the server was compiled with %s %d.",
4579 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4580 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4581 errhint("It looks like you need to recompile or initdb.")));
4583 ereport(FATAL,
4585 errmsg("database files are incompatible with server"),
4586 /* translator: %s is a variable name and %d is its value */
4587 errdetail("The database cluster was initialized with %s %d,"
4588 " but the server was compiled with %s %d.",
4589 "LOBLKSIZE", ControlFile->loblksize,
4590 "LOBLKSIZE", (int) LOBLKSIZE),
4591 errhint("It looks like you need to recompile or initdb.")));
4592
4593 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4594
4596
4599 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4600 "invalid WAL segment size in control file (%d bytes)",
4603 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4604
4606 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4608
4609 /* check and update variables dependent on wal_segment_size */
4612 /* translator: both %s are GUC names */
4613 errmsg("\"%s\" must be at least twice \"%s\"",
4614 "min_wal_size", "wal_segment_size")));
4615
4618 /* translator: both %s are GUC names */
4619 errmsg("\"%s\" must be at least twice \"%s\"",
4620 "max_wal_size", "wal_segment_size")));
4621
4625
4627}
#define PG_BINARY
Definition c.h:1374
#define CATALOG_VERSION_NO
Definition catversion.h:60
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
int BasicOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1090
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4234
@ PGC_S_DYNAMIC_DEFAULT
Definition guc.h:114
@ PGC_INTERNAL
Definition guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition heaptoast.h:84
#define read(a, b, c)
Definition win32.h:13
#define LOBLKSIZE
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define SLRU_PAGES_PER_SEGMENT
#define FLOATFORMAT_VALUE
Definition pg_control.h:209
#define PG_CONTROL_VERSION
Definition pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
uint32 pg_control_version
Definition pg_control.h:133
uint32 xlog_seg_size
Definition pg_control.h:221
uint32 slru_pages_per_segment
Definition pg_control.h:218
uint32 indexMaxKeys
Definition pg_control.h:224
uint32 catalog_version_no
Definition pg_control.h:134
pg_crc32c crc
Definition pg_control.h:248
uint32 toast_max_chunk_size
Definition pg_control.h:226
#define UsableBytesInPage
Definition xlog.c:617
static int UsableBytesInSegment
Definition xlog.c:626
int min_wal_size_mb
Definition xlog.c:122
#define XLOG_CONTROL_FILE

References Assert, BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg, errmsg_plural(), ERROR, FATAL, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6832 of file xlog.c.

6833{
6834 /*
6835 * We check shared state each time only until we leave recovery mode. We
6836 * can't re-enter recovery, so there's no need to keep checking after the
6837 * shared variable has once been seen false.
6838 */
6840 return false;
6841 else
6842 {
6843 /*
6844 * use volatile pointer to make sure we make a fresh read of the
6845 * shared variable.
6846 */
6847 volatile XLogCtlData *xlogctl = XLogCtl;
6848
6849 LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
6850
6851 /*
6852 * Note: We don't need a memory barrier when we're still in recovery.
6853 * We might exit recovery immediately after return, so the caller
6854 * can't rely on 'true' meaning that we're still in recovery anyway.
6855 */
6856
6858 }
6859}
static bool LocalRecoveryInProgress
Definition xlog.c:231

References fb(), LocalRecoveryInProgress, RECOVERY_STATE_DONE, and XLogCtl.

Referenced by AbsorbDataChecksumsBarrier(), amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), DisableLogicalDecoding(), DisableLogicalDecodingIfNecessary(), do_pg_backup_start(), do_pg_backup_stop(), EnableLogicalDecoding(), EnsureLogicalDecodingEnabled(), error_commit_ts_disabled(), ExecCheckpoint(), ExecWaitStmt(), extended_statistics_update(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkSharedBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_clear_extended_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_stat_get_recovery(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), pgstat_report_replslotsync(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_effective_wal_level(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), verify_heapam(), WaitForLSN(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 8086 of file xlog.c.

8087{
8088 /*
8089 * Also refrain from creating a restartpoint if we have seen any
8090 * references to non-existent pages. Restarting recovery from the
8091 * restartpoint would not see the references, so we would lose the
8092 * cross-check that the pages belonged to a relation that was dropped
8093 * later.
8094 */
8096 {
8097 elog(DEBUG2,
8098 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
8099 LSN_FORMAT_ARGS(checkPoint->redo));
8100 return;
8101 }
8102
8103 /*
8104 * Copy the checkpoint record to shared memory, so that checkpointer can
8105 * work out the next time it wants to perform a restartpoint.
8106 */
8110 XLogCtl->lastCheckPoint = *checkPoint;
8112}
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
bool XLogHaveInvalidPages(void)
Definition xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire(), SpinLockRelease(), XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 10078 of file xlog.c.

10079{
10080 static bool already_done = false;
10081
10082 if (already_done)
10083 return;
10085 already_done = true;
10086}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344

References before_shmem_exit(), BoolGetDatum(), do_pg_abort_backup(), and fb().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3991 of file xlog.c.

3992{
3993 DIR *xldir;
3994 struct dirent *xlde;
3995 char switchseg[MAXFNAMELEN];
3999
4000 /*
4001 * Initialize info about where to begin the work. This will recycle,
4002 * somewhat arbitrarily, 10 future segments.
4003 */
4007
4008 /*
4009 * Construct a filename of the last segment to be kept.
4010 */
4012
4013 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
4014 switchseg);
4015
4017
4018 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4019 {
4020 /* Ignore files that are not XLOG segments */
4021 if (!IsXLogFileName(xlde->d_name))
4022 continue;
4023
4024 /*
4025 * Remove files that are on a timeline older than the new one we're
4026 * switching to, but with a segment number >= the first segment on the
4027 * new timeline.
4028 */
4029 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4030 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4031 {
4032 /*
4033 * If the file has already been marked as .ready, however, don't
4034 * remove it yet. It should be OK to remove it - files that are
4035 * not part of our timeline history are not required for recovery
4036 * - but seems safer to let them be archived and removed later.
4037 */
4038 if (!XLogArchiveIsReady(xlde->d_name))
4040 }
4041 }
4042
4043 FreeDir(xldir);
4044}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition xlog.c:4060
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3916 of file xlog.c.

3918{
3919 DIR *xldir;
3920 struct dirent *xlde;
3921 char lastoff[MAXFNAMELEN];
3924
3925 /* Initialize info about where to try to recycle to */
3928
3929 /*
3930 * Construct a filename of the last segment to be kept. The timeline ID
3931 * doesn't matter, we ignore that in the comparison. (During recovery,
3932 * InsertTimeLineID isn't set, so we can't use that.)
3933 */
3935
3936 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3937 lastoff);
3938
3940
3941 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3942 {
3943 /* Ignore files that are not XLOG segments */
3944 if (!IsXLogFileName(xlde->d_name) &&
3945 !IsPartialXLogFileName(xlde->d_name))
3946 continue;
3947
3948 /*
3949 * We ignore the timeline part of the XLOG segment identifiers in
3950 * deciding whether a segment is still needed. This ensures that we
3951 * won't prematurely remove a segment from a parent timeline. We could
3952 * probably be a little more proactive about removing segments of
3953 * non-parent timelines, but that would be a whole lot more
3954 * complicated.
3955 *
3956 * We use the alphanumeric sorting property of the filenames to decide
3957 * which ones are earlier than the lastoff segment.
3958 */
3959 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3960 {
3961 if (XLogArchiveCheckDone(xlde->d_name))
3962 {
3963 /* Update the last removed location in shared memory first */
3964 UpdateLastRemovedPtr(xlde->d_name);
3965
3967 }
3968 }
3969 }
3970
3971 FreeDir(xldir);
3972}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition xlog.c:2251
static void UpdateLastRemovedPtr(char *filename)
Definition xlog.c:3863
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3883 of file xlog.c.

3884{
3885 DIR *xldir;
3886 struct dirent *xlde;
3887
3888 elog(DEBUG2, "removing all temporary WAL segments");
3889
3891 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3892 {
3893 char path[MAXPGPATH];
3894
3895 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3896 continue;
3897
3898 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3899 unlink(path);
3900 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3901 }
3902 FreeDir(xldir);
3903}

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4060 of file xlog.c.

4063{
4064 char path[MAXPGPATH];
4065#ifdef WIN32
4066 char newpath[MAXPGPATH];
4067#endif
4068 const char *segname = segment_de->d_name;
4069
4070 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4071
4072 /*
4073 * Before deleting the file, see if it can be recycled as a future log
4074 * segment. Only recycle normal files, because we don't want to recycle
4075 * symbolic links pointing to a separate archive directory.
4076 */
4077 if (wal_recycle &&
4079 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4080 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4082 true, recycleSegNo, insertTLI))
4083 {
4085 (errmsg_internal("recycled write-ahead log file \"%s\"",
4086 segname)));
4088 /* Needn't recheck that slot on future iterations */
4089 (*endlogSegNo)++;
4090 }
4091 else
4092 {
4093 /* No need for any more future segments, or recycling failed ... */
4094 int rc;
4095
4097 (errmsg_internal("removing write-ahead log file \"%s\"",
4098 segname)));
4099
4100#ifdef WIN32
4101
4102 /*
4103 * On Windows, if another process (e.g another backend) holds the file
4104 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4105 * will still show up in directory listing until the last handle is
4106 * closed. To avoid confusing the lingering deleted file for a live
4107 * WAL file that needs to be archived, rename it before deleting it.
4108 *
4109 * If another process holds the file open without FILE_SHARE_DELETE
4110 * flag, rename will fail. We'll try again at the next checkpoint.
4111 */
4112 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4113 if (rename(path, newpath) != 0)
4114 {
4115 ereport(LOG,
4117 errmsg("could not rename file \"%s\": %m",
4118 path)));
4119 return;
4120 }
4121 rc = durable_unlink(newpath, LOG);
4122#else
4123 rc = durable_unlink(path, LOG);
4124#endif
4125 if (rc != 0)
4126 {
4127 /* Message already logged by durable_unlink() */
4128 return;
4129 }
4131 }
4132
4134}
@ PGFILETYPE_REG
Definition file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition xlog.c:3614
bool wal_recycle
Definition xlog.c:135

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg, errmsg_internal(), fb(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8604 of file xlog.c.

8605{
8607
8608 /* XLOG SWITCH has no data */
8610
8611 if (mark_unimportant)
8614
8615 return RecPtr;
8616}
#define XLOG_SWITCH
Definition pg_control.h:76
#define XLOG_MARK_UNIMPORTANT
Definition xlog.h:167
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:464

References fb(), XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1149 of file xlog.c.

1151{
1156
1157 size = MAXALIGN(size);
1158
1159 /* All (non xlog-switch) records should contain data. */
1160 Assert(size > SizeOfXLogRecord);
1161
1162 /*
1163 * The duration the spinlock needs to be held is minimized by minimizing
1164 * the calculations that have to be done while holding the lock. The
1165 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1166 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1167 * page headers. The mapping between "usable" byte positions and physical
1168 * positions (XLogRecPtrs) can be done outside the locked region, and
1169 * because the usable byte position doesn't include any headers, reserving
1170 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1171 */
1172 SpinLockAcquire(&Insert->insertpos_lck);
1173
1174 startbytepos = Insert->CurrBytePos;
1175 endbytepos = startbytepos + size;
1176 prevbytepos = Insert->PrevBytePos;
1177 Insert->CurrBytePos = endbytepos;
1178 Insert->PrevBytePos = startbytepos;
1179
1180 SpinLockRelease(&Insert->insertpos_lck);
1181
1185
1186 /*
1187 * Check that the conversions between "usable byte positions" and
1188 * XLogRecPtrs work consistently in both directions.
1189 */
1193}
#define MAXALIGN(LEN)
Definition c.h:896
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition xlog.c:1982

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire(), SpinLockRelease(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1205 of file xlog.c.

1206{
1212 XLogRecPtr ptr;
1214
1215 /*
1216 * These calculations are a bit heavy-weight to be done while holding a
1217 * spinlock, but since we're holding all the WAL insertion locks, there
1218 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1219 * compete for it, but that's not called very frequently.
1220 */
1221 SpinLockAcquire(&Insert->insertpos_lck);
1222
1223 startbytepos = Insert->CurrBytePos;
1224
1226 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1227 {
1228 SpinLockRelease(&Insert->insertpos_lck);
1229 *EndPos = *StartPos = ptr;
1230 return false;
1231 }
1232
1233 endbytepos = startbytepos + size;
1234 prevbytepos = Insert->PrevBytePos;
1235
1238
1241 {
1242 /* consume the rest of the segment */
1243 *EndPos += segleft;
1245 }
1246 Insert->CurrBytePos = endbytepos;
1247 Insert->PrevBytePos = startbytepos;
1248
1249 SpinLockRelease(&Insert->insertpos_lck);
1250
1252
1257
1258 return true;
1259}

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ ResetInstallXLogFileSegmentActive()

◆ SetDataChecksumsOff()

void SetDataChecksumsOff ( void  )

Definition at line 4867 of file xlog.c.

4868{
4870
4872
4873 /* If data checksums are already disabled there is nothing to do */
4875 {
4877 return;
4878 }
4879
4880 /*
4881 * If data checksums are currently enabled, or in the process of being
4882 * enabled, we first transition to the "inprogress-off" state during which
4883 * backends continue to write checksums without verifying them. When all
4884 * backends are in "inprogress-off" the next transition to "off" can be
4885 * performed, after which all data checksum processing is disabled.
4886 */
4889 {
4891
4894
4896
4900
4905
4907
4910
4913
4914 /*
4915 * At this point we know that no backends are verifying data checksums
4916 * during reading. Next, we can safely move to state "off" to also
4917 * stop writing checksums.
4918 */
4919 }
4920 else
4921 {
4922 /*
4923 * Ending up here implies that the checksums state is "inprogress-off"
4924 * and we can transition directly to "off" from there.
4925 */
4927 }
4928
4930 /* Ensure that we don't incur a checkpoint during disabling checksums */
4932
4934
4938
4943
4945
4948
4951}
static THREAD_BARRIER_T barrier
Definition pgbench.c:488
void WaitForProcSignalBarrier(uint64 generation)
Definition procsignal.c:428
uint64 EmitProcSignalBarrier(ProcSignalBarrierType type)
Definition procsignal.c:360
@ PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF
Definition procsignal.h:55
@ PROCSIGNAL_BARRIER_CHECKSUM_OFF
Definition procsignal.h:53
PGPROC * MyProc
Definition proc.c:71
int delayChkptFlags
Definition proc.h:260
static void XLogChecksums(uint32 new_type)
Definition xlog.c:8730

References barrier, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_OFF, PG_DATA_CHECKSUM_VERSION, PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF, PROCSIGNAL_BARRIER_CHECKSUM_OFF, RequestCheckpoint(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain(), launcher_exit(), ProcessAllDatabases(), and SetDataChecksumsOn().

◆ SetDataChecksumsOn()

void SetDataChecksumsOn ( void  )

Definition at line 4802 of file xlog.c.

4803{
4805
4807
4808 /*
4809 * The only allowed state transition to "on" is from "inprogress-on" since
4810 * that state ensures that all pages will have data checksums written. No
4811 * such state transition exists, if it does happen it's likely due to a
4812 * programmer error.
4813 */
4815 {
4817 elog(WARNING,
4818 "cannot set data checksums to \"on\", current state is not \"inprogress-on\", disabling");
4820 return;
4821 }
4822
4824
4825 INJECTION_POINT("datachecksums-enable-checksums-delay", NULL);
4828
4830
4834
4835 /*
4836 * Update the controlfile before waiting since if we have an immediate
4837 * shutdown while waiting we want to come back up with checksums enabled.
4838 */
4843
4845
4848
4851}
@ PROCSIGNAL_BARRIER_CHECKSUM_ON
Definition procsignal.h:56
void SetDataChecksumsOff(void)
Definition xlog.c:4867

References barrier, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, INJECTION_POINT, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_VERSION, PROCSIGNAL_BARRIER_CHECKSUM_ON, RequestCheckpoint(), SetDataChecksumsOff(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), WARNING, XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain().

◆ SetDataChecksumsOnInProgress()

void SetDataChecksumsOnInProgress ( void  )

Definition at line 4749 of file xlog.c.

4750{
4752
4753 /*
4754 * The state transition is performed in a critical section with
4755 * checkpoints held off to provide crash safety.
4756 */
4759
4761
4765
4770
4772
4775
4777}
@ PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON
Definition procsignal.h:54

References barrier, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_ON, PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain().

◆ SetInstallXLogFileSegmentActive()

◆ SetLocalDataChecksumState()

void SetLocalDataChecksumState ( uint32  data_checksum_version)

Definition at line 4971 of file xlog.c.

4972{
4973 LocalDataChecksumState = data_checksum_version;
4974
4975 data_checksums = data_checksum_version;
4976}
int data_checksums
Definition xlog.c:683

References data_checksums, and LocalDataChecksumState.

Referenced by AbsorbDataChecksumsBarrier(), InitLocalDataChecksumState(), LocalProcessControlFile(), StartupXLOG(), xlog_redo(), and XLOGShmemInit().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 5216 of file xlog.c.

5217{
5218 if (XLogArchivingActive())
5219 return XLogArchiveCommand;
5220 else
5221 return "(disabled)";
5222}
char * XLogArchiveCommand
Definition xlog.c:127

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_data_checksums()

const char * show_data_checksums ( void  )

Definition at line 4980 of file xlog.c.

4981{
4983}
const char * get_checksum_state_string(uint32 state)
Definition xlogdesc.c:59

References get_checksum_state_string(), and LocalDataChecksumState.

◆ show_effective_wal_level()

const char * show_effective_wal_level ( void  )

Definition at line 5243 of file xlog.c.

5244{
5246 return "minimal";
5247
5248 /*
5249 * During recovery, effective_wal_level reflects the primary's
5250 * configuration rather than the local wal_level value.
5251 */
5252 if (RecoveryInProgress())
5253 return IsXLogLogicalInfoEnabled() ? "logical" : "replica";
5254
5255 return XLogLogicalInfoActive() ? "logical" : "replica";
5256}
bool IsXLogLogicalInfoEnabled(void)
Definition logicalctl.c:218
#define XLogLogicalInfoActive()
Definition xlog.h:137

References IsXLogLogicalInfoEnabled(), RecoveryInProgress(), wal_level, WAL_LEVEL_MINIMAL, and XLogLogicalInfoActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 5228 of file xlog.c.

5229{
5230 /*
5231 * We display the actual state based on shared memory, so that this GUC
5232 * reports up-to-date state if examined intra-query. The underlying
5233 * variable (in_hot_standby_guc) changes only when we transmit a new value
5234 * to the client.
5235 */
5236 return RecoveryInProgress() ? "on" : "off";
5237}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 7100 of file xlog.c.

7101{
7102 /*
7103 * We should have an aux process resource owner to use, and we should not
7104 * be in a transaction that's installed some other resowner.
7105 */
7110
7111 /* Don't be chatty in standalone mode */
7113 (errmsg("shutting down")));
7114
7115 /*
7116 * Signal walsenders to move to stopping state.
7117 */
7119
7120 /*
7121 * Wait for WAL senders to be in stopping state. This prevents commands
7122 * from writing new WAL.
7123 */
7125
7126 if (RecoveryInProgress())
7128 else
7129 {
7130 /*
7131 * If archiving is enabled, rotate the last XLOG file so that all the
7132 * remaining records are archived (postmaster wakes up the archiver
7133 * process one more time at the end of shutdown). The checkpoint
7134 * record will go to the next XLOG file and won't be archived (yet).
7135 */
7136 if (XLogArchivingActive())
7137 RequestXLogSwitch(false);
7138
7140 }
7141}
bool IsPostmasterEnvironment
Definition globals.c:121
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition resowner.c:176
void WalSndInitStopping(void)
Definition walsender.c:4091
void WalSndWaitStopping(void)
Definition walsender.c:4117
bool CreateRestartPoint(int flags)
Definition xlog.c:8126
bool CreateCheckPoint(int flags)
Definition xlog.c:7397

References Assert, AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg, fb(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5847 of file xlog.c.

5848{
5850 CheckPoint checkPoint;
5851 bool wasShutdown;
5852 bool didCrash;
5853 bool haveTblspcMap;
5854 bool haveBackupLabel;
5863 bool promoted = false;
5864 char timebuf[128];
5865
5866 /*
5867 * We should have an aux process resource owner to use, and we should not
5868 * be in a transaction that's installed some other resowner.
5869 */
5874
5875 /*
5876 * Check that contents look valid.
5877 */
5879 ereport(FATAL,
5881 errmsg("control file contains invalid checkpoint location")));
5882
5883 switch (ControlFile->state)
5884 {
5885 case DB_SHUTDOWNED:
5886
5887 /*
5888 * This is the expected case, so don't be chatty in standalone
5889 * mode
5890 */
5892 (errmsg("database system was shut down at %s",
5893 str_time(ControlFile->time,
5894 timebuf, sizeof(timebuf)))));
5895 break;
5896
5898 ereport(LOG,
5899 (errmsg("database system was shut down in recovery at %s",
5901 timebuf, sizeof(timebuf)))));
5902 break;
5903
5904 case DB_SHUTDOWNING:
5905 ereport(LOG,
5906 (errmsg("database system shutdown was interrupted; last known up at %s",
5908 timebuf, sizeof(timebuf)))));
5909 break;
5910
5912 ereport(LOG,
5913 (errmsg("database system was interrupted while in recovery at %s",
5915 timebuf, sizeof(timebuf))),
5916 errhint("This probably means that some data is corrupted and"
5917 " you will have to use the last backup for recovery.")));
5918 break;
5919
5921 ereport(LOG,
5922 (errmsg("database system was interrupted while in recovery at log time %s",
5924 timebuf, sizeof(timebuf))),
5925 errhint("If this has occurred more than once some data might be corrupted"
5926 " and you might need to choose an earlier recovery target.")));
5927 break;
5928
5929 case DB_IN_PRODUCTION:
5930 ereport(LOG,
5931 (errmsg("database system was interrupted; last known up at %s",
5933 timebuf, sizeof(timebuf)))));
5934 break;
5935
5936 default:
5937 ereport(FATAL,
5939 errmsg("control file contains invalid database cluster state")));
5940 }
5941
5942 /* This is just to allow attaching to startup process with a debugger */
5943#ifdef XLOG_REPLAY_DELAY
5945 pg_usleep(60000000L);
5946#endif
5947
5948 /*
5949 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5950 * In cases where someone has performed a copy for PITR, these directories
5951 * may have been excluded and need to be re-created.
5952 */
5954
5955 /* Set up timeout handler needed to report startup progress. */
5959
5960 /*----------
5961 * If we previously crashed, perform a couple of actions:
5962 *
5963 * - The pg_wal directory may still include some temporary WAL segments
5964 * used when creating a new segment, so perform some clean up to not
5965 * bloat this path. This is done first as there is no point to sync
5966 * this temporary data.
5967 *
5968 * - There might be data which we had written, intending to fsync it, but
5969 * which we had not actually fsync'd yet. Therefore, a power failure in
5970 * the near future might cause earlier unflushed writes to be lost, even
5971 * though more recent data written to disk from here on would be
5972 * persisted. To avoid that, fsync the entire data directory.
5973 */
5976 {
5979 didCrash = true;
5980 }
5981 else
5982 didCrash = false;
5983
5984 /*
5985 * Prepare for WAL recovery if needed.
5986 *
5987 * InitWalRecovery analyzes the control file and the backup label file, if
5988 * any. It updates the in-memory ControlFile buffer according to the
5989 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5990 * It also applies the tablespace map file, if any.
5991 */
5994 checkPoint = ControlFile->checkPointCopy;
5995
5996 /* initialize shared memory variables from the checkpoint record */
5997 TransamVariables->nextXid = checkPoint.nextXid;
5998 TransamVariables->nextOid = checkPoint.nextOid;
6000 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
6001 AdvanceOldestClogXid(checkPoint.oldestXid);
6002 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
6003 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
6005 checkPoint.newestCommitTsXid);
6006
6007 /*
6008 * Clear out any old relcache cache files. This is *necessary* if we do
6009 * any WAL replay, since that would probably result in the cache files
6010 * being out of sync with database reality. In theory we could leave them
6011 * in place if the database had been cleanly shut down, but it seems
6012 * safest to just remove them always and let them be rebuilt during the
6013 * first backend startup. These files needs to be removed from all
6014 * directories including pg_tblspc, however the symlinks are created only
6015 * after reading tablespace_map file in case of archive recovery from
6016 * backup, so needs to clear old relcache files here after creating
6017 * symlinks.
6018 */
6020
6021 /*
6022 * Initialize replication slots, before there's a chance to remove
6023 * required resources.
6024 */
6026
6027 /*
6028 * Startup the logical decoding status with the last status stored in the
6029 * checkpoint record.
6030 */
6032
6033 /*
6034 * Startup logical state, needs to be setup now so we have proper data
6035 * during crash recovery.
6036 */
6038
6039 /*
6040 * Startup CLOG. This must be done after TransamVariables->nextXid has
6041 * been initialized and before we accept connections or begin WAL replay.
6042 */
6043 StartupCLOG();
6044
6045 /*
6046 * Startup MultiXact. We need to do this early to be able to replay
6047 * truncations.
6048 */
6050
6051 /*
6052 * Ditto for commit timestamps. Activate the facility if the setting is
6053 * enabled in the control file, as there should be no tracking of commit
6054 * timestamps done when the setting was disabled. This facility can be
6055 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
6056 */
6059
6060 /*
6061 * Recover knowledge about replay progress of known replication partners.
6062 */
6064
6065 /*
6066 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
6067 * control file. On recovery, all unlogged relations are blown away, so
6068 * the unlogged LSN counter can be reset too.
6069 */
6073 else
6076
6077 /*
6078 * Copy any missing timeline history files between 'now' and the recovery
6079 * target timeline from archive to pg_wal. While we don't need those files
6080 * ourselves - the history file of the recovery target timeline covers all
6081 * the previous timelines in the history too - a cascading standby server
6082 * might be interested in them. Or, if you archive the WAL from this
6083 * server to a different archive than the primary, it'd be good for all
6084 * the history files to get archived there after failover, so that you can
6085 * use one of the old timelines as a PITR target. Timeline history files
6086 * are small, so it's better to copy them unnecessarily than not copy them
6087 * and regret later.
6088 */
6090
6091 /*
6092 * Before running in recovery, scan pg_twophase and fill in its status to
6093 * be able to work on entries generated by redo. Doing a scan before
6094 * taking any recovery action has the merit to discard any 2PC files that
6095 * are newer than the first record to replay, saving from any conflicts at
6096 * replay. This avoids as well any subsequent scans when doing recovery
6097 * of the on-disk two-phase data.
6098 */
6100
6101 /*
6102 * When starting with crash recovery, reset pgstat data - it might not be
6103 * valid. Otherwise restore pgstat data. It's safe to do this here,
6104 * because postmaster will not yet have started any other processes.
6105 *
6106 * NB: Restoring replication slot stats relies on slot state to have
6107 * already been restored from disk.
6108 *
6109 * TODO: With a bit of extra work we could just start with a pgstat file
6110 * associated with the checkpoint redo location we're starting from.
6111 */
6112 if (didCrash)
6114 else
6116
6118
6121
6122 /* REDO */
6123 if (InRecovery)
6124 {
6125 /* Initialize state for RecoveryInProgress() */
6129 else
6132
6133 /*
6134 * Update pg_control to show that we are recovering and to show the
6135 * selected checkpoint as the place we are starting from. We also mark
6136 * pg_control with any minimum recovery stop point obtained from a
6137 * backup history file.
6138 *
6139 * No need to hold ControlFileLock yet, we aren't up far enough.
6140 */
6142
6143 /*
6144 * If there was a backup label file, it's done its job and the info
6145 * has now been propagated into pg_control. We must get rid of the
6146 * label file so that if we crash during recovery, we'll pick up at
6147 * the latest recovery restartpoint instead of going all the way back
6148 * to the backup start point. It seems prudent though to just rename
6149 * the file out of the way rather than delete it completely.
6150 */
6151 if (haveBackupLabel)
6152 {
6155 }
6156
6157 /*
6158 * If there was a tablespace_map file, it's done its job and the
6159 * symlinks have been created. We must get rid of the map file so
6160 * that if we crash during recovery, we don't create symlinks again.
6161 * It seems prudent though to just rename the file out of the way
6162 * rather than delete it completely.
6163 */
6164 if (haveTblspcMap)
6165 {
6168 }
6169
6170 /*
6171 * Initialize our local copy of minRecoveryPoint. When doing crash
6172 * recovery we want to replay up to the end of WAL. Particularly, in
6173 * the case of a promoted standby minRecoveryPoint value in the
6174 * control file is only updated after the first checkpoint. However,
6175 * if the instance crashes before the first post-recovery checkpoint
6176 * is completed then recovery will use a stale location causing the
6177 * startup process to think that there are still invalid page
6178 * references when checking for data consistency.
6179 */
6181 {
6184 }
6185 else
6186 {
6189 }
6190
6191 /* Check that the GUCs used to generate the WAL allow recovery */
6193
6194 /*
6195 * We're in recovery, so unlogged relations may be trashed and must be
6196 * reset. This should be done BEFORE allowing Hot Standby
6197 * connections, so that read-only backends don't try to read whatever
6198 * garbage is left over from before.
6199 */
6201
6202 /*
6203 * Likewise, delete any saved transaction snapshot files that got left
6204 * behind by crashed backends.
6205 */
6207
6208 /*
6209 * Initialize for Hot Standby, if enabled. We won't let backends in
6210 * yet, not until we've reached the min recovery point specified in
6211 * control file and we've established a recovery snapshot from a
6212 * running-xacts WAL record.
6213 */
6215 {
6216 TransactionId *xids;
6217 int nxids;
6218
6220 (errmsg_internal("initializing for hot standby")));
6221
6223
6224 if (wasShutdown)
6226 else
6227 oldestActiveXID = checkPoint.oldestActiveXid;
6229
6230 /* Tell procarray about the range of xids it has to deal with */
6232
6233 /*
6234 * Startup subtrans only. CLOG, MultiXact and commit timestamp
6235 * have already been started up and other SLRUs are not maintained
6236 * during recovery and need not be started yet.
6237 */
6239
6240 /*
6241 * If we're beginning at a shutdown checkpoint, we know that
6242 * nothing was running on the primary at this point. So fake-up an
6243 * empty running-xacts record and use that here and now. Recover
6244 * additional standby state for prepared transactions.
6245 */
6246 if (wasShutdown)
6247 {
6249 TransactionId latestCompletedXid;
6250
6251 /* Update pg_subtrans entries for any prepared transactions */
6253
6254 /*
6255 * Construct a RunningTransactions snapshot representing a
6256 * shut down server, with only prepared transactions still
6257 * alive. We're never overflowed at this point because all
6258 * subxids are listed with their parent prepared transactions.
6259 */
6260 running.xcnt = nxids;
6261 running.subxcnt = 0;
6263 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
6265 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
6266 TransactionIdRetreat(latestCompletedXid);
6267 Assert(TransactionIdIsNormal(latestCompletedXid));
6268 running.latestCompletedXid = latestCompletedXid;
6269 running.xids = xids;
6270
6272 }
6273 }
6274
6275 /*
6276 * We're all set for replaying the WAL now. Do it.
6277 */
6279 performedWalRecovery = true;
6280 }
6281 else
6282 performedWalRecovery = false;
6283
6284 /*
6285 * Finish WAL recovery.
6286 */
6288 EndOfLog = endOfRecoveryInfo->endOfLog;
6289 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
6290 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
6291 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
6292
6293 /*
6294 * Reset ps status display, so as no information related to recovery shows
6295 * up.
6296 */
6297 set_ps_display("");
6298
6299 /*
6300 * When recovering from a backup (we are in recovery, and archive recovery
6301 * was requested), complain if we did not roll forward far enough to reach
6302 * the point where the database is consistent. For regular online
6303 * backup-from-primary, that means reaching the end-of-backup WAL record
6304 * (at which point we reset backupStartPoint to be Invalid), for
6305 * backup-from-replica (which can't inject records into the WAL stream),
6306 * that point is when we reach the minRecoveryPoint in pg_control (which
6307 * we purposefully copy last when backing up from a replica). For
6308 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
6309 * or snapshot-style backups (which don't), backupEndRequired will be set
6310 * to false.
6311 *
6312 * Note: it is indeed okay to look at the local variable
6313 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
6314 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
6315 * been advanced beyond the WAL we processed.
6316 */
6317 if (InRecovery &&
6320 {
6321 /*
6322 * Ran off end of WAL before reaching end-of-backup WAL record, or
6323 * minRecoveryPoint. That's a bad sign, indicating that you tried to
6324 * recover from an online backup but never called pg_backup_stop(), or
6325 * you didn't archive all the WAL needed.
6326 */
6328 {
6330 ereport(FATAL,
6332 errmsg("WAL ends before end of online backup"),
6333 errhint("All WAL generated while online backup was taken must be available at recovery.")));
6334 else
6335 ereport(FATAL,
6337 errmsg("WAL ends before consistent recovery point")));
6338 }
6339 }
6340
6341 /*
6342 * Reset unlogged relations to the contents of their INIT fork. This is
6343 * done AFTER recovery is complete so as to include any unlogged relations
6344 * created during recovery, but BEFORE recovery is marked as having
6345 * completed successfully. Otherwise we'd not retry if any of the post
6346 * end-of-recovery steps fail.
6347 */
6348 if (InRecovery)
6350
6351 /*
6352 * Pre-scan prepared transactions to find out the range of XIDs present.
6353 * This information is not quite needed yet, but it is positioned here so
6354 * as potential problems are detected before any on-disk change is done.
6355 */
6357
6358 /*
6359 * Allow ordinary WAL segment creation before possibly switching to a new
6360 * timeline, which creates a new segment, and after the last ReadRecord().
6361 */
6363
6364 /*
6365 * Consider whether we need to assign a new timeline ID.
6366 *
6367 * If we did archive recovery, we always assign a new ID. This handles a
6368 * couple of issues. If we stopped short of the end of WAL during
6369 * recovery, then we are clearly generating a new timeline and must assign
6370 * it a unique new ID. Even if we ran to the end, modifying the current
6371 * last segment is problematic because it may result in trying to
6372 * overwrite an already-archived copy of that segment, and we encourage
6373 * DBAs to make their archive_commands reject that. We can dodge the
6374 * problem by making the new active segment have a new timeline ID.
6375 *
6376 * In a normal crash recovery, we can just extend the timeline we were in.
6377 */
6378 newTLI = endOfRecoveryInfo->lastRecTLI;
6380 {
6382 ereport(LOG,
6383 (errmsg("selected new timeline ID: %u", newTLI)));
6384
6385 /*
6386 * Make a writable copy of the last WAL segment. (Note that we also
6387 * have a copy of the last block of the old WAL in
6388 * endOfRecovery->lastPage; we will use that below.)
6389 */
6391
6392 /*
6393 * Remove the signal files out of the way, so that we don't
6394 * accidentally re-enter archive recovery mode in a subsequent crash.
6395 */
6396 if (endOfRecoveryInfo->standby_signal_file_found)
6398
6399 if (endOfRecoveryInfo->recovery_signal_file_found)
6401
6402 /*
6403 * Write the timeline history file, and have it archived. After this
6404 * point (or rather, as soon as the file is archived), the timeline
6405 * will appear as "taken" in the WAL archive and to any standby
6406 * servers. If we crash before actually switching to the new
6407 * timeline, standby servers will nevertheless think that we switched
6408 * to the new timeline, and will try to connect to the new timeline.
6409 * To minimize the window for that, try to do as little as possible
6410 * between here and writing the end-of-recovery record.
6411 */
6413 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6414
6415 ereport(LOG,
6416 (errmsg("archive recovery complete")));
6417 }
6418
6419 /* Save the selected TimeLineID in shared memory, too */
6424
6425 /*
6426 * Actually, if WAL ended in an incomplete record, skip the parts that
6427 * made it through and start writing after the portion that persisted.
6428 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6429 * we'll do as soon as we're open for writing new WAL.)
6430 */
6432 {
6433 /*
6434 * We should only have a missingContrecPtr if we're not switching to a
6435 * new timeline. When a timeline switch occurs, WAL is copied from the
6436 * old timeline to the new only up to the end of the last complete
6437 * record, so there can't be an incomplete WAL record that we need to
6438 * disregard.
6439 */
6440 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6443 }
6444
6445 /*
6446 * Prepare to write WAL starting at EndOfLog location, and init xlog
6447 * buffer cache using the block containing the last record from the
6448 * previous incarnation.
6449 */
6450 Insert = &XLogCtl->Insert;
6452 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6453
6454 /*
6455 * Tricky point here: lastPage contains the *last* block that the LastRec
6456 * record spans, not the one it starts in. The last block is indeed the
6457 * one we want to use.
6458 */
6459 if (EndOfLog % XLOG_BLCKSZ != 0)
6460 {
6461 char *page;
6462 int len;
6463 int firstIdx;
6464
6466 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6468
6469 /* Copy the valid part of the last block, and zero the rest */
6470 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6471 memcpy(page, endOfRecoveryInfo->lastPage, len);
6472 memset(page + len, 0, XLOG_BLCKSZ - len);
6473
6476 }
6477 else
6478 {
6479 /*
6480 * There is no partial block to copy. Just set InitializedUpTo, and
6481 * let the first attempt to insert a log record to initialize the next
6482 * buffer.
6483 */
6485 }
6486
6487 /*
6488 * Update local and shared status. This is OK to do without any locks
6489 * because no other process can be reading or writing WAL yet.
6490 */
6497
6498 /*
6499 * Preallocate additional log files, if wanted.
6500 */
6502
6503 /*
6504 * Okay, we're officially UP.
6505 */
6506 InRecovery = false;
6507
6508 /* start the archive_timeout timer and LSN running */
6511
6512 /* also initialize latestCompletedXid, to nextXid - 1 */
6517
6518 /*
6519 * Start up subtrans, if not already done for hot standby. (commit
6520 * timestamps are started below, if necessary.)
6521 */
6524
6525 /*
6526 * Perform end of recovery actions for any SLRUs that need it.
6527 */
6528 TrimCLOG();
6529 TrimMultiXact();
6530
6531 /*
6532 * Reload shared-memory state for prepared transactions. This needs to
6533 * happen before renaming the last partial segment of the old timeline as
6534 * it may be possible that we have to recover some transactions from it.
6535 */
6537
6538 /* Shut down xlogreader */
6540
6541 /* Enable WAL writes for this backend only. */
6543
6544 /* If necessary, write overwrite-contrecord before doing anything else */
6546 {
6549 }
6550
6551 /*
6552 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6553 * record before resource manager writes cleanup WAL records or checkpoint
6554 * record is written.
6555 */
6556 Insert->fullPageWrites = lastFullPageWrites;
6558
6559 /*
6560 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6561 */
6564
6565 /*
6566 * If any of the critical GUCs have changed, log them before we allow
6567 * backends to write WAL.
6568 */
6570
6571 /* If this is archive recovery, perform post-recovery cleanup actions. */
6574
6575 /*
6576 * Local WAL inserts enabled, so it's time to finish initialization of
6577 * commit timestamp.
6578 */
6580
6581 /*
6582 * Update logical decoding status in shared memory and write an
6583 * XLOG_LOGICAL_DECODING_STATUS_CHANGE, if necessary.
6584 */
6586
6587 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6588 if (endOfRecoveryInfo->lastPage)
6589 pfree(endOfRecoveryInfo->lastPage);
6590 pfree(endOfRecoveryInfo->recoveryStopReason);
6592
6593 /*
6594 * If we reach this point with checksums in the state inprogress-on, it
6595 * means that data checksums were in the process of being enabled when the
6596 * cluster shut down. Since processing didn't finish, the operation will
6597 * have to be restarted from scratch since there is no capability to
6598 * continue where it was when the cluster shut down. Thus, revert the
6599 * state back to off, and inform the user with a warning message. Being
6600 * able to restart processing is a TODO, but it wouldn't be possible to
6601 * restart here since we cannot launch a dynamic background worker
6602 * directly from here (it has to be from a regular backend).
6603 */
6605 {
6607
6612
6614 errmsg("enabling data checksums was interrupted"),
6615 errhint("Data checksum processing must be manually restarted for checksums to be enabled"));
6616 }
6617
6618 /*
6619 * If data checksums were being disabled when the cluster was shut down,
6620 * we know that we have a state where all backends have stopped validating
6621 * checksums and we can move to off instead of prompting the user to
6622 * perform any action.
6623 */
6625 {
6627
6632 }
6633
6634 /*
6635 * All done with end-of-recovery actions.
6636 *
6637 * Now allow backends to write WAL and update the control file status in
6638 * consequence. SharedRecoveryState, that controls if backends can write
6639 * WAL, is updated while holding ControlFileLock to prevent other backends
6640 * to look at an inconsistent state of the control file in shared memory.
6641 * There is still a small window during which backends can write WAL and
6642 * the control file is still referring to a system not in DB_IN_PRODUCTION
6643 * state while looking at the on-disk control file.
6644 *
6645 * Also, we use info_lck to update SharedRecoveryState to ensure that
6646 * there are no race conditions concerning visibility of other recent
6647 * updates to shared memory.
6648 */
6651
6656
6659
6660 /*
6661 * Wake up the checkpointer process as there might be a request to disable
6662 * logical decoding by concurrent slot drop.
6663 */
6665
6666 /*
6667 * Wake up all waiters. They need to report an error that recovery was
6668 * ended before reaching the target LSN.
6669 */
6673
6674 /*
6675 * Shutdown the recovery environment. This must occur after
6676 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6677 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6678 * any session building a snapshot will not rely on KnownAssignedXids as
6679 * RecoveryInProgress() would return false at this stage. This is
6680 * particularly critical for prepared 2PC transactions, that would still
6681 * need to be included in snapshots once recovery has ended.
6682 */
6685
6686 /*
6687 * If there were cascading standby servers connected to us, nudge any wal
6688 * sender processes to notice that we've been promoted.
6689 */
6690 WalSndWakeup(true, true);
6691
6692 /*
6693 * If this was a promotion, request an (online) checkpoint now. This isn't
6694 * required for consistency, but the last restartpoint might be far back,
6695 * and in case of a crash, recovering from it might take a longer than is
6696 * appropriate now that we're not in standby mode anymore.
6697 */
6698 if (promoted)
6700}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:504
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition timeline.c:265
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition timeline.c:51
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition timeline.c:305
void startup_progress_timeout_handler(void)
Definition startup.c:302
uint32 TransactionId
Definition c.h:736
void WakeupCheckpointer(void)
void StartupCLOG(void)
Definition clog.c:862
void TrimCLOG(void)
Definition clog.c:877
void StartupCommitTs(void)
Definition commit_ts.c:613
void CompleteCommitTsInitialization(void)
Definition commit_ts.c:623
void SyncDataDirectory(void)
Definition fd.c:3594
void UpdateLogicalDecodingStatusEndOfRecovery(void)
Definition logicalctl.c:551
void StartupLogicalDecodingStatus(bool last_status)
Definition logicalctl.c:144
#define IsBootstrapProcessingMode()
Definition miscadmin.h:495
void TrimMultiXact(void)
Definition multixact.c:1904
void StartupMultiXact(void)
Definition multixact.c:1879
void StartupReplicationOrigin(void)
Definition origin.c:740
@ DB_IN_PRODUCTION
Definition pg_control.h:105
@ DB_IN_CRASH_RECOVERY
Definition pg_control.h:103
const void size_t len
void pgstat_restore_stats(void)
Definition pgstat.c:525
void pgstat_discard_stats(void)
Definition pgstat.c:537
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition procarray.c:1045
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition procarray.c:1014
static void set_ps_display(const char *activity)
Definition ps_status.h:40
void ResetUnloggedRelations(int op)
Definition reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition reinit.h:27
void RelationCacheInitFileRemove(void)
Definition relcache.c:6926
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition slot.c:2402
void DeleteAllExportedSnapshotFiles(void)
Definition snapmgr.c:1587
void InitRecoveryTransactionEnvironment(void)
Definition standby.c:96
void ShutdownRecoveryTransactionEnvironment(void)
Definition standby.c:162
@ SUBXIDS_IN_SUBTRANS
Definition standby.h:123
TransactionId oldestRunningXid
Definition standby.h:134
TransactionId nextXid
Definition standby.h:133
TransactionId latestCompletedXid
Definition standby.h:137
subxids_array_status subxid_status
Definition standby.h:132
TransactionId * xids
Definition standby.h:139
FullTransactionId latestCompletedXid
Definition transam.h:238
pg_atomic_uint64 logInsertResult
Definition xlog.c:477
uint64 PrevBytePos
Definition xlog.c:415
XLogRecPtr Flush
Definition xlog.c:329
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition subtrans.c:302
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition timeout.h:38
#define TransactionIdRetreat(dest)
Definition transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition transam.h:103
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
void RecoverPreparedTransactions(void)
Definition twophase.c:2089
void restoreTwoPhaseData(void)
Definition twophase.c:1910
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1972
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2051
void WalSndWakeup(bool physical, bool logical)
Definition walsender.c:4012
void UpdateFullPageWrites(void)
Definition xlog.c:8752
static void ValidateXLOGDirectoryStructure(void)
Definition xlog.c:4150
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition xlog.c:7976
static void XLogReportParameters(void)
Definition xlog.c:8671
static bool PerformRecoveryXLogAction(void)
Definition xlog.c:6782
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition xlog.c:5707
static bool lastFullPageWrites
Definition xlog.c:224
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition xlog.c:5632
static void CheckRequiredParameterValues(void)
Definition xlog.c:5803
static void RemoveTempXlogFiles(void)
Definition xlog.c:3883
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition xlog.c:5619
#define TABLESPACE_MAP_OLD
Definition xlog.h:338
#define TABLESPACE_MAP
Definition xlog.h:337
#define STANDBY_SIGNAL_FILE
Definition xlog.h:333
#define BACKUP_LABEL_OLD
Definition xlog.h:335
#define BACKUP_LABEL_FILE
Definition xlog.h:334
#define RECOVERY_SIGNAL_FILE
Definition xlog.h:332
@ RECOVERY_STATE_CRASH
Definition xlog.h:93
@ RECOVERY_STATE_ARCHIVE
Definition xlog.h:94
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
static XLogRecPtr abortedRecPtr
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
TimeLineID recoveryTargetTLI
HotStandbyState standbyState
Definition xlogutils.c:53
bool InRecovery
Definition xlogutils.c:50
@ STANDBY_DISABLED
Definition xlogutils.h:52
void WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
Definition xlogwait.c:344
@ WAIT_LSN_TYPE_STANDBY_REPLAY
Definition xlogwait.h:39
@ WAIT_LSN_TYPE_STANDBY_FLUSH
Definition xlogwait.h:41
@ WAIT_LSN_TYPE_STANDBY_WRITE
Definition xlogwait.h:40

References abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg, errmsg_internal(), FATAL, fb(), findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, CheckPoint::logicalDecodingEnabled, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), memcpy(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_OFF, pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetLocalDataChecksumState(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire(), SpinLockRelease(), STANDBY_DISABLED, STANDBY_SIGNAL_FILE, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupLogicalDecodingStatus(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), ValidateXLOGDirectoryStructure(), WAIT_LSN_TYPE_STANDBY_FLUSH, WAIT_LSN_TYPE_STANDBY_REPLAY, WAIT_LSN_TYPE_STANDBY_WRITE, WaitLSNWakeup(), WakeupCheckpointer(), WalSndWakeup(), WARNING, XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogChecksums(), XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsValid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char buf,
size_t  bufsize 
)
static

Definition at line 5619 of file xlog.c.

5620{
5622 "%Y-%m-%d %H:%M:%S %Z",
5624
5625 return buf;
5626}
#define bufsize
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition strftime.c:135
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition localtime.c:1347
PGDLLIMPORT pg_tz * log_timezone
Definition pgtz.c:31

References buf, bufsize, fb(), log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6707 of file xlog.c.

6708{
6709 /* initialize minRecoveryPoint to this record */
6712 if (ControlFile->minRecoveryPoint < EndRecPtr)
6713 {
6714 ControlFile->minRecoveryPoint = EndRecPtr;
6715 ControlFile->minRecoveryPointTLI = replayTLI;
6716 }
6717 /* update local copy */
6720
6721 /*
6722 * The startup process can update its local copy of minRecoveryPoint from
6723 * this point.
6724 */
6726
6728
6729 /*
6730 * We update SharedRecoveryState while holding the lock on ControlFileLock
6731 * so both states are consistent in shared memory.
6732 */
6736
6738}
static bool updateMinRecoveryPoint
Definition xlog.c:668

References ControlFile, DB_IN_ARCHIVE_RECOVERY, fb(), XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire(), SpinLockRelease(), ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 7332 of file xlog.c.

7333{
7334 /*
7335 * The status is reported only for end-of-recovery and shutdown
7336 * checkpoints or shutdown restartpoints. Updating the ps display is
7337 * useful in those situations as it may not be possible to rely on
7338 * pg_stat_activity to see the status of the checkpointer or the startup
7339 * process.
7340 */
7342 return;
7343
7344 if (reset)
7345 set_ps_display("");
7346 else
7347 {
7348 char activitymsg[128];
7349
7350 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
7351 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
7352 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
7353 restartpoint ? "restartpoint" : "checkpoint");
7355 }
7356}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, fb(), reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 7294 of file xlog.c.

7295{
7296 /*
7297 * To estimate the number of segments consumed between checkpoints, keep a
7298 * moving average of the amount of WAL generated in previous checkpoint
7299 * cycles. However, if the load is bursty, with quiet periods and busy
7300 * periods, we want to cater for the peak load. So instead of a plain
7301 * moving average, let the average decline slowly if the previous cycle
7302 * used less WAL than estimated, but bump it up immediately if it used
7303 * more.
7304 *
7305 * When checkpoints are triggered by max_wal_size, this should converge to
7306 * CheckpointSegments * wal_segment_size,
7307 *
7308 * Note: This doesn't pay any attention to what caused the checkpoint.
7309 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
7310 * starting a base backup, are counted the same as those created
7311 * automatically. The slow-decline will largely mask them out, if they are
7312 * not frequent. If they are frequent, it seems reasonable to count them
7313 * in as any others; if you issue a manual checkpoint every 5 minutes and
7314 * never let a timed checkpoint happen, it makes sense to base the
7315 * preallocation on that 5 minute interval rather than whatever
7316 * checkpoint_timeout is set to.
7317 */
7318 PrevCheckPointDistance = nbytes;
7319 if (CheckPointDistanceEstimate < nbytes)
7321 else
7323 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
7324}

References CheckPointDistanceEstimate, fb(), and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8752 of file xlog.c.

8753{
8755 bool recoveryInProgress;
8756
8757 /*
8758 * Do nothing if full_page_writes has not been changed.
8759 *
8760 * It's safe to check the shared full_page_writes without the lock,
8761 * because we assume that there is no concurrently running process which
8762 * can update it.
8763 */
8764 if (fullPageWrites == Insert->fullPageWrites)
8765 return;
8766
8767 /*
8768 * Perform this outside critical section so that the WAL insert
8769 * initialization done by RecoveryInProgress() doesn't trigger an
8770 * assertion failure.
8771 */
8773
8775
8776 /*
8777 * It's always safe to take full page images, even when not strictly
8778 * required, but not the other round. So if we're setting full_page_writes
8779 * to true, first set it true and then write the WAL record. If we're
8780 * setting it to false, first write the WAL record and then set the global
8781 * flag.
8782 */
8783 if (fullPageWrites)
8784 {
8786 Insert->fullPageWrites = true;
8788 }
8789
8790 /*
8791 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8792 * full_page_writes during archive recovery, if required.
8793 */
8795 {
8797 XLogRegisterData(&fullPageWrites, sizeof(bool));
8798
8800 }
8801
8802 if (!fullPageWrites)
8803 {
8805 Insert->fullPageWrites = false;
8807 }
8809}
#define XLOG_FPW_CHANGE
Definition pg_control.h:80

References END_CRIT_SECTION, fb(), fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char filename)
static

Definition at line 3863 of file xlog.c.

3864{
3865 uint32 tli;
3866 XLogSegNo segno;
3867
3869
3871 if (segno > XLogCtl->lastRemovedSegNo)
3872 XLogCtl->lastRemovedSegNo = segno;
3874}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2721 of file xlog.c.

2722{
2723 /* Quick check using our local copy of the variable */
2724 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2725 return;
2726
2727 /*
2728 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2729 * i.e., we're doing crash recovery. We never modify the control file's
2730 * value in that case, so we can short-circuit future checks here too. The
2731 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2732 * updated until crash recovery finishes. We only do this for the startup
2733 * process as it should not update its own reference of minRecoveryPoint
2734 * until it has finished crash recovery to make sure that all WAL
2735 * available is replayed in this case. This also saves from extra locks
2736 * taken on the control file from the startup process.
2737 */
2739 {
2740 updateMinRecoveryPoint = false;
2741 return;
2742 }
2743
2745
2746 /* update local copy */
2749
2751 updateMinRecoveryPoint = false;
2752 else if (force || LocalMinRecoveryPoint < lsn)
2753 {
2756
2757 /*
2758 * To avoid having to update the control file too often, we update it
2759 * all the way to the last record being replayed, even though 'lsn'
2760 * would suffice for correctness. This also allows the 'force' case
2761 * to not need a valid 'lsn' value.
2762 *
2763 * Another important reason for doing it this way is that the passed
2764 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2765 * the caller got it from a corrupted heap page. Accepting such a
2766 * value as the min recovery point would prevent us from coming up at
2767 * all. Instead, we just log a warning and continue with recovery.
2768 * (See also the comments about corrupt LSNs in XLogFlush.)
2769 */
2771 if (!force && newMinRecoveryPoint < lsn)
2772 elog(WARNING,
2773 "xlog min recovery request %X/%08X is past current point %X/%08X",
2775
2776 /* update control file */
2778 {
2784
2786 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2789 }
2790 }
2792}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), fb(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsValid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4150 of file xlog.c.

4151{
4152 char path[MAXPGPATH];
4153 struct stat stat_buf;
4154
4155 /* Check for pg_wal; if it doesn't exist, error out */
4156 if (stat(XLOGDIR, &stat_buf) != 0 ||
4157 !S_ISDIR(stat_buf.st_mode))
4158 ereport(FATAL,
4160 errmsg("required WAL directory \"%s\" does not exist",
4161 XLOGDIR)));
4162
4163 /* Check for archive_status */
4164 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4165 if (stat(path, &stat_buf) == 0)
4166 {
4167 /* Check for weird cases where it exists but isn't a directory */
4168 if (!S_ISDIR(stat_buf.st_mode))
4169 ereport(FATAL,
4171 errmsg("required WAL directory \"%s\" does not exist",
4172 path)));
4173 }
4174 else
4175 {
4176 ereport(LOG,
4177 (errmsg("creating missing WAL directory \"%s\"", path)));
4178 if (MakePGDirectory(path) < 0)
4179 ereport(FATAL,
4181 errmsg("could not create missing directory \"%s\": %m",
4182 path)));
4183 }
4184
4185 /* Check for summaries */
4186 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4187 if (stat(path, &stat_buf) == 0)
4188 {
4189 /* Check for weird cases where it exists but isn't a directory */
4190 if (!S_ISDIR(stat_buf.st_mode))
4191 ereport(FATAL,
4192 (errmsg("required WAL directory \"%s\" does not exist",
4193 path)));
4194 }
4195 else
4196 {
4197 ereport(LOG,
4198 (errmsg("creating missing WAL directory \"%s\"", path)));
4199 if (MakePGDirectory(path) < 0)
4200 ereport(FATAL,
4201 (errmsg("could not create missing directory \"%s\": %m",
4202 path)));
4203 }
4204}
int MakePGDirectory(const char *directoryName)
Definition fd.c:3963
#define S_ISDIR(m)
Definition win32_port.h:315

References ereport, errcode_for_file_access(), errmsg, FATAL, fb(), LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1545 of file xlog.c.

1546{
1552 int i;
1553
1554 if (MyProc == NULL)
1555 elog(PANIC, "cannot wait without a PGPROC structure");
1556
1557 /*
1558 * Check if there's any work to do. Use a barrier to ensure we get the
1559 * freshest value.
1560 */
1562 if (upto <= inserted)
1563 return inserted;
1564
1565 /* Read the current insert position */
1566 SpinLockAcquire(&Insert->insertpos_lck);
1567 bytepos = Insert->CurrBytePos;
1568 SpinLockRelease(&Insert->insertpos_lck);
1570
1571 /*
1572 * No-one should request to flush a piece of WAL that hasn't even been
1573 * reserved yet. However, it can happen if there is a block with a bogus
1574 * LSN on disk, for example. XLogFlush checks for that situation and
1575 * complains, but only after the flush. Here we just assume that to mean
1576 * that all WAL that has been reserved needs to be finished. In this
1577 * corner-case, the return value can be smaller than 'upto' argument.
1578 */
1579 if (upto > reservedUpto)
1580 {
1581 ereport(LOG,
1582 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1585 }
1586
1587 /*
1588 * Loop through all the locks, sleeping on any in-progress insert older
1589 * than 'upto'.
1590 *
1591 * finishedUpto is our return value, indicating the point upto which all
1592 * the WAL insertions have been finished. Initialize it to the head of
1593 * reserved WAL, and as we iterate through the insertion locks, back it
1594 * out for any insertion that's still in progress.
1595 */
1597 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1598 {
1600
1601 do
1602 {
1603 /*
1604 * See if this insertion is in progress. LWLockWaitForVar will
1605 * wait for the lock to be released, or for the 'value' to be set
1606 * by a LWLockUpdateVar call. When a lock is initially acquired,
1607 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1608 * know where it's inserting yet. We will have to wait for it. If
1609 * it's a small insertion, the record will most likely fit on the
1610 * same page and the inserter will release the lock without ever
1611 * calling LWLockUpdateVar. But if it has to sleep, it will
1612 * advertise the insertion point with LWLockUpdateVar before
1613 * sleeping.
1614 *
1615 * In this loop we are only waiting for insertions that started
1616 * before WaitXLogInsertionsToFinish was called. The lack of
1617 * memory barriers in the loop means that we might see locks as
1618 * "unused" that have since become used. This is fine because
1619 * they only can be used for later insertions that we would not
1620 * want to wait on anyway. Not taking a lock to acquire the
1621 * current insertingAt value means that we might see older
1622 * insertingAt values. This is also fine, because if we read a
1623 * value too old, we will add ourselves to the wait queue, which
1624 * contains atomic operations.
1625 */
1626 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1629 {
1630 /* the lock was free, so no insertion in progress */
1632 break;
1633 }
1634
1635 /*
1636 * This insertion is still in progress. Have to wait, unless the
1637 * inserter has proceeded past 'upto'.
1638 */
1639 } while (insertingat < upto);
1640
1643 }
1644
1645 /*
1646 * Advance the limit we know to have been inserted and return the freshest
1647 * value we know of, which might be beyond what we requested if somebody
1648 * is concurrently doing this with an 'upto' pointer ahead of us.
1649 */
1651 finishedUpto);
1652
1653 return finishedUpto;
1654}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition atomics.h:595
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition lwlock.c:1566
pg_atomic_uint64 insertingAt
Definition xlog.c:377

References elog, ereport, errmsg, fb(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire(), SpinLockRelease(), WALInsertLocks, XLogBytePosToEndRecPtr(), XLogCtl, and XLogRecPtrIsValid.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1412 of file xlog.c.

1413{
1414 bool immed;
1415
1416 /*
1417 * It doesn't matter which of the WAL insertion locks we acquire, so try
1418 * the one we used last time. If the system isn't particularly busy, it's
1419 * a good bet that it's still available, and it's good to have some
1420 * affinity to a particular lock so that you don't unnecessarily bounce
1421 * cache lines between processes when there's no contention.
1422 *
1423 * If this is the first time through in this backend, pick a lock
1424 * (semi-)randomly. This allows the locks to be used evenly if you have a
1425 * lot of very short connections.
1426 */
1427 static int lockToTry = -1;
1428
1429 if (lockToTry == -1)
1432
1433 /*
1434 * The insertingAt value is initially set to 0, as we don't know our
1435 * insert location yet.
1436 */
1438 if (!immed)
1439 {
1440 /*
1441 * If we couldn't get the lock immediately, try another lock next
1442 * time. On a system with more insertion locks than concurrent
1443 * inserters, this causes all the inserters to eventually migrate to a
1444 * lock that no-one else is using. On a system with more inserters
1445 * than locks, it still helps to distribute the inserters evenly
1446 * across the locks.
1447 */
1449 }
1450}
ProcNumber MyProcNumber
Definition globals.c:92
static int MyLockNo
Definition xlog.c:686

References fb(), LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1457 of file xlog.c.

1458{
1459 int i;
1460
1461 /*
1462 * When holding all the locks, all but the last lock's insertingAt
1463 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1464 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1465 */
1466 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1467 {
1472 }
1473 /* Variable value reset to 0 at release */
1475
1476 holdingAllLocks = true;
1477}
#define PG_UINT64_MAX
Definition c.h:677
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1702
static bool holdingAllLocks
Definition xlog.c:687

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1486 of file xlog.c.

1487{
1488 if (holdingAllLocks)
1489 {
1490 int i;
1491
1492 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1495 0);
1496
1497 holdingAllLocks = false;
1498 }
1499 else
1500 {
1503 0);
1504 }
1505}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1840

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1512 of file xlog.c.

1513{
1514 if (holdingAllLocks)
1515 {
1516 /*
1517 * We use the last lock to mark our actual position, see comments in
1518 * WALInsertLockAcquireExclusive.
1519 */
1522 insertingAt);
1523 }
1524 else
1527 insertingAt);
1528}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1789 of file xlog.c.

1791{
1792 char *pdst = dstbuf;
1793 XLogRecPtr recptr = startptr;
1795 Size nbytes = count;
1796
1798 return 0;
1799
1800 Assert(XLogRecPtrIsValid(startptr));
1801
1802 /*
1803 * Caller should ensure that the requested data has been inserted into WAL
1804 * buffers before we try to read it.
1805 */
1807 if (startptr + count > inserted)
1808 ereport(ERROR,
1809 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1810 LSN_FORMAT_ARGS(startptr + count),
1812
1813 /*
1814 * Loop through the buffers without a lock. For each buffer, atomically
1815 * read and verify the end pointer, then copy the data out, and finally
1816 * re-read and re-verify the end pointer.
1817 *
1818 * Once a page is evicted, it never returns to the WAL buffers, so if the
1819 * end pointer matches the expected end pointer before and after we copy
1820 * the data, then the right page must have been present during the data
1821 * copy. Read barriers are necessary to ensure that the data copy actually
1822 * happens between the two verification steps.
1823 *
1824 * If either verification fails, we simply terminate the loop and return
1825 * with the data that had been already copied out successfully.
1826 */
1827 while (nbytes > 0)
1828 {
1829 uint32 offset = recptr % XLOG_BLCKSZ;
1832 XLogRecPtr endptr;
1833 const char *page;
1834 const char *psrc;
1836
1837 /*
1838 * Calculate the end pointer we expect in the xlblocks array if the
1839 * correct page is present.
1840 */
1841 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1842
1843 /*
1844 * First verification step: check that the correct page is present in
1845 * the WAL buffers.
1846 */
1848 if (expectedEndPtr != endptr)
1849 break;
1850
1851 /*
1852 * The correct page is present (or was at the time the endptr was
1853 * read; must re-verify later). Calculate pointer to source data and
1854 * determine how much data to read from this page.
1855 */
1856 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1857 psrc = page + offset;
1858 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1859
1860 /*
1861 * Ensure that the data copy and the first verification step are not
1862 * reordered.
1863 */
1865
1866 /* data copy */
1868
1869 /*
1870 * Ensure that the data copy and the second verification step are not
1871 * reordered.
1872 */
1874
1875 /*
1876 * Second verification step: check that the page we read from wasn't
1877 * evicted while we were copying the data.
1878 */
1880 if (expectedEndPtr != endptr)
1881 break;
1882
1883 pdst += npagebytes;
1884 recptr += npagebytes;
1885 nbytes -= npagebytes;
1886 }
1887
1888 Assert(pdst - dstbuf <= count);
1889
1890 return pdst - dstbuf;
1891}
#define pg_read_barrier()
Definition atomics.h:154
#define Min(x, y)
Definition c.h:1091
TimeLineID GetWALInsertionTimeLine(void)
Definition xlog.c:7018

References Assert, ereport, errmsg, ERROR, fb(), GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, memcpy(), Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsValid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4296 of file xlog.c.

4297{
4298 int fd;
4299 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4300
4301 /*
4302 * Initialize version and compatibility-check fields
4303 */
4306
4309
4315
4318
4321
4322 ControlFile->float8ByVal = true; /* vestigial */
4323
4324 /*
4325 * Initialize the default 'char' signedness.
4326 *
4327 * The signedness of the char type is implementation-defined. For instance
4328 * on x86 architecture CPUs, the char data type is typically treated as
4329 * signed by default, whereas on aarch architecture CPUs, it is typically
4330 * treated as unsigned by default. In v17 or earlier, we accidentally let
4331 * C implementation signedness affect persistent data. This led to
4332 * inconsistent results when comparing char data across different
4333 * platforms.
4334 *
4335 * This flag can be used as a hint to ensure consistent behavior for
4336 * pre-v18 data files that store data sorted by the 'char' type on disk,
4337 * especially in cross-platform replication scenarios.
4338 *
4339 * Newly created database clusters unconditionally set the default char
4340 * signedness to true. pg_upgrade changes this flag for clusters that were
4341 * initialized on signedness=false platforms. As a result,
4342 * signedness=false setting will become rare over time. If we had known
4343 * about this problem during the last development cycle that forced initdb
4344 * (v8.3), we would have made all clusters signed or all clusters
4345 * unsigned. Making pg_upgrade the only source of signedness=false will
4346 * cause the population of database clusters to converge toward that
4347 * retrospective ideal.
4348 */
4350
4351 /* Contents are protected with a CRC */
4357
4358 /*
4359 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4360 * the excess over sizeof(ControlFileData). This reduces the odds of
4361 * premature-EOF errors when reading pg_control. We'll still fail when we
4362 * check the contents of the file, but hopefully with a more specific
4363 * error than "couldn't read pg_control".
4364 */
4365 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4366 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4367
4370 if (fd < 0)
4371 ereport(PANIC,
4373 errmsg("could not create file \"%s\": %m",
4375
4376 errno = 0;
4379 {
4380 /* if write didn't set errno, assume problem is no disk space */
4381 if (errno == 0)
4382 errno = ENOSPC;
4383 ereport(PANIC,
4385 errmsg("could not write to file \"%s\": %m",
4387 }
4389
4391 if (pg_fsync(fd) != 0)
4392 ereport(PANIC,
4394 errmsg("could not fsync file \"%s\": %m",
4397
4398 if (close(fd) != 0)
4399 ereport(PANIC,
4401 errmsg("could not close file \"%s\": %m",
4403}
#define PG_CONTROL_FILE_SIZE
Definition pg_control.h:266

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, memcpy(), ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog2_redo()

void xlog2_redo ( XLogReaderState record)

Definition at line 9235 of file xlog.c.

9236{
9237 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
9238
9239 if (info == XLOG2_CHECKSUMS)
9240 {
9242
9243 memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state));
9244
9246 XLogCtl->data_checksum_version = state.new_checksum_state;
9248
9250 ControlFile->data_checksum_version = state.new_checksum_state;
9253
9254 /*
9255 * Block on a procsignalbarrier to await all processes having seen the
9256 * change to checksum status. Once the barrier has been passed we can
9257 * initiate the corresponding processing.
9258 */
9259 EmitAndWaitDataChecksumsBarrier(state.new_checksum_state);
9260 }
9261}
uint8_t uint8
Definition c.h:622
void EmitAndWaitDataChecksumsBarrier(uint32 state)
#define XLOG2_CHECKSUMS
Definition pg_control.h:90
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:410
#define XLogRecGetData(decoder)
Definition xlogreader.h:415

References ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, EmitAndWaitDataChecksumsBarrier(), fb(), XLogCtlData::info_lck, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), memcpy(), SpinLockAcquire(), SpinLockRelease(), UpdateControlFile(), XLOG2_CHECKSUMS, XLogCtl, XLogRecGetData, and XLogRecGetInfo.

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8821 of file xlog.c.

8822{
8823 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8824 XLogRecPtr lsn = record->EndRecPtr;
8825
8826 /*
8827 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8828 * XLOG_FPI_FOR_HINT records.
8829 */
8830 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8831 !XLogRecHasAnyBlockRefs(record));
8832
8833 if (info == XLOG_NEXTOID)
8834 {
8835 Oid nextOid;
8836
8837 /*
8838 * We used to try to take the maximum of TransamVariables->nextOid and
8839 * the recorded nextOid, but that fails if the OID counter wraps
8840 * around. Since no OID allocation should be happening during replay
8841 * anyway, better to just believe the record exactly. We still take
8842 * OidGenLock while setting the variable, just in case.
8843 */
8844 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8846 TransamVariables->nextOid = nextOid;
8849 }
8850 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8851 {
8852 CheckPoint checkPoint;
8853 TimeLineID replayTLI;
8854
8855 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8856 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8858 TransamVariables->nextXid = checkPoint.nextXid;
8861 TransamVariables->nextOid = checkPoint.nextOid;
8865 checkPoint.nextMultiOffset);
8866
8868 checkPoint.oldestMultiDB);
8869
8870 /*
8871 * No need to set oldestClogXid here as well; it'll be set when we
8872 * redo an xl_clog_truncate if it changed since initialization.
8873 */
8874 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8875
8876 /*
8877 * If we see a shutdown checkpoint while waiting for an end-of-backup
8878 * record, the backup was canceled and the end-of-backup record will
8879 * never arrive.
8880 */
8884 ereport(PANIC,
8885 (errmsg("online backup was canceled, recovery cannot continue")));
8886
8887 /*
8888 * If we see a shutdown checkpoint, we know that nothing was running
8889 * on the primary at this point. So fake-up an empty running-xacts
8890 * record and use that here and now. Recover additional standby state
8891 * for prepared transactions.
8892 */
8894 {
8895 TransactionId *xids;
8896 int nxids;
8898 TransactionId latestCompletedXid;
8900
8902
8903 /* Update pg_subtrans entries for any prepared transactions */
8905
8906 /*
8907 * Construct a RunningTransactions snapshot representing a shut
8908 * down server, with only prepared transactions still alive. We're
8909 * never overflowed at this point because all subxids are listed
8910 * with their parent prepared transactions.
8911 */
8912 running.xcnt = nxids;
8913 running.subxcnt = 0;
8915 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8917 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8918 TransactionIdRetreat(latestCompletedXid);
8919 Assert(TransactionIdIsNormal(latestCompletedXid));
8920 running.latestCompletedXid = latestCompletedXid;
8921 running.xids = xids;
8922
8924 }
8925
8926 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8930
8933
8934 /*
8935 * We should've already switched to the new TLI before replaying this
8936 * record.
8937 */
8938 (void) GetCurrentReplayRecPtr(&replayTLI);
8939 if (checkPoint.ThisTimeLineID != replayTLI)
8940 ereport(PANIC,
8941 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8942 checkPoint.ThisTimeLineID, replayTLI)));
8943
8944 RecoveryRestartPoint(&checkPoint, record);
8945
8946 /*
8947 * After replaying a checkpoint record, free all smgr objects.
8948 * Otherwise we would never do so for dropped relations, as the
8949 * startup does not process shared invalidation messages or call
8950 * AtEOXact_SMgr().
8951 */
8953 }
8954 else if (info == XLOG_CHECKPOINT_ONLINE)
8955 {
8956 CheckPoint checkPoint;
8957 TimeLineID replayTLI;
8958
8959 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8960 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8963 checkPoint.nextXid))
8964 TransamVariables->nextXid = checkPoint.nextXid;
8966
8967 /*
8968 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8969 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8970 * counter is from the start of the checkpoint and might well be stale
8971 * compared to later XLOG_NEXTOID records. We could try to take the
8972 * maximum of the nextOid counter and our latest value, but since
8973 * there's no particular guarantee about the speed with which the OID
8974 * counter wraps around, that's a risky thing to do. In any case,
8975 * users of the nextOid counter are required to avoid assignment of
8976 * duplicates, so that a somewhat out-of-date value should be safe.
8977 */
8978
8979 /* Handle multixact */
8981 checkPoint.nextMultiOffset);
8982
8983 /*
8984 * NB: This may perform multixact truncation when replaying WAL
8985 * generated by an older primary.
8986 */
8988 checkPoint.oldestMultiDB);
8990 checkPoint.oldestXid))
8992 checkPoint.oldestXidDB);
8993 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8997
8998 /* TLI should not change in an on-line checkpoint */
8999 (void) GetCurrentReplayRecPtr(&replayTLI);
9000 if (checkPoint.ThisTimeLineID != replayTLI)
9001 ereport(PANIC,
9002 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
9003 checkPoint.ThisTimeLineID, replayTLI)));
9004
9005 RecoveryRestartPoint(&checkPoint, record);
9006
9007 /*
9008 * After replaying a checkpoint record, free all smgr objects.
9009 * Otherwise we would never do so for dropped relations, as the
9010 * startup does not process shared invalidation messages or call
9011 * AtEOXact_SMgr().
9012 */
9014 }
9015 else if (info == XLOG_OVERWRITE_CONTRECORD)
9016 {
9017 /* nothing to do here, handled in xlogrecovery_redo() */
9018 }
9019 else if (info == XLOG_END_OF_RECOVERY)
9020 {
9022 TimeLineID replayTLI;
9023
9024 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
9025
9026 /*
9027 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
9028 * but this case is rarer and harder to test, so the benefit doesn't
9029 * outweigh the potential extra cost of maintenance.
9030 */
9031
9032 /*
9033 * We should've already switched to the new TLI before replaying this
9034 * record.
9035 */
9036 (void) GetCurrentReplayRecPtr(&replayTLI);
9037 if (xlrec.ThisTimeLineID != replayTLI)
9038 ereport(PANIC,
9039 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
9040 xlrec.ThisTimeLineID, replayTLI)));
9041 }
9042 else if (info == XLOG_NOOP)
9043 {
9044 /* nothing to do here */
9045 }
9046 else if (info == XLOG_SWITCH)
9047 {
9048 /* nothing to do here */
9049 }
9050 else if (info == XLOG_RESTORE_POINT)
9051 {
9052 /* nothing to do here, handled in xlogrecovery.c */
9053 }
9054 else if (info == XLOG_ASSIGN_LSN)
9055 {
9056 /* nothing to do here, see XLogGetFakeLSN() */
9057 }
9058 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
9059 {
9060 /*
9061 * XLOG_FPI records contain nothing else but one or more block
9062 * references. Every block reference must include a full-page image
9063 * even if full_page_writes was disabled when the record was generated
9064 * - otherwise there would be no point in this record.
9065 *
9066 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
9067 * WAL-logged because of a hint bit update. They are only generated
9068 * when checksums and/or wal_log_hints are enabled. They may include
9069 * no full-page images if full_page_writes was disabled when they were
9070 * generated. In this case there is nothing to do here.
9071 *
9072 * No recovery conflicts are generated by these generic records - if a
9073 * resource manager needs to generate conflicts, it has to define a
9074 * separate WAL record type and redo routine.
9075 */
9076 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
9077 {
9078 Buffer buffer;
9079
9080 if (!XLogRecHasBlockImage(record, block_id))
9081 {
9082 if (info == XLOG_FPI)
9083 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
9084 continue;
9085 }
9086
9087 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
9088 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
9089 UnlockReleaseBuffer(buffer);
9090 }
9091 }
9092 else if (info == XLOG_BACKUP_END)
9093 {
9094 /* nothing to do here, handled in xlogrecovery_redo() */
9095 }
9096 else if (info == XLOG_PARAMETER_CHANGE)
9097 {
9099
9100 /* Update our copy of the parameters in pg_control */
9101 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
9102
9104 ControlFile->MaxConnections = xlrec.MaxConnections;
9105 ControlFile->max_worker_processes = xlrec.max_worker_processes;
9106 ControlFile->max_wal_senders = xlrec.max_wal_senders;
9107 ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
9108 ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
9109 ControlFile->wal_level = xlrec.wal_level;
9110 ControlFile->wal_log_hints = xlrec.wal_log_hints;
9111
9112 /*
9113 * Update minRecoveryPoint to ensure that if recovery is aborted, we
9114 * recover back up to this point before allowing hot standby again.
9115 * This is important if the max_* settings are decreased, to ensure
9116 * you don't run queries against the WAL preceding the change. The
9117 * local copies cannot be updated as long as crash recovery is
9118 * happening and we expect all the WAL to be replayed.
9119 */
9121 {
9124 }
9126 {
9127 TimeLineID replayTLI;
9128
9129 (void) GetCurrentReplayRecPtr(&replayTLI);
9131 ControlFile->minRecoveryPointTLI = replayTLI;
9132 }
9133
9134 CommitTsParameterChange(xlrec.track_commit_timestamp,
9136 ControlFile->track_commit_timestamp = xlrec.track_commit_timestamp;
9137
9140
9141 /* Check to see if any parameter change gives a problem on recovery */
9143 }
9144 else if (info == XLOG_FPW_CHANGE)
9145 {
9146 bool fpw;
9147
9148 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
9149
9150 /*
9151 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
9152 * do_pg_backup_start() and do_pg_backup_stop() can check whether
9153 * full_page_writes has been disabled during online backup.
9154 */
9155 if (!fpw)
9156 {
9161 }
9162
9163 /* Keep track of full_page_writes */
9165 }
9166 else if (info == XLOG_CHECKPOINT_REDO)
9167 {
9169 bool new_state = false;
9170
9172
9174 XLogCtl->data_checksum_version = redo_rec.data_checksum_version;
9175 SetLocalDataChecksumState(redo_rec.data_checksum_version);
9176 if (redo_rec.data_checksum_version != ControlFile->data_checksum_version)
9177 new_state = true;
9179
9180 if (new_state)
9181 EmitAndWaitDataChecksumsBarrier(redo_rec.data_checksum_version);
9182 }
9183 else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE)
9184 {
9185 bool status;
9186
9187 memcpy(&status, XLogRecGetData(record), sizeof(bool));
9188
9189 /*
9190 * We need to toggle the logical decoding status and update the
9191 * XLogLogicalInfo cache of processes synchronously because
9192 * XLogLogicalInfoActive() is used even during read-only queries
9193 * (e.g., via RelationIsAccessibleInLogicalDecoding()). In the
9194 * 'disable' case, it is safe to invalidate existing slots after
9195 * disabling logical decoding because logical decoding cannot process
9196 * subsequent WAL records, which may not contain logical information.
9197 */
9198 if (status)
9200 else
9202
9203 elog(DEBUG1, "update logical decoding status to %d during recovery",
9204 status);
9205
9206 if (InRecovery && InHotStandby)
9207 {
9208 if (!status)
9209 {
9210 /*
9211 * Invalidate logical slots if we are in hot standby and the
9212 * primary disabled logical decoding.
9213 */
9215 0, InvalidOid,
9217 }
9218 else if (sync_replication_slots)
9219 {
9220 /*
9221 * Signal the postmaster to launch the slotsync worker.
9222 *
9223 * XXX: For simplicity, we keep the slotsync worker running
9224 * even after logical decoding is disabled. A future
9225 * improvement can consider starting and stopping the worker
9226 * based on logical decoding status change.
9227 */
9229 }
9230 }
9231 }
9232}
int Buffer
Definition buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5603
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition commit_ts.c:645
pid_t PostmasterPid
Definition globals.c:108
void DisableLogicalDecoding(void)
Definition logicalctl.c:489
void EnableLogicalDecoding(void)
Definition logicalctl.c:338
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2266
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2239
#define XLOG_RESTORE_POINT
Definition pg_control.h:79
#define XLOG_ASSIGN_LSN
Definition pg_control.h:84
#define XLOG_FPI
Definition pg_control.h:83
#define XLOG_FPI_FOR_HINT
Definition pg_control.h:82
#define XLOG_NEXTOID
Definition pg_control.h:75
#define XLOG_NOOP
Definition pg_control.h:74
#define XLOG_PARAMETER_CHANGE
Definition pg_control.h:78
#define XLOG_LOGICAL_DECODING_STATUS_CHANGE
Definition pg_control.h:87
@ RS_INVAL_WAL_LEVEL
Definition slot.h:66
bool sync_replication_slots
Definition slotsync.c:132
void smgrdestroyall(void)
Definition smgr.c:386
#define FullTransactionIdPrecedes(a, b)
Definition transam.h:51
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition xlog.c:8086
#define XLogRecMaxBlockId(decoder)
Definition xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:417
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition xlogutils.c:303
@ STANDBY_INITIALIZED
Definition xlogutils.h:53
#define InHotStandby
Definition xlogutils.h:60
@ BLK_RESTORED
Definition xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, CheckPoint::dataChecksumState, DEBUG1, DisableLogicalDecoding(), elog, EmitAndWaitDataChecksumsBarrier(), EnableLogicalDecoding(), XLogReaderState::EndRecPtr, ereport, errmsg, ERROR, fb(), FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, kill, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::max_locks_per_xact, ControlFileData::max_prepared_xacts, ControlFileData::max_wal_senders, ControlFileData::max_worker_processes, ControlFileData::MaxConnections, memcpy(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PostmasterPid, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetLocalDataChecksumState(), SetTransactionIdLimit(), SIGUSR1, smgrdestroyall(), SpinLockAcquire(), SpinLockRelease(), STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, sync_replication_slots, CheckPoint::ThisTimeLineID, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), ControlFileData::wal_level, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_ASSIGN_LSN, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_LOGICAL_DECODING_STATUS_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsValid.

◆ XLogAssignLSN()

XLogRecPtr XLogAssignLSN ( void  )

Definition at line 8652 of file xlog.c.

8653{
8654 int dummy = 0;
8655
8656 /*
8657 * Records other than XLOG_SWITCH must have content. We use an integer 0
8658 * to satisfy this restriction.
8659 */
8662 XLogRegisterData(&dummy, sizeof(dummy));
8664}

References fb(), XLOG_ASSIGN_LSN, XLOG_MARK_UNIMPORTANT, XLogBeginInsert(), XLogInsert(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by XLogGetFakeLSN().

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 3004 of file xlog.c.

3005{
3007 bool flexible = true;
3008 static TimestampTz lastflush;
3010 int flushblocks;
3012
3013 /* XLOG doesn't need flushing during recovery */
3014 if (RecoveryInProgress())
3015 return false;
3016
3017 /*
3018 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3019 * so we can read it without a lock.
3020 */
3022
3023 /* read updated LogwrtRqst */
3027
3028 /* back off to last completed page boundary */
3029 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3030
3031 /* if we have already flushed that far, consider async commit records */
3033 if (WriteRqst.Write <= LogwrtResult.Flush)
3034 {
3038 flexible = false; /* ensure it all gets written */
3039 }
3040
3041 /*
3042 * If already known flushed, we're done. Just need to check if we are
3043 * holding an open file handle to a logfile that's no longer in use,
3044 * preventing the file from being deleted.
3045 */
3046 if (WriteRqst.Write <= LogwrtResult.Flush)
3047 {
3048 if (openLogFile >= 0)
3049 {
3052 {
3053 XLogFileClose();
3054 }
3055 }
3056 return false;
3057 }
3058
3059 /*
3060 * Determine how far to flush WAL, based on the wal_writer_delay and
3061 * wal_writer_flush_after GUCs.
3062 *
3063 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3064 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3065 * logic is the same in both places if you change this.
3066 */
3068 flushblocks =
3070
3071 if (WalWriterFlushAfter == 0 || lastflush == 0)
3072 {
3073 /* first call, or block based limits disabled */
3074 WriteRqst.Flush = WriteRqst.Write;
3075 lastflush = now;
3076 }
3078 {
3079 /*
3080 * Flush the writes at least every WalWriterDelay ms. This is
3081 * important to bound the amount of time it takes for an asynchronous
3082 * commit to hit disk.
3083 */
3084 WriteRqst.Flush = WriteRqst.Write;
3085 lastflush = now;
3086 }
3087 else if (flushblocks >= WalWriterFlushAfter)
3088 {
3089 /* exceeded wal_writer_flush_after blocks, flush */
3090 WriteRqst.Flush = WriteRqst.Write;
3091 lastflush = now;
3092 }
3093 else
3094 {
3095 /* no flushing, this time round */
3097 }
3098
3099#ifdef WAL_DEBUG
3100 if (XLOG_DEBUG)
3101 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3106#endif
3107
3109
3110 /* now wait for any in-progress insertions to finish and get write lock */
3114 if (WriteRqst.Write > LogwrtResult.Write ||
3116 {
3118 }
3120
3122
3123 /* wake up walsenders now that we've released heavily contended locks */
3125
3126 /*
3127 * Wake up processes waiting for primary flush LSN to reach current flush
3128 * position.
3129 */
3131
3132 /*
3133 * Great, done. To take some work off the critical path, try to initialize
3134 * as many of the no-longer-needed WAL buffers for future use as we can.
3135 */
3137
3138 /*
3139 * If we determined that we need to write data, but somebody else
3140 * wrote/flushed already, it should be considered as being active, to
3141 * avoid hibernating too early.
3142 */
3143 return true;
3144}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1789
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1613
XLogRecPtr asyncXactLSN
Definition xlog.c:464
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition walsender.h:64
int WalWriterFlushAfter
Definition walwriter.c:72
int WalWriterDelay
Definition walwriter.c:71
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, fb(), XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, TimestampDifferenceExceeds(), WAIT_LSN_TYPE_PRIMARY_FLUSH, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1939 of file xlog.c.

1940{
1946
1949
1951 {
1952 /* fits on first page of segment */
1953 if (bytesleft == 0)
1954 seg_offset = 0;
1955 else
1957 }
1958 else
1959 {
1960 /* account for the first page on segment with long header */
1963
1966
1967 if (bytesleft == 0)
1969 else
1971 }
1972
1974
1975 return result;
1976}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by GetXLogInsertEndRecPtr(), ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1899 of file xlog.c.

1900{
1906
1909
1911 {
1912 /* fits on first page of segment */
1914 }
1915 else
1916 {
1917 /* account for the first page on segment with long header */
1920
1923
1925 }
1926
1928
1929 return result;
1930}

References fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2301 of file xlog.c.

2302{
2304
2306
2308 return true;
2309 return false;
2310}

References CheckPointSegments, fb(), RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLogChecksums()

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 5025 of file xlog.c.

5026{
5027 int xbuffers;
5028
5029 xbuffers = NBuffers / 32;
5032 if (xbuffers < 8)
5033 xbuffers = 8;
5034 return xbuffers;
5035}

References fb(), NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemRequest().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3690 of file xlog.c.

3691{
3692 Assert(openLogFile >= 0);
3693
3694 /*
3695 * WAL segment files will not be re-read in normal operation, so we advise
3696 * the OS to release any cached pages. But do not do so if WAL archiving
3697 * or streaming is active, because archiver and walsender process could
3698 * use the cache to read the WAL segment.
3699 */
3700#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3701 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3703#endif
3704
3705 if (close(openLogFile) != 0)
3706 {
3707 char xlogfname[MAXFNAMELEN];
3708 int save_errno = errno;
3709
3711 errno = save_errno;
3712 ereport(PANIC,
3714 errmsg("could not close file \"%s\": %m", xlogfname)));
3715 }
3716
3717 openLogFile = -1;
3719}
void ReleaseExternalFD(void)
Definition fd.c:1225

References Assert, close, ereport, errcode_for_file_access(), errmsg, fb(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3469 of file xlog.c.

3472{
3473 char path[MAXPGPATH];
3474 char tmppath[MAXPGPATH];
3475 PGAlignedXLogBlock buffer;
3476 int srcfd;
3477 int fd;
3478 int nbytes;
3479
3480 /*
3481 * Open the source file
3482 */
3485 if (srcfd < 0)
3486 ereport(ERROR,
3488 errmsg("could not open file \"%s\": %m", path)));
3489
3490 /*
3491 * Copy into a temp file name.
3492 */
3493 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3494
3495 unlink(tmppath);
3496
3497 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3499 if (fd < 0)
3500 ereport(ERROR,
3502 errmsg("could not create file \"%s\": %m", tmppath)));
3503
3504 /*
3505 * Do the data copying.
3506 */
3507 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3508 {
3509 int nread;
3510
3511 nread = upto - nbytes;
3512
3513 /*
3514 * The part that is not read from the source file is filled with
3515 * zeros.
3516 */
3517 if (nread < sizeof(buffer))
3518 memset(buffer.data, 0, sizeof(buffer));
3519
3520 if (nread > 0)
3521 {
3522 int r;
3523
3524 if (nread > sizeof(buffer))
3525 nread = sizeof(buffer);
3527 r = read(srcfd, buffer.data, nread);
3528 if (r != nread)
3529 {
3530 if (r < 0)
3531 ereport(ERROR,
3533 errmsg("could not read file \"%s\": %m",
3534 path)));
3535 else
3536 ereport(ERROR,
3538 errmsg("could not read file \"%s\": read %d of %zu",
3539 path, r, (Size) nread)));
3540 }
3542 }
3543 errno = 0;
3545 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3546 {
3547 int save_errno = errno;
3548
3549 /*
3550 * If we fail to make the file, delete it to release disk space
3551 */
3552 unlink(tmppath);
3553 /* if write didn't set errno, assume problem is no disk space */
3555
3556 ereport(ERROR,
3558 errmsg("could not write to file \"%s\": %m", tmppath)));
3559 }
3561 }
3562
3564 if (pg_fsync(fd) != 0)
3567 errmsg("could not fsync file \"%s\": %m", tmppath)));
3569
3570 if (CloseTransientFile(fd) != 0)
3571 ereport(ERROR,
3573 errmsg("could not close file \"%s\": %m", tmppath)));
3574
3575 if (CloseTransientFile(srcfd) != 0)
3576 ereport(ERROR,
3578 errmsg("could not close file \"%s\": %m", path)));
3579
3580 /*
3581 * Now move the segment into place with its final name.
3582 */
3584 elog(ERROR, "InstallXLogFileSegment should not have failed");
3585}
int CloseTransientFile(int fd)
Definition fd.c:2855
int data_sync_elevel(int elevel)
Definition fd.c:3986
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
char data[XLOG_BLCKSZ]
Definition c.h:1231

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3431 of file xlog.c.

3432{
3433 bool ignore_added;
3434 char path[MAXPGPATH];
3435 int fd;
3436
3437 Assert(logtli != 0);
3438
3440 if (fd >= 0)
3441 return fd;
3442
3443 /* Now open original target segment (might not be file I just made) */
3446 if (fd < 0)
3447 ereport(ERROR,
3449 errmsg("could not open file \"%s\": %m", path)));
3450 return fd;
3451}
#define O_CLOEXEC
Definition win32_port.h:344

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char path 
)
static

Definition at line 3243 of file xlog.c.

3245{
3246 char tmppath[MAXPGPATH];
3249 int fd;
3250 int save_errno;
3253
3254 Assert(logtli != 0);
3255
3257
3258 /*
3259 * Try to use existent file (checkpoint maker may have created it already)
3260 */
3261 *added = false;
3264 if (fd < 0)
3265 {
3266 if (errno != ENOENT)
3267 ereport(ERROR,
3269 errmsg("could not open file \"%s\": %m", path)));
3270 }
3271 else
3272 return fd;
3273
3274 /*
3275 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3276 * another process is doing the same thing. If so, we will end up
3277 * pre-creating an extra log segment. That seems OK, and better than
3278 * holding the lock throughout this lengthy process.
3279 */
3280 elog(DEBUG2, "creating and filling new WAL file");
3281
3282 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3283
3284 unlink(tmppath);
3285
3288
3289 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3291 if (fd < 0)
3292 ereport(ERROR,
3294 errmsg("could not create file \"%s\": %m", tmppath)));
3295
3296 /* Measure I/O timing when initializing segment */
3298
3300 save_errno = 0;
3301 if (wal_init_zero)
3302 {
3303 ssize_t rc;
3304
3305 /*
3306 * Zero-fill the file. With this setting, we do this the hard way to
3307 * ensure that all the file space has really been allocated. On
3308 * platforms that allow "holes" in files, just seeking to the end
3309 * doesn't allocate intermediate space. This way, we know that we
3310 * have all the space and (after the fsync below) that all the
3311 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3312 * O_DSYNC will be sufficient to sync future writes to the log file.
3313 */
3315
3316 if (rc < 0)
3317 save_errno = errno;
3318 }
3319 else
3320 {
3321 /*
3322 * Otherwise, seeking to the end and writing a solitary byte is
3323 * enough.
3324 */
3325 errno = 0;
3326 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3327 {
3328 /* if write didn't set errno, assume no disk space */
3330 }
3331 }
3333
3334 /*
3335 * A full segment worth of data is written when using wal_init_zero. One
3336 * byte is written when not using it.
3337 */
3339 io_start, 1,
3341
3342 if (save_errno)
3343 {
3344 /*
3345 * If we fail to make the file, delete it to release disk space
3346 */
3347 unlink(tmppath);
3348
3349 close(fd);
3350
3351 errno = save_errno;
3352
3353 ereport(ERROR,
3355 errmsg("could not write to file \"%s\": %m", tmppath)));
3356 }
3357
3358 /* Measure I/O timing when flushing segment */
3360
3362 if (pg_fsync(fd) != 0)
3363 {
3364 save_errno = errno;
3365 close(fd);
3366 errno = save_errno;
3367 ereport(ERROR,
3369 errmsg("could not fsync file \"%s\": %m", tmppath)));
3370 }
3372
3374 IOOP_FSYNC, io_start, 1, 0);
3375
3376 if (close(fd) != 0)
3377 ereport(ERROR,
3379 errmsg("could not close file \"%s\": %m", tmppath)));
3380
3381 /*
3382 * Now move the segment into place with its final name. Cope with
3383 * possibility that someone else has created the file while we were
3384 * filling ours: if so, use ours to pre-create a future log segment.
3385 */
3387
3388 /*
3389 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3390 * that was a constant, but that was always a bit dubious: normally, at a
3391 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3392 * here, it was the offset from the insert location. We can't do the
3393 * normal XLOGfileslop calculation here because we don't have access to
3394 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3395 * CheckPointSegments.
3396 */
3399 logtli))
3400 {
3401 *added = true;
3402 elog(DEBUG2, "done creating and filling new WAL file");
3403 }
3404 else
3405 {
3406 /*
3407 * No need for any more future segments, or InstallXLogFileSegment()
3408 * failed to rename the file into place. If the rename failed, a
3409 * caller opening the file may fail.
3410 */
3411 unlink(tmppath);
3412 elog(DEBUG2, "abandoned new WAL file");
3413 }
3414
3415 return -1;
3416}
#define IO_DIRECT_WAL_INIT
Definition fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
Definition file_utils.c:709
@ IOCONTEXT_INIT
Definition pgstat.h:292
@ IOOP_WRITE
Definition pgstat.h:320
#define pg_pwrite
Definition port.h:249
bool wal_init_zero
Definition xlog.c:134

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3669 of file xlog.c.

3670{
3671 char path[MAXPGPATH];
3672 int fd;
3673
3674 XLogFilePath(path, tli, segno, wal_segment_size);
3675
3678 if (fd < 0)
3679 ereport(PANIC,
3681 errmsg("could not open file \"%s\": %m", path)));
3682
3683 return fd;
3684}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2251 of file xlog.c.

2252{
2255 double distance;
2257
2258 /*
2259 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2260 * correspond to. Always recycle enough segments to meet the minimum, and
2261 * remove enough segments to stay below the maximum.
2262 */
2267
2268 /*
2269 * Between those limits, recycle enough segments to get us through to the
2270 * estimated end of next checkpoint.
2271 *
2272 * To estimate where the next checkpoint will finish, assume that the
2273 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2274 * every checkpoint.
2275 */
2277 /* add 10% for good measure. */
2278 distance *= 1.10;
2279
2280 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2282
2283 if (recycleSegNo < minSegNo)
2285 if (recycleSegNo > maxSegNo)
2287
2288 return recycleSegNo;
2289}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, fb(), max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2801 of file xlog.c.

2802{
2806
2807 /*
2808 * During REDO, we are reading not writing WAL. Therefore, instead of
2809 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2810 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2811 * to act this way too, and because when it tries to write the
2812 * end-of-recovery checkpoint, it should indeed flush.
2813 */
2814 if (!XLogInsertAllowed())
2815 {
2816 UpdateMinRecoveryPoint(record, false);
2817 return;
2818 }
2819
2820 /* Quick exit if already known flushed */
2821 if (record <= LogwrtResult.Flush)
2822 return;
2823
2824#ifdef WAL_DEBUG
2825 if (XLOG_DEBUG)
2826 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2827 LSN_FORMAT_ARGS(record),
2830#endif
2831
2833
2834 /*
2835 * Since fsync is usually a horribly expensive operation, we try to
2836 * piggyback as much data as we can on each fsync: if we see any more data
2837 * entered into the xlog buffer, we'll write and fsync that too, so that
2838 * the final value of LogwrtResult.Flush is as large as possible. This
2839 * gives us some chance of avoiding another fsync immediately after.
2840 */
2841
2842 /* initialize to given target; may increase below */
2843 WriteRqstPtr = record;
2844
2845 /*
2846 * Now wait until we get the write lock, or someone else does the flush
2847 * for us.
2848 */
2849 for (;;)
2850 {
2852
2853 /* done already? */
2855 if (record <= LogwrtResult.Flush)
2856 break;
2857
2858 /*
2859 * Before actually performing the write, wait for all in-flight
2860 * insertions to the pages we're about to write to finish.
2861 */
2863 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2867
2868 /*
2869 * Try to get the write lock. If we can't get it immediately, wait
2870 * until it's released, and recheck if we still need to do the flush
2871 * or if the backend that held the lock did it for us already. This
2872 * helps to maintain a good rate of group committing when the system
2873 * is bottlenecked by the speed of fsyncing.
2874 */
2876 {
2877 /*
2878 * The lock is now free, but we didn't acquire it yet. Before we
2879 * do, loop back to check if someone else flushed the record for
2880 * us already.
2881 */
2882 continue;
2883 }
2884
2885 /* Got the lock; recheck whether request is satisfied */
2887 if (record <= LogwrtResult.Flush)
2888 {
2890 break;
2891 }
2892
2893 /*
2894 * Sleep before flush! By adding a delay here, we may give further
2895 * backends the opportunity to join the backlog of group commit
2896 * followers; this can significantly improve transaction throughput,
2897 * at the risk of increasing transaction latency.
2898 *
2899 * We do not sleep if enableFsync is not turned on, nor if there are
2900 * fewer than CommitSiblings other backends with active transactions.
2901 */
2902 if (CommitDelay > 0 && enableFsync &&
2904 {
2908
2909 /*
2910 * Re-check how far we can now flush the WAL. It's generally not
2911 * safe to call WaitXLogInsertionsToFinish while holding
2912 * WALWriteLock, because an in-progress insertion might need to
2913 * also grab WALWriteLock to make progress. But we know that all
2914 * the insertions up to insertpos have already finished, because
2915 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2916 * We're only calling it again to allow insertpos to be moved
2917 * further forward, not to actually wait for anyone.
2918 */
2920 }
2921
2922 /* try to write/flush later additions to XLOG as well */
2923 WriteRqst.Write = insertpos;
2924 WriteRqst.Flush = insertpos;
2925
2926 XLogWrite(WriteRqst, insertTLI, false);
2927
2929 /* done */
2930 break;
2931 }
2932
2934
2935 /* wake up walsenders now that we've released heavily contended locks */
2937
2938 /*
2939 * Wake up processes waiting for primary flush LSN to reach current flush
2940 * position.
2941 */
2943
2944 /*
2945 * If we still haven't flushed to the request point then we have a
2946 * problem; most likely, the requested flush point is past end of XLOG.
2947 * This has been seen to occur when a disk page has a corrupted LSN.
2948 *
2949 * Formerly we treated this as a PANIC condition, but that hurts the
2950 * system's robustness rather than helping it: we do not want to take down
2951 * the whole system due to corruption on one data page. In particular, if
2952 * the bad page is encountered again during recovery then we would be
2953 * unable to restart the database at all! (This scenario actually
2954 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2955 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2956 * the only time we can reach here during recovery is while flushing the
2957 * end-of-recovery checkpoint record, and we don't expect that to have a
2958 * bad LSN.
2959 *
2960 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2961 * since xact.c calls this routine inside a critical section. However,
2962 * calls from bufmgr.c are not within critical sections and so we will not
2963 * force a restart for a bad LSN on a data page.
2964 */
2965 if (LogwrtResult.Flush < record)
2966 elog(ERROR,
2967 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2968 LSN_FORMAT_ARGS(record),
2970
2971 /*
2972 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2973 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2974 * remain consistent.
2975 */
2976 Assert(!XLogNeedsFlush(record));
2977}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1378
bool MinimumActiveBackends(int min)
Definition procarray.c:3589
int CommitDelay
Definition xlog.c:139
int CommitSiblings
Definition xlog.c:140
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3159
bool XLogInsertAllowed(void)
Definition xlog.c:6887

References Assert, CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, fb(), XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateMinRecoveryPoint(), WAIT_LSN_TYPE_PRIMARY_FLUSH, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FinishSyncWorker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), rebuild_relation_finish_concurrent(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), WalSndWaitForWal(), write_logical_decoding_status_update_record(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogChecksums(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3809 of file xlog.c.

3810{
3811 XLogSegNo lastRemovedSegNo;
3812
3814 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3816
3817 return lastRemovedSegNo;
3818}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3825 of file xlog.c.

3826{
3827 DIR *xldir;
3828 struct dirent *xlde;
3830
3832 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3833 {
3836
3837 /* Ignore files that are not XLOG segments. */
3838 if (!IsXLogFileName(xlde->d_name))
3839 continue;
3840
3841 /* Parse filename to get TLI and segno. */
3844
3845 /* Ignore anything that's not from the TLI of interest. */
3846 if (tli != file_tli)
3847 continue;
3848
3849 /* If it's the oldest so far, update oldest_segno. */
3850 if (oldest_segno == 0 || file_segno < oldest_segno)
3852 }
3853
3854 FreeDir(xldir);
3855 return oldest_segno;
3856}

References AllocateDir(), fb(), FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), and MaybeRemoveOldWalSummaries().

◆ XLogGetReplicationSlotMinimumLSN()

XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )

Definition at line 2700 of file xlog.c.

2701{
2702 XLogRecPtr retval;
2703
2707
2708 return retval;
2709}
XLogRecPtr replicationSlotMinLSN
Definition xlog.c:465

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by KeepLogSeg(), and reserve_wal_for_local_slot().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5632 of file xlog.c.

5633{
5634 char xlogfname[MAXFNAMELEN];
5637
5638 /* we always switch to a new timeline after archive recovery */
5639 Assert(endTLI != newTLI);
5640
5641 /*
5642 * Update min recovery point one last time.
5643 */
5645
5646 /*
5647 * Calculate the last segment on the old timeline, and the first segment
5648 * on the new timeline. If the switch happens in the middle of a segment,
5649 * they are the same, but if the switch happens exactly at a segment
5650 * boundary, startLogSegNo will be endLogSegNo + 1.
5651 */
5654
5655 /*
5656 * Initialize the starting WAL segment for the new timeline. If the switch
5657 * happens in the middle of a segment, copy data from the last WAL segment
5658 * of the old timeline up to the switch point, to the starting WAL segment
5659 * on the new timeline.
5660 */
5662 {
5663 /*
5664 * Make a copy of the file on the new timeline.
5665 *
5666 * Writing WAL isn't allowed yet, so there are no locking
5667 * considerations. But we should be just as tense as XLogFileInit to
5668 * avoid emplacing a bogus file.
5669 */
5672 }
5673 else
5674 {
5675 /*
5676 * The switch happened at a segment boundary, so just create the next
5677 * segment on the new timeline.
5678 */
5679 int fd;
5680
5682
5683 if (close(fd) != 0)
5684 {
5685 int save_errno = errno;
5686
5688 errno = save_errno;
5689 ereport(ERROR,
5691 errmsg("could not close file \"%s\": %m", xlogfname)));
5692 }
5693 }
5694
5695 /*
5696 * Let's just make real sure there are not .ready or .done flags posted
5697 * for the new segment.
5698 */
5701}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition xlog.c:3469

References Assert, close, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6887 of file xlog.c.

6888{
6889 /*
6890 * If value is "unconditionally true" or "unconditionally false", just
6891 * return it. This provides the normal fast path once recovery is known
6892 * done.
6893 */
6894 if (LocalXLogInsertAllowed >= 0)
6895 return (bool) LocalXLogInsertAllowed;
6896
6897 /*
6898 * Else, must check to see if we're still in recovery.
6899 */
6900 if (RecoveryInProgress())
6901 return false;
6902
6903 /*
6904 * On exit from recovery, reset to "unconditionally true", since there is
6905 * no need to keep checking.
6906 */
6908 return true;
6909}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
uint64  fpi_bytes,
bool  topxid_included 
)

Definition at line 784 of file xlog.c.

790{
793 bool inserted;
794 XLogRecord *rechdr = (XLogRecord *) rdata->data;
795 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
801
802 /* Does this record type require special handling? */
803 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
804 {
805 if (info == XLOG_SWITCH)
807 else if (info == XLOG_CHECKPOINT_REDO)
809 }
810
811 /* we assume that all of the record header is in the first chunk */
813
814 /* cross-check on whether we should be here or not */
815 if (!XLogInsertAllowed())
816 elog(ERROR, "cannot make new WAL entries during recovery");
817
818 /*
819 * Given that we're not in recovery, InsertTimeLineID is set and can't
820 * change, so we can read it without a lock.
821 */
823
824 /*----------
825 *
826 * We have now done all the preparatory work we can without holding a
827 * lock or modifying shared state. From here on, inserting the new WAL
828 * record to the shared WAL buffer cache is a two-step process:
829 *
830 * 1. Reserve the right amount of space from the WAL. The current head of
831 * reserved space is kept in Insert->CurrBytePos, and is protected by
832 * insertpos_lck.
833 *
834 * 2. Copy the record to the reserved WAL space. This involves finding the
835 * correct WAL buffer containing the reserved space, and copying the
836 * record in place. This can be done concurrently in multiple processes.
837 *
838 * To keep track of which insertions are still in-progress, each concurrent
839 * inserter acquires an insertion lock. In addition to just indicating that
840 * an insertion is in progress, the lock tells others how far the inserter
841 * has progressed. There is a small fixed number of insertion locks,
842 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
843 * boundary, it updates the value stored in the lock to the how far it has
844 * inserted, to allow the previous buffer to be flushed.
845 *
846 * Holding onto an insertion lock also protects RedoRecPtr and
847 * fullPageWrites from changing until the insertion is finished.
848 *
849 * Step 2 can usually be done completely in parallel. If the required WAL
850 * page is not initialized yet, you have to grab WALBufMappingLock to
851 * initialize it, but the WAL writer tries to do that ahead of insertions
852 * to avoid that from happening in the critical path.
853 *
854 *----------
855 */
857
858 if (likely(class == WALINSERT_NORMAL))
859 {
861
862 /*
863 * Check to see if my copy of RedoRecPtr is out of date. If so, may
864 * have to go back and have the caller recompute everything. This can
865 * only happen just after a checkpoint, so it's better to be slow in
866 * this case and fast otherwise.
867 *
868 * Also check to see if fullPageWrites was just turned on or there's a
869 * running backup (which forces full-page writes); if we weren't
870 * already doing full-page writes then go back and recompute.
871 *
872 * If we aren't doing full-page writes then RedoRecPtr doesn't
873 * actually affect the contents of the XLOG record, so we'll update
874 * our local copy but not force a recomputation. (If doPageWrites was
875 * just turned off, we could recompute the record without full pages,
876 * but we choose not to bother.)
877 */
878 if (RedoRecPtr != Insert->RedoRecPtr)
879 {
881 RedoRecPtr = Insert->RedoRecPtr;
882 }
883 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
884
885 if (doPageWrites &&
888 {
889 /*
890 * Oops, some buffer now needs to be backed up that the caller
891 * didn't back up. Start over.
892 */
895 return InvalidXLogRecPtr;
896 }
897
898 /*
899 * Reserve space for the record in the WAL. This also sets the xl_prev
900 * pointer.
901 */
903 &rechdr->xl_prev);
904
905 /* Normal records are always inserted. */
906 inserted = true;
907 }
908 else if (class == WALINSERT_SPECIAL_SWITCH)
909 {
910 /*
911 * In order to insert an XLOG_SWITCH record, we need to hold all of
912 * the WAL insertion locks, not just one, so that no one else can
913 * begin inserting a record until we've figured out how much space
914 * remains in the current WAL segment and claimed all of it.
915 *
916 * Nonetheless, this case is simpler than the normal cases handled
917 * below, which must check for changes in doPageWrites and RedoRecPtr.
918 * Those checks are only needed for records that can contain buffer
919 * references, and an XLOG_SWITCH record never does.
920 */
924 }
925 else
926 {
928
929 /*
930 * We need to update both the local and shared copies of RedoRecPtr,
931 * which means that we need to hold all the WAL insertion locks.
932 * However, there can't be any buffer references, so as above, we need
933 * not check RedoRecPtr before inserting the record; we just need to
934 * update it afterwards.
935 */
939 &rechdr->xl_prev);
940 RedoRecPtr = Insert->RedoRecPtr = StartPos;
941 inserted = true;
942 }
943
944 if (inserted)
945 {
946 /*
947 * Now that xl_prev has been filled in, calculate CRC of the record
948 * header.
949 */
950 rdata_crc = rechdr->xl_crc;
953 rechdr->xl_crc = rdata_crc;
954
955 /*
956 * All the record data, including the header, is now ready to be
957 * inserted. Copy the record in the space reserved.
958 */
959 CopyXLogRecordToWAL(rechdr->xl_tot_len,
962
963 /*
964 * Unless record is flagged as not important, update LSN of last
965 * important record in the current slot. When holding all locks, just
966 * update the first one.
967 */
968 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
969 {
970 int lockno = holdingAllLocks ? 0 : MyLockNo;
971
973 }
974 }
975 else
976 {
977 /*
978 * This was an xlog-switch record, but the current insert location was
979 * already exactly at the beginning of a segment, so there was no need
980 * to do anything.
981 */
982 }
983
984 /*
985 * Done! Let others know that we're finished.
986 */
988
990
992
993 /*
994 * Mark top transaction id is logged (if needed) so that we should not try
995 * to log it again with the next WAL record in the current subtransaction.
996 */
997 if (topxid_included)
999
1000 /*
1001 * Update shared LogwrtRqst.Write, if we crossed page boundary.
1002 */
1004 {
1006 /* advance global request to include new block(s) */
1011 }
1012
1013 /*
1014 * If this was an XLOG_SWITCH record, flush the record and the empty
1015 * padding space that fills the rest of the segment, and perform
1016 * end-of-segment actions (eg, notifying archiver).
1017 */
1018 if (class == WALINSERT_SPECIAL_SWITCH)
1019 {
1022
1023 /*
1024 * Even though we reserved the rest of the segment for us, which is
1025 * reflected in EndPos, we return a pointer to just the end of the
1026 * xlog-switch record.
1027 */
1028 if (inserted)
1029 {
1032 {
1034
1035 if (offset == EndPos % XLOG_BLCKSZ)
1037 else
1039 }
1040 }
1041 }
1042
1043#ifdef WAL_DEBUG
1044 if (XLOG_DEBUG)
1045 {
1047 XLogRecord *record;
1051 char *errormsg = NULL;
1053
1055
1057 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1058
1059 /*
1060 * We have to piece together the WAL record data from the XLogRecData
1061 * entries, so that we can pass it to the rm_desc function as one
1062 * contiguous chunk.
1063 */
1065 for (; rdata != NULL; rdata = rdata->next)
1067
1068 /* We also need temporary space to decode the record. */
1069 record = (XLogRecord *) recordBuf.data;
1072
1073 if (!debug_reader)
1075 XL_ROUTINE(.page_read = NULL,
1076 .segment_open = NULL,
1077 .segment_close = NULL),
1078 NULL);
1079 if (!debug_reader)
1080 {
1081 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1082 }
1084 decoded,
1085 record,
1086 EndPos,
1087 &errormsg))
1088 {
1089 appendStringInfo(&buf, "error decoding record: %s",
1090 errormsg ? errormsg : "no error message");
1091 }
1092 else
1093 {
1094 appendStringInfoString(&buf, " - ");
1095
1096 debug_reader->record = decoded;
1098 debug_reader->record = NULL;
1099 }
1100 elog(LOG, "%s", buf.data);
1101
1102 pfree(decoded);
1103 pfree(buf.data);
1104 pfree(recordBuf.data);
1106 }
1107#endif
1108
1109 /*
1110 * Update our global variables
1111 */
1114
1115 /* Report WAL traffic to the instrumentation. */
1116 if (inserted)
1117 {
1118 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1122
1123 /* Required for the flush of pending stats WAL data */
1124 pgstat_report_fixed = true;
1125 }
1126
1127 return EndPos;
1128}
#define likely(x)
Definition c.h:437
#define unlikely(x)
Definition c.h:438
void * palloc(Size size)
Definition mcxt.c:1390
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:138
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
void MarkSubxactTopXidLogged(void)
Definition xact.c:593
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition xact.c:543
XLogRecPtr XactLastRecEnd
Definition xlog.c:261
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition xlog.c:1266
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1149
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1205
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:108
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, fb(), FIN_CRC32C, holdingAllLocks, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogRecPtrIsValid, and XLogSegmentOffset.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3159 of file xlog.c.

3160{
3161 /*
3162 * During recovery, we don't flush WAL but update minRecoveryPoint
3163 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3164 * would need to be updated.
3165 *
3166 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3167 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3168 * This check should be consistent with the one in XLogFlush().
3169 */
3170 if (!XLogInsertAllowed())
3171 {
3172 /* Quick exit if already known to be updated or cannot be updated */
3174 return false;
3175
3176 /*
3177 * An invalid minRecoveryPoint means that we need to recover all the
3178 * WAL, i.e., we're doing crash recovery. We never modify the control
3179 * file's value in that case, so we can short-circuit future checks
3180 * here too. This triggers a quick exit path for the startup process,
3181 * which cannot update its local copy of minRecoveryPoint as long as
3182 * it has not replayed all WAL available when doing crash recovery.
3183 */
3185 {
3186 updateMinRecoveryPoint = false;
3187 return false;
3188 }
3189
3190 /*
3191 * Update local copy of minRecoveryPoint. But if the lock is busy,
3192 * just return a conservative guess.
3193 */
3195 return true;
3199
3200 /*
3201 * Check minRecoveryPoint for any other process than the startup
3202 * process doing crash recovery, which should not update the control
3203 * file value if crash recovery is still running.
3204 */
3206 updateMinRecoveryPoint = false;
3207
3208 /* check again */
3210 return false;
3211 else
3212 return true;
3213 }
3214
3215 /* Quick exit if already known flushed */
3216 if (record <= LogwrtResult.Flush)
3217 return false;
3218
3219 /* read LogwrtResult and update local state */
3221
3222 /* check again */
3223 if (record <= LogwrtResult.Flush)
3224 return false;
3225
3226 return true;
3227}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1321

References ControlFile, fb(), XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsValid.

Referenced by GetVictimBuffer(), SetHintBitsExt(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8567 of file xlog.c.

8568{
8570 XLogRegisterData(&nextOid, sizeof(Oid));
8572
8573 /*
8574 * We need not flush the NEXTOID record immediately, because any of the
8575 * just-allocated OIDs could only reach disk as part of a tuple insert or
8576 * update that would have its own XLOG record that must follow the NEXTOID
8577 * record. Therefore, the standard buffer LSN interlock applied to those
8578 * records will ensure no such OID reaches disk before the NEXTOID record
8579 * does.
8580 *
8581 * Note, however, that the above statement only covers state "within" the
8582 * database. When we use a generated OID as a file or directory name, we
8583 * are in a sense violating the basic WAL rule, because that filesystem
8584 * change may reach disk before the NEXTOID WAL record does. The impact
8585 * of this is that if a database crash occurs immediately afterward, we
8586 * might after restart re-generate the same OID and find that it conflicts
8587 * with the leftover file or directory. But since for safety's sake we
8588 * always loop until finding a nonconflicting filename, this poses no real
8589 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8590 */
8591}

References fb(), XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1982 of file xlog.c.

1983{
1986 uint32 offset;
1987 uint64 result;
1988
1990
1992 offset = ptr % XLOG_BLCKSZ;
1993
1994 if (fullpages == 0)
1995 {
1997 if (offset > 0)
1998 {
1999 Assert(offset >= SizeOfXLogLongPHD);
2000 result += offset - SizeOfXLogLongPHD;
2001 }
2002 }
2003 else
2004 {
2006 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
2007 (fullpages - 1) * UsableBytesInPage; /* full pages */
2008 if (offset > 0)
2009 {
2010 Assert(offset >= SizeOfXLogShortPHD);
2011 result += offset - SizeOfXLogShortPHD;
2012 }
2013 }
2014
2015 return result;
2016}

References Assert, fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8671 of file xlog.c.

8672{
8681 {
8682 /*
8683 * The change in number of backend slots doesn't need to be WAL-logged
8684 * if archiving is not enabled, as you can't start archive recovery
8685 * with wal_level=minimal anyway. We don't really care about the
8686 * values in pg_control either if wal_level=minimal, but seems better
8687 * to keep them up-to-date to avoid confusion.
8688 */
8690 {
8693
8695 xlrec.max_worker_processes = max_worker_processes;
8696 xlrec.max_wal_senders = max_wal_senders;
8697 xlrec.max_prepared_xacts = max_prepared_xacts;
8698 xlrec.max_locks_per_xact = max_locks_per_xact;
8699 xlrec.wal_level = wal_level;
8700 xlrec.wal_log_hints = wal_log_hints;
8701 xlrec.track_commit_timestamp = track_commit_timestamp;
8702
8704 XLogRegisterData(&xlrec, sizeof(xlrec));
8705
8708 }
8709
8711
8721
8723 }
8724}

References ControlFile, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char rpName)

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2630 of file xlog.c.

2631{
2632 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2633 bool sleeping;
2634 bool wakeup = false;
2636
2640 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2641 XLogCtl->asyncXactLSN = asyncXactLSN;
2643
2644 /*
2645 * If somebody else already called this function with a more aggressive
2646 * LSN, they will have done what we needed (and perhaps more).
2647 */
2648 if (asyncXactLSN <= prevAsyncXactLSN)
2649 return;
2650
2651 /*
2652 * If the WALWriter is sleeping, kick it to make it come out of low-power
2653 * mode, so that this async commit will reach disk within the expected
2654 * amount of time. Otherwise, determine whether it has enough WAL
2655 * available to flush, the same way that XLogBackgroundFlush() does.
2656 */
2657 if (sleeping)
2658 wakeup = true;
2659 else
2660 {
2661 int flushblocks;
2662
2664
2665 flushblocks =
2667
2669 wakeup = true;
2670 }
2671
2672 if (wakeup)
2673 {
2674 volatile PROC_HDR *procglobal = ProcGlobal;
2675 ProcNumber walwriterProc = procglobal->walwriterProc;
2676
2677 if (walwriterProc != INVALID_PROC_NUMBER)
2678 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2679 }
2680}
void SetLatch(Latch *latch)
Definition latch.c:290
#define GetPGProcByNumber(n)
Definition proc.h:504
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
PROC_HDR * ProcGlobal
Definition proc.c:74
ProcNumber walwriterProc
Definition proc.h:488
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References XLogCtlData::asyncXactLSN, fb(), XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire(), SpinLockRelease(), wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemAttach()

static void XLOGShmemAttach ( void arg)
static

Definition at line 5445 of file xlog.c.

5446{
5448}
WALInsertLockPadded * WALInsertLocks
Definition xlog.c:451

References XLogCtlData::Insert, XLogCtlInsert::WALInsertLocks, WALInsertLocks, and XLogCtl.

◆ XLOGShmemInit()

static void XLOGShmemInit ( void arg)
static

Definition at line 5349 of file xlog.c.

5350{
5351 char *allocptr;
5352 int i;
5353
5354#ifdef WAL_DEBUG
5355
5356 /*
5357 * Create a memory context for WAL debugging that's exempt from the normal
5358 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
5359 * an allocation fails, but wal_debug is not for production use anyway.
5360 */
5361 if (walDebugCxt == NULL)
5362 {
5364 "WAL Debug",
5367 }
5368#endif
5369
5370 memset(XLogCtl, 0, sizeof(XLogCtlData));
5371
5372 /*
5373 * Already have read control file locally, unless in bootstrap mode. Move
5374 * contents into shared memory.
5375 */
5376 if (LocalControlFile)
5377 {
5381 }
5382
5383 /*
5384 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5385 * multiple of the alignment for same, so no extra alignment padding is
5386 * needed here.
5387 */
5388 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5391
5392 for (i = 0; i < XLOGbuffers; i++)
5393 {
5395 }
5396
5397 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5398 allocptr += sizeof(WALInsertLockPadded) -
5403
5404 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5405 {
5409 }
5410
5411 /*
5412 * Align the start of the page buffers to a full xlog block size boundary.
5413 * This simplifies some calculations in XLOG insertion. It is also
5414 * required for O_DIRECT.
5415 */
5419
5420 /*
5421 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5422 * in additional info.)
5423 */
5427 XLogCtl->WalWriterSleeping = false;
5428
5429 /* Use the checksum info from control file */
5432
5439}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:889
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
MemoryContext TopMemoryContext
Definition mcxt.c:167
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition mcxt.c:746
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
int XLogCacheBlck
Definition xlog.c:500
slock_t insertpos_lck
Definition xlog.c:405

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, fb(), i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LocalControlFile, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), memcpy(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, SetLocalDataChecksumState(), XLogCtlData::SharedRecoveryState, SpinLockInit(), TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, and XLogCtl.

◆ XLOGShmemRequest()

static void XLOGShmemRequest ( void arg)
static

Definition at line 5295 of file xlog.c.

5296{
5297 Size size;
5298
5299 /*
5300 * If the value of wal_buffers is -1, use the preferred auto-tune value.
5301 * This isn't an amazingly clean place to do this, but we must wait till
5302 * NBuffers has received its final value, and must do it before using the
5303 * value of XLOGbuffers to do anything important.
5304 *
5305 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
5306 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
5307 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
5308 * the matter with PGC_S_OVERRIDE.
5309 */
5310 if (XLOGbuffers == -1)
5311 {
5312 char buf[32];
5313
5314 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
5315 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5317 if (XLOGbuffers == -1) /* failed to apply it? */
5318 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5320 }
5321 Assert(XLOGbuffers > 0);
5322
5323 /* XLogCtl */
5324 size = sizeof(XLogCtlData);
5325
5326 /* WAL insertion locks, plus alignment */
5327 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
5328 /* xlblocks array */
5329 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
5330 /* extra alignment padding for XLOG I/O buffers */
5331 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
5332 /* and the buffers themselves */
5333 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
5334
5335 ShmemRequestStruct(.name = "XLOG Ctl",
5336 .size = size,
5337 .ptr = (void **) &XLogCtl,
5338 );
5339 ShmemRequestStruct(.name = "Control File",
5340 .size = sizeof(ControlFileData),
5341 .ptr = (void **) &ControlFile,
5342 );
5343}
#define Max(x, y)
Definition c.h:1085
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_POSTMASTER
Definition guc.h:74
Size add_size(Size s1, Size s2)
Definition mcxt.c:1733
Size mul_size(Size s1, Size s2)
Definition mcxt.c:1752
#define PG_IO_ALIGN_SIZE
#define ShmemRequestStruct(...)
Definition shmem.h:176
const char * name

References add_size(), Assert, buf, ControlFile, fb(), Max, mul_size(), name, NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), ShmemRequestStruct, snprintf, XLOGbuffers, XLOGChooseNumBuffers(), and XLogCtl.

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 10146 of file xlog.c.

10147{
10149
10152}
#define AmStartupProcess()
Definition miscadmin.h:405
void ShutdownWalRcv(void)
void ResetInstallXLogFileSegmentActive(void)
Definition xlog.c:10165

References AmStartupProcess, Assert, IsUnderPostmaster, ResetInstallXLogFileSegmentActive(), and ShutdownWalRcv().

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2325 of file xlog.c.

2326{
2327 bool ispartialpage;
2328 bool last_iteration;
2329 bool finishing_seg;
2330 int curridx;
2331 int npages;
2332 int startidx;
2334
2335 /* We should always be inside a critical section here */
2337
2338 /*
2339 * Update local LogwrtResult (caller probably did this already, but...)
2340 */
2342
2343 /*
2344 * Since successive pages in the xlog cache are consecutively allocated,
2345 * we can usually gather multiple pages together and issue just one
2346 * write() call. npages is the number of pages we have determined can be
2347 * written together; startidx is the cache block index of the first one,
2348 * and startoffset is the file offset at which it should go. The latter
2349 * two variables are only valid when npages > 0, but we must initialize
2350 * all of them to keep the compiler quiet.
2351 */
2352 npages = 0;
2353 startidx = 0;
2354 startoffset = 0;
2355
2356 /*
2357 * Within the loop, curridx is the cache block index of the page to
2358 * consider writing. Begin at the buffer containing the next unwritten
2359 * page, or last partially written page.
2360 */
2362
2363 while (LogwrtResult.Write < WriteRqst.Write)
2364 {
2365 /*
2366 * Make sure we're not ahead of the insert process. This could happen
2367 * if we're passed a bogus WriteRqst.Write that is past the end of the
2368 * last page that's been initialized by AdvanceXLInsertBuffer.
2369 */
2371
2372 if (LogwrtResult.Write >= EndPtr)
2373 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2376
2377 /* Advance LogwrtResult.Write to end of current buffer page */
2380
2383 {
2384 /*
2385 * Switch to new logfile segment. We cannot have any pending
2386 * pages here (since we dump what we have at segment end).
2387 */
2388 Assert(npages == 0);
2389 if (openLogFile >= 0)
2390 XLogFileClose();
2393 openLogTLI = tli;
2394
2395 /* create/use new log file */
2398 }
2399
2400 /* Make sure we have the current logfile open */
2401 if (openLogFile < 0)
2402 {
2405 openLogTLI = tli;
2408 }
2409
2410 /* Add current page to the set of pending pages-to-dump */
2411 if (npages == 0)
2412 {
2413 /* first of group */
2414 startidx = curridx;
2417 }
2418 npages++;
2419
2420 /*
2421 * Dump the set if this will be the last loop iteration, or if we are
2422 * at the last page of the cache area (since the next page won't be
2423 * contiguous in memory), or if we are at the end of the logfile
2424 * segment.
2425 */
2427
2430
2431 if (last_iteration ||
2434 {
2435 char *from;
2436 Size nbytes;
2437 Size nleft;
2440
2441 /* OK to write the page(s) */
2442 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2443 nbytes = npages * (Size) XLOG_BLCKSZ;
2444 nleft = nbytes;
2445 do
2446 {
2447 errno = 0;
2448
2449 /*
2450 * Measure I/O timing to write WAL data, for pg_stat_io.
2451 */
2453
2457
2459 IOOP_WRITE, start, 1, written);
2460
2461 if (written <= 0)
2462 {
2463 char xlogfname[MAXFNAMELEN];
2464 int save_errno;
2465
2466 if (errno == EINTR)
2467 continue;
2468
2469 save_errno = errno;
2472 errno = save_errno;
2473 ereport(PANIC,
2475 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2477 }
2478 nleft -= written;
2479 from += written;
2481 } while (nleft > 0);
2482
2483 npages = 0;
2484
2485 /*
2486 * If we just wrote the whole last page of a logfile segment,
2487 * fsync the segment immediately. This avoids having to go back
2488 * and re-open prior segments when an fsync request comes along
2489 * later. Doing it here ensures that one and only one backend will
2490 * perform this fsync.
2491 *
2492 * This is also the right place to notify the Archiver that the
2493 * segment is ready to copy to archival storage, and to update the
2494 * timer for archive_timeout, and to signal for a checkpoint if
2495 * too many logfile segments have been used since the last
2496 * checkpoint.
2497 */
2498 if (finishing_seg)
2499 {
2501
2502 /* signal that we need to wakeup walsenders later */
2504
2505 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2506
2507 if (XLogArchivingActive())
2509
2512
2513 /*
2514 * Request a checkpoint if we've consumed too much xlog since
2515 * the last one. For speed, we first check using the local
2516 * copy of RedoRecPtr, which might be out of date; if it looks
2517 * like a checkpoint is needed, forcibly update RedoRecPtr and
2518 * recheck.
2519 */
2521 {
2522 (void) GetRedoRecPtr();
2525 }
2526 }
2527 }
2528
2529 if (ispartialpage)
2530 {
2531 /* Only asked to write a partial page */
2533 break;
2534 }
2536
2537 /* If flexible, break out of loop as soon as we wrote something */
2538 if (flexible && npages == 0)
2539 break;
2540 }
2541
2542 Assert(npages == 0);
2543
2544 /*
2545 * If asked to flush, do so
2546 */
2547 if (LogwrtResult.Flush < WriteRqst.Flush &&
2549 {
2550 /*
2551 * Could get here without iterating above loop, in which case we might
2552 * have no open file or the wrong one. However, we do not need to
2553 * fsync more than one file.
2554 */
2557 {
2558 if (openLogFile >= 0 &&
2561 XLogFileClose();
2562 if (openLogFile < 0)
2563 {
2566 openLogTLI = tli;
2569 }
2570
2572 }
2573
2574 /* signal that we need to wakeup walsenders later */
2576
2578 }
2579
2580 /*
2581 * Update shared-memory status
2582 *
2583 * We make sure that the shared 'request' values do not fall behind the
2584 * 'result' values. This is not absolutely essential, but it saves some
2585 * code in a couple of places.
2586 */
2593
2594 /*
2595 * We write Write first, bar, then Flush. When reading, the opposite must
2596 * be done (with a matching barrier in between), so that we always see a
2597 * Flush value that trails behind the Write value seen.
2598 */
2602
2603#ifdef USE_ASSERT_CHECKING
2604 {
2608
2614
2615 /* WAL written to disk is always ahead of WAL flushed */
2616 Assert(Write >= Flush);
2617
2618 /* WAL inserted to buffers is always ahead of WAL written */
2619 Assert(Insert >= Write);
2620 }
2621#endif
2622}
void ReserveExternalFD(void)
Definition fd.c:1207
volatile uint32 CritSectionCount
Definition globals.c:45
XLogRecPtr Flush
XLogRecPtr Write
#define WalSndWakeupRequest()
Definition walsender.h:57
#define EINTR
Definition win32_port.h:361
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6935
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition xlog.c:3669
#define NextBufIdx(idx)
Definition xlog.c:604
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition xlog.c:9358
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition xlog.c:2301
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg, fb(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire(), SpinLockRelease(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition xlog.h:69
@ ARCHIVE_MODE_OFF
Definition xlog.h:67
@ ARCHIVE_MODE_ON
Definition xlog.h:68

Definition at line 198 of file xlog.c.

198 {
199 {"always", ARCHIVE_MODE_ALWAYS, false},
200 {"on", ARCHIVE_MODE_ON, false},
201 {"off", ARCHIVE_MODE_OFF, false},
202 {"true", ARCHIVE_MODE_ON, true},
203 {"false", ARCHIVE_MODE_OFF, true},
204 {"yes", ARCHIVE_MODE_ON, true},
205 {"no", ARCHIVE_MODE_OFF, true},
206 {"1", ARCHIVE_MODE_ON, true},
207 {"0", ARCHIVE_MODE_OFF, true},
208 {NULL, 0, false}
209};

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 173 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 166 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 139 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 140 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ data_checksums

int data_checksums = 0

Definition at line 683 of file xlog.c.

Referenced by AbsorbDataChecksumsBarrier(), and SetLocalDataChecksumState().

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 129 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 224 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalControlFile

ControlFileData* LocalControlFile = NULL
static

Definition at line 583 of file xlog.c.

Referenced by LocalProcessControlFile(), and XLOGShmemInit().

◆ LocalDataChecksumState

ChecksumStateType LocalDataChecksumState = 0
static

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 231 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 243 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 142 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 122 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 656 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 657 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 167 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 668 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 131 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 133 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 132 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 143 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 134 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 123 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 130 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 135 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

Definition at line 141 of file xlog.c.

Referenced by ApplyLauncherMain(), launch_sync_worker(), and WaitForWALToBecomeAvailable().

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 150 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), decode_concurrent_changes(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), repack_setup_logical_decoding(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 178 of file xlog.c.

178 {
179 {"fsync", WAL_SYNC_METHOD_FSYNC, false},
180#ifdef HAVE_FSYNC_WRITETHROUGH
181 {"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
182#endif
183 {"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
184#ifdef O_SYNC
185 {"open_sync", WAL_SYNC_METHOD_OPEN, false},
186#endif
187#ifdef O_DSYNC
188 {"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
189#endif
190 {NULL, 0, false}
191};

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 125 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 124 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemRequest().

◆ XLogCtl

◆ XLOGShmemCallbacks

const ShmemCallbacks XLOGShmemCallbacks
Initial value:
= {
.request_fn = XLOGShmemRequest,
.init_fn = XLOGShmemInit,
.attach_fn = XLOGShmemAttach,
}
static void XLOGShmemInit(void *arg)
Definition xlog.c:5349
static void XLOGShmemAttach(void *arg)
Definition xlog.c:5445
static void XLOGShmemRequest(void *arg)
Definition xlog.c:5295

Definition at line 590 of file xlog.c.

590 {
591 .request_fn = XLOGShmemRequest,
592 .init_fn = XLOGShmemInit,
593 .attach_fn = XLOGShmemAttach,
594};