PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "access/xlogwait.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/datachecksum_state.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/procsignal.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/subsystems.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
#include "utils/wait_event.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void XLOGShmemRequest (void *arg)
 
static void XLOGShmemInit (void *arg)
 
static void XLOGShmemAttach (void *arg)
 
static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static charstr_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static charGetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
static void XLogChecksums (uint32 new_type)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, uint64 fpi_bytes, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
charGetMockAuthenticationNonce (void)
 
bool DataChecksumsNeedWrite (void)
 
bool DataChecksumsInProgressOn (void)
 
bool DataChecksumsNeedVerify (void)
 
void SetDataChecksumsOnInProgress (void)
 
void SetDataChecksumsOn (void)
 
void SetDataChecksumsOff (void)
 
void InitLocalDataChecksumState (void)
 
void SetLocalDataChecksumState (uint32 data_checksum_version)
 
const charshow_data_checksums (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const charshow_archive_command (void)
 
const charshow_in_hot_standby (void)
 
const charshow_effective_wal_level (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static const charCheckpointFlagsString (int flags)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint, int flags)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
XLogRecPtr XLogAssignLSN (void)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void xlog2_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogInsertEndRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
void ResetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
charXLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
charwal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataLocalControlFile = NULL
 
static ControlFileDataControlFile = NULL
 
const ShmemCallbacks XLOGShmemCallbacks
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static ChecksumStateType LocalDataChecksumState = 0
 
int data_checksums = 0
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 118 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 623 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 600 of file xlog.c.

601 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
static int fb(int x)

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 604 of file xlog.c.

605 : ((idx) + 1))
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 157 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
pg_atomic_uint64 logWriteResult
Definition xlog.c:478
pg_atomic_uint64 logFlushResult
Definition xlog.c:479
static XLogCtlData * XLogCtl
Definition xlog.c:575

Definition at line 640 of file xlog.c.

641 { \
645 } while (0)

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 617 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 611 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

◆ XLogCtlInsert

◆ XLogwrtResult

◆ XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 568 of file xlog.c.

569{
WalInsertClass
Definition xlog.c:569
@ WALINSERT_SPECIAL_SWITCH
Definition xlog.c:571
@ WALINSERT_NORMAL
Definition xlog.c:570
@ WALINSERT_SPECIAL_CHECKPOINT
Definition xlog.c:572

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 2026 of file xlog.c.

2027{
2028 int nextidx;
2034 int npages pg_attribute_unused() = 0;
2035
2037
2038 /*
2039 * Now that we have the lock, check if someone initialized the page
2040 * already.
2041 */
2043 {
2045
2046 /*
2047 * Get ending-offset of the buffer page we need to replace (this may
2048 * be zero if the buffer hasn't been used yet). Fall through if it's
2049 * already written out.
2050 */
2053 {
2054 /*
2055 * Nope, got work to do. If we just want to pre-initialize as much
2056 * as we can without flushing, give up now.
2057 */
2058 if (opportunistic)
2059 break;
2060
2061 /* Advance shared memory write request position */
2066
2067 /*
2068 * Acquire an up-to-date LogwrtResult value and see if we still
2069 * need to write it or if someone else already did.
2070 */
2073 {
2074 /*
2075 * Must acquire write lock. Release WALBufMappingLock first,
2076 * to make sure that all insertions that we need to wait for
2077 * can finish (up to this same position). Otherwise we risk
2078 * deadlock.
2079 */
2081
2083
2085
2088 {
2089 /* OK, someone wrote it already */
2091 }
2092 else
2093 {
2094 /* Have to write it ourselves */
2096 WriteRqst.Write = OldPageRqstPtr;
2098 XLogWrite(WriteRqst, tli, false);
2102
2103 /*
2104 * Required for the flush of pending stats WAL data, per
2105 * update of pgWalUsage.
2106 */
2107 pgstat_report_fixed = true;
2108 }
2109 /* Re-acquire WALBufMappingLock and retry */
2111 continue;
2112 }
2113 }
2114
2115 /*
2116 * Now the next buffer slot is free and we can set it up to be the
2117 * next output page.
2118 */
2121
2123
2125
2126 /*
2127 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2128 * before initializing. Otherwise, the old page may be partially
2129 * zeroed but look valid.
2130 */
2133
2134 /*
2135 * Be sure to re-zero the buffer so that bytes beyond what we've
2136 * written will look like zeroes and not valid XLOG records...
2137 */
2139
2140 /*
2141 * Fill the new page's header
2142 */
2143 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2144
2145 /* NewPage->xlp_info = 0; */ /* done by memset */
2146 NewPage->xlp_tli = tli;
2147 NewPage->xlp_pageaddr = NewPageBeginPtr;
2148
2149 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2150
2151 /*
2152 * If first page of an XLOG segment file, make it a long header.
2153 */
2154 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2155 {
2157
2159 NewLongPage->xlp_seg_size = wal_segment_size;
2160 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2161 NewPage->xlp_info |= XLP_LONG_HEADER;
2162 }
2163
2164 /*
2165 * Make sure the initialization of the page becomes visible to others
2166 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2167 * holding a lock.
2168 */
2170
2173
2174 npages++;
2175 }
2177
2178#ifdef WAL_DEBUG
2179 if (XLOG_DEBUG && npages > 0)
2180 {
2181 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2183 }
2184#endif
2185}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
#define pg_write_barrier()
Definition atomics.h:155
#define pg_attribute_unused()
Definition c.h:149
#define Assert(condition)
Definition c.h:943
#define MemSet(start, val, len)
Definition c.h:1107
size_t Size
Definition c.h:689
#define DEBUG1
Definition elog.h:31
#define elog(elevel,...)
Definition elog.h:228
WalUsage pgWalUsage
Definition instrument.c:27
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_EXCLUSIVE
Definition lwlock.h:104
bool pgstat_report_fixed
Definition pgstat.c:219
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
uint64 system_identifier
Definition pg_control.h:118
int64 wal_buffers_full
Definition instrument.h:57
XLogwrtRqst LogwrtRqst
Definition xlog.c:462
slock_t info_lck
Definition xlog.c:562
XLogRecPtr InitializedUpTo
Definition xlog.c:491
char * pages
Definition xlog.c:498
pg_atomic_uint64 * xlblocks
Definition xlog.c:499
XLogRecPtr Write
Definition xlog.c:334
XLogRecPtr Write
Definition xlog.c:328
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition xlog.c:1545
#define RefreshXLogWriteResult(_target)
Definition xlog.c:640
int wal_segment_size
Definition xlog.c:150
static XLogwrtResult LogwrtResult
Definition xlog.c:632
#define XLogRecPtrToBufIdx(recptr)
Definition xlog.c:611
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition xlog.c:2325
static ControlFileData * ControlFile
Definition xlog.c:584
XLogLongPageHeaderData * XLogLongPageHeader
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
#define XLP_LONG_HEADER
#define XLOG_PAGE_MAGIC
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, fb(), XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_LONG_HEADER, and XLogLongPageHeaderData::xlp_sysid.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void extra 
)

Definition at line 2228 of file xlog.c.

2229{
2232}
double CheckPointCompletionTarget
#define newval
static void CalculateCheckpointSegments(void)
Definition xlog.c:2192

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void extra 
)

Definition at line 2221 of file xlog.c.

2222{
2225}
int max_wal_size_mb
Definition xlog.c:121

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char newval,
void extra 
)

Definition at line 5161 of file xlog.c.

5162{
5163 /*
5164 * If some checks were deferred, it's possible that the checks will fail
5165 * later during InitializeWalConsistencyChecking(). But in that case, the
5166 * postmaster will exit anyway, so it's safe to proceed with the
5167 * assignment.
5168 *
5169 * Any built-in resource managers specified are assigned immediately,
5170 * which affects WAL created before shared_preload_libraries are
5171 * processed. Any custom resource managers specified won't be assigned
5172 * until after shared_preload_libraries are processed, but that's OK
5173 * because WAL for a custom resource manager can't be written before the
5174 * module is loaded anyway.
5175 */
5177}
bool * wal_consistency_checking
Definition xlog.c:133

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void extra 
)

Definition at line 9332 of file xlog.c.

9333{
9335 {
9336 /*
9337 * To ensure that no blocks escape unsynced, force an fsync on the
9338 * currently open log segment (if any). Also, if the open flag is
9339 * changing, close the log file so it will be reopened (with new flag
9340 * bit) at next use.
9341 */
9342 if (openLogFile >= 0)
9343 {
9345 if (pg_fsync(openLogFile) != 0)
9346 {
9347 char xlogfname[MAXFNAMELEN];
9348 int save_errno;
9349
9350 save_errno = errno;
9353 errno = save_errno;
9354 ereport(PANIC,
9356 errmsg("could not fsync file \"%s\": %m", xlogfname)));
9357 }
9358
9361 XLogFileClose();
9362 }
9363 }
9364}
int errcode_for_file_access(void)
Definition elog.c:898
#define PANIC
Definition elog.h:44
#define ereport(elevel,...)
Definition elog.h:152
int pg_fsync(int fd)
Definition fd.c:390
static char * errmsg
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:67
static void pgstat_report_wait_end(void)
Definition wait_event.h:83
static int openLogFile
Definition xlog.c:655
static int get_sync_bit(int method)
Definition xlog.c:9284
int wal_sync_method
Definition xlog.c:137
static TimeLineID openLogTLI
Definition xlog.c:657
static void XLogFileClose(void)
Definition xlog.c:3694
static XLogSegNo openLogSegNo
Definition xlog.c:656
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg, fb(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5454 of file xlog.c.

5455{
5456 CheckPoint checkPoint;
5457 PGAlignedXLogBlock buffer;
5458 XLogPageHeader page;
5460 XLogRecord *record;
5461 char *recptr;
5462 uint64 sysidentifier;
5463 struct timeval tv;
5464 pg_crc32c crc;
5465
5466 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5468
5469 /*
5470 * Select a hopefully-unique system identifier code for this installation.
5471 * We use the result of gettimeofday(), including the fractional seconds
5472 * field, as being about as unique as we can easily get. (Think not to
5473 * use random(), since it hasn't been seeded and there's no portable way
5474 * to seed it other than the system clock value...) The upper half of the
5475 * uint64 value is just the tv_sec part, while the lower half contains the
5476 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5477 * PID for a little extra uniqueness. A person knowing this encoding can
5478 * determine the initialization time of the installation, which could
5479 * perhaps be useful sometimes.
5480 */
5481 gettimeofday(&tv, NULL);
5482 sysidentifier = ((uint64) tv.tv_sec) << 32;
5483 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5484 sysidentifier |= getpid() & 0xFFF;
5485
5486 memset(&buffer, 0, sizeof buffer);
5487 page = (XLogPageHeader) &buffer;
5488
5489 /*
5490 * Set up information for the initial checkpoint record
5491 *
5492 * The initial checkpoint record is written to the beginning of the WAL
5493 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5494 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5495 */
5499 checkPoint.fullPageWrites = fullPageWrites;
5501 checkPoint.wal_level = wal_level;
5502 checkPoint.nextXid =
5504 checkPoint.nextOid = FirstGenbkiObjectId;
5505 checkPoint.nextMulti = FirstMultiXactId;
5506 checkPoint.nextMultiOffset = 1;
5508 checkPoint.oldestXidDB = Template1DbOid;
5509 checkPoint.oldestMulti = FirstMultiXactId;
5510 checkPoint.oldestMultiDB = Template1DbOid;
5513 checkPoint.time = (pg_time_t) time(NULL);
5515 checkPoint.dataChecksumState = data_checksum_version;
5516
5517 TransamVariables->nextXid = checkPoint.nextXid;
5518 TransamVariables->nextOid = checkPoint.nextOid;
5520 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5521 AdvanceOldestClogXid(checkPoint.oldestXid);
5522 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5523 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5525
5526 /* Set up the XLOG page header */
5527 page->xlp_magic = XLOG_PAGE_MAGIC;
5528 page->xlp_info = XLP_LONG_HEADER;
5532 longpage->xlp_sysid = sysidentifier;
5533 longpage->xlp_seg_size = wal_segment_size;
5534 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5535
5536 /* Insert the initial checkpoint record */
5537 recptr = ((char *) page + SizeOfXLogLongPHD);
5538 record = (XLogRecord *) recptr;
5539 record->xl_prev = InvalidXLogRecPtr;
5540 record->xl_xid = InvalidTransactionId;
5541 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5543 record->xl_rmid = RM_XLOG_ID;
5545 /* fill the XLogRecordDataHeaderShort struct */
5546 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5547 *(recptr++) = sizeof(checkPoint);
5548 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5549 recptr += sizeof(checkPoint);
5550 Assert(recptr - (char *) record == record->xl_tot_len);
5551
5553 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5554 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5555 FIN_CRC32C(crc);
5556 record->xl_crc = crc;
5557
5558 /* Create first XLOG segment file */
5561
5562 /*
5563 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5564 * close the file again in a moment.
5565 */
5566
5567 /* Write the first page with the initial record */
5568 errno = 0;
5570 if (write(openLogFile, &buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5571 {
5572 /* if write didn't set errno, assume problem is no disk space */
5573 if (errno == 0)
5574 errno = ENOSPC;
5575 ereport(PANIC,
5577 errmsg("could not write bootstrap write-ahead log file: %m")));
5578 }
5580
5582 if (pg_fsync(openLogFile) != 0)
5583 ereport(PANIC,
5585 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5587
5588 if (close(openLogFile) != 0)
5589 ereport(PANIC,
5591 errmsg("could not close bootstrap write-ahead log file: %m")));
5592
5593 openLogFile = -1;
5594
5595 /* Now create pg_control */
5596 InitControlFile(sysidentifier, data_checksum_version);
5597 ControlFile->time = checkPoint.time;
5598 ControlFile->checkPoint = checkPoint.redo;
5599 ControlFile->checkPointCopy = checkPoint;
5600
5601 /* some additional ControlFile fields are set in WriteControlFile() */
5603
5604 /* Bootstrap the commit log, too */
5605 BootStrapCLOG();
5609
5610 /*
5611 * Force control file to be read - in contrast to normal processing we'd
5612 * otherwise never run the checks and GUC related initializations therein.
5613 */
5615}
uint64_t uint64
Definition c.h:625
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void BootStrapCLOG(void)
Definition clog.c:851
void BootStrapCommitTs(void)
Definition commit_ts.c:599
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition commit_ts.c:892
#define close(a)
Definition win32.h:12
#define write(a, b, c)
Definition win32.h:14
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:2063
void BootStrapMultiXact(void)
Definition multixact.c:1863
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2085
#define FirstMultiXactId
Definition multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:72
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:173
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:178
return crc
int64 pg_time_t
Definition pgtime.h:23
Oid oldestMultiDB
Definition pg_control.h:52
MultiXactId oldestMulti
Definition pg_control.h:51
MultiXactOffset nextMultiOffset
Definition pg_control.h:48
TransactionId newestCommitTsXid
Definition pg_control.h:56
TransactionId oldestXid
Definition pg_control.h:49
TimeLineID PrevTimeLineID
Definition pg_control.h:40
TimeLineID ThisTimeLineID
Definition pg_control.h:39
TransactionId oldestActiveXid
Definition pg_control.h:65
bool fullPageWrites
Definition pg_control.h:42
MultiXactId nextMulti
Definition pg_control.h:47
FullTransactionId nextXid
Definition pg_control.h:45
TransactionId oldestCommitTsXid
Definition pg_control.h:54
pg_time_t time
Definition pg_control.h:53
int wal_level
Definition pg_control.h:43
bool logicalDecodingEnabled
Definition pg_control.h:44
uint32 dataChecksumState
Definition pg_control.h:68
XLogRecPtr redo
Definition pg_control.h:37
Oid oldestXidDB
Definition pg_control.h:50
CheckPoint checkPointCopy
Definition pg_control.h:143
pg_time_t time
Definition pg_control.h:140
XLogRecPtr checkPoint
Definition pg_control.h:141
FullTransactionId nextXid
Definition transam.h:220
XLogRecPtr xlp_pageaddr
XLogRecPtr xl_prev
Definition xlogrecord.h:45
uint8 xl_info
Definition xlogrecord.h:46
uint32 xl_tot_len
Definition xlogrecord.h:43
TransactionId xl_xid
Definition xlogrecord.h:44
RmgrId xl_rmid
Definition xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition subtrans.c:288
#define InvalidTransactionId
Definition transam.h:31
#define FirstGenbkiObjectId
Definition transam.h:195
#define FirstNormalTransactionId
Definition transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition varsup.c:367
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition varsup.c:350
TransamVariablesData * TransamVariables
Definition varsup.c:37
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition xlog.c:3435
bool fullPageWrites
Definition xlog.c:129
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition xlog.c:4259
void SetInstallXLogFileSegmentActive(void)
Definition xlog.c:10172
int wal_level
Definition xlog.c:138
static void WriteControlFile(void)
Definition xlog.c:4300
#define BootstrapTimeLineID
Definition xlog.c:118
static void ReadControlFile(void)
Definition xlog.c:4410
@ WAL_LEVEL_LOGICAL
Definition xlog.h:78
#define SizeOfXLogLongPHD
#define SizeOfXLogRecordDataHeaderShort
Definition xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition xlogrecord.h:241
#define SizeOfXLogRecord
Definition xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, CheckPoint::dataChecksumState, ereport, errcode_for_file_access(), errmsg, fb(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, InvalidXLogRecPtr, CheckPoint::logicalDecodingEnabled, memcpy(), MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, wal_level, CheckPoint::wal_level, WAL_LEVEL_LOGICAL, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogPageHeaderData::xlp_tli, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2192 of file xlog.c.

2193{
2194 double target;
2195
2196 /*-------
2197 * Calculate the distance at which to trigger a checkpoint, to avoid
2198 * exceeding max_wal_size_mb. This is based on two assumptions:
2199 *
2200 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2201 * WAL for two checkpoint cycles to allow us to recover from the
2202 * secondary checkpoint if the first checkpoint failed, though we
2203 * only did this on the primary anyway, not on standby. Keeping just
2204 * one checkpoint simplifies processing and reduces disk space in
2205 * many smaller databases.)
2206 * b) during checkpoint, we consume checkpoint_completion_target *
2207 * number of segments consumed between checkpoints.
2208 *-------
2209 */
2212
2213 /* round down */
2214 CheckPointSegments = (int) target;
2215
2216 if (CheckPointSegments < 1)
2218}
#define ConvertToXSegs(x, segsize)
Definition xlog.c:623
int CheckPointSegments
Definition xlog.c:163

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, fb(), max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 5039 of file xlog.c.

5040{
5041 /*
5042 * -1 indicates a request for auto-tune.
5043 */
5044 if (*newval == -1)
5045 {
5046 /*
5047 * If we haven't yet changed the boot_val default of -1, just let it
5048 * be. We'll fix it when XLOGShmemRequest is called.
5049 */
5050 if (XLOGbuffers == -1)
5051 return true;
5052
5053 /* Otherwise, substitute the auto-tune value */
5055 }
5056
5057 /*
5058 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
5059 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
5060 * the case, we just silently treat such values as a request for the
5061 * minimum. (We could throw an error instead, but that doesn't seem very
5062 * helpful.)
5063 */
5064 if (*newval < 4)
5065 *newval = 4;
5066
5067 return true;
5068}
static int XLOGChooseNumBuffers(void)
Definition xlog.c:5023
int XLOGbuffers
Definition xlog.c:124

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 5074 of file xlog.c.

5075{
5076 char *rawstring;
5077 List *elemlist;
5078 ListCell *l;
5079 bool newwalconsistency[RM_MAX_ID + 1];
5080
5081 /* Initialize the array */
5082 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
5083
5084 /* Need a modifiable copy of string */
5086
5087 /* Parse string into list of identifiers */
5089 {
5090 /* syntax error in list */
5091 GUC_check_errdetail("List syntax is invalid.");
5094 return false;
5095 }
5096
5097 foreach(l, elemlist)
5098 {
5099 char *tok = (char *) lfirst(l);
5100 int rmid;
5101
5102 /* Check for 'all'. */
5103 if (pg_strcasecmp(tok, "all") == 0)
5104 {
5105 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
5106 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
5107 newwalconsistency[rmid] = true;
5108 }
5109 else
5110 {
5111 /* Check if the token matches any known resource manager. */
5112 bool found = false;
5113
5114 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
5115 {
5116 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
5117 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
5118 {
5119 newwalconsistency[rmid] = true;
5120 found = true;
5121 break;
5122 }
5123 }
5124 if (!found)
5125 {
5126 /*
5127 * During startup, it might be a not-yet-loaded custom
5128 * resource manager. Defer checking until
5129 * InitializeWalConsistencyChecking().
5130 */
5132 {
5134 }
5135 else
5136 {
5137 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
5140 return false;
5141 }
5142 }
5143 }
5144 }
5145
5148
5149 /* assign new value */
5150 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
5151 if (!*extra)
5152 return false;
5153 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
5154 return true;
5155}
#define LOG
Definition elog.h:32
void * guc_malloc(int elevel, size_t size)
Definition guc.c:637
#define GUC_check_errdetail
Definition guc.h:507
void list_free(List *list)
Definition list.c:1546
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
bool process_shared_preload_libraries_done
Definition miscinit.c:1789
#define lfirst(lc)
Definition pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
#define RM_MAX_ID
Definition rmgr.h:33
Definition pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition varlena.c:2867
static bool check_wal_consistency_checking_deferred
Definition xlog.c:173
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, fb(), GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, memcpy(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 2235 of file xlog.c.

2236{
2238 {
2239 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2240 return false;
2241 }
2242
2243 return true;
2244}
#define IsValidWalSegSize(size)

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckpointFlagsString()

static const char * CheckpointFlagsString ( int  flags)
static

Definition at line 7146 of file xlog.c.

7147{
7148 static char buf[128];
7149
7150 snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
7151 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
7152 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
7153 (flags & CHECKPOINT_FAST) ? " fast" : "",
7154 (flags & CHECKPOINT_FORCE) ? " force" : "",
7155 (flags & CHECKPOINT_WAIT) ? " wait" : "",
7156 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
7157 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
7158 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "");
7159
7160 return buf;
7161}
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define snprintf
Definition port.h:260
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:155
#define CHECKPOINT_CAUSE_XLOG
Definition xlog.h:160
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:152
#define CHECKPOINT_CAUSE_TIME
Definition xlog.h:161
#define CHECKPOINT_FORCE
Definition xlog.h:154
#define CHECKPOINT_WAIT
Definition xlog.h:157
#define CHECKPOINT_FAST
Definition xlog.h:153
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:151

References buf, CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, and snprintf.

Referenced by LogCheckpointEnd(), and LogCheckpointStart().

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 8049 of file xlog.c.

8050{
8056
8057 /* Write out all dirty data in SLRUs and the main buffer pool */
8065 CheckPointBuffers(flags);
8066
8067 /* Perform all queued up fsyncs */
8073
8074 /* We deliberately delay 2PC checkpointing as long as possible */
8076}
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
void CheckPointBuffers(int flags)
Definition bufmgr.c:4432
void CheckPointCLOG(void)
Definition clog.c:922
void CheckPointCommitTs(void)
Definition commit_ts.c:799
void CheckPointMultiXact(void)
Definition multixact.c:2039
void CheckPointReplicationOrigin(void)
Definition origin.c:614
void CheckPointPredicate(void)
Definition predicate.c:1022
void CheckPointRelationMap(void)
Definition relmapper.c:612
void CheckPointLogicalRewriteHeap(void)
void CheckPointReplicationSlots(bool is_shutdown)
Definition slot.c:2324
void CheckPointSnapBuild(void)
Definition snapbuild.c:2030
TimestampTz ckpt_write_t
Definition xlog.h:174
TimestampTz ckpt_sync_end_t
Definition xlog.h:176
TimestampTz ckpt_sync_t
Definition xlog.h:175
void CheckPointSUBTRANS(void)
Definition subtrans.c:348
void ProcessSyncRequests(void)
Definition sync.c:287
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1828
CheckpointStatsData CheckpointStats
Definition xlog.c:216

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, fb(), GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5802 of file xlog.c.

5803{
5804 /*
5805 * For archive recovery, the WAL must be generated with at least 'replica'
5806 * wal_level.
5807 */
5809 {
5810 ereport(FATAL,
5812 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5813 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5814 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5815 }
5816
5817 /*
5818 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5819 * must have at least as many backend slots as the primary.
5820 */
5822 {
5823 /* We ignore autovacuum_worker_slots when we make this test. */
5824 RecoveryRequiresIntParameter("max_connections",
5827 RecoveryRequiresIntParameter("max_worker_processes",
5830 RecoveryRequiresIntParameter("max_wal_senders",
5833 RecoveryRequiresIntParameter("max_prepared_transactions",
5836 RecoveryRequiresIntParameter("max_locks_per_transaction",
5839 }
5840}
int errcode(int sqlerrcode)
Definition elog.c:875
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define FATAL
Definition elog.h:42
int MaxConnections
Definition globals.c:145
int max_worker_processes
Definition globals.c:146
int max_locks_per_xact
Definition lock.c:56
int max_prepared_xacts
Definition twophase.c:118
int max_wal_senders
Definition walsender.c:141
bool EnableHotStandby
Definition xlog.c:128
@ WAL_LEVEL_MINIMAL
Definition xlog.h:76
bool ArchiveRecoveryRequested
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg, FATAL, fb(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3782 of file xlog.c.

3783{
3784 int save_errno = errno;
3785 XLogSegNo lastRemovedSegNo;
3786
3788 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3790
3791 if (segno <= lastRemovedSegNo)
3792 {
3793 char filename[MAXFNAMELEN];
3794
3796 errno = save_errno;
3797 ereport(ERROR,
3799 errmsg("requested WAL segment %s has already been removed",
3800 filename)));
3801 }
3802 errno = save_errno;
3803}
#define ERROR
Definition elog.h:40
static char * filename
Definition pg_dumpall.c:133
XLogSegNo lastRemovedSegNo
Definition xlog.c:467
uint64 XLogSegNo
Definition xlogdefs.h:52

References ereport, errcode_for_file_access(), errmsg, ERROR, fb(), filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5706 of file xlog.c.

5708{
5709 /*
5710 * Execute the recovery_end_command, if any.
5711 */
5714 "recovery_end_command",
5715 true,
5717
5718 /*
5719 * We switched to a new timeline. Clean up segments on the old timeline.
5720 *
5721 * If there are any higher-numbered segments on the old timeline, remove
5722 * them. They might contain valid WAL, but they might also be
5723 * pre-allocated files containing garbage. In any case, they are not part
5724 * of the new timeline's history so we don't need them.
5725 */
5727
5728 /*
5729 * If the switch happened in the middle of a segment, what to do with the
5730 * last, partial segment on the old timeline? If we don't archive it, and
5731 * the server that created the WAL never archives it either (e.g. because
5732 * it was hit by a meteor), it will never make it to the archive. That's
5733 * OK from our point of view, because the new segment that we created with
5734 * the new TLI contains all the WAL from the old timeline up to the switch
5735 * point. But if you later try to do PITR to the "missing" WAL on the old
5736 * timeline, recovery won't find it in the archive. It's physically
5737 * present in the new file with new TLI, but recovery won't look there
5738 * when it's recovering to the older timeline. On the other hand, if we
5739 * archive the partial segment, and the original server on that timeline
5740 * is still running and archives the completed version of the same segment
5741 * later, it will fail. (We used to do that in 9.4 and below, and it
5742 * caused such problems).
5743 *
5744 * As a compromise, we rename the last segment with the .partial suffix,
5745 * and archive it. Archive recovery will never try to read .partial
5746 * segments, so they will normally go unused. But in the odd PITR case,
5747 * the administrator can copy them manually to the pg_wal directory
5748 * (removing the suffix). They can be useful in debugging, too.
5749 *
5750 * If a .done or .ready file already exists for the old timeline, however,
5751 * we had already determined that the segment is complete, so we can let
5752 * it be archived normally. (In particular, if it was restored from the
5753 * archive to begin with, it's expected to have a .done file).
5754 */
5757 {
5758 char origfname[MAXFNAMELEN];
5760
5763
5765 {
5766 char origpath[MAXPGPATH];
5768 char partialpath[MAXPGPATH];
5769
5770 /*
5771 * If we're summarizing WAL, we can't rename the partial file
5772 * until the summarizer finishes with it, else it will fail.
5773 */
5774 if (summarize_wal)
5776
5778 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5779 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5780
5781 /*
5782 * Make sure there's no .done or .ready file for the .partial
5783 * file.
5784 */
5786
5789 }
5790 }
5791}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:783
#define MAXPGPATH
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition xlog.c:3995
#define XLogArchivingActive()
Definition xlog.h:102
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
void XLogArchiveNotify(const char *xlog)
void XLogArchiveCleanup(const char *xlog)
char * recoveryEndCommand

References durable_rename(), ERROR, ExecuteRecoveryCommand(), fb(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4216 of file xlog.c.

4217{
4218 DIR *xldir;
4219 struct dirent *xlde;
4220 char path[MAXPGPATH + sizeof(XLOGDIR)];
4221
4223
4224 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4225 {
4226 if (IsBackupHistoryFileName(xlde->d_name))
4227 {
4228 if (XLogArchiveCheckDone(xlde->d_name))
4229 {
4230 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4231 xlde->d_name);
4232 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4233 unlink(path);
4234 XLogArchiveCleanup(xlde->d_name);
4235 }
4236 }
4237 }
4238
4239 FreeDir(xldir);
4240}
#define DEBUG2
Definition elog.h:30
int FreeDir(DIR *dir)
Definition fd.c:3009
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
Definition dirent.c:26
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1266 of file xlog.c.

1268{
1269 char *currpos;
1270 int freespace;
1271 int written;
1274
1275 /*
1276 * Get a pointer to the right place in the right WAL buffer to start
1277 * inserting to.
1278 */
1279 CurrPos = StartPos;
1280 currpos = GetXLogBuffer(CurrPos, tli);
1281 freespace = INSERT_FREESPACE(CurrPos);
1282
1283 /*
1284 * there should be enough space for at least the first field (xl_tot_len)
1285 * on this page.
1286 */
1287 Assert(freespace >= sizeof(uint32));
1288
1289 /* Copy record data */
1290 written = 0;
1291 while (rdata != NULL)
1292 {
1293 const char *rdata_data = rdata->data;
1294 int rdata_len = rdata->len;
1295
1296 while (rdata_len > freespace)
1297 {
1298 /*
1299 * Write what fits on this page, and continue on the next page.
1300 */
1301 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1302 memcpy(currpos, rdata_data, freespace);
1303 rdata_data += freespace;
1304 rdata_len -= freespace;
1305 written += freespace;
1306 CurrPos += freespace;
1307
1308 /*
1309 * Get pointer to beginning of next page, and set the xlp_rem_len
1310 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1311 *
1312 * It's safe to set the contrecord flag and xlp_rem_len without a
1313 * lock on the page. All the other flags were already set when the
1314 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1315 * only backend that needs to set the contrecord flag.
1316 */
1317 currpos = GetXLogBuffer(CurrPos, tli);
1318 pagehdr = (XLogPageHeader) currpos;
1319 pagehdr->xlp_rem_len = write_len - written;
1320 pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1321
1322 /* skip over the page header */
1324 {
1326 currpos += SizeOfXLogLongPHD;
1327 }
1328 else
1329 {
1331 currpos += SizeOfXLogShortPHD;
1332 }
1333 freespace = INSERT_FREESPACE(CurrPos);
1334 }
1335
1336 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1337 memcpy(currpos, rdata_data, rdata_len);
1338 currpos += rdata_len;
1339 CurrPos += rdata_len;
1340 freespace -= rdata_len;
1341 written += rdata_len;
1342
1343 rdata = rdata->next;
1344 }
1346
1347 /*
1348 * If this was an xlog-switch, it's not enough to write the switch record,
1349 * we also have to consume all the remaining space in the WAL segment. We
1350 * have already reserved that space, but we need to actually fill it.
1351 */
1353 {
1354 /* An xlog-switch record doesn't contain any data besides the header */
1356
1357 /* Assert that we did reserve the right amount of space */
1359
1360 /* Use up all the remaining space on the current page */
1361 CurrPos += freespace;
1362
1363 /*
1364 * Cause all remaining pages in the segment to be flushed, leaving the
1365 * XLog position where it should be, at the start of the next segment.
1366 * We do this one page at a time, to make sure we don't deadlock
1367 * against ourselves if wal_buffers < wal_segment_size.
1368 */
1369 while (CurrPos < EndPos)
1370 {
1371 /*
1372 * The minimal action to flush the page would be to call
1373 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1374 * AdvanceXLInsertBuffer(...). The page would be left initialized
1375 * mostly to zeros, except for the page header (always the short
1376 * variant, as this is never a segment's first page).
1377 *
1378 * The large vistas of zeros are good for compressibility, but the
1379 * headers interrupting them every XLOG_BLCKSZ (with values that
1380 * differ from page to page) are not. The effect varies with
1381 * compression tool, but bzip2 for instance compresses about an
1382 * order of magnitude worse if those headers are left in place.
1383 *
1384 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1385 * called in heavily-loaded circumstances as well as this lightly-
1386 * loaded one) with variant behavior, we just use GetXLogBuffer
1387 * (which itself calls the two methods we need) to get the pointer
1388 * and zero most of the page. Then we just zero the page header.
1389 */
1390 currpos = GetXLogBuffer(CurrPos, tli);
1391 MemSet(currpos, 0, SizeOfXLogShortPHD);
1392
1394 }
1395 }
1396 else
1397 {
1398 /* Align the end position, so that the next record starts aligned */
1400 }
1401
1402 if (CurrPos != EndPos)
1403 ereport(PANIC,
1405 errmsg_internal("space reserved for WAL record does not match what was written"));
1406}
uint32_t uint32
Definition c.h:624
#define MAXALIGN64(LEN)
Definition c.h:921
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define ERRCODE_DATA_CORRUPTED
#define INSERT_FREESPACE(endptr)
Definition xlog.c:600
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition xlog.c:1673
#define XLP_FIRST_IS_CONTRECORD
#define SizeOfXLogShortPHD

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), fb(), GetXLogBuffer(), INSERT_FREESPACE, MAXALIGN64, memcpy(), MemSet, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, and XLP_FIRST_IS_CONTRECORD.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 7395 of file xlog.c.

7396{
7397 bool shutdown;
7398 CheckPoint checkPoint;
7402 uint32 freespace;
7406 int nvxids;
7407 int oldXLogAllowed = 0;
7408
7409 /*
7410 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
7411 * issued at a different time.
7412 */
7414 shutdown = true;
7415 else
7416 shutdown = false;
7417
7418 /* sanity check */
7419 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
7420 elog(ERROR, "can't create a checkpoint during recovery");
7421
7422 /*
7423 * Prepare to accumulate statistics.
7424 *
7425 * Note: because it is possible for log_checkpoints to change while a
7426 * checkpoint proceeds, we always accumulate stats, even if
7427 * log_checkpoints is currently off.
7428 */
7431
7432 /*
7433 * Let smgr prepare for checkpoint; this has to happen outside the
7434 * critical section and before we determine the REDO pointer. Note that
7435 * smgr must not do anything that'd have to be undone if we decide no
7436 * checkpoint is needed.
7437 */
7439
7440 /* Run these points outside the critical section. */
7441 INJECTION_POINT("create-checkpoint-initial", NULL);
7442 INJECTION_POINT_LOAD("create-checkpoint-run");
7443
7444 /*
7445 * Use a critical section to force system panic if we have trouble.
7446 */
7448
7449 if (shutdown)
7450 {
7455 }
7456
7457 /* Begin filling in the checkpoint WAL record */
7458 MemSet(&checkPoint, 0, sizeof(checkPoint));
7459 checkPoint.time = (pg_time_t) time(NULL);
7460
7461 /*
7462 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7463 * pointer. This allows us to begin accumulating changes to assemble our
7464 * starting snapshot of locks and transactions.
7465 */
7467 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7468 else
7470
7471 /*
7472 * Get location of last important record before acquiring insert locks (as
7473 * GetLastImportantRecPtr() also locks WAL locks).
7474 */
7476
7477 /*
7478 * If this isn't a shutdown or forced checkpoint, and if there has been no
7479 * WAL activity requiring a checkpoint, skip it. The idea here is to
7480 * avoid inserting duplicate checkpoints when the system is idle.
7481 */
7483 CHECKPOINT_FORCE)) == 0)
7484 {
7486 {
7489 (errmsg_internal("checkpoint skipped because system is idle")));
7490 return false;
7491 }
7492 }
7493
7494 /*
7495 * An end-of-recovery checkpoint is created before anyone is allowed to
7496 * write WAL. To allow us to write the checkpoint record, temporarily
7497 * enable XLogInsertAllowed.
7498 */
7499 if (flags & CHECKPOINT_END_OF_RECOVERY)
7501
7503 if (flags & CHECKPOINT_END_OF_RECOVERY)
7505 else
7506 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7507
7508 /*
7509 * We must block concurrent insertions while examining insert state.
7510 */
7512
7513 checkPoint.fullPageWrites = Insert->fullPageWrites;
7514 checkPoint.wal_level = wal_level;
7515
7516 /*
7517 * Get the current data_checksum_version value from xlogctl, valid at the
7518 * time of the checkpoint.
7519 */
7521
7522 if (shutdown)
7523 {
7525
7526 /*
7527 * Compute new REDO record ptr = location of next XLOG record.
7528 *
7529 * Since this is a shutdown checkpoint, there can't be any concurrent
7530 * WAL insertion.
7531 */
7532 freespace = INSERT_FREESPACE(curInsert);
7533 if (freespace == 0)
7534 {
7537 else
7539 }
7540 checkPoint.redo = curInsert;
7541
7542 /*
7543 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7544 * this must be done while holding all the insertion locks.
7545 *
7546 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7547 * left pointing past where it really needs to point. This is okay;
7548 * the only consequence is that XLogInsert might back up whole buffers
7549 * that it didn't really need to. We can't postpone advancing
7550 * RedoRecPtr because XLogInserts that happen while we are dumping
7551 * buffers must assume that their buffer changes are not included in
7552 * the checkpoint.
7553 */
7554 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7555 }
7556
7557 /*
7558 * Now we can release the WAL insertion locks, allowing other xacts to
7559 * proceed while we are flushing disk buffers.
7560 */
7562
7563 /*
7564 * If this is an online checkpoint, we have not yet determined the redo
7565 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7566 * record; the LSN at which it starts becomes the new redo pointer. We
7567 * don't do this for a shutdown checkpoint, because in that case no WAL
7568 * can be written between the redo point and the insertion of the
7569 * checkpoint record itself, so the checkpoint record itself serves to
7570 * mark the redo point.
7571 */
7572 if (!shutdown)
7573 {
7575
7577 redo_rec.wal_level = wal_level;
7579 redo_rec.data_checksum_version = XLogCtl->data_checksum_version;
7582
7583 /* Include WAL level in record for WAL summarizer's benefit. */
7587
7588 /*
7589 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7590 * shared memory and RedoRecPtr in backend-local memory, but we need
7591 * to copy that into the record that will be inserted when the
7592 * checkpoint is complete.
7593 */
7594 checkPoint.redo = RedoRecPtr;
7595 }
7596
7597 /* Update the info_lck-protected copy of RedoRecPtr as well */
7599 XLogCtl->RedoRecPtr = checkPoint.redo;
7601
7602 /*
7603 * If enabled, log checkpoint start. We postpone this until now so as not
7604 * to log anything if we decided to skip the checkpoint.
7605 */
7606 if (log_checkpoints)
7607 LogCheckpointStart(flags, false);
7608
7609 INJECTION_POINT_CACHED("create-checkpoint-run", NULL);
7610
7611 /* Update the process title */
7612 update_checkpoint_display(flags, false, false);
7613
7615
7616 /*
7617 * Get the other info we need for the checkpoint record.
7618 *
7619 * We don't need to save oldestClogXid in the checkpoint, it only matters
7620 * for the short period in which clog is being truncated, and if we crash
7621 * during that we'll redo the clog truncation and fix up oldestClogXid
7622 * there.
7623 */
7625 checkPoint.nextXid = TransamVariables->nextXid;
7626 checkPoint.oldestXid = TransamVariables->oldestXid;
7629
7634
7636 checkPoint.nextOid = TransamVariables->nextOid;
7637 if (!shutdown)
7638 checkPoint.nextOid += TransamVariables->oidCount;
7640
7644
7646
7648 &checkPoint.nextMulti,
7649 &checkPoint.nextMultiOffset,
7650 &checkPoint.oldestMulti,
7651 &checkPoint.oldestMultiDB);
7652
7653 /*
7654 * Having constructed the checkpoint record, ensure all shmem disk buffers
7655 * and commit-log buffers are flushed to disk.
7656 *
7657 * This I/O could fail for various reasons. If so, we will fail to
7658 * complete the checkpoint, but there is no reason to force a system
7659 * panic. Accordingly, exit critical section while doing it.
7660 */
7662
7663 /*
7664 * In some cases there are groups of actions that must all occur on one
7665 * side or the other of a checkpoint record. Before flushing the
7666 * checkpoint record we must explicitly wait for any backend currently
7667 * performing those groups of actions.
7668 *
7669 * One example is end of transaction, so we must wait for any transactions
7670 * that are currently in commit critical sections. If an xact inserted
7671 * its commit record into XLOG just before the REDO point, then a crash
7672 * restart from the REDO point would not replay that record, which means
7673 * that our flushing had better include the xact's update of pg_xact. So
7674 * we wait till he's out of his commit critical section before proceeding.
7675 * See notes in RecordTransactionCommit().
7676 *
7677 * Because we've already released the insertion locks, this test is a bit
7678 * fuzzy: it is possible that we will wait for xacts we didn't really need
7679 * to wait for. But the delay should be short and it seems better to make
7680 * checkpoint take a bit longer than to hold off insertions longer than
7681 * necessary. (In fact, the whole reason we have this issue is that xact.c
7682 * does commit record XLOG insertion and clog update as two separate steps
7683 * protected by different locks, but again that seems best on grounds of
7684 * minimizing lock contention.)
7685 *
7686 * A transaction that has not yet set delayChkptFlags when we look cannot
7687 * be at risk, since it has not inserted its commit record yet; and one
7688 * that's already cleared it is not at risk either, since it's done fixing
7689 * clog and we will correctly flush the update below. So we cannot miss
7690 * any xacts we need to wait for.
7691 */
7693 if (nvxids > 0)
7694 {
7695 do
7696 {
7697 /*
7698 * Keep absorbing fsync requests while we wait. There could even
7699 * be a deadlock if we don't, if the process that prevents the
7700 * checkpoint is trying to add a request to the queue.
7701 */
7703
7705 pg_usleep(10000L); /* wait for 10 msec */
7709 }
7710 pfree(vxids);
7711
7712 CheckPointGuts(checkPoint.redo, flags);
7713
7715 if (nvxids > 0)
7716 {
7717 do
7718 {
7720
7722 pg_usleep(10000L); /* wait for 10 msec */
7726 }
7727 pfree(vxids);
7728
7729 /*
7730 * Take a snapshot of running transactions and write this to WAL. This
7731 * allows us to reconstruct the state of running transactions during
7732 * archive recovery, if required. Skip, if this info disabled.
7733 *
7734 * If we are shutting down, or Startup process is completing crash
7735 * recovery we don't need to write running xact data.
7736 */
7739
7741
7742 /*
7743 * Now insert the checkpoint record into XLOG.
7744 */
7746 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7750
7752
7753 /*
7754 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7755 * overwritten at next startup. No-one should even try, this just allows
7756 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7757 * to just temporarily disable writing until the system has exited
7758 * recovery.
7759 */
7760 if (shutdown)
7761 {
7762 if (flags & CHECKPOINT_END_OF_RECOVERY)
7764 else
7765 LocalXLogInsertAllowed = 0; /* never again write WAL */
7766 }
7767
7768 /*
7769 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7770 * = end of actual checkpoint record.
7771 */
7772 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7773 ereport(PANIC,
7774 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7775
7776 /*
7777 * Remember the prior checkpoint's redo ptr for
7778 * UpdateCheckPointDistanceEstimate()
7779 */
7781
7782 /*
7783 * Update the control file.
7784 */
7786 if (shutdown)
7789 ControlFile->checkPointCopy = checkPoint;
7790 /* crash recovery should always recover to the end of WAL */
7793
7794 /* make sure we start with the checksum version as of the checkpoint */
7796
7797 /*
7798 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7799 * unused on non-shutdown checkpoints, but seems useful to store it always
7800 * for debugging purposes.
7801 */
7803
7806
7807 /*
7808 * We are now done with critical updates; no need for system panic if we
7809 * have trouble while fooling with old log segments.
7810 */
7812
7813 /*
7814 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7815 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7816 * where (a) we're not inside of a critical section and (b) we can be
7817 * certain that the relevant record has been flushed to disk, which must
7818 * happen before it can be summarized.
7819 *
7820 * If this is a shutdown checkpoint, then this happens reasonably
7821 * promptly: we've only just inserted and flushed the
7822 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7823 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7824 * record was written before we began flushing data to disk, and that
7825 * could be many minutes ago at this point. However, we don't XLogFlush()
7826 * after inserting that record, so we're not guaranteed that it's on disk
7827 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7828 * record.
7829 */
7831
7832 /*
7833 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7834 */
7836
7837 /*
7838 * Update the average distance between checkpoints if the prior checkpoint
7839 * exists.
7840 */
7843
7844 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7845
7846 /*
7847 * Delete old log files, those no longer needed for last checkpoint to
7848 * prevent the disk holding the xlog from growing full.
7849 */
7855 {
7856 /*
7857 * Some slots have been invalidated; recalculate the old-segment
7858 * horizon, starting again from RedoRecPtr.
7859 */
7862 }
7863 _logSegNo--;
7865 checkPoint.ThisTimeLineID);
7866
7867 /*
7868 * Make more log segments if needed. (Do this after recycling old log
7869 * segments, since that may supply some of the needed files.)
7870 */
7871 if (!shutdown)
7873
7874 /*
7875 * Truncate pg_subtrans if possible. We can throw away all data before
7876 * the oldest XMIN of any running transaction. No future transaction will
7877 * attempt to reference any pg_subtrans entry older than that (see Asserts
7878 * in subtrans.c). During recovery, though, we mustn't do this because
7879 * StartupSUBTRANS hasn't been called yet.
7880 */
7881 if (!RecoveryInProgress())
7883
7884 /* Real work is done; log and update stats. */
7885 LogCheckpointEnd(false, flags);
7886
7887 /* Reset the process title */
7888 update_checkpoint_display(flags, false, true);
7889
7891 NBuffers,
7895
7896 return true;
7897}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:476
void AbsorbSyncRequests(void)
static void Insert(File file)
Definition fd.c:1301
int NBuffers
Definition globals.c:144
#define INJECTION_POINT(name, arg)
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:202
@ LW_SHARED
Definition lwlock.h:105
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:2017
#define XLOG_CHECKPOINT_REDO
Definition pg_control.h:86
@ DB_SHUTDOWNING
Definition pg_control.h:102
@ DB_SHUTDOWNED
Definition pg_control.h:100
#define XLOG_CHECKPOINT_ONLINE
Definition pg_control.h:73
#define InvalidOid
#define DELAY_CHKPT_START
Definition proc.h:139
#define DELAY_CHKPT_COMPLETE
Definition proc.h:140
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition procarray.c:1973
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition procarray.c:3062
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition procarray.c:2845
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition procarray.c:3017
void pg_usleep(long microsec)
Definition signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition slot.c:2220
@ RS_INVAL_WAL_REMOVED
Definition slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition slot.h:68
XLogRecPtr LogStandbySnapshot(Oid dbid)
Definition standby.c:1303
TimestampTz ckpt_start_t
Definition xlog.h:173
int ckpt_segs_removed
Definition xlog.h:183
int ckpt_bufs_written
Definition xlog.h:179
int ckpt_segs_recycled
Definition xlog.h:184
XLogRecPtr minRecoveryPoint
Definition pg_control.h:176
uint32 data_checksum_version
Definition pg_control.h:232
XLogRecPtr unloggedLSN
Definition pg_control.h:145
TimeLineID minRecoveryPointTLI
Definition pg_control.h:177
TransactionId oldestCommitTsXid
Definition transam.h:232
TransactionId newestCommitTsXid
Definition transam.h:233
TransactionId oldestXid
Definition transam.h:222
uint32 data_checksum_version
Definition xlog.c:560
TimeLineID InsertTimeLineID
Definition xlog.c:515
XLogRecPtr RedoRecPtr
Definition xlog.c:463
XLogCtlInsert Insert
Definition xlog.c:459
TimeLineID PrevTimeLineID
Definition xlog.c:516
pg_atomic_uint64 unloggedLSN
Definition xlog.c:470
XLogRecPtr RedoRecPtr
Definition xlog.c:437
void TruncateSUBTRANS(TransactionId oldestXact)
Definition subtrans.c:404
void SyncPreCheckpoint(void)
Definition sync.c:178
void SyncPostCheckpoint(void)
Definition sync.c:203
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition xlog.c:260
bool RecoveryInProgress(void)
Definition xlog.c:6830
static void WALInsertLockRelease(void)
Definition xlog.c:1486
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition xlog.c:1899
static void WALInsertLockAcquireExclusive(void)
Definition xlog.c:1457
static void UpdateControlFile(void)
Definition xlog.c:4638
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition xlog.c:3920
static void LogCheckpointStart(int flags, bool restartpoint)
Definition xlog.c:7167
static XLogRecPtr RedoRecPtr
Definition xlog.c:280
static void LogCheckpointEnd(bool restartpoint, int flags)
Definition xlog.c:7185
static void WALInsertLockAcquire(void)
Definition xlog.c:1412
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition xlog.c:3745
bool log_checkpoints
Definition xlog.c:136
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition xlog.c:8498
static int LocalSetXLogInsertAllowed(void)
Definition xlog.c:6918
XLogRecPtr GetLastImportantRecPtr(void)
Definition xlog.c:7052
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition xlog.c:7292
static int LocalXLogInsertAllowed
Definition xlog.c:243
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition xlog.c:8049
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition xlog.c:7330
#define XLogStandbyInfoActive()
Definition xlog.h:126
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogBeginInsert(void)
Definition xloginsert.c:153

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, CheckPoint::dataChecksumState, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg, errmsg_internal(), ERROR, fb(), CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, INJECTION_POINT_CACHED, INJECTION_POINT_LOAD, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsLogicalDecodingEnabled(), KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), CheckPoint::logicalDecodingEnabled, LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRecPtrIsValid, XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7908 of file xlog.c.

7909{
7912
7913 /* sanity check */
7914 if (!RecoveryInProgress())
7915 elog(ERROR, "can only be used to end recovery");
7916
7917 xlrec.end_time = GetCurrentTimestamp();
7918 xlrec.wal_level = wal_level;
7919
7921 xlrec.ThisTimeLineID = XLogCtl->InsertTimeLineID;
7922 xlrec.PrevTimeLineID = XLogCtl->PrevTimeLineID;
7924
7926
7930
7932
7933 /*
7934 * Update the control file so that crash recovery can follow the timeline
7935 * changes to this point.
7936 */
7939 ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7940
7941 /* start with the latest checksum version (as of the end of recovery) */
7945
7948
7950}
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:81

References ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7979 of file xlog.c.

7981{
7986
7987 /* sanity checks */
7988 if (!RecoveryInProgress())
7989 elog(ERROR, "can only be used at end of recovery");
7990 if (pagePtr % XLOG_BLCKSZ != 0)
7991 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7993
7994 /* The current WAL insert position should be right after the page header */
7995 startPos = pagePtr;
7998 else
8001 if (recptr != startPos)
8002 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
8004
8006
8007 /*
8008 * Initialize the XLOG page header (by GetXLogBuffer), and set the
8009 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
8010 *
8011 * No other backend is allowed to write WAL yet, so acquiring the WAL
8012 * insertion lock is just pro forma.
8013 */
8018
8019 /*
8020 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
8021 * page. We know it becomes the first record, because no other backend is
8022 * allowed to write WAL yet.
8023 */
8025 xlrec.overwritten_lsn = aborted_lsn;
8026 xlrec.overwrite_time = GetCurrentTimestamp();
8029
8030 /* check that the record was inserted to the right place */
8031 if (ProcLastRecPtr != startPos)
8032 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
8034
8036
8038
8039 return recptr;
8040}
#define XLOG_OVERWRITE_CONTRECORD
Definition pg_control.h:85
XLogRecPtr GetXLogInsertRecPtr(void)
Definition xlog.c:10108
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD

References elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLP_FIRST_IS_OVERWRITE_CONTRECORD.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 8129 of file xlog.c.

8130{
8131 XLogRecPtr lastCheckPointRecPtr;
8132 XLogRecPtr lastCheckPointEndPtr;
8133 CheckPoint lastCheckPoint;
8137 TimeLineID replayTLI;
8138 XLogRecPtr endptr;
8141
8142 /* Concurrent checkpoint/restartpoint cannot happen */
8144
8145 /* Get a local copy of the last safe checkpoint record. */
8147 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
8148 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
8149 lastCheckPoint = XLogCtl->lastCheckPoint;
8151
8152 /*
8153 * Check that we're still in recovery mode. It's ok if we exit recovery
8154 * mode after this check, the restart point is valid anyway.
8155 */
8156 if (!RecoveryInProgress())
8157 {
8159 (errmsg_internal("skipping restartpoint, recovery has already ended")));
8160 return false;
8161 }
8162
8163 /*
8164 * If the last checkpoint record we've replayed is already our last
8165 * restartpoint, we can't perform a new restart point. We still update
8166 * minRecoveryPoint in that case, so that if this is a shutdown restart
8167 * point, we won't start up earlier than before. That's not strictly
8168 * necessary, but when hot standby is enabled, it would be rather weird if
8169 * the database opened up for read-only connections at a point-in-time
8170 * before the last shutdown. Such time travel is still possible in case of
8171 * immediate shutdown, though.
8172 *
8173 * We don't explicitly advance minRecoveryPoint when we do create a
8174 * restartpoint. It's assumed that flushing the buffers will do that as a
8175 * side-effect.
8176 */
8177 if (!XLogRecPtrIsValid(lastCheckPointRecPtr) ||
8178 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
8179 {
8181 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
8182 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
8183
8185 if (flags & CHECKPOINT_IS_SHUTDOWN)
8186 {
8191 }
8192 return false;
8193 }
8194
8195 /*
8196 * Update the shared RedoRecPtr so that the startup process can calculate
8197 * the number of segments replayed since last restartpoint, and request a
8198 * restartpoint if it exceeds CheckPointSegments.
8199 *
8200 * Like in CreateCheckPoint(), hold off insertions to update it, although
8201 * during recovery this is just pro forma, because no WAL insertions are
8202 * happening.
8203 */
8205 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
8207
8208 /* Also update the info_lck-protected copy */
8210 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
8212
8213 /*
8214 * Prepare to accumulate statistics.
8215 *
8216 * Note: because it is possible for log_checkpoints to change while a
8217 * checkpoint proceeds, we always accumulate stats, even if
8218 * log_checkpoints is currently off.
8219 */
8222
8223 if (log_checkpoints)
8224 LogCheckpointStart(flags, true);
8225
8226 /* Update the process title */
8227 update_checkpoint_display(flags, true, false);
8228
8229 CheckPointGuts(lastCheckPoint.redo, flags);
8230
8231 /*
8232 * This location needs to be after CheckPointGuts() to ensure that some
8233 * work has already happened during this checkpoint.
8234 */
8235 INJECTION_POINT("create-restart-point", NULL);
8236
8237 /*
8238 * Remember the prior checkpoint's redo ptr for
8239 * UpdateCheckPointDistanceEstimate()
8240 */
8242
8243 /*
8244 * Update pg_control, using current time. Check that it still shows an
8245 * older checkpoint, else do nothing; this is a quick hack to make sure
8246 * nothing really bad happens if somehow we get here after the
8247 * end-of-recovery checkpoint.
8248 */
8250 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
8251 {
8252 /*
8253 * Update the checkpoint information. We do this even if the cluster
8254 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
8255 * segments recycled below.
8256 */
8257 ControlFile->checkPoint = lastCheckPointRecPtr;
8258 ControlFile->checkPointCopy = lastCheckPoint;
8259
8260 /*
8261 * Ensure minRecoveryPoint is past the checkpoint record and update it
8262 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
8263 * this will have happened already while writing out dirty buffers,
8264 * but not necessarily - e.g. because no buffers were dirtied. We do
8265 * this because a backup performed in recovery uses minRecoveryPoint
8266 * to determine which WAL files must be included in the backup, and
8267 * the file (or files) containing the checkpoint record must be
8268 * included, at a minimum. Note that for an ordinary restart of
8269 * recovery there's no value in having the minimum recovery point any
8270 * earlier than this anyway, because redo will begin just after the
8271 * checkpoint record.
8272 */
8274 {
8275 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
8276 {
8277 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
8279
8280 /* update local copy */
8283 }
8284 if (flags & CHECKPOINT_IS_SHUTDOWN)
8286 }
8287
8288 /* we shall start with the latest checksum version */
8290
8292 }
8294
8295 /*
8296 * Update the average distance between checkpoints/restartpoints if the
8297 * prior checkpoint exists.
8298 */
8301
8302 /*
8303 * Delete old log files, those no longer needed for last restartpoint to
8304 * prevent the disk holding the xlog from growing full.
8305 */
8307
8308 /*
8309 * Retreat _logSegNo using the current end of xlog replayed or received,
8310 * whichever is later.
8311 */
8313 replayPtr = GetXLogReplayRecPtr(&replayTLI);
8314 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
8315 KeepLogSeg(endptr, &_logSegNo);
8316
8317 INJECTION_POINT("restartpoint-before-slot-invalidation", NULL);
8318
8322 {
8323 /*
8324 * Some slots have been invalidated; recalculate the old-segment
8325 * horizon, starting again from RedoRecPtr.
8326 */
8328 KeepLogSeg(endptr, &_logSegNo);
8329 }
8330 _logSegNo--;
8331
8332 /*
8333 * Try to recycle segments on a useful timeline. If we've been promoted
8334 * since the beginning of this restartpoint, use the new timeline chosen
8335 * at end of recovery. If we're still in recovery, use the timeline we're
8336 * currently replaying.
8337 *
8338 * There is no guarantee that the WAL segments will be useful on the
8339 * current timeline; if recovery proceeds to a new timeline right after
8340 * this, the pre-allocated WAL segments on this timeline will not be used,
8341 * and will go wasted until recycled on the next restartpoint. We'll live
8342 * with that.
8343 */
8344 if (!RecoveryInProgress())
8345 replayTLI = XLogCtl->InsertTimeLineID;
8346
8347 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
8348
8349 /*
8350 * Make more log segments if needed. (Do this after recycling old log
8351 * segments, since that may supply some of the needed files.)
8352 */
8353 PreallocXlogFiles(endptr, replayTLI);
8354
8355 /*
8356 * Truncate pg_subtrans if possible. We can throw away all data before
8357 * the oldest XMIN of any running transaction. No future transaction will
8358 * attempt to reference any pg_subtrans entry older than that (see Asserts
8359 * in subtrans.c). When hot standby is disabled, though, we mustn't do
8360 * this because StartupSUBTRANS hasn't been called yet.
8361 */
8362 if (EnableHotStandby)
8364
8365 /* Real work is done; log and update stats. */
8366 LogCheckpointEnd(true, flags);
8367
8368 /* Reset the process title */
8369 update_checkpoint_display(flags, true, true);
8370
8373 errmsg("recovery restart point at %X/%08X",
8374 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
8375 xtime ? errdetail("Last completed transaction was at log time %s.",
8377
8378 /*
8379 * Finally, execute archive_cleanup_command, if any.
8380 */
8383 "archive_cleanup_command",
8384 false,
8386
8387 return true;
8388}
const char * timestamptz_to_str(TimestampTz t)
Definition timestamp.c:1856
int64 TimestampTz
Definition timestamp.h:39
bool IsUnderPostmaster
Definition globals.c:122
@ B_CHECKPOINTER
Definition miscadmin.h:375
BackendType MyBackendType
Definition miscinit.c:65
@ DB_IN_ARCHIVE_RECOVERY
Definition pg_control.h:104
@ DB_SHUTDOWNED_IN_RECOVERY
Definition pg_control.h:101
CheckPoint lastCheckPoint
Definition xlog.c:551
XLogRecPtr lastCheckPointRecPtr
Definition xlog.c:549
XLogRecPtr lastCheckPointEndPtr
Definition xlog.c:550
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition xlog.c:2721
static XLogRecPtr LocalMinRecoveryPoint
Definition xlog.c:666
static TimeLineID LocalMinRecoveryPointTLI
Definition xlog.c:667
uint32 TimeLineID
Definition xlogdefs.h:63
char * archiveCleanupCommand
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, ControlFileData::data_checksum_version, CheckPoint::dataChecksumState, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg, errmsg_internal(), ExecuteRecoveryCommand(), fb(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire(), SpinLockRelease(), ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsValid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsInProgressOn()

bool DataChecksumsInProgressOn ( void  )

Definition at line 4686 of file xlog.c.

4687{
4689}
@ PG_DATA_CHECKSUM_INPROGRESS_ON
Definition checksum.h:31
static ChecksumStateType LocalDataChecksumState
Definition xlog.c:677

References LocalDataChecksumState, and PG_DATA_CHECKSUM_INPROGRESS_ON.

Referenced by createdb(), and launcher_exit().

◆ DataChecksumsNeedVerify()

bool DataChecksumsNeedVerify ( void  )

◆ DataChecksumsNeedWrite()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 10067 of file xlog.c.

10068{
10070
10071 /* If called during backup start, there shouldn't be one already running */
10073
10075 {
10079
10082
10085 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
10086 }
10087}
Datum arg
Definition elog.c:1323
#define WARNING
Definition elog.h:37
static bool DatumGetBool(Datum X)
Definition postgres.h:100
int runningBackups
Definition xlog.c:445
static SessionBackupState sessionBackupState
Definition xlog.c:398
@ SESSION_BACKUP_NONE
Definition xlog.h:317

References arg, Assert, DatumGetBool(), ereport, errmsg, fb(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 9471 of file xlog.c.

9473{
9475
9476 Assert(state != NULL);
9478
9479 /*
9480 * During recovery, we don't need to check WAL level. Because, if WAL
9481 * level is not sufficient, it's impossible to get here during recovery.
9482 */
9484 ereport(ERROR,
9486 errmsg("WAL level not sufficient for making an online backup"),
9487 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9488
9490 ereport(ERROR,
9492 errmsg("backup label too long (max %d bytes)",
9493 MAXPGPATH)));
9494
9495 strlcpy(state->name, backupidstr, sizeof(state->name));
9496
9497 /*
9498 * Mark backup active in shared memory. We must do full-page WAL writes
9499 * during an on-line backup even if not doing so at other times, because
9500 * it's quite possible for the backup dump to obtain a "torn" (partially
9501 * written) copy of a database page if it reads the page concurrently with
9502 * our write to the same page. This can be fixed as long as the first
9503 * write to the page in the WAL sequence is a full-page write. Hence, we
9504 * increment runningBackups then force a CHECKPOINT, to ensure there are
9505 * no dirty pages in shared memory that might get dumped while the backup
9506 * is in progress without having a corresponding WAL record. (Once the
9507 * backup is complete, we need not force full-page writes anymore, since
9508 * we expect that any pages not modified during the backup interval must
9509 * have been correctly captured by the backup.)
9510 *
9511 * Note that forcing full-page writes has no effect during an online
9512 * backup from the standby.
9513 *
9514 * We must hold all the insertion locks to change the value of
9515 * runningBackups, to ensure adequate interlocking against
9516 * XLogInsertRecord().
9517 */
9521
9522 /*
9523 * Ensure we decrement runningBackups if we fail below. NB -- for this to
9524 * work correctly, it is critical that sessionBackupState is only updated
9525 * after this block is over.
9526 */
9528 {
9529 bool gotUniqueStartpoint = false;
9530 DIR *tblspcdir;
9531 struct dirent *de;
9533 int datadirpathlen;
9534
9535 /*
9536 * Force an XLOG file switch before the checkpoint, to ensure that the
9537 * WAL segment the checkpoint is written to doesn't contain pages with
9538 * old timeline IDs. That would otherwise happen if you called
9539 * pg_backup_start() right after restoring from a PITR archive: the
9540 * first WAL segment containing the startup checkpoint has pages in
9541 * the beginning with the old timeline ID. That can cause trouble at
9542 * recovery: we won't have a history file covering the old timeline if
9543 * pg_wal directory was not included in the base backup and the WAL
9544 * archive was cleared too before starting the backup.
9545 *
9546 * During recovery, we skip forcing XLOG file switch, which means that
9547 * the backup taken during recovery is not available for the special
9548 * recovery case described above.
9549 */
9551 RequestXLogSwitch(false);
9552
9553 do
9554 {
9555 bool checkpointfpw;
9556
9557 /*
9558 * Force a CHECKPOINT. Aside from being necessary to prevent torn
9559 * page problems, this guarantees that two successive backup runs
9560 * will have different checkpoint positions and hence different
9561 * history file names, even if nothing happened in between.
9562 *
9563 * During recovery, establish a restartpoint if possible. We use
9564 * the last restartpoint as the backup starting checkpoint. This
9565 * means that two successive backup runs can have same checkpoint
9566 * positions.
9567 *
9568 * Since the fact that we are executing do_pg_backup_start()
9569 * during recovery means that checkpointer is running, we can use
9570 * RequestCheckpoint() to establish a restartpoint.
9571 *
9572 * We use CHECKPOINT_FAST only if requested by user (via passing
9573 * fast = true). Otherwise this can take awhile.
9574 */
9576 (fast ? CHECKPOINT_FAST : 0));
9577
9578 /*
9579 * Now we need to fetch the checkpoint record location, and also
9580 * its REDO pointer. The oldest point in WAL that would be needed
9581 * to restore starting from the checkpoint is precisely the REDO
9582 * pointer.
9583 */
9585 state->checkpointloc = ControlFile->checkPoint;
9586 state->startpoint = ControlFile->checkPointCopy.redo;
9590
9592 {
9594
9595 /*
9596 * Check to see if all WAL replayed during online backup
9597 * (i.e., since last restartpoint used as backup starting
9598 * checkpoint) contain full-page writes.
9599 */
9603
9604 if (!checkpointfpw || state->startpoint <= recptr)
9605 ereport(ERROR,
9607 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9608 "since last restartpoint"),
9609 errhint("This means that the backup being taken on the standby "
9610 "is corrupt and should not be used. "
9611 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9612 "and then try an online backup again.")));
9613
9614 /*
9615 * During recovery, since we don't use the end-of-backup WAL
9616 * record and don't write the backup history file, the
9617 * starting WAL location doesn't need to be unique. This means
9618 * that two base backups started at the same time might use
9619 * the same checkpoint as starting locations.
9620 */
9621 gotUniqueStartpoint = true;
9622 }
9623
9624 /*
9625 * If two base backups are started at the same time (in WAL sender
9626 * processes), we need to make sure that they use different
9627 * checkpoints as starting locations, because we use the starting
9628 * WAL location as a unique identifier for the base backup in the
9629 * end-of-backup WAL record and when we write the backup history
9630 * file. Perhaps it would be better generate a separate unique ID
9631 * for each backup instead of forcing another checkpoint, but
9632 * taking a checkpoint right after another is not that expensive
9633 * either because only few buffers have been dirtied yet.
9634 */
9636 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9637 {
9638 XLogCtl->Insert.lastBackupStart = state->startpoint;
9639 gotUniqueStartpoint = true;
9640 }
9642 } while (!gotUniqueStartpoint);
9643
9644 /*
9645 * Construct tablespace_map file.
9646 */
9648
9649 /* Collect information about all tablespaces */
9651 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9652 {
9653 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9654 char linkpath[MAXPGPATH];
9655 char *relpath = NULL;
9656 char *s;
9658 char *badp;
9659 Oid tsoid;
9660
9661 /*
9662 * Try to parse the directory name as an unsigned integer.
9663 *
9664 * Tablespace directories should be positive integers that can be
9665 * represented in 32 bits, with no leading zeroes or trailing
9666 * garbage. If we come across a name that doesn't meet those
9667 * criteria, skip it.
9668 */
9669 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9670 continue;
9671 errno = 0;
9672 tsoid = strtoul(de->d_name, &badp, 10);
9673 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9674 continue;
9675
9676 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9677
9678 de_type = get_dirent_type(fullpath, de, false, ERROR);
9679
9680 if (de_type == PGFILETYPE_LNK)
9681 {
9683 int rllen;
9684
9685 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9686 if (rllen < 0)
9687 {
9689 (errmsg("could not read symbolic link \"%s\": %m",
9690 fullpath)));
9691 continue;
9692 }
9693 else if (rllen >= sizeof(linkpath))
9694 {
9696 (errmsg("symbolic link \"%s\" target is too long",
9697 fullpath)));
9698 continue;
9699 }
9700 linkpath[rllen] = '\0';
9701
9702 /*
9703 * Relpath holds the relative path of the tablespace directory
9704 * when it's located within PGDATA, or NULL if it's located
9705 * elsewhere.
9706 */
9707 if (rllen > datadirpathlen &&
9711
9712 /*
9713 * Add a backslash-escaped version of the link path to the
9714 * tablespace map file.
9715 */
9717 for (s = linkpath; *s; s++)
9718 {
9719 if (*s == '\n' || *s == '\r' || *s == '\\')
9722 }
9724 de->d_name, escapedpath.data);
9725 pfree(escapedpath.data);
9726 }
9727 else if (de_type == PGFILETYPE_DIR)
9728 {
9729 /*
9730 * It's possible to use allow_in_place_tablespaces to create
9731 * directories directly under pg_tblspc, for testing purposes
9732 * only.
9733 *
9734 * In this case, we store a relative path rather than an
9735 * absolute path into the tablespaceinfo.
9736 */
9737 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9738 PG_TBLSPC_DIR, de->d_name);
9740 }
9741 else
9742 {
9743 /* Skip any other file type that appears here. */
9744 continue;
9745 }
9746
9748 ti->oid = tsoid;
9749 ti->path = pstrdup(linkpath);
9750 ti->rpath = relpath;
9751 ti->size = -1;
9752
9753 if (tablespaces)
9754 *tablespaces = lappend(*tablespaces, ti);
9755 }
9757
9758 state->starttime = (pg_time_t) time(NULL);
9759 }
9761
9762 state->started_in_recovery = backup_started_in_recovery;
9763
9764 /*
9765 * Mark that the start phase has correctly finished for the backup.
9766 */
9768}
static bool backup_started_in_recovery
Definition basebackup.c:129
void RequestCheckpoint(int flags)
#define palloc_object(type)
Definition fe_memutils.h:74
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition file_utils.c:547
PGFileType
Definition file_utils.h:19
@ PGFILETYPE_LNK
Definition file_utils.h:24
@ PGFILETYPE_DIR
Definition file_utils.h:23
char * DataDir
Definition globals.c:73
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
List * lappend(List *list, void *datum)
Definition list.c:339
#define IS_DIR_SEP(ch)
Definition port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
unsigned int Oid
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define PG_TBLSPC_DIR
Definition relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition xlog.c:557
XLogRecPtr lastBackupStart
Definition xlog.c:446
#define readlink(path, buf, size)
Definition win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition xlog.c:8607
void do_pg_abort_backup(int code, Datum arg)
Definition xlog.c:10067
@ SESSION_BACKUP_RUNNING
Definition xlog.h:318
#define XLogIsNeeded()
Definition xlog.h:112

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, palloc_object, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, snprintf, SpinLockAcquire(), SpinLockRelease(), strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9793 of file xlog.c.

9794{
9795 bool backup_stopped_in_recovery = false;
9796 char histfilepath[MAXPGPATH];
9800 FILE *fp;
9802 int waits = 0;
9803 bool reported_waiting = false;
9804
9805 Assert(state != NULL);
9806
9808
9809 /*
9810 * During recovery, we don't need to check WAL level. Because, if WAL
9811 * level is not sufficient, it's impossible to get here during recovery.
9812 */
9814 ereport(ERROR,
9816 errmsg("WAL level not sufficient for making an online backup"),
9817 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9818
9819 /*
9820 * OK to update backup counter and session-level lock.
9821 *
9822 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9823 * otherwise they can be updated inconsistently, which might cause
9824 * do_pg_abort_backup() to fail.
9825 */
9827
9828 /*
9829 * It is expected that each do_pg_backup_start() call is matched by
9830 * exactly one do_pg_backup_stop() call.
9831 */
9834
9835 /*
9836 * Clean up session-level lock.
9837 *
9838 * You might think that WALInsertLockRelease() can be called before
9839 * cleaning up session-level lock because session-level lock doesn't need
9840 * to be protected with WAL insertion lock. But since
9841 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9842 * cleaned up before it.
9843 */
9845
9847
9848 /*
9849 * If we are taking an online backup from the standby, we confirm that the
9850 * standby has not been promoted during the backup.
9851 */
9852 if (state->started_in_recovery && !backup_stopped_in_recovery)
9853 ereport(ERROR,
9855 errmsg("the standby was promoted during online backup"),
9856 errhint("This means that the backup being taken is corrupt "
9857 "and should not be used. "
9858 "Try taking another online backup.")));
9859
9860 /*
9861 * During recovery, we don't write an end-of-backup record. We assume that
9862 * pg_control was backed up last and its minimum recovery point can be
9863 * available as the backup end location. Since we don't have an
9864 * end-of-backup record, we use the pg_control value to check whether
9865 * we've reached the end of backup when starting recovery from this
9866 * backup. We have no way of checking if pg_control wasn't backed up last
9867 * however.
9868 *
9869 * We don't force a switch to new WAL file but it is still possible to
9870 * wait for all the required files to be archived if waitforarchive is
9871 * true. This is okay if we use the backup to start a standby and fetch
9872 * the missing WAL using streaming replication. But in the case of an
9873 * archive recovery, a user should set waitforarchive to true and wait for
9874 * them to be archived to ensure that all the required files are
9875 * available.
9876 *
9877 * We return the current minimum recovery point as the backup end
9878 * location. Note that it can be greater than the exact backup end
9879 * location if the minimum recovery point is updated after the backup of
9880 * pg_control. This is harmless for current uses.
9881 *
9882 * XXX currently a backup history file is for informational and debug
9883 * purposes only. It's not essential for an online backup. Furthermore,
9884 * even if it's created, it will not be archived during recovery because
9885 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9886 * backup history file during recovery.
9887 */
9889 {
9891
9892 /*
9893 * Check to see if all WAL replayed during online backup contain
9894 * full-page writes.
9895 */
9899
9900 if (state->startpoint <= recptr)
9901 ereport(ERROR,
9903 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9904 "during online backup"),
9905 errhint("This means that the backup being taken on the standby "
9906 "is corrupt and should not be used. "
9907 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9908 "and then try an online backup again.")));
9909
9910
9912 state->stoppoint = ControlFile->minRecoveryPoint;
9915 }
9916 else
9917 {
9918 char *history_file;
9919
9920 /*
9921 * Write the backup-end xlog record
9922 */
9924 XLogRegisterData(&state->startpoint,
9925 sizeof(state->startpoint));
9927
9928 /*
9929 * Given that we're not in recovery, InsertTimeLineID is set and can't
9930 * change, so we can read it without a lock.
9931 */
9932 state->stoptli = XLogCtl->InsertTimeLineID;
9933
9934 /*
9935 * Force a switch to a new xlog segment file, so that the backup is
9936 * valid as soon as archiver moves out the current segment file.
9937 */
9938 RequestXLogSwitch(false);
9939
9940 state->stoptime = (pg_time_t) time(NULL);
9941
9942 /*
9943 * Write the backup history file
9944 */
9947 state->startpoint, wal_segment_size);
9948 fp = AllocateFile(histfilepath, "w");
9949 if (!fp)
9950 ereport(ERROR,
9952 errmsg("could not create file \"%s\": %m",
9953 histfilepath)));
9954
9955 /* Build and save the contents of the backup history file */
9957 fprintf(fp, "%s", history_file);
9959
9960 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9961 ereport(ERROR,
9963 errmsg("could not write file \"%s\": %m",
9964 histfilepath)));
9965
9966 /*
9967 * Clean out any no-longer-needed history files. As a side effect,
9968 * this will post a .ready file for the newly created history file,
9969 * notifying the archiver that history file may be archived
9970 * immediately.
9971 */
9973 }
9974
9975 /*
9976 * If archiving is enabled, wait for all the required WAL files to be
9977 * archived before returning. If archiving isn't enabled, the required WAL
9978 * needs to be transported via streaming replication (hopefully with
9979 * wal_keep_size set high enough), or some more exotic mechanism like
9980 * polling and copying files from pg_wal with script. We have no knowledge
9981 * of those mechanisms, so it's up to the user to ensure that he gets all
9982 * the required WAL.
9983 *
9984 * We wait until both the last WAL file filled during backup and the
9985 * history file have been archived, and assume that the alphabetic sorting
9986 * property of the WAL files ensures any earlier WAL files are safely
9987 * archived as well.
9988 *
9989 * We wait forever, since archive_command is supposed to work and we
9990 * assume the admin wanted his backup to work completely. If you don't
9991 * wish to wait, then either waitforarchive should be passed in as false,
9992 * or you can set statement_timeout. Also, some notices are issued to
9993 * clue in anyone who might be doing this interactively.
9994 */
9995
9996 if (waitforarchive &&
9999 {
10003
10006 state->startpoint, wal_segment_size);
10007
10009 waits = 0;
10010
10013 {
10015
10016 if (!reported_waiting && waits > 5)
10017 {
10019 (errmsg("base backup done, waiting for required WAL segments to be archived")));
10020 reported_waiting = true;
10021 }
10022
10025 1000L,
10028
10029 if (++waits >= seconds_before_warning)
10030 {
10031 seconds_before_warning *= 2; /* This wraps in >10 years... */
10033 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
10034 waits),
10035 errhint("Check that your \"archive_command\" is executing properly. "
10036 "You can safely cancel this backup, "
10037 "but the database backup will not be usable without all the WAL segments.")));
10038 }
10039 }
10040
10042 (errmsg("all required WAL segments have been archived")));
10043 }
10044 else if (waitforarchive)
10046 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
10047}
#define fprintf(file, fmt, msg)
Definition cubescan.l:21
#define NOTICE
Definition elog.h:36
int FreeFile(FILE *file)
Definition fd.c:2827
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2628
struct Latch * MyLatch
Definition globals.c:65
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define XLOG_BACKUP_END
Definition pg_control.h:77
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static void CleanupBackupHistory(void)
Definition xlog.c:4216
#define XLogArchivingAlways()
Definition xlog.h:105
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg, ERROR, fb(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire(), SpinLockRelease(), WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9774 of file xlog.c.

9775{
9776 return sessionBackupState;
9777}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 9284 of file xlog.c.

9285{
9286 int o_direct_flag = 0;
9287
9288 /*
9289 * Use O_DIRECT if requested, except in walreceiver process. The WAL
9290 * written by walreceiver is normally read by the startup process soon
9291 * after it's written. Also, walreceiver performs unaligned writes, which
9292 * don't work with O_DIRECT, so it is required for correctness too.
9293 */
9296
9297 /* If fsync is disabled, never open in sync mode */
9298 if (!enableFsync)
9299 return o_direct_flag;
9300
9301 switch (method)
9302 {
9303 /*
9304 * enum values for all sync options are defined even if they are
9305 * not supported on the current platform. But if not, they are
9306 * not included in the enum option array, and therefore will never
9307 * be seen here.
9308 */
9312 return o_direct_flag;
9313#ifdef O_SYNC
9315 return O_SYNC | o_direct_flag;
9316#endif
9317#ifdef O_DSYNC
9319 return O_DSYNC | o_direct_flag;
9320#endif
9321 default:
9322 /* can't happen (unless we are out of sync with option array) */
9323 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
9324 return 0; /* silence warning */
9325 }
9326}
int io_direct_flags
Definition fd.c:172
#define IO_DIRECT_WAL
Definition fd.h:55
#define PG_O_DIRECT
Definition fd.h:123
bool enableFsync
Definition globals.c:131
#define AmWalReceiverProcess()
Definition miscadmin.h:406
#define O_DSYNC
Definition win32_port.h:346
@ WAL_SYNC_METHOD_OPEN
Definition xlog.h:27
@ WAL_SYNC_METHOD_FDATASYNC
Definition xlog.h:26
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition xlog.h:28
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition xlog.h:29
@ WAL_SYNC_METHOD_FSYNC
Definition xlog.h:25

References AmWalReceiverProcess, elog, enableFsync, ERROR, fb(), io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 5284 of file xlog.c.

5285{
5286 return ControlFile->wal_level;
5287}

References ControlFile, and ControlFileData::wal_level.

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4991 of file xlog.c.

4992{
4994}
bool default_char_signedness
Definition pg_control.h:238

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE(), and trigram_qsort().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 5006 of file xlog.c.

5007{
5009}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition atomics.h:532

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by XLogGetFakeLSN().

◆ GetFlushRecPtr()

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6963 of file xlog.c.

6964{
6967}
static bool doPageWrites
Definition xlog.c:293

References doPageWrites, fb(), and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 7052 of file xlog.c.

7053{
7055 int i;
7056
7057 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
7058 {
7060
7061 /*
7062 * Need to take a lock to prevent torn reads of the LSN, which are
7063 * possible on some of the supported platforms. WAL insert locks only
7064 * support exclusive mode, so we have to use that.
7065 */
7068 LWLockRelease(&WALInsertLocks[i].l.lock);
7069
7070 if (res < last_important)
7071 res = last_important;
7072 }
7073
7074 return res;
7075}
int i
Definition isn.c:77
XLogRecPtr lastImportantAt
Definition xlog.c:378
WALInsertLock l
Definition xlog.c:390
static WALInsertLockPadded * WALInsertLocks
Definition xlog.c:578
#define NUM_XLOGINSERT_LOCKS
Definition xlog.c:157

References fb(), i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 7081 of file xlog.c.

7082{
7084
7085 /* Need WALWriteLock, but shared lock is sufficient */
7090
7091 return result;
7092}
uint32 result
pg_time_t lastSegSwitchTime
Definition xlog.c:473
XLogRecPtr lastSegSwitchLSN
Definition xlog.c:474

References fb(), XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), result, and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4657 of file xlog.c.

4658{
4661}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition pg_control.h:245

References Assert, ControlFile, fb(), and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6866 of file xlog.c.

6867{
6868 RecoveryState retval;
6869
6871 retval = XLogCtl->SharedRecoveryState;
6873
6874 return retval;
6875}
RecoveryState
Definition xlog.h:92

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6933 of file xlog.c.

6934{
6935 XLogRecPtr ptr;
6936
6937 /*
6938 * The possibly not up-to-date copy in XLogCtl is enough. Even if we
6939 * grabbed a WAL insertion lock to read the authoritative value in
6940 * Insert->RedoRecPtr, someone might update it just after we've released
6941 * the lock.
6942 */
6944 ptr = XLogCtl->RedoRecPtr;
6946
6947 if (RedoRecPtr < ptr)
6948 RedoRecPtr = ptr;
6949
6950 return RedoRecPtr;
6951}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), reserve_wal_for_local_slot(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 8414 of file xlog.c.

8415{
8416 XLogRecPtr currpos; /* current write LSN */
8417 XLogSegNo currSeg; /* segid of currpos */
8418 XLogSegNo targetSeg; /* segid of targetLSN */
8419 XLogSegNo oldestSeg; /* actual oldest segid */
8420 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
8421 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
8423
8424 /*
8425 * slot does not reserve WAL. Either deactivated, or has never been active
8426 */
8428 return WALAVAIL_INVALID_LSN;
8429
8430 /*
8431 * Calculate the oldest segment currently reserved by all slots,
8432 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
8433 * oldestSlotSeg to the current segment.
8434 */
8435 currpos = GetXLogWriteRecPtr();
8437 KeepLogSeg(currpos, &oldestSlotSeg);
8438
8439 /*
8440 * Find the oldest extant segment file. We get 1 until checkpoint removes
8441 * the first WAL segment file since startup, which causes the status being
8442 * wrong under certain abnormal conditions but that doesn't actually harm.
8443 */
8445
8446 /* calculate oldest segment by max_wal_size */
8449
8450 if (currSeg > keepSegs)
8452 else
8454
8455 /* the segment we care about */
8457
8458 /*
8459 * No point in returning reserved or extended status values if the
8460 * targetSeg is known to be lost.
8461 */
8462 if (targetSeg >= oldestSlotSeg)
8463 {
8464 /* show "reserved" when targetSeg is within max_wal_size */
8466 return WALAVAIL_RESERVED;
8467
8468 /* being retained by slots exceeding max_wal_size */
8469 return WALAVAIL_EXTENDED;
8470 }
8471
8472 /* WAL segments are no longer retained but haven't been removed yet */
8473 if (targetSeg >= oldestSeg)
8474 return WALAVAIL_UNRESERVED;
8475
8476 /* Definitely lost */
8477 return WALAVAIL_REMOVED;
8478}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3813
XLogRecPtr GetXLogWriteRecPtr(void)
Definition xlog.c:10140
@ WALAVAIL_REMOVED
Definition xlog.h:206
@ WALAVAIL_RESERVED
Definition xlog.h:202
@ WALAVAIL_UNRESERVED
Definition xlog.h:205
@ WALAVAIL_EXTENDED
Definition xlog.h:203
@ WALAVAIL_INVALID_LSN
Definition xlog.h:201

References ConvertToXSegs, fb(), GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsValid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 7016 of file xlog.c.

7017{
7019
7020 /* Since the value can't be changing, no lock is required. */
7021 return XLogCtl->InsertTimeLineID;
7022}

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1673 of file xlog.c.

1674{
1675 int idx;
1676 XLogRecPtr endptr;
1677 static uint64 cachedPage = 0;
1678 static char *cachedPos = NULL;
1680
1681 /*
1682 * Fast path for the common case that we need to access again the same
1683 * page as last time.
1684 */
1685 if (ptr / XLOG_BLCKSZ == cachedPage)
1686 {
1688 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1689 return cachedPos + ptr % XLOG_BLCKSZ;
1690 }
1691
1692 /*
1693 * The XLog buffer cache is organized so that a page is always loaded to a
1694 * particular buffer. That way we can easily calculate the buffer a given
1695 * page must be loaded into, from the XLogRecPtr alone.
1696 */
1697 idx = XLogRecPtrToBufIdx(ptr);
1698
1699 /*
1700 * See what page is loaded in the buffer at the moment. It could be the
1701 * page we're looking for, or something older. It can't be anything newer
1702 * - that would imply the page we're looking for has already been written
1703 * out to disk and evicted, and the caller is responsible for making sure
1704 * that doesn't happen.
1705 *
1706 * We don't hold a lock while we read the value. If someone is just about
1707 * to initialize or has just initialized the page, it's possible that we
1708 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1709 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1710 * we're looking for.
1711 */
1712 expectedEndPtr = ptr;
1714
1716 if (expectedEndPtr != endptr)
1717 {
1719
1720 /*
1721 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1722 * know how far we're finished with inserting the record.
1723 *
1724 * NB: If 'ptr' points to just after the page header, advertise a
1725 * position at the beginning of the page rather than 'ptr' itself. If
1726 * there are no other insertions running, someone might try to flush
1727 * up to our advertised location. If we advertised a position after
1728 * the page header, someone might try to flush the page header, even
1729 * though page might actually not be initialized yet. As the first
1730 * inserter on the page, we are effectively responsible for making
1731 * sure that it's initialized, before we let insertingAt to move past
1732 * the page header.
1733 */
1734 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1737 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1740 else
1741 initializedUpto = ptr;
1742
1744
1745 AdvanceXLInsertBuffer(ptr, tli, false);
1747
1748 if (expectedEndPtr != endptr)
1749 elog(PANIC, "could not find WAL buffer for %X/%08X",
1750 LSN_FORMAT_ARGS(ptr));
1751 }
1752 else
1753 {
1754 /*
1755 * Make sure the initialization of the page is visible to us, and
1756 * won't arrive later to overwrite the WAL data we write on the page.
1757 */
1759 }
1760
1761 /*
1762 * Found the buffer holding this page. Return a pointer to the right
1763 * offset within the page.
1764 */
1765 cachedPage = ptr / XLOG_BLCKSZ;
1767
1769 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1770
1771 return cachedPos + ptr % XLOG_BLCKSZ;
1772}
#define pg_memory_barrier()
Definition atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition xlog.c:1512
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition xlog.c:2026

References AdvanceXLInsertBuffer(), Assert, elog, fb(), idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertEndRecPtr()

XLogRecPtr GetXLogInsertEndRecPtr ( void  )

Definition at line 10124 of file xlog.c.

10125{
10128
10129 SpinLockAcquire(&Insert->insertpos_lck);
10130 current_bytepos = Insert->CurrBytePos;
10131 SpinLockRelease(&Insert->insertpos_lck);
10132
10134}
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition xlog.c:1939

References fb(), XLogCtlData::Insert, Insert(), SpinLockAcquire(), SpinLockRelease(), XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by rebuild_relation_finish_concurrent(), WalSndWaitForWal(), and XLogGetFakeLSN().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4259 of file xlog.c.

4260{
4262
4263 /*
4264 * Generate a random nonce. This is used for authentication requests that
4265 * will fail because the user does not exist. The nonce is used to create
4266 * a genuine-looking password challenge for the non-existent user, in lieu
4267 * of an actual stored password.
4268 */
4270 ereport(PANIC,
4272 errmsg("could not generate secret authorization token")));
4273
4274 memset(ControlFile, 0, sizeof(ControlFileData));
4275 /* Initialize pg_control status fields */
4276 ControlFile->system_identifier = sysidentifier;
4280
4281 /* Set important parameter values for use when replaying WAL */
4290 ControlFile->data_checksum_version = data_checksum_version;
4291
4292 /*
4293 * Set the data_checksum_version value into XLogCtl, which is where all
4294 * processes get the current value from.
4295 */
4296 XLogCtl->data_checksum_version = data_checksum_version;
4297}
bool track_commit_timestamp
Definition commit_ts.c:121
#define MOCK_AUTH_NONCE_LEN
Definition pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition pg_control.h:193
bool wal_log_hints
Definition xlog.c:130
#define FirstNormalUnloggedLSN
Definition xlogdefs.h:37

References ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg, fb(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, memcpy(), MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, and XLogCtl.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 5188 of file xlog.c.

5189{
5191
5193 {
5194 struct config_generic *guc;
5195
5196 guc = find_option("wal_consistency_checking", false, false, ERROR);
5197
5199
5200 set_config_option_ext("wal_consistency_checking",
5202 guc->scontext, guc->source, guc->srole,
5203 GUC_ACTION_SET, true, ERROR, false);
5204
5205 /* checking should not be deferred again */
5207 }
5208}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition guc.c:3288
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition guc.c:1114
@ GUC_ACTION_SET
Definition guc.h:203
char * wal_consistency_checking_string
Definition xlog.c:132

References Assert, check_wal_consistency_checking_deferred, ERROR, fb(), find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, set_config_option_ext(), and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InitLocalDataChecksumState()

void InitLocalDataChecksumState ( void  )

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3618 of file xlog.c.

3620{
3621 char path[MAXPGPATH];
3622 struct stat stat_buf;
3623
3624 Assert(tli != 0);
3625
3626 XLogFilePath(path, tli, *segno, wal_segment_size);
3627
3630 {
3632 return false;
3633 }
3634
3635 if (!find_free)
3636 {
3637 /* Force installation: get rid of any pre-existing segment file */
3638 durable_unlink(path, DEBUG1);
3639 }
3640 else
3641 {
3642 /* Find a free slot to put it in */
3643 while (stat(path, &stat_buf) == 0)
3644 {
3645 if ((*segno) >= max_segno)
3646 {
3647 /* Failed to find a free slot within specified range */
3649 return false;
3650 }
3651 (*segno)++;
3652 XLogFilePath(path, tli, *segno, wal_segment_size);
3653 }
3654 }
3655
3656 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3657 if (durable_rename(tmppath, path, LOG) != 0)
3658 {
3660 /* durable_rename already emitted log message */
3661 return false;
3662 }
3663
3665
3666 return true;
3667}
int durable_unlink(const char *fname, int elevel)
Definition fd.c:873
short access
bool InstallXLogFileSegmentActive
Definition xlog.c:532
#define stat
Definition win32_port.h:74

References Assert, DEBUG1, durable_rename(), durable_unlink(), fb(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 10189 of file xlog.c.

10190{
10191 bool result;
10192
10196
10197 return result;
10198}

References fb(), XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), result, and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 9374 of file xlog.c.

9375{
9376 char *msg = NULL;
9378
9379 Assert(tli != 0);
9380
9381 /*
9382 * Quick exit if fsync is disabled or write() has already synced the WAL
9383 * file.
9384 */
9385 if (!enableFsync ||
9388 return;
9389
9390 /*
9391 * Measure I/O timing to sync the WAL file for pg_stat_io.
9392 */
9394
9396 switch (wal_sync_method)
9397 {
9399 if (pg_fsync_no_writethrough(fd) != 0)
9400 msg = _("could not fsync file \"%s\": %m");
9401 break;
9402#ifdef HAVE_FSYNC_WRITETHROUGH
9404 if (pg_fsync_writethrough(fd) != 0)
9405 msg = _("could not fsync write-through file \"%s\": %m");
9406 break;
9407#endif
9409 if (pg_fdatasync(fd) != 0)
9410 msg = _("could not fdatasync file \"%s\": %m");
9411 break;
9414 /* not reachable */
9415 Assert(false);
9416 break;
9417 default:
9418 ereport(PANIC,
9420 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
9421 break;
9422 }
9423
9424 /* PANIC if failed to fsync */
9425 if (msg)
9426 {
9427 char xlogfname[MAXFNAMELEN];
9428 int save_errno = errno;
9429
9431 errno = save_errno;
9432 ereport(PANIC,
9434 errmsg(msg, xlogfname)));
9435 }
9436
9438
9440 start, 1, 0);
9441}
#define _(x)
Definition elog.c:96
int pg_fsync_no_writethrough(int fd)
Definition fd.c:442
int pg_fdatasync(int fd)
Definition fd.c:481
int pg_fsync_writethrough(int fd)
Definition fd.c:462
return str start
@ IOOBJECT_WAL
Definition pgstat.h:283
@ IOCONTEXT_NORMAL
Definition pgstat.h:293
@ IOOP_FSYNC
Definition pgstat.h:312
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
static int fd(const char *x, int i)
bool track_wal_io_timing
Definition xlog.c:144

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg, errmsg_internal(), fb(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8498 of file xlog.c.

8499{
8501 XLogSegNo segno;
8503
8505 segno = currSegNo;
8506
8507 /* Calculate how many segments are kept by slots. */
8510 {
8512
8513 /*
8514 * Account for max_slot_wal_keep_size to avoid keeping more than
8515 * configured. However, don't do that during a binary upgrade: if
8516 * slots were to be invalidated because of this, it would not be
8517 * possible to preserve logical ones during the upgrade.
8518 */
8520 {
8522
8525
8526 if (currSegNo - segno > slot_keep_segs)
8527 segno = currSegNo - slot_keep_segs;
8528 }
8529 }
8530
8531 /*
8532 * If WAL summarization is in use, don't remove WAL that has yet to be
8533 * summarized.
8534 */
8537 {
8539
8541 if (unsummarized_segno < segno)
8542 segno = unsummarized_segno;
8543 }
8544
8545 /* but, keep at least wal_keep_size if that's set */
8546 if (wal_keep_size_mb > 0)
8547 {
8549
8551 if (currSegNo - segno < keep_segs)
8552 {
8553 /* avoid underflow, don't go below 1 */
8554 if (currSegNo <= keep_segs)
8555 segno = 1;
8556 else
8557 segno = currSegNo - keep_segs;
8558 }
8559 }
8560
8561 /* don't delete WAL segments newer than the calculated segment */
8562 if (segno < *logSegNo)
8563 *logSegNo = segno;
8564}
bool IsBinaryUpgrade
Definition globals.c:123
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition xlog.c:123
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2700
int max_slot_wal_keep_size_mb
Definition xlog.c:142

References ConvertToXSegs, fb(), GetOldestUnsummarizedLSN(), IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6918 of file xlog.c.

6919{
6921
6923
6924 return oldXLogAllowed;
6925}

References fb(), and LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint,
int  flags 
)
static

Definition at line 7185 of file xlog.c.

7186{
7187 long write_msecs,
7188 sync_msecs,
7193
7195
7198
7201
7202 /* Accumulate checkpoint timing summary data, in milliseconds. */
7205
7206 /*
7207 * All of the published timing statistics are accounted for. Only
7208 * continue if a log message is to be written.
7209 */
7210 if (!log_checkpoints)
7211 return;
7212
7215
7216 /*
7217 * Timing values returned from CheckpointStats are in microseconds.
7218 * Convert to milliseconds for consistent printing.
7219 */
7221
7226 average_msecs = (long) ((average_sync_time + 999) / 1000);
7227
7228 /*
7229 * ControlFileLock is not required to see ControlFile->checkPoint and
7230 * ->checkPointCopy here as we are the only updator of those variables at
7231 * this moment.
7232 */
7233 if (restartpoint)
7234 ereport(LOG,
7235 (errmsg("restartpoint complete:%s: wrote %d buffers (%.1f%%), "
7236 "wrote %d SLRU buffers; %d WAL file(s) added, "
7237 "%d removed, %d recycled; write=%ld.%03d s, "
7238 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
7239 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
7240 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
7241 CheckpointFlagsString(flags),
7248 write_msecs / 1000, (int) (write_msecs % 1000),
7249 sync_msecs / 1000, (int) (sync_msecs % 1000),
7250 total_msecs / 1000, (int) (total_msecs % 1000),
7252 longest_msecs / 1000, (int) (longest_msecs % 1000),
7253 average_msecs / 1000, (int) (average_msecs % 1000),
7254 (int) (PrevCheckPointDistance / 1024.0),
7255 (int) (CheckPointDistanceEstimate / 1024.0),
7258 else
7259 ereport(LOG,
7260 (errmsg("checkpoint complete:%s: wrote %d buffers (%.1f%%), "
7261 "wrote %d SLRU buffers; %d WAL file(s) added, "
7262 "%d removed, %d recycled; write=%ld.%03d s, "
7263 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
7264 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
7265 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
7266 CheckpointFlagsString(flags),
7273 write_msecs / 1000, (int) (write_msecs % 1000),
7274 sync_msecs / 1000, (int) (sync_msecs % 1000),
7275 total_msecs / 1000, (int) (total_msecs % 1000),
7277 longest_msecs / 1000, (int) (longest_msecs % 1000),
7278 average_msecs / 1000, (int) (average_msecs % 1000),
7279 (int) (PrevCheckPointDistance / 1024.0),
7280 (int) (CheckPointDistanceEstimate / 1024.0),
7283}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition timestamp.c:1751
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition xlog.h:188
uint64 ckpt_longest_sync
Definition xlog.h:187
TimestampTz ckpt_end_t
Definition xlog.h:177
int ckpt_slru_written
Definition xlog.h:180
PgStat_Counter sync_time
Definition pgstat.h:269
PgStat_Counter write_time
Definition pgstat.h:268
static const char * CheckpointFlagsString(int flags)
Definition xlog.c:7146
static double CheckPointDistanceEstimate
Definition xlog.c:166
static double PrevCheckPointDistance
Definition xlog.c:167

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointFlagsString(), CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg, fb(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 7167 of file xlog.c.

7168{
7169 if (restartpoint)
7170 ereport(LOG,
7171 /* translator: the placeholder shows checkpoint options */
7172 (errmsg("restartpoint starting:%s",
7173 CheckpointFlagsString(flags))));
7174 else
7175 ereport(LOG,
7176 /* translator: the placeholder shows checkpoint options */
7177 (errmsg("checkpoint starting:%s",
7178 CheckpointFlagsString(flags))));
7179}

References CheckpointFlagsString(), ereport, errmsg, fb(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6780 of file xlog.c.

6781{
6782 bool promoted = false;
6783
6784 /*
6785 * Perform a checkpoint to update all our recovery activity to disk.
6786 *
6787 * Note that we write a shutdown checkpoint rather than an on-line one.
6788 * This is not particularly critical, but since we may be assigning a new
6789 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6790 * only changes in shutdown checkpoints, which allows some extra error
6791 * checking in xlog_redo.
6792 *
6793 * In promotion, only create a lightweight end-of-recovery record instead
6794 * of a full checkpoint. A checkpoint is requested later, after we're
6795 * fully out of recovery mode and already accepting queries.
6796 */
6799 {
6800 promoted = true;
6801
6802 /*
6803 * Insert a special WAL record to mark the end of recovery, since we
6804 * aren't doing a checkpoint. That means that the checkpointer process
6805 * may likely be in the middle of a time-smoothed restartpoint and
6806 * could continue to be for minutes after this. That sounds strange,
6807 * but the effect is roughly the same and it would be stranger to try
6808 * to come out of the restartpoint and then checkpoint. We request a
6809 * checkpoint later anyway, just for safety.
6810 */
6812 }
6813 else
6814 {
6818 }
6819
6820 return promoted;
6821}
static void CreateEndOfRecoveryRecord(void)
Definition xlog.c:7908
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), fb(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3745 of file xlog.c.

3746{
3748 int lf;
3749 bool added;
3750 char path[MAXPGPATH];
3751 uint64 offset;
3752
3754 return; /* unlocked check says no */
3755
3757 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3758 if (offset >= (uint32) (0.75 * wal_segment_size))
3759 {
3760 _logSegNo++;
3761 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3762 if (lf >= 0)
3763 close(lf);
3764 if (added)
3766 }
3767}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition xlog.c:3247

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, fb(), XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6743 of file xlog.c.

6744{
6745 /*
6746 * We have reached the end of base backup, as indicated by pg_control. The
6747 * data on disk is now consistent (unless minRecoveryPoint is further
6748 * ahead, which can happen if we crashed during previous recovery). Reset
6749 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6750 * make sure we don't allow starting up at an earlier point even if
6751 * recovery is stopped and restarted soon after this.
6752 */
6754
6755 if (ControlFile->minRecoveryPoint < EndRecPtr)
6756 {
6757 ControlFile->minRecoveryPoint = EndRecPtr;
6759 }
6760
6765
6767}
XLogRecPtr backupStartPoint
Definition pg_control.h:178
XLogRecPtr backupEndPoint
Definition pg_control.h:179

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, fb(), InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4410 of file xlog.c.

4411{
4412 pg_crc32c crc;
4413 int fd;
4414 char wal_segsz_str[20];
4415 int r;
4416
4417 /*
4418 * Read data...
4419 */
4421 O_RDWR | PG_BINARY);
4422 if (fd < 0)
4423 ereport(PANIC,
4425 errmsg("could not open file \"%s\": %m",
4427
4429 r = read(fd, ControlFile, sizeof(ControlFileData));
4430 if (r != sizeof(ControlFileData))
4431 {
4432 if (r < 0)
4433 ereport(PANIC,
4435 errmsg("could not read file \"%s\": %m",
4437 else
4438 ereport(PANIC,
4440 errmsg("could not read file \"%s\": read %d of %zu",
4441 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4442 }
4444
4445 close(fd);
4446
4447 /*
4448 * Check for expected pg_control format version. If this is wrong, the
4449 * CRC check will likely fail because we'll be checking the wrong number
4450 * of bytes. Complaining about wrong version will probably be more
4451 * enlightening than complaining about wrong CRC.
4452 */
4453
4455 ereport(FATAL,
4457 errmsg("database files are incompatible with server"),
4458 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4459 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4462 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4463
4465 ereport(FATAL,
4467 errmsg("database files are incompatible with server"),
4468 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4469 " but the server was compiled with PG_CONTROL_VERSION %d.",
4471 errhint("It looks like you need to initdb.")));
4472
4473 /* Now check the CRC. */
4478 FIN_CRC32C(crc);
4479
4480 if (!EQ_CRC32C(crc, ControlFile->crc))
4481 ereport(FATAL,
4483 errmsg("incorrect checksum in control file")));
4484
4485 /*
4486 * Do compatibility checking immediately. If the database isn't
4487 * compatible with the backend executable, we want to abort before we can
4488 * possibly do any damage.
4489 */
4491 ereport(FATAL,
4493 errmsg("database files are incompatible with server"),
4494 /* translator: %s is a variable name and %d is its value */
4495 errdetail("The database cluster was initialized with %s %d,"
4496 " but the server was compiled with %s %d.",
4497 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4498 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4499 errhint("It looks like you need to initdb.")));
4501 ereport(FATAL,
4503 errmsg("database files are incompatible with server"),
4504 /* translator: %s is a variable name and %d is its value */
4505 errdetail("The database cluster was initialized with %s %d,"
4506 " but the server was compiled with %s %d.",
4507 "MAXALIGN", ControlFile->maxAlign,
4508 "MAXALIGN", MAXIMUM_ALIGNOF),
4509 errhint("It looks like you need to initdb.")));
4511 ereport(FATAL,
4513 errmsg("database files are incompatible with server"),
4514 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4515 errhint("It looks like you need to initdb.")));
4516 if (ControlFile->blcksz != BLCKSZ)
4517 ereport(FATAL,
4519 errmsg("database files are incompatible with server"),
4520 /* translator: %s is a variable name and %d is its value */
4521 errdetail("The database cluster was initialized with %s %d,"
4522 " but the server was compiled with %s %d.",
4523 "BLCKSZ", ControlFile->blcksz,
4524 "BLCKSZ", BLCKSZ),
4525 errhint("It looks like you need to recompile or initdb.")));
4527 ereport(FATAL,
4529 errmsg("database files are incompatible with server"),
4530 /* translator: %s is a variable name and %d is its value */
4531 errdetail("The database cluster was initialized with %s %d,"
4532 " but the server was compiled with %s %d.",
4533 "RELSEG_SIZE", ControlFile->relseg_size,
4534 "RELSEG_SIZE", RELSEG_SIZE),
4535 errhint("It looks like you need to recompile or initdb.")));
4537 ereport(FATAL,
4539 errmsg("database files are incompatible with server"),
4540 /* translator: %s is a variable name and %d is its value */
4541 errdetail("The database cluster was initialized with %s %d,"
4542 " but the server was compiled with %s %d.",
4543 "SLRU_PAGES_PER_SEGMENT", ControlFile->slru_pages_per_segment,
4544 "SLRU_PAGES_PER_SEGMENT", SLRU_PAGES_PER_SEGMENT),
4545 errhint("It looks like you need to recompile or initdb.")));
4547 ereport(FATAL,
4549 errmsg("database files are incompatible with server"),
4550 /* translator: %s is a variable name and %d is its value */
4551 errdetail("The database cluster was initialized with %s %d,"
4552 " but the server was compiled with %s %d.",
4553 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4554 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4555 errhint("It looks like you need to recompile or initdb.")));
4557 ereport(FATAL,
4559 errmsg("database files are incompatible with server"),
4560 /* translator: %s is a variable name and %d is its value */
4561 errdetail("The database cluster was initialized with %s %d,"
4562 " but the server was compiled with %s %d.",
4563 "NAMEDATALEN", ControlFile->nameDataLen,
4564 "NAMEDATALEN", NAMEDATALEN),
4565 errhint("It looks like you need to recompile or initdb.")));
4567 ereport(FATAL,
4569 errmsg("database files are incompatible with server"),
4570 /* translator: %s is a variable name and %d is its value */
4571 errdetail("The database cluster was initialized with %s %d,"
4572 " but the server was compiled with %s %d.",
4573 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4574 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4575 errhint("It looks like you need to recompile or initdb.")));
4577 ereport(FATAL,
4579 errmsg("database files are incompatible with server"),
4580 /* translator: %s is a variable name and %d is its value */
4581 errdetail("The database cluster was initialized with %s %d,"
4582 " but the server was compiled with %s %d.",
4583 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4584 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4585 errhint("It looks like you need to recompile or initdb.")));
4587 ereport(FATAL,
4589 errmsg("database files are incompatible with server"),
4590 /* translator: %s is a variable name and %d is its value */
4591 errdetail("The database cluster was initialized with %s %d,"
4592 " but the server was compiled with %s %d.",
4593 "LOBLKSIZE", ControlFile->loblksize,
4594 "LOBLKSIZE", (int) LOBLKSIZE),
4595 errhint("It looks like you need to recompile or initdb.")));
4596
4597 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4598
4600
4603 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4604 "invalid WAL segment size in control file (%d bytes)",
4607 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4608
4610 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4612
4613 /* check and update variables dependent on wal_segment_size */
4616 /* translator: both %s are GUC names */
4617 errmsg("\"%s\" must be at least twice \"%s\"",
4618 "min_wal_size", "wal_segment_size")));
4619
4622 /* translator: both %s are GUC names */
4623 errmsg("\"%s\" must be at least twice \"%s\"",
4624 "max_wal_size", "wal_segment_size")));
4625
4629
4631}
#define PG_BINARY
Definition c.h:1374
#define CATALOG_VERSION_NO
Definition catversion.h:60
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
int BasicOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1090
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4234
@ PGC_S_DYNAMIC_DEFAULT
Definition guc.h:114
@ PGC_INTERNAL
Definition guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition heaptoast.h:84
#define read(a, b, c)
Definition win32.h:13
#define LOBLKSIZE
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define SLRU_PAGES_PER_SEGMENT
#define FLOATFORMAT_VALUE
Definition pg_control.h:209
#define PG_CONTROL_VERSION
Definition pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
uint32 pg_control_version
Definition pg_control.h:133
uint32 xlog_seg_size
Definition pg_control.h:221
uint32 slru_pages_per_segment
Definition pg_control.h:218
uint32 indexMaxKeys
Definition pg_control.h:224
uint32 catalog_version_no
Definition pg_control.h:134
pg_crc32c crc
Definition pg_control.h:248
uint32 toast_max_chunk_size
Definition pg_control.h:226
#define UsableBytesInPage
Definition xlog.c:617
static int UsableBytesInSegment
Definition xlog.c:626
int min_wal_size_mb
Definition xlog.c:122
#define XLOG_CONTROL_FILE

References Assert, BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg, errmsg_plural(), ERROR, FATAL, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6830 of file xlog.c.

6831{
6832 /*
6833 * We check shared state each time only until we leave recovery mode. We
6834 * can't re-enter recovery, so there's no need to keep checking after the
6835 * shared variable has once been seen false.
6836 */
6838 return false;
6839 else
6840 {
6841 /*
6842 * use volatile pointer to make sure we make a fresh read of the
6843 * shared variable.
6844 */
6845 volatile XLogCtlData *xlogctl = XLogCtl;
6846
6847 LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
6848
6849 /*
6850 * Note: We don't need a memory barrier when we're still in recovery.
6851 * We might exit recovery immediately after return, so the caller
6852 * can't rely on 'true' meaning that we're still in recovery anyway.
6853 */
6854
6856 }
6857}
static bool LocalRecoveryInProgress
Definition xlog.c:231

References fb(), LocalRecoveryInProgress, RECOVERY_STATE_DONE, and XLogCtl.

Referenced by AbsorbDataChecksumsBarrier(), amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), DisableLogicalDecoding(), DisableLogicalDecodingIfNecessary(), do_pg_backup_start(), do_pg_backup_stop(), EnableLogicalDecoding(), EnsureLogicalDecodingEnabled(), error_commit_ts_disabled(), ExecCheckpoint(), ExecWaitStmt(), extended_statistics_update(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkSharedBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_clear_extended_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_stat_get_recovery(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), pgstat_report_replslotsync(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_effective_wal_level(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), verify_heapam(), WaitForLSN(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 8089 of file xlog.c.

8090{
8091 /*
8092 * Also refrain from creating a restartpoint if we have seen any
8093 * references to non-existent pages. Restarting recovery from the
8094 * restartpoint would not see the references, so we would lose the
8095 * cross-check that the pages belonged to a relation that was dropped
8096 * later.
8097 */
8099 {
8100 elog(DEBUG2,
8101 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
8102 LSN_FORMAT_ARGS(checkPoint->redo));
8103 return;
8104 }
8105
8106 /*
8107 * Copy the checkpoint record to shared memory, so that checkpointer can
8108 * work out the next time it wants to perform a restartpoint.
8109 */
8113 XLogCtl->lastCheckPoint = *checkPoint;
8115}
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
bool XLogHaveInvalidPages(void)
Definition xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire(), SpinLockRelease(), XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 10094 of file xlog.c.

10095{
10096 static bool already_done = false;
10097
10098 if (already_done)
10099 return;
10101 already_done = true;
10102}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344

References before_shmem_exit(), BoolGetDatum(), do_pg_abort_backup(), and fb().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3995 of file xlog.c.

3996{
3997 DIR *xldir;
3998 struct dirent *xlde;
3999 char switchseg[MAXFNAMELEN];
4003
4004 /*
4005 * Initialize info about where to begin the work. This will recycle,
4006 * somewhat arbitrarily, 10 future segments.
4007 */
4011
4012 /*
4013 * Construct a filename of the last segment to be kept.
4014 */
4016
4017 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
4018 switchseg);
4019
4021
4022 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4023 {
4024 /* Ignore files that are not XLOG segments */
4025 if (!IsXLogFileName(xlde->d_name))
4026 continue;
4027
4028 /*
4029 * Remove files that are on a timeline older than the new one we're
4030 * switching to, but with a segment number >= the first segment on the
4031 * new timeline.
4032 */
4033 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4034 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4035 {
4036 /*
4037 * If the file has already been marked as .ready, however, don't
4038 * remove it yet. It should be OK to remove it - files that are
4039 * not part of our timeline history are not required for recovery
4040 * - but seems safer to let them be archived and removed later.
4041 */
4042 if (!XLogArchiveIsReady(xlde->d_name))
4044 }
4045 }
4046
4047 FreeDir(xldir);
4048}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition xlog.c:4064
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3920 of file xlog.c.

3922{
3923 DIR *xldir;
3924 struct dirent *xlde;
3925 char lastoff[MAXFNAMELEN];
3928
3929 /* Initialize info about where to try to recycle to */
3932
3933 /*
3934 * Construct a filename of the last segment to be kept. The timeline ID
3935 * doesn't matter, we ignore that in the comparison. (During recovery,
3936 * InsertTimeLineID isn't set, so we can't use that.)
3937 */
3939
3940 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3941 lastoff);
3942
3944
3945 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3946 {
3947 /* Ignore files that are not XLOG segments */
3948 if (!IsXLogFileName(xlde->d_name) &&
3949 !IsPartialXLogFileName(xlde->d_name))
3950 continue;
3951
3952 /*
3953 * We ignore the timeline part of the XLOG segment identifiers in
3954 * deciding whether a segment is still needed. This ensures that we
3955 * won't prematurely remove a segment from a parent timeline. We could
3956 * probably be a little more proactive about removing segments of
3957 * non-parent timelines, but that would be a whole lot more
3958 * complicated.
3959 *
3960 * We use the alphanumeric sorting property of the filenames to decide
3961 * which ones are earlier than the lastoff segment.
3962 */
3963 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3964 {
3965 if (XLogArchiveCheckDone(xlde->d_name))
3966 {
3967 /* Update the last removed location in shared memory first */
3968 UpdateLastRemovedPtr(xlde->d_name);
3969
3971 }
3972 }
3973 }
3974
3975 FreeDir(xldir);
3976}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition xlog.c:2251
static void UpdateLastRemovedPtr(char *filename)
Definition xlog.c:3867
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3887 of file xlog.c.

3888{
3889 DIR *xldir;
3890 struct dirent *xlde;
3891
3892 elog(DEBUG2, "removing all temporary WAL segments");
3893
3895 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3896 {
3897 char path[MAXPGPATH];
3898
3899 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3900 continue;
3901
3902 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3903 unlink(path);
3904 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3905 }
3906 FreeDir(xldir);
3907}

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4064 of file xlog.c.

4067{
4068 char path[MAXPGPATH];
4069#ifdef WIN32
4070 char newpath[MAXPGPATH];
4071#endif
4072 const char *segname = segment_de->d_name;
4073
4074 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4075
4076 /*
4077 * Before deleting the file, see if it can be recycled as a future log
4078 * segment. Only recycle normal files, because we don't want to recycle
4079 * symbolic links pointing to a separate archive directory.
4080 */
4081 if (wal_recycle &&
4083 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4084 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4086 true, recycleSegNo, insertTLI))
4087 {
4089 (errmsg_internal("recycled write-ahead log file \"%s\"",
4090 segname)));
4092 /* Needn't recheck that slot on future iterations */
4093 (*endlogSegNo)++;
4094 }
4095 else
4096 {
4097 /* No need for any more future segments, or recycling failed ... */
4098 int rc;
4099
4101 (errmsg_internal("removing write-ahead log file \"%s\"",
4102 segname)));
4103
4104#ifdef WIN32
4105
4106 /*
4107 * On Windows, if another process (e.g another backend) holds the file
4108 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4109 * will still show up in directory listing until the last handle is
4110 * closed. To avoid confusing the lingering deleted file for a live
4111 * WAL file that needs to be archived, rename it before deleting it.
4112 *
4113 * If another process holds the file open without FILE_SHARE_DELETE
4114 * flag, rename will fail. We'll try again at the next checkpoint.
4115 */
4116 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4117 if (rename(path, newpath) != 0)
4118 {
4119 ereport(LOG,
4121 errmsg("could not rename file \"%s\": %m",
4122 path)));
4123 return;
4124 }
4125 rc = durable_unlink(newpath, LOG);
4126#else
4127 rc = durable_unlink(path, LOG);
4128#endif
4129 if (rc != 0)
4130 {
4131 /* Message already logged by durable_unlink() */
4132 return;
4133 }
4135 }
4136
4138}
@ PGFILETYPE_REG
Definition file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition xlog.c:3618
bool wal_recycle
Definition xlog.c:135

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg, errmsg_internal(), fb(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8607 of file xlog.c.

8608{
8610
8611 /* XLOG SWITCH has no data */
8613
8614 if (mark_unimportant)
8617
8618 return RecPtr;
8619}
#define XLOG_SWITCH
Definition pg_control.h:76
#define XLOG_MARK_UNIMPORTANT
Definition xlog.h:167
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:464

References fb(), XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1149 of file xlog.c.

1151{
1156
1157 size = MAXALIGN(size);
1158
1159 /* All (non xlog-switch) records should contain data. */
1160 Assert(size > SizeOfXLogRecord);
1161
1162 /*
1163 * The duration the spinlock needs to be held is minimized by minimizing
1164 * the calculations that have to be done while holding the lock. The
1165 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1166 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1167 * page headers. The mapping between "usable" byte positions and physical
1168 * positions (XLogRecPtrs) can be done outside the locked region, and
1169 * because the usable byte position doesn't include any headers, reserving
1170 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1171 */
1172 SpinLockAcquire(&Insert->insertpos_lck);
1173
1174 startbytepos = Insert->CurrBytePos;
1175 endbytepos = startbytepos + size;
1176 prevbytepos = Insert->PrevBytePos;
1177 Insert->CurrBytePos = endbytepos;
1178 Insert->PrevBytePos = startbytepos;
1179
1180 SpinLockRelease(&Insert->insertpos_lck);
1181
1185
1186 /*
1187 * Check that the conversions between "usable byte positions" and
1188 * XLogRecPtrs work consistently in both directions.
1189 */
1193}
#define MAXALIGN(LEN)
Definition c.h:896
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition xlog.c:1982

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire(), SpinLockRelease(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1205 of file xlog.c.

1206{
1212 XLogRecPtr ptr;
1214
1215 /*
1216 * These calculations are a bit heavy-weight to be done while holding a
1217 * spinlock, but since we're holding all the WAL insertion locks, there
1218 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1219 * compete for it, but that's not called very frequently.
1220 */
1221 SpinLockAcquire(&Insert->insertpos_lck);
1222
1223 startbytepos = Insert->CurrBytePos;
1224
1226 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1227 {
1228 SpinLockRelease(&Insert->insertpos_lck);
1229 *EndPos = *StartPos = ptr;
1230 return false;
1231 }
1232
1233 endbytepos = startbytepos + size;
1234 prevbytepos = Insert->PrevBytePos;
1235
1238
1241 {
1242 /* consume the rest of the segment */
1243 *EndPos += segleft;
1245 }
1246 Insert->CurrBytePos = endbytepos;
1247 Insert->PrevBytePos = startbytepos;
1248
1249 SpinLockRelease(&Insert->insertpos_lck);
1250
1252
1257
1258 return true;
1259}

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ ResetInstallXLogFileSegmentActive()

◆ SetDataChecksumsOff()

void SetDataChecksumsOff ( void  )

Definition at line 4858 of file xlog.c.

4859{
4861
4863
4865
4866 /* If data checksums are already disabled there is nothing to do */
4868 {
4870 return;
4871 }
4872
4873 /*
4874 * If data checksums are currently enabled we first transition to the
4875 * "inprogress-off" state during which backends continue to write
4876 * checksums without verifying them. When all backends are in
4877 * "inprogress-off" the next transition to "off" can be performed, after
4878 * which all data checksum processing is disabled.
4879 */
4881 {
4883
4886
4888
4892
4894
4897
4902
4904
4905 /*
4906 * Update local state in all backends to ensure that any backend in
4907 * "on" state is changed to "inprogress-off".
4908 */
4910
4911 /*
4912 * At this point we know that no backends are verifying data checksums
4913 * during reading. Next, we can safely move to state "off" to also
4914 * stop writing checksums.
4915 */
4916 }
4917 else
4918 {
4919 /*
4920 * Ending up here implies that the checksums state is "inprogress-on"
4921 * or "inprogress-off" and we can transition directly to "off" from
4922 * there.
4923 */
4925 }
4926
4928 /* Ensure that we don't incur a checkpoint during disabling checksums */
4930
4932
4936
4938
4941
4946
4948
4950}
@ PG_DATA_CHECKSUM_OFF
Definition checksum.h:28
static THREAD_BARRIER_T barrier
Definition pgbench.c:488
void WaitForProcSignalBarrier(uint64 generation)
Definition procsignal.c:428
uint64 EmitProcSignalBarrier(ProcSignalBarrierType type)
Definition procsignal.c:360
@ PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF
Definition procsignal.h:55
@ PROCSIGNAL_BARRIER_CHECKSUM_OFF
Definition procsignal.h:53
PGPROC * MyProc
Definition proc.c:71
int delayChkptFlags
Definition proc.h:260
static void XLogChecksums(uint32 new_type)
Definition xlog.c:8733

References Assert, barrier, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_OFF, PG_DATA_CHECKSUM_VERSION, PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF, PROCSIGNAL_BARRIER_CHECKSUM_OFF, RequestCheckpoint(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain(), launcher_exit(), ProcessAllDatabases(), and SetDataChecksumsOn().

◆ SetDataChecksumsOn()

void SetDataChecksumsOn ( void  )

Definition at line 4786 of file xlog.c.

4787{
4789
4791
4793
4794 /*
4795 * The only allowed state transition to "on" is from "inprogress-on" since
4796 * that state ensures that all pages will have data checksums written. No
4797 * such state transition exists, if it does happen it's likely due to a
4798 * programmer error.
4799 */
4801 {
4803 elog(WARNING,
4804 "cannot set data checksums to \"on\", current state is not \"inprogress-on\", disabling");
4806 return;
4807 }
4808
4810
4811 INJECTION_POINT("datachecksums-enable-checksums-delay", NULL);
4814
4816
4820
4822
4825
4826 /*
4827 * Update the controlfile before waiting since if we have an immediate
4828 * shutdown while waiting we want to come back up with checksums enabled.
4829 */
4834
4836
4837 /*
4838 * Await state transition to "on" in all backends. When done we know that
4839 * data checksums are both written and verified in all backends.
4840 */
4842}
@ PROCSIGNAL_BARRIER_CHECKSUM_ON
Definition procsignal.h:56
void SetDataChecksumsOff(void)
Definition xlog.c:4858

References Assert, barrier, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, INJECTION_POINT, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_VERSION, PROCSIGNAL_BARRIER_CHECKSUM_ON, RequestCheckpoint(), SetDataChecksumsOff(), SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), WARNING, XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain().

◆ SetDataChecksumsOnInProgress()

void SetDataChecksumsOnInProgress ( void  )

Definition at line 4722 of file xlog.c.

4723{
4725
4727
4728 /*
4729 * The state transition is performed in a critical section with
4730 * checkpoints held off to provide crash safety.
4731 */
4734
4736
4740
4742
4745
4746 /*
4747 * Update the controlfile before waiting since if we have an immediate
4748 * shutdown while waiting we want to come back up with checksums enabled.
4749 */
4754
4755 /*
4756 * Await state change in all backends to ensure that all backends are in
4757 * "inprogress-on". Once done we know that all backends are writing data
4758 * checksums.
4759 */
4761}
@ PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON
Definition procsignal.h:54

References Assert, barrier, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, DELAY_CHKPT_START, PGPROC::delayChkptFlags, EmitProcSignalBarrier(), END_CRIT_SECTION, fb(), XLogCtlData::info_lck, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MyProc, PG_DATA_CHECKSUM_INPROGRESS_ON, PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateControlFile(), WaitForProcSignalBarrier(), XLogChecksums(), and XLogCtl.

Referenced by DataChecksumsWorkerLauncherMain().

◆ SetInstallXLogFileSegmentActive()

◆ SetLocalDataChecksumState()

void SetLocalDataChecksumState ( uint32  data_checksum_version)

Definition at line 4969 of file xlog.c.

4970{
4971 LocalDataChecksumState = data_checksum_version;
4972
4973 data_checksums = data_checksum_version;
4974}
int data_checksums
Definition xlog.c:683

References data_checksums, and LocalDataChecksumState.

Referenced by AbsorbDataChecksumsBarrier(), InitLocalDataChecksumState(), LocalProcessControlFile(), StartupXLOG(), xlog_redo(), and XLOGShmemInit().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 5214 of file xlog.c.

5215{
5216 if (XLogArchivingActive())
5217 return XLogArchiveCommand;
5218 else
5219 return "(disabled)";
5220}
char * XLogArchiveCommand
Definition xlog.c:127

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_data_checksums()

const char * show_data_checksums ( void  )

Definition at line 4978 of file xlog.c.

4979{
4981}
const char * get_checksum_state_string(uint32 state)
Definition xlogdesc.c:59

References get_checksum_state_string(), and LocalDataChecksumState.

◆ show_effective_wal_level()

const char * show_effective_wal_level ( void  )

Definition at line 5241 of file xlog.c.

5242{
5244 return "minimal";
5245
5246 /*
5247 * During recovery, effective_wal_level reflects the primary's
5248 * configuration rather than the local wal_level value.
5249 */
5250 if (RecoveryInProgress())
5251 return IsXLogLogicalInfoEnabled() ? "logical" : "replica";
5252
5253 return XLogLogicalInfoActive() ? "logical" : "replica";
5254}
bool IsXLogLogicalInfoEnabled(void)
Definition logicalctl.c:218
#define XLogLogicalInfoActive()
Definition xlog.h:137

References IsXLogLogicalInfoEnabled(), RecoveryInProgress(), wal_level, WAL_LEVEL_MINIMAL, and XLogLogicalInfoActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 5226 of file xlog.c.

5227{
5228 /*
5229 * We display the actual state based on shared memory, so that this GUC
5230 * reports up-to-date state if examined intra-query. The underlying
5231 * variable (in_hot_standby_guc) changes only when we transmit a new value
5232 * to the client.
5233 */
5234 return RecoveryInProgress() ? "on" : "off";
5235}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 7098 of file xlog.c.

7099{
7100 /*
7101 * We should have an aux process resource owner to use, and we should not
7102 * be in a transaction that's installed some other resowner.
7103 */
7108
7109 /* Don't be chatty in standalone mode */
7111 (errmsg("shutting down")));
7112
7113 /*
7114 * Signal walsenders to move to stopping state.
7115 */
7117
7118 /*
7119 * Wait for WAL senders to be in stopping state. This prevents commands
7120 * from writing new WAL.
7121 */
7123
7124 if (RecoveryInProgress())
7126 else
7127 {
7128 /*
7129 * If archiving is enabled, rotate the last XLOG file so that all the
7130 * remaining records are archived (postmaster wakes up the archiver
7131 * process one more time at the end of shutdown). The checkpoint
7132 * record will go to the next XLOG file and won't be archived (yet).
7133 */
7134 if (XLogArchivingActive())
7135 RequestXLogSwitch(false);
7136
7138 }
7139}
bool IsPostmasterEnvironment
Definition globals.c:121
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition resowner.c:176
void WalSndInitStopping(void)
Definition walsender.c:4037
void WalSndWaitStopping(void)
Definition walsender.c:4063
bool CreateRestartPoint(int flags)
Definition xlog.c:8129
bool CreateCheckPoint(int flags)
Definition xlog.c:7395

References Assert, AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg, fb(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5846 of file xlog.c.

5847{
5849 CheckPoint checkPoint;
5850 bool wasShutdown;
5851 bool didCrash;
5852 bool haveTblspcMap;
5853 bool haveBackupLabel;
5862 bool promoted = false;
5863 char timebuf[128];
5864
5865 /*
5866 * We should have an aux process resource owner to use, and we should not
5867 * be in a transaction that's installed some other resowner.
5868 */
5873
5874 /*
5875 * Check that contents look valid.
5876 */
5878 ereport(FATAL,
5880 errmsg("control file contains invalid checkpoint location")));
5881
5882 switch (ControlFile->state)
5883 {
5884 case DB_SHUTDOWNED:
5885
5886 /*
5887 * This is the expected case, so don't be chatty in standalone
5888 * mode
5889 */
5891 (errmsg("database system was shut down at %s",
5892 str_time(ControlFile->time,
5893 timebuf, sizeof(timebuf)))));
5894 break;
5895
5897 ereport(LOG,
5898 (errmsg("database system was shut down in recovery at %s",
5900 timebuf, sizeof(timebuf)))));
5901 break;
5902
5903 case DB_SHUTDOWNING:
5904 ereport(LOG,
5905 (errmsg("database system shutdown was interrupted; last known up at %s",
5907 timebuf, sizeof(timebuf)))));
5908 break;
5909
5911 ereport(LOG,
5912 (errmsg("database system was interrupted while in recovery at %s",
5914 timebuf, sizeof(timebuf))),
5915 errhint("This probably means that some data is corrupted and"
5916 " you will have to use the last backup for recovery.")));
5917 break;
5918
5920 ereport(LOG,
5921 (errmsg("database system was interrupted while in recovery at log time %s",
5923 timebuf, sizeof(timebuf))),
5924 errhint("If this has occurred more than once some data might be corrupted"
5925 " and you might need to choose an earlier recovery target.")));
5926 break;
5927
5928 case DB_IN_PRODUCTION:
5929 ereport(LOG,
5930 (errmsg("database system was interrupted; last known up at %s",
5932 timebuf, sizeof(timebuf)))));
5933 break;
5934
5935 default:
5936 ereport(FATAL,
5938 errmsg("control file contains invalid database cluster state")));
5939 }
5940
5941 /* This is just to allow attaching to startup process with a debugger */
5942#ifdef XLOG_REPLAY_DELAY
5944 pg_usleep(60000000L);
5945#endif
5946
5947 /*
5948 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5949 * In cases where someone has performed a copy for PITR, these directories
5950 * may have been excluded and need to be re-created.
5951 */
5953
5954 /* Set up timeout handler needed to report startup progress. */
5958
5959 /*----------
5960 * If we previously crashed, perform a couple of actions:
5961 *
5962 * - The pg_wal directory may still include some temporary WAL segments
5963 * used when creating a new segment, so perform some clean up to not
5964 * bloat this path. This is done first as there is no point to sync
5965 * this temporary data.
5966 *
5967 * - There might be data which we had written, intending to fsync it, but
5968 * which we had not actually fsync'd yet. Therefore, a power failure in
5969 * the near future might cause earlier unflushed writes to be lost, even
5970 * though more recent data written to disk from here on would be
5971 * persisted. To avoid that, fsync the entire data directory.
5972 */
5975 {
5978 didCrash = true;
5979 }
5980 else
5981 didCrash = false;
5982
5983 /*
5984 * Prepare for WAL recovery if needed.
5985 *
5986 * InitWalRecovery analyzes the control file and the backup label file, if
5987 * any. It updates the in-memory ControlFile buffer according to the
5988 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5989 * It also applies the tablespace map file, if any.
5990 */
5993 checkPoint = ControlFile->checkPointCopy;
5994
5995 /* initialize shared memory variables from the checkpoint record */
5996 TransamVariables->nextXid = checkPoint.nextXid;
5997 TransamVariables->nextOid = checkPoint.nextOid;
5999 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
6000 AdvanceOldestClogXid(checkPoint.oldestXid);
6001 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
6002 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
6004 checkPoint.newestCommitTsXid);
6005
6006 /*
6007 * Clear out any old relcache cache files. This is *necessary* if we do
6008 * any WAL replay, since that would probably result in the cache files
6009 * being out of sync with database reality. In theory we could leave them
6010 * in place if the database had been cleanly shut down, but it seems
6011 * safest to just remove them always and let them be rebuilt during the
6012 * first backend startup. These files needs to be removed from all
6013 * directories including pg_tblspc, however the symlinks are created only
6014 * after reading tablespace_map file in case of archive recovery from
6015 * backup, so needs to clear old relcache files here after creating
6016 * symlinks.
6017 */
6019
6020 /*
6021 * Initialize replication slots, before there's a chance to remove
6022 * required resources.
6023 */
6025
6026 /*
6027 * Startup the logical decoding status with the last status stored in the
6028 * checkpoint record.
6029 */
6031
6032 /*
6033 * Startup logical state, needs to be setup now so we have proper data
6034 * during crash recovery.
6035 */
6037
6038 /*
6039 * Startup CLOG. This must be done after TransamVariables->nextXid has
6040 * been initialized and before we accept connections or begin WAL replay.
6041 */
6042 StartupCLOG();
6043
6044 /*
6045 * Startup MultiXact. We need to do this early to be able to replay
6046 * truncations.
6047 */
6049
6050 /*
6051 * Ditto for commit timestamps. Activate the facility if the setting is
6052 * enabled in the control file, as there should be no tracking of commit
6053 * timestamps done when the setting was disabled. This facility can be
6054 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
6055 */
6058
6059 /*
6060 * Recover knowledge about replay progress of known replication partners.
6061 */
6063
6064 /*
6065 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
6066 * control file. On recovery, all unlogged relations are blown away, so
6067 * the unlogged LSN counter can be reset too.
6068 */
6072 else
6075
6076 /*
6077 * Copy any missing timeline history files between 'now' and the recovery
6078 * target timeline from archive to pg_wal. While we don't need those files
6079 * ourselves - the history file of the recovery target timeline covers all
6080 * the previous timelines in the history too - a cascading standby server
6081 * might be interested in them. Or, if you archive the WAL from this
6082 * server to a different archive than the primary, it'd be good for all
6083 * the history files to get archived there after failover, so that you can
6084 * use one of the old timelines as a PITR target. Timeline history files
6085 * are small, so it's better to copy them unnecessarily than not copy them
6086 * and regret later.
6087 */
6089
6090 /*
6091 * Before running in recovery, scan pg_twophase and fill in its status to
6092 * be able to work on entries generated by redo. Doing a scan before
6093 * taking any recovery action has the merit to discard any 2PC files that
6094 * are newer than the first record to replay, saving from any conflicts at
6095 * replay. This avoids as well any subsequent scans when doing recovery
6096 * of the on-disk two-phase data.
6097 */
6099
6100 /*
6101 * When starting with crash recovery, reset pgstat data - it might not be
6102 * valid. Otherwise restore pgstat data. It's safe to do this here,
6103 * because postmaster will not yet have started any other processes.
6104 *
6105 * NB: Restoring replication slot stats relies on slot state to have
6106 * already been restored from disk.
6107 *
6108 * TODO: With a bit of extra work we could just start with a pgstat file
6109 * associated with the checkpoint redo location we're starting from.
6110 */
6111 if (didCrash)
6113 else
6115
6117
6120
6121 /* REDO */
6122 if (InRecovery)
6123 {
6124 /* Initialize state for RecoveryInProgress() */
6128 else
6131
6132 /*
6133 * Update pg_control to show that we are recovering and to show the
6134 * selected checkpoint as the place we are starting from. We also mark
6135 * pg_control with any minimum recovery stop point obtained from a
6136 * backup history file.
6137 *
6138 * No need to hold ControlFileLock yet, we aren't up far enough.
6139 */
6141
6142 /*
6143 * If there was a backup label file, it's done its job and the info
6144 * has now been propagated into pg_control. We must get rid of the
6145 * label file so that if we crash during recovery, we'll pick up at
6146 * the latest recovery restartpoint instead of going all the way back
6147 * to the backup start point. It seems prudent though to just rename
6148 * the file out of the way rather than delete it completely.
6149 */
6150 if (haveBackupLabel)
6151 {
6154 }
6155
6156 /*
6157 * If there was a tablespace_map file, it's done its job and the
6158 * symlinks have been created. We must get rid of the map file so
6159 * that if we crash during recovery, we don't create symlinks again.
6160 * It seems prudent though to just rename the file out of the way
6161 * rather than delete it completely.
6162 */
6163 if (haveTblspcMap)
6164 {
6167 }
6168
6169 /*
6170 * Initialize our local copy of minRecoveryPoint. When doing crash
6171 * recovery we want to replay up to the end of WAL. Particularly, in
6172 * the case of a promoted standby minRecoveryPoint value in the
6173 * control file is only updated after the first checkpoint. However,
6174 * if the instance crashes before the first post-recovery checkpoint
6175 * is completed then recovery will use a stale location causing the
6176 * startup process to think that there are still invalid page
6177 * references when checking for data consistency.
6178 */
6180 {
6183 }
6184 else
6185 {
6188 }
6189
6190 /* Check that the GUCs used to generate the WAL allow recovery */
6192
6193 /*
6194 * We're in recovery, so unlogged relations may be trashed and must be
6195 * reset. This should be done BEFORE allowing Hot Standby
6196 * connections, so that read-only backends don't try to read whatever
6197 * garbage is left over from before.
6198 */
6200
6201 /*
6202 * Likewise, delete any saved transaction snapshot files that got left
6203 * behind by crashed backends.
6204 */
6206
6207 /*
6208 * Initialize for Hot Standby, if enabled. We won't let backends in
6209 * yet, not until we've reached the min recovery point specified in
6210 * control file and we've established a recovery snapshot from a
6211 * running-xacts WAL record.
6212 */
6214 {
6215 TransactionId *xids;
6216 int nxids;
6217
6219 (errmsg_internal("initializing for hot standby")));
6220
6222
6223 if (wasShutdown)
6225 else
6226 oldestActiveXID = checkPoint.oldestActiveXid;
6228
6229 /* Tell procarray about the range of xids it has to deal with */
6231
6232 /*
6233 * Startup subtrans only. CLOG, MultiXact and commit timestamp
6234 * have already been started up and other SLRUs are not maintained
6235 * during recovery and need not be started yet.
6236 */
6238
6239 /*
6240 * If we're beginning at a shutdown checkpoint, we know that
6241 * nothing was running on the primary at this point. So fake-up an
6242 * empty running-xacts record and use that here and now. Recover
6243 * additional standby state for prepared transactions.
6244 */
6245 if (wasShutdown)
6246 {
6248 TransactionId latestCompletedXid;
6249
6250 /* Update pg_subtrans entries for any prepared transactions */
6252
6253 /*
6254 * Construct a RunningTransactions snapshot representing a
6255 * shut down server, with only prepared transactions still
6256 * alive. We're never overflowed at this point because all
6257 * subxids are listed with their parent prepared transactions.
6258 */
6259 running.xcnt = nxids;
6260 running.subxcnt = 0;
6262 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
6264 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
6265 TransactionIdRetreat(latestCompletedXid);
6266 Assert(TransactionIdIsNormal(latestCompletedXid));
6267 running.latestCompletedXid = latestCompletedXid;
6268 running.xids = xids;
6269
6271 }
6272 }
6273
6274 /*
6275 * We're all set for replaying the WAL now. Do it.
6276 */
6278 performedWalRecovery = true;
6279 }
6280 else
6281 performedWalRecovery = false;
6282
6283 /*
6284 * Finish WAL recovery.
6285 */
6287 EndOfLog = endOfRecoveryInfo->endOfLog;
6288 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
6289 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
6290 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
6291
6292 /*
6293 * Reset ps status display, so as no information related to recovery shows
6294 * up.
6295 */
6296 set_ps_display("");
6297
6298 /*
6299 * When recovering from a backup (we are in recovery, and archive recovery
6300 * was requested), complain if we did not roll forward far enough to reach
6301 * the point where the database is consistent. For regular online
6302 * backup-from-primary, that means reaching the end-of-backup WAL record
6303 * (at which point we reset backupStartPoint to be Invalid), for
6304 * backup-from-replica (which can't inject records into the WAL stream),
6305 * that point is when we reach the minRecoveryPoint in pg_control (which
6306 * we purposefully copy last when backing up from a replica). For
6307 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
6308 * or snapshot-style backups (which don't), backupEndRequired will be set
6309 * to false.
6310 *
6311 * Note: it is indeed okay to look at the local variable
6312 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
6313 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
6314 * been advanced beyond the WAL we processed.
6315 */
6316 if (InRecovery &&
6319 {
6320 /*
6321 * Ran off end of WAL before reaching end-of-backup WAL record, or
6322 * minRecoveryPoint. That's a bad sign, indicating that you tried to
6323 * recover from an online backup but never called pg_backup_stop(), or
6324 * you didn't archive all the WAL needed.
6325 */
6327 {
6329 ereport(FATAL,
6331 errmsg("WAL ends before end of online backup"),
6332 errhint("All WAL generated while online backup was taken must be available at recovery.")));
6333 else
6334 ereport(FATAL,
6336 errmsg("WAL ends before consistent recovery point")));
6337 }
6338 }
6339
6340 /*
6341 * Reset unlogged relations to the contents of their INIT fork. This is
6342 * done AFTER recovery is complete so as to include any unlogged relations
6343 * created during recovery, but BEFORE recovery is marked as having
6344 * completed successfully. Otherwise we'd not retry if any of the post
6345 * end-of-recovery steps fail.
6346 */
6347 if (InRecovery)
6349
6350 /*
6351 * Pre-scan prepared transactions to find out the range of XIDs present.
6352 * This information is not quite needed yet, but it is positioned here so
6353 * as potential problems are detected before any on-disk change is done.
6354 */
6356
6357 /*
6358 * Allow ordinary WAL segment creation before possibly switching to a new
6359 * timeline, which creates a new segment, and after the last ReadRecord().
6360 */
6362
6363 /*
6364 * Consider whether we need to assign a new timeline ID.
6365 *
6366 * If we did archive recovery, we always assign a new ID. This handles a
6367 * couple of issues. If we stopped short of the end of WAL during
6368 * recovery, then we are clearly generating a new timeline and must assign
6369 * it a unique new ID. Even if we ran to the end, modifying the current
6370 * last segment is problematic because it may result in trying to
6371 * overwrite an already-archived copy of that segment, and we encourage
6372 * DBAs to make their archive_commands reject that. We can dodge the
6373 * problem by making the new active segment have a new timeline ID.
6374 *
6375 * In a normal crash recovery, we can just extend the timeline we were in.
6376 */
6377 newTLI = endOfRecoveryInfo->lastRecTLI;
6379 {
6381 ereport(LOG,
6382 (errmsg("selected new timeline ID: %u", newTLI)));
6383
6384 /*
6385 * Make a writable copy of the last WAL segment. (Note that we also
6386 * have a copy of the last block of the old WAL in
6387 * endOfRecovery->lastPage; we will use that below.)
6388 */
6390
6391 /*
6392 * Remove the signal files out of the way, so that we don't
6393 * accidentally re-enter archive recovery mode in a subsequent crash.
6394 */
6395 if (endOfRecoveryInfo->standby_signal_file_found)
6397
6398 if (endOfRecoveryInfo->recovery_signal_file_found)
6400
6401 /*
6402 * Write the timeline history file, and have it archived. After this
6403 * point (or rather, as soon as the file is archived), the timeline
6404 * will appear as "taken" in the WAL archive and to any standby
6405 * servers. If we crash before actually switching to the new
6406 * timeline, standby servers will nevertheless think that we switched
6407 * to the new timeline, and will try to connect to the new timeline.
6408 * To minimize the window for that, try to do as little as possible
6409 * between here and writing the end-of-recovery record.
6410 */
6412 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6413
6414 ereport(LOG,
6415 (errmsg("archive recovery complete")));
6416 }
6417
6418 /* Save the selected TimeLineID in shared memory, too */
6423
6424 /*
6425 * Actually, if WAL ended in an incomplete record, skip the parts that
6426 * made it through and start writing after the portion that persisted.
6427 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6428 * we'll do as soon as we're open for writing new WAL.)
6429 */
6431 {
6432 /*
6433 * We should only have a missingContrecPtr if we're not switching to a
6434 * new timeline. When a timeline switch occurs, WAL is copied from the
6435 * old timeline to the new only up to the end of the last complete
6436 * record, so there can't be an incomplete WAL record that we need to
6437 * disregard.
6438 */
6439 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6442 }
6443
6444 /*
6445 * Prepare to write WAL starting at EndOfLog location, and init xlog
6446 * buffer cache using the block containing the last record from the
6447 * previous incarnation.
6448 */
6449 Insert = &XLogCtl->Insert;
6451 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6452
6453 /*
6454 * Tricky point here: lastPage contains the *last* block that the LastRec
6455 * record spans, not the one it starts in. The last block is indeed the
6456 * one we want to use.
6457 */
6458 if (EndOfLog % XLOG_BLCKSZ != 0)
6459 {
6460 char *page;
6461 int len;
6462 int firstIdx;
6463
6465 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6467
6468 /* Copy the valid part of the last block, and zero the rest */
6469 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6470 memcpy(page, endOfRecoveryInfo->lastPage, len);
6471 memset(page + len, 0, XLOG_BLCKSZ - len);
6472
6475 }
6476 else
6477 {
6478 /*
6479 * There is no partial block to copy. Just set InitializedUpTo, and
6480 * let the first attempt to insert a log record to initialize the next
6481 * buffer.
6482 */
6484 }
6485
6486 /*
6487 * Update local and shared status. This is OK to do without any locks
6488 * because no other process can be reading or writing WAL yet.
6489 */
6496
6497 /*
6498 * Preallocate additional log files, if wanted.
6499 */
6501
6502 /*
6503 * Okay, we're officially UP.
6504 */
6505 InRecovery = false;
6506
6507 /* start the archive_timeout timer and LSN running */
6510
6511 /* also initialize latestCompletedXid, to nextXid - 1 */
6516
6517 /*
6518 * Start up subtrans, if not already done for hot standby. (commit
6519 * timestamps are started below, if necessary.)
6520 */
6523
6524 /*
6525 * Perform end of recovery actions for any SLRUs that need it.
6526 */
6527 TrimCLOG();
6528 TrimMultiXact();
6529
6530 /*
6531 * Reload shared-memory state for prepared transactions. This needs to
6532 * happen before renaming the last partial segment of the old timeline as
6533 * it may be possible that we have to recover some transactions from it.
6534 */
6536
6537 /* Shut down xlogreader */
6539
6540 /* Enable WAL writes for this backend only. */
6542
6543 /* If necessary, write overwrite-contrecord before doing anything else */
6545 {
6548 }
6549
6550 /*
6551 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6552 * record before resource manager writes cleanup WAL records or checkpoint
6553 * record is written.
6554 */
6555 Insert->fullPageWrites = lastFullPageWrites;
6557
6558 /*
6559 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6560 */
6563
6564 /*
6565 * If any of the critical GUCs have changed, log them before we allow
6566 * backends to write WAL.
6567 */
6569
6570 /* If this is archive recovery, perform post-recovery cleanup actions. */
6573
6574 /*
6575 * Local WAL inserts enabled, so it's time to finish initialization of
6576 * commit timestamp.
6577 */
6579
6580 /*
6581 * Update logical decoding status in shared memory and write an
6582 * XLOG_LOGICAL_DECODING_STATUS_CHANGE, if necessary.
6583 */
6585
6586 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6587 if (endOfRecoveryInfo->lastPage)
6588 pfree(endOfRecoveryInfo->lastPage);
6589 pfree(endOfRecoveryInfo->recoveryStopReason);
6591
6592 /*
6593 * If we reach this point with checksums in the state inprogress-on, it
6594 * means that data checksums were in the process of being enabled when the
6595 * cluster shut down. Since processing didn't finish, the operation will
6596 * have to be restarted from scratch since there is no capability to
6597 * continue where it was when the cluster shut down. Thus, revert the
6598 * state back to off, and inform the user with a warning message. Being
6599 * able to restart processing is a TODO, but it wouldn't be possible to
6600 * restart here since we cannot launch a dynamic background worker
6601 * directly from here (it has to be from a regular backend).
6602 */
6604 {
6606
6611
6613 errmsg("enabling data checksums was interrupted"),
6614 errhint("Data checksum processing must be manually restarted for checksums to be enabled"));
6615 }
6616
6617 /*
6618 * If data checksums were being disabled when the cluster was shut down,
6619 * we know that we have a state where all backends have stopped validating
6620 * checksums and we can move to off instead of prompting the user to
6621 * perform any action.
6622 */
6624 {
6626
6631 }
6632
6633 /*
6634 * All done with end-of-recovery actions.
6635 *
6636 * Now allow backends to write WAL and update the control file status in
6637 * consequence. SharedRecoveryState, that controls if backends can write
6638 * WAL, is updated while holding ControlFileLock to prevent other backends
6639 * to look at an inconsistent state of the control file in shared memory.
6640 * There is still a small window during which backends can write WAL and
6641 * the control file is still referring to a system not in DB_IN_PRODUCTION
6642 * state while looking at the on-disk control file.
6643 *
6644 * Also, we use info_lck to update SharedRecoveryState to ensure that
6645 * there are no race conditions concerning visibility of other recent
6646 * updates to shared memory.
6647 */
6650
6654
6657
6658 /*
6659 * Wake up the checkpointer process as there might be a request to disable
6660 * logical decoding by concurrent slot drop.
6661 */
6663
6664 /*
6665 * Wake up all waiters. They need to report an error that recovery was
6666 * ended before reaching the target LSN.
6667 */
6671
6672 /*
6673 * Shutdown the recovery environment. This must occur after
6674 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6675 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6676 * any session building a snapshot will not rely on KnownAssignedXids as
6677 * RecoveryInProgress() would return false at this stage. This is
6678 * particularly critical for prepared 2PC transactions, that would still
6679 * need to be included in snapshots once recovery has ended.
6680 */
6683
6684 /*
6685 * If there were cascading standby servers connected to us, nudge any wal
6686 * sender processes to notice that we've been promoted.
6687 */
6688 WalSndWakeup(true, true);
6689
6690 /*
6691 * If this was a promotion, request an (online) checkpoint now. This isn't
6692 * required for consistency, but the last restartpoint might be far back,
6693 * and in case of a crash, recovering from it might take a longer than is
6694 * appropriate now that we're not in standby mode anymore.
6695 */
6696 if (promoted)
6698}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:504
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition timeline.c:265
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition timeline.c:51
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition timeline.c:305
void startup_progress_timeout_handler(void)
Definition startup.c:302
uint32 TransactionId
Definition c.h:736
void WakeupCheckpointer(void)
void StartupCLOG(void)
Definition clog.c:862
void TrimCLOG(void)
Definition clog.c:877
void StartupCommitTs(void)
Definition commit_ts.c:613
void CompleteCommitTsInitialization(void)
Definition commit_ts.c:623
void SyncDataDirectory(void)
Definition fd.c:3594
void UpdateLogicalDecodingStatusEndOfRecovery(void)
Definition logicalctl.c:551
void StartupLogicalDecodingStatus(bool last_status)
Definition logicalctl.c:144
#define IsBootstrapProcessingMode()
Definition miscadmin.h:495
void TrimMultiXact(void)
Definition multixact.c:1904
void StartupMultiXact(void)
Definition multixact.c:1879
void StartupReplicationOrigin(void)
Definition origin.c:740
@ DB_IN_PRODUCTION
Definition pg_control.h:105
@ DB_IN_CRASH_RECOVERY
Definition pg_control.h:103
const void size_t len
void pgstat_restore_stats(void)
Definition pgstat.c:525
void pgstat_discard_stats(void)
Definition pgstat.c:537
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition procarray.c:1045
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition procarray.c:1014
static void set_ps_display(const char *activity)
Definition ps_status.h:40
void ResetUnloggedRelations(int op)
Definition reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition reinit.h:27
void RelationCacheInitFileRemove(void)
Definition relcache.c:6916
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition slot.c:2402
void DeleteAllExportedSnapshotFiles(void)
Definition snapmgr.c:1587
void InitRecoveryTransactionEnvironment(void)
Definition standby.c:96
void ShutdownRecoveryTransactionEnvironment(void)
Definition standby.c:162
@ SUBXIDS_IN_SUBTRANS
Definition standby.h:123
TransactionId oldestRunningXid
Definition standby.h:134
TransactionId nextXid
Definition standby.h:133
TransactionId latestCompletedXid
Definition standby.h:137
subxids_array_status subxid_status
Definition standby.h:132
TransactionId * xids
Definition standby.h:139
FullTransactionId latestCompletedXid
Definition transam.h:238
pg_atomic_uint64 logInsertResult
Definition xlog.c:477
uint64 PrevBytePos
Definition xlog.c:415
XLogRecPtr Flush
Definition xlog.c:329
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition subtrans.c:302
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition timeout.h:38
#define TransactionIdRetreat(dest)
Definition transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition transam.h:103
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
void RecoverPreparedTransactions(void)
Definition twophase.c:2089
void restoreTwoPhaseData(void)
Definition twophase.c:1910
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1972
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2051
void WalSndWakeup(bool physical, bool logical)
Definition walsender.c:3958
void UpdateFullPageWrites(void)
Definition xlog.c:8755
static void ValidateXLOGDirectoryStructure(void)
Definition xlog.c:4154
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition xlog.c:7979
static void XLogReportParameters(void)
Definition xlog.c:8674
static bool PerformRecoveryXLogAction(void)
Definition xlog.c:6780
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition xlog.c:5706
static bool lastFullPageWrites
Definition xlog.c:224
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition xlog.c:5631
static void CheckRequiredParameterValues(void)
Definition xlog.c:5802
static void RemoveTempXlogFiles(void)
Definition xlog.c:3887
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition xlog.c:5618
#define TABLESPACE_MAP_OLD
Definition xlog.h:336
#define TABLESPACE_MAP
Definition xlog.h:335
#define STANDBY_SIGNAL_FILE
Definition xlog.h:331
#define BACKUP_LABEL_OLD
Definition xlog.h:333
#define BACKUP_LABEL_FILE
Definition xlog.h:332
#define RECOVERY_SIGNAL_FILE
Definition xlog.h:330
@ RECOVERY_STATE_CRASH
Definition xlog.h:93
@ RECOVERY_STATE_ARCHIVE
Definition xlog.h:94
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
static XLogRecPtr abortedRecPtr
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
TimeLineID recoveryTargetTLI
HotStandbyState standbyState
Definition xlogutils.c:53
bool InRecovery
Definition xlogutils.c:50
@ STANDBY_DISABLED
Definition xlogutils.h:52
void WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
Definition xlogwait.c:320
@ WAIT_LSN_TYPE_STANDBY_REPLAY
Definition xlogwait.h:39
@ WAIT_LSN_TYPE_STANDBY_FLUSH
Definition xlogwait.h:41
@ WAIT_LSN_TYPE_STANDBY_WRITE
Definition xlogwait.h:40

References abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, XLogCtlData::data_checksum_version, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg, errmsg_internal(), FATAL, fb(), findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, CheckPoint::logicalDecodingEnabled, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), memcpy(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_OFF, pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetLocalDataChecksumState(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire(), SpinLockRelease(), STANDBY_DISABLED, STANDBY_SIGNAL_FILE, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupLogicalDecodingStatus(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), ValidateXLOGDirectoryStructure(), WAIT_LSN_TYPE_STANDBY_FLUSH, WAIT_LSN_TYPE_STANDBY_REPLAY, WAIT_LSN_TYPE_STANDBY_WRITE, WaitLSNWakeup(), WakeupCheckpointer(), WalSndWakeup(), WARNING, XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogChecksums(), XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsValid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char buf,
size_t  bufsize 
)
static

Definition at line 5618 of file xlog.c.

5619{
5621 "%Y-%m-%d %H:%M:%S %Z",
5623
5624 return buf;
5625}
#define bufsize
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition localtime.c:1345
PGDLLIMPORT pg_tz * log_timezone
Definition pgtz.c:31

References buf, bufsize, fb(), log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6705 of file xlog.c.

6706{
6707 /* initialize minRecoveryPoint to this record */
6710 if (ControlFile->minRecoveryPoint < EndRecPtr)
6711 {
6712 ControlFile->minRecoveryPoint = EndRecPtr;
6713 ControlFile->minRecoveryPointTLI = replayTLI;
6714 }
6715 /* update local copy */
6718
6719 /*
6720 * The startup process can update its local copy of minRecoveryPoint from
6721 * this point.
6722 */
6724
6726
6727 /*
6728 * We update SharedRecoveryState while holding the lock on ControlFileLock
6729 * so both states are consistent in shared memory.
6730 */
6734
6736}
static bool updateMinRecoveryPoint
Definition xlog.c:668

References ControlFile, DB_IN_ARCHIVE_RECOVERY, fb(), XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire(), SpinLockRelease(), ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 7330 of file xlog.c.

7331{
7332 /*
7333 * The status is reported only for end-of-recovery and shutdown
7334 * checkpoints or shutdown restartpoints. Updating the ps display is
7335 * useful in those situations as it may not be possible to rely on
7336 * pg_stat_activity to see the status of the checkpointer or the startup
7337 * process.
7338 */
7340 return;
7341
7342 if (reset)
7343 set_ps_display("");
7344 else
7345 {
7346 char activitymsg[128];
7347
7348 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
7349 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
7350 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
7351 restartpoint ? "restartpoint" : "checkpoint");
7353 }
7354}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, fb(), reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 7292 of file xlog.c.

7293{
7294 /*
7295 * To estimate the number of segments consumed between checkpoints, keep a
7296 * moving average of the amount of WAL generated in previous checkpoint
7297 * cycles. However, if the load is bursty, with quiet periods and busy
7298 * periods, we want to cater for the peak load. So instead of a plain
7299 * moving average, let the average decline slowly if the previous cycle
7300 * used less WAL than estimated, but bump it up immediately if it used
7301 * more.
7302 *
7303 * When checkpoints are triggered by max_wal_size, this should converge to
7304 * CheckpointSegments * wal_segment_size,
7305 *
7306 * Note: This doesn't pay any attention to what caused the checkpoint.
7307 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
7308 * starting a base backup, are counted the same as those created
7309 * automatically. The slow-decline will largely mask them out, if they are
7310 * not frequent. If they are frequent, it seems reasonable to count them
7311 * in as any others; if you issue a manual checkpoint every 5 minutes and
7312 * never let a timed checkpoint happen, it makes sense to base the
7313 * preallocation on that 5 minute interval rather than whatever
7314 * checkpoint_timeout is set to.
7315 */
7316 PrevCheckPointDistance = nbytes;
7317 if (CheckPointDistanceEstimate < nbytes)
7319 else
7321 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
7322}

References CheckPointDistanceEstimate, fb(), and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8755 of file xlog.c.

8756{
8758 bool recoveryInProgress;
8759
8760 /*
8761 * Do nothing if full_page_writes has not been changed.
8762 *
8763 * It's safe to check the shared full_page_writes without the lock,
8764 * because we assume that there is no concurrently running process which
8765 * can update it.
8766 */
8767 if (fullPageWrites == Insert->fullPageWrites)
8768 return;
8769
8770 /*
8771 * Perform this outside critical section so that the WAL insert
8772 * initialization done by RecoveryInProgress() doesn't trigger an
8773 * assertion failure.
8774 */
8776
8778
8779 /*
8780 * It's always safe to take full page images, even when not strictly
8781 * required, but not the other round. So if we're setting full_page_writes
8782 * to true, first set it true and then write the WAL record. If we're
8783 * setting it to false, first write the WAL record and then set the global
8784 * flag.
8785 */
8786 if (fullPageWrites)
8787 {
8789 Insert->fullPageWrites = true;
8791 }
8792
8793 /*
8794 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8795 * full_page_writes during archive recovery, if required.
8796 */
8798 {
8800 XLogRegisterData(&fullPageWrites, sizeof(bool));
8801
8803 }
8804
8805 if (!fullPageWrites)
8806 {
8808 Insert->fullPageWrites = false;
8810 }
8812}
#define XLOG_FPW_CHANGE
Definition pg_control.h:80

References END_CRIT_SECTION, fb(), fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char filename)
static

Definition at line 3867 of file xlog.c.

3868{
3869 uint32 tli;
3870 XLogSegNo segno;
3871
3873
3875 if (segno > XLogCtl->lastRemovedSegNo)
3876 XLogCtl->lastRemovedSegNo = segno;
3878}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire(), SpinLockRelease(), wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2721 of file xlog.c.

2722{
2723 /* Quick check using our local copy of the variable */
2724 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2725 return;
2726
2727 /*
2728 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2729 * i.e., we're doing crash recovery. We never modify the control file's
2730 * value in that case, so we can short-circuit future checks here too. The
2731 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2732 * updated until crash recovery finishes. We only do this for the startup
2733 * process as it should not update its own reference of minRecoveryPoint
2734 * until it has finished crash recovery to make sure that all WAL
2735 * available is replayed in this case. This also saves from extra locks
2736 * taken on the control file from the startup process.
2737 */
2739 {
2740 updateMinRecoveryPoint = false;
2741 return;
2742 }
2743
2745
2746 /* update local copy */
2749
2751 updateMinRecoveryPoint = false;
2752 else if (force || LocalMinRecoveryPoint < lsn)
2753 {
2756
2757 /*
2758 * To avoid having to update the control file too often, we update it
2759 * all the way to the last record being replayed, even though 'lsn'
2760 * would suffice for correctness. This also allows the 'force' case
2761 * to not need a valid 'lsn' value.
2762 *
2763 * Another important reason for doing it this way is that the passed
2764 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2765 * the caller got it from a corrupted heap page. Accepting such a
2766 * value as the min recovery point would prevent us from coming up at
2767 * all. Instead, we just log a warning and continue with recovery.
2768 * (See also the comments about corrupt LSNs in XLogFlush.)
2769 */
2771 if (!force && newMinRecoveryPoint < lsn)
2772 elog(WARNING,
2773 "xlog min recovery request %X/%08X is past current point %X/%08X",
2775
2776 /* update control file */
2778 {
2784
2786 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2789 }
2790 }
2792}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), fb(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsValid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4154 of file xlog.c.

4155{
4156 char path[MAXPGPATH];
4157 struct stat stat_buf;
4158
4159 /* Check for pg_wal; if it doesn't exist, error out */
4160 if (stat(XLOGDIR, &stat_buf) != 0 ||
4161 !S_ISDIR(stat_buf.st_mode))
4162 ereport(FATAL,
4164 errmsg("required WAL directory \"%s\" does not exist",
4165 XLOGDIR)));
4166
4167 /* Check for archive_status */
4168 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4169 if (stat(path, &stat_buf) == 0)
4170 {
4171 /* Check for weird cases where it exists but isn't a directory */
4172 if (!S_ISDIR(stat_buf.st_mode))
4173 ereport(FATAL,
4175 errmsg("required WAL directory \"%s\" does not exist",
4176 path)));
4177 }
4178 else
4179 {
4180 ereport(LOG,
4181 (errmsg("creating missing WAL directory \"%s\"", path)));
4182 if (MakePGDirectory(path) < 0)
4183 ereport(FATAL,
4185 errmsg("could not create missing directory \"%s\": %m",
4186 path)));
4187 }
4188
4189 /* Check for summaries */
4190 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4191 if (stat(path, &stat_buf) == 0)
4192 {
4193 /* Check for weird cases where it exists but isn't a directory */
4194 if (!S_ISDIR(stat_buf.st_mode))
4195 ereport(FATAL,
4196 (errmsg("required WAL directory \"%s\" does not exist",
4197 path)));
4198 }
4199 else
4200 {
4201 ereport(LOG,
4202 (errmsg("creating missing WAL directory \"%s\"", path)));
4203 if (MakePGDirectory(path) < 0)
4204 ereport(FATAL,
4205 (errmsg("could not create missing directory \"%s\": %m",
4206 path)));
4207 }
4208}
int MakePGDirectory(const char *directoryName)
Definition fd.c:3963
#define S_ISDIR(m)
Definition win32_port.h:315

References ereport, errcode_for_file_access(), errmsg, FATAL, fb(), LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1545 of file xlog.c.

1546{
1552 int i;
1553
1554 if (MyProc == NULL)
1555 elog(PANIC, "cannot wait without a PGPROC structure");
1556
1557 /*
1558 * Check if there's any work to do. Use a barrier to ensure we get the
1559 * freshest value.
1560 */
1562 if (upto <= inserted)
1563 return inserted;
1564
1565 /* Read the current insert position */
1566 SpinLockAcquire(&Insert->insertpos_lck);
1567 bytepos = Insert->CurrBytePos;
1568 SpinLockRelease(&Insert->insertpos_lck);
1570
1571 /*
1572 * No-one should request to flush a piece of WAL that hasn't even been
1573 * reserved yet. However, it can happen if there is a block with a bogus
1574 * LSN on disk, for example. XLogFlush checks for that situation and
1575 * complains, but only after the flush. Here we just assume that to mean
1576 * that all WAL that has been reserved needs to be finished. In this
1577 * corner-case, the return value can be smaller than 'upto' argument.
1578 */
1579 if (upto > reservedUpto)
1580 {
1581 ereport(LOG,
1582 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1585 }
1586
1587 /*
1588 * Loop through all the locks, sleeping on any in-progress insert older
1589 * than 'upto'.
1590 *
1591 * finishedUpto is our return value, indicating the point upto which all
1592 * the WAL insertions have been finished. Initialize it to the head of
1593 * reserved WAL, and as we iterate through the insertion locks, back it
1594 * out for any insertion that's still in progress.
1595 */
1597 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1598 {
1600
1601 do
1602 {
1603 /*
1604 * See if this insertion is in progress. LWLockWaitForVar will
1605 * wait for the lock to be released, or for the 'value' to be set
1606 * by a LWLockUpdateVar call. When a lock is initially acquired,
1607 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1608 * know where it's inserting yet. We will have to wait for it. If
1609 * it's a small insertion, the record will most likely fit on the
1610 * same page and the inserter will release the lock without ever
1611 * calling LWLockUpdateVar. But if it has to sleep, it will
1612 * advertise the insertion point with LWLockUpdateVar before
1613 * sleeping.
1614 *
1615 * In this loop we are only waiting for insertions that started
1616 * before WaitXLogInsertionsToFinish was called. The lack of
1617 * memory barriers in the loop means that we might see locks as
1618 * "unused" that have since become used. This is fine because
1619 * they only can be used for later insertions that we would not
1620 * want to wait on anyway. Not taking a lock to acquire the
1621 * current insertingAt value means that we might see older
1622 * insertingAt values. This is also fine, because if we read a
1623 * value too old, we will add ourselves to the wait queue, which
1624 * contains atomic operations.
1625 */
1626 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1629 {
1630 /* the lock was free, so no insertion in progress */
1632 break;
1633 }
1634
1635 /*
1636 * This insertion is still in progress. Have to wait, unless the
1637 * inserter has proceeded past 'upto'.
1638 */
1639 } while (insertingat < upto);
1640
1643 }
1644
1645 /*
1646 * Advance the limit we know to have been inserted and return the freshest
1647 * value we know of, which might be beyond what we requested if somebody
1648 * is concurrently doing this with an 'upto' pointer ahead of us.
1649 */
1651 finishedUpto);
1652
1653 return finishedUpto;
1654}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition atomics.h:595
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition lwlock.c:1566
pg_atomic_uint64 insertingAt
Definition xlog.c:377

References elog, ereport, errmsg, fb(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire(), SpinLockRelease(), WALInsertLocks, XLogBytePosToEndRecPtr(), XLogCtl, and XLogRecPtrIsValid.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1412 of file xlog.c.

1413{
1414 bool immed;
1415
1416 /*
1417 * It doesn't matter which of the WAL insertion locks we acquire, so try
1418 * the one we used last time. If the system isn't particularly busy, it's
1419 * a good bet that it's still available, and it's good to have some
1420 * affinity to a particular lock so that you don't unnecessarily bounce
1421 * cache lines between processes when there's no contention.
1422 *
1423 * If this is the first time through in this backend, pick a lock
1424 * (semi-)randomly. This allows the locks to be used evenly if you have a
1425 * lot of very short connections.
1426 */
1427 static int lockToTry = -1;
1428
1429 if (lockToTry == -1)
1432
1433 /*
1434 * The insertingAt value is initially set to 0, as we don't know our
1435 * insert location yet.
1436 */
1438 if (!immed)
1439 {
1440 /*
1441 * If we couldn't get the lock immediately, try another lock next
1442 * time. On a system with more insertion locks than concurrent
1443 * inserters, this causes all the inserters to eventually migrate to a
1444 * lock that no-one else is using. On a system with more inserters
1445 * than locks, it still helps to distribute the inserters evenly
1446 * across the locks.
1447 */
1449 }
1450}
ProcNumber MyProcNumber
Definition globals.c:92
static int MyLockNo
Definition xlog.c:686

References fb(), LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1457 of file xlog.c.

1458{
1459 int i;
1460
1461 /*
1462 * When holding all the locks, all but the last lock's insertingAt
1463 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1464 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1465 */
1466 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1467 {
1472 }
1473 /* Variable value reset to 0 at release */
1475
1476 holdingAllLocks = true;
1477}
#define PG_UINT64_MAX
Definition c.h:677
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1702
static bool holdingAllLocks
Definition xlog.c:687

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1486 of file xlog.c.

1487{
1488 if (holdingAllLocks)
1489 {
1490 int i;
1491
1492 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1495 0);
1496
1497 holdingAllLocks = false;
1498 }
1499 else
1500 {
1503 0);
1504 }
1505}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1840

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1512 of file xlog.c.

1513{
1514 if (holdingAllLocks)
1515 {
1516 /*
1517 * We use the last lock to mark our actual position, see comments in
1518 * WALInsertLockAcquireExclusive.
1519 */
1522 insertingAt);
1523 }
1524 else
1527 insertingAt);
1528}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1789 of file xlog.c.

1791{
1792 char *pdst = dstbuf;
1793 XLogRecPtr recptr = startptr;
1795 Size nbytes = count;
1796
1798 return 0;
1799
1800 Assert(XLogRecPtrIsValid(startptr));
1801
1802 /*
1803 * Caller should ensure that the requested data has been inserted into WAL
1804 * buffers before we try to read it.
1805 */
1807 if (startptr + count > inserted)
1808 ereport(ERROR,
1809 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1810 LSN_FORMAT_ARGS(startptr + count),
1812
1813 /*
1814 * Loop through the buffers without a lock. For each buffer, atomically
1815 * read and verify the end pointer, then copy the data out, and finally
1816 * re-read and re-verify the end pointer.
1817 *
1818 * Once a page is evicted, it never returns to the WAL buffers, so if the
1819 * end pointer matches the expected end pointer before and after we copy
1820 * the data, then the right page must have been present during the data
1821 * copy. Read barriers are necessary to ensure that the data copy actually
1822 * happens between the two verification steps.
1823 *
1824 * If either verification fails, we simply terminate the loop and return
1825 * with the data that had been already copied out successfully.
1826 */
1827 while (nbytes > 0)
1828 {
1829 uint32 offset = recptr % XLOG_BLCKSZ;
1832 XLogRecPtr endptr;
1833 const char *page;
1834 const char *psrc;
1836
1837 /*
1838 * Calculate the end pointer we expect in the xlblocks array if the
1839 * correct page is present.
1840 */
1841 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1842
1843 /*
1844 * First verification step: check that the correct page is present in
1845 * the WAL buffers.
1846 */
1848 if (expectedEndPtr != endptr)
1849 break;
1850
1851 /*
1852 * The correct page is present (or was at the time the endptr was
1853 * read; must re-verify later). Calculate pointer to source data and
1854 * determine how much data to read from this page.
1855 */
1856 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1857 psrc = page + offset;
1858 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1859
1860 /*
1861 * Ensure that the data copy and the first verification step are not
1862 * reordered.
1863 */
1865
1866 /* data copy */
1868
1869 /*
1870 * Ensure that the data copy and the second verification step are not
1871 * reordered.
1872 */
1874
1875 /*
1876 * Second verification step: check that the page we read from wasn't
1877 * evicted while we were copying the data.
1878 */
1880 if (expectedEndPtr != endptr)
1881 break;
1882
1883 pdst += npagebytes;
1884 recptr += npagebytes;
1885 nbytes -= npagebytes;
1886 }
1887
1888 Assert(pdst - dstbuf <= count);
1889
1890 return pdst - dstbuf;
1891}
#define pg_read_barrier()
Definition atomics.h:154
#define Min(x, y)
Definition c.h:1091
TimeLineID GetWALInsertionTimeLine(void)
Definition xlog.c:7016

References Assert, ereport, errmsg, ERROR, fb(), GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, memcpy(), Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsValid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4300 of file xlog.c.

4301{
4302 int fd;
4303 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4304
4305 /*
4306 * Initialize version and compatibility-check fields
4307 */
4310
4313
4319
4322
4325
4326 ControlFile->float8ByVal = true; /* vestigial */
4327
4328 /*
4329 * Initialize the default 'char' signedness.
4330 *
4331 * The signedness of the char type is implementation-defined. For instance
4332 * on x86 architecture CPUs, the char data type is typically treated as
4333 * signed by default, whereas on aarch architecture CPUs, it is typically
4334 * treated as unsigned by default. In v17 or earlier, we accidentally let
4335 * C implementation signedness affect persistent data. This led to
4336 * inconsistent results when comparing char data across different
4337 * platforms.
4338 *
4339 * This flag can be used as a hint to ensure consistent behavior for
4340 * pre-v18 data files that store data sorted by the 'char' type on disk,
4341 * especially in cross-platform replication scenarios.
4342 *
4343 * Newly created database clusters unconditionally set the default char
4344 * signedness to true. pg_upgrade changes this flag for clusters that were
4345 * initialized on signedness=false platforms. As a result,
4346 * signedness=false setting will become rare over time. If we had known
4347 * about this problem during the last development cycle that forced initdb
4348 * (v8.3), we would have made all clusters signed or all clusters
4349 * unsigned. Making pg_upgrade the only source of signedness=false will
4350 * cause the population of database clusters to converge toward that
4351 * retrospective ideal.
4352 */
4354
4355 /* Contents are protected with a CRC */
4361
4362 /*
4363 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4364 * the excess over sizeof(ControlFileData). This reduces the odds of
4365 * premature-EOF errors when reading pg_control. We'll still fail when we
4366 * check the contents of the file, but hopefully with a more specific
4367 * error than "couldn't read pg_control".
4368 */
4369 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4370 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4371
4374 if (fd < 0)
4375 ereport(PANIC,
4377 errmsg("could not create file \"%s\": %m",
4379
4380 errno = 0;
4383 {
4384 /* if write didn't set errno, assume problem is no disk space */
4385 if (errno == 0)
4386 errno = ENOSPC;
4387 ereport(PANIC,
4389 errmsg("could not write to file \"%s\": %m",
4391 }
4393
4395 if (pg_fsync(fd) != 0)
4396 ereport(PANIC,
4398 errmsg("could not fsync file \"%s\": %m",
4401
4402 if (close(fd) != 0)
4403 ereport(PANIC,
4405 errmsg("could not close file \"%s\": %m",
4407}
#define PG_CONTROL_FILE_SIZE
Definition pg_control.h:266

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, memcpy(), ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog2_redo()

void xlog2_redo ( XLogReaderState record)

Definition at line 9256 of file xlog.c.

9257{
9258 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
9259
9260 if (info == XLOG2_CHECKSUMS)
9261 {
9263
9264 memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state));
9265
9267 XLogCtl->data_checksum_version = state.new_checksum_state;
9269
9270 /*
9271 * Block on a procsignalbarrier to await all processes having seen the
9272 * change to checksum status. Once the barrier has been passed we can
9273 * initiate the corresponding processing.
9274 */
9275 EmitAndWaitDataChecksumsBarrier(state.new_checksum_state);
9276 }
9277}
uint8_t uint8
Definition c.h:622
void EmitAndWaitDataChecksumsBarrier(uint32 state)
#define XLOG2_CHECKSUMS
Definition pg_control.h:90
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:410
#define XLogRecGetData(decoder)
Definition xlogreader.h:415

References XLogCtlData::data_checksum_version, EmitAndWaitDataChecksumsBarrier(), fb(), XLogCtlData::info_lck, memcpy(), SpinLockAcquire(), SpinLockRelease(), XLOG2_CHECKSUMS, XLogCtl, XLogRecGetData, and XLogRecGetInfo.

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8824 of file xlog.c.

8825{
8826 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8827 XLogRecPtr lsn = record->EndRecPtr;
8828
8829 /*
8830 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8831 * XLOG_FPI_FOR_HINT records.
8832 */
8833 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8834 !XLogRecHasAnyBlockRefs(record));
8835
8836 if (info == XLOG_NEXTOID)
8837 {
8838 Oid nextOid;
8839
8840 /*
8841 * We used to try to take the maximum of TransamVariables->nextOid and
8842 * the recorded nextOid, but that fails if the OID counter wraps
8843 * around. Since no OID allocation should be happening during replay
8844 * anyway, better to just believe the record exactly. We still take
8845 * OidGenLock while setting the variable, just in case.
8846 */
8847 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8849 TransamVariables->nextOid = nextOid;
8852 }
8853 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8854 {
8855 CheckPoint checkPoint;
8856 TimeLineID replayTLI;
8857
8858 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8859 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8861 TransamVariables->nextXid = checkPoint.nextXid;
8864 TransamVariables->nextOid = checkPoint.nextOid;
8868 checkPoint.nextMultiOffset);
8869
8871 checkPoint.oldestMultiDB);
8872
8877
8878 /*
8879 * No need to set oldestClogXid here as well; it'll be set when we
8880 * redo an xl_clog_truncate if it changed since initialization.
8881 */
8882 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8883
8884 /*
8885 * If we see a shutdown checkpoint while waiting for an end-of-backup
8886 * record, the backup was canceled and the end-of-backup record will
8887 * never arrive.
8888 */
8892 ereport(PANIC,
8893 (errmsg("online backup was canceled, recovery cannot continue")));
8894
8895 /*
8896 * If we see a shutdown checkpoint, we know that nothing was running
8897 * on the primary at this point. So fake-up an empty running-xacts
8898 * record and use that here and now. Recover additional standby state
8899 * for prepared transactions.
8900 */
8902 {
8903 TransactionId *xids;
8904 int nxids;
8906 TransactionId latestCompletedXid;
8908
8910
8911 /* Update pg_subtrans entries for any prepared transactions */
8913
8914 /*
8915 * Construct a RunningTransactions snapshot representing a shut
8916 * down server, with only prepared transactions still alive. We're
8917 * never overflowed at this point because all subxids are listed
8918 * with their parent prepared transactions.
8919 */
8920 running.xcnt = nxids;
8921 running.subxcnt = 0;
8923 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8925 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8926 TransactionIdRetreat(latestCompletedXid);
8927 Assert(TransactionIdIsNormal(latestCompletedXid));
8928 running.latestCompletedXid = latestCompletedXid;
8929 running.xids = xids;
8930
8932 }
8933
8934 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8939
8940 /*
8941 * We should've already switched to the new TLI before replaying this
8942 * record.
8943 */
8944 (void) GetCurrentReplayRecPtr(&replayTLI);
8945 if (checkPoint.ThisTimeLineID != replayTLI)
8946 ereport(PANIC,
8947 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8948 checkPoint.ThisTimeLineID, replayTLI)));
8949
8950 RecoveryRestartPoint(&checkPoint, record);
8951
8952 /*
8953 * After replaying a checkpoint record, free all smgr objects.
8954 * Otherwise we would never do so for dropped relations, as the
8955 * startup does not process shared invalidation messages or call
8956 * AtEOXact_SMgr().
8957 */
8959 }
8960 else if (info == XLOG_CHECKPOINT_ONLINE)
8961 {
8962 CheckPoint checkPoint;
8963 TimeLineID replayTLI;
8964 bool new_state = false;
8965 int old_state;
8966
8967 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8968 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8971 checkPoint.nextXid))
8972 TransamVariables->nextXid = checkPoint.nextXid;
8974
8975 /*
8976 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8977 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8978 * counter is from the start of the checkpoint and might well be stale
8979 * compared to later XLOG_NEXTOID records. We could try to take the
8980 * maximum of the nextOid counter and our latest value, but since
8981 * there's no particular guarantee about the speed with which the OID
8982 * counter wraps around, that's a risky thing to do. In any case,
8983 * users of the nextOid counter are required to avoid assignment of
8984 * duplicates, so that a somewhat out-of-date value should be safe.
8985 */
8986
8987 /* Handle multixact */
8989 checkPoint.nextMultiOffset);
8990
8991 /*
8992 * NB: This may perform multixact truncation when replaying WAL
8993 * generated by an older primary.
8994 */
8996 checkPoint.oldestMultiDB);
8998 checkPoint.oldestXid))
9000 checkPoint.oldestXidDB);
9001 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
9007
9008 /* TLI should not change in an on-line checkpoint */
9009 (void) GetCurrentReplayRecPtr(&replayTLI);
9010 if (checkPoint.ThisTimeLineID != replayTLI)
9011 ereport(PANIC,
9012 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
9013 checkPoint.ThisTimeLineID, replayTLI)));
9014
9015 RecoveryRestartPoint(&checkPoint, record);
9016
9017 /*
9018 * If the data checksum state change we need to emit a barrier.
9019 */
9022 if (checkPoint.dataChecksumState != old_state)
9023 new_state = true;
9025
9026 if (new_state)
9028
9029 /*
9030 * After replaying a checkpoint record, free all smgr objects.
9031 * Otherwise we would never do so for dropped relations, as the
9032 * startup does not process shared invalidation messages or call
9033 * AtEOXact_SMgr().
9034 */
9036 }
9037 else if (info == XLOG_OVERWRITE_CONTRECORD)
9038 {
9039 /* nothing to do here, handled in xlogrecovery_redo() */
9040 }
9041 else if (info == XLOG_END_OF_RECOVERY)
9042 {
9044 TimeLineID replayTLI;
9045
9046 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
9047
9048 /*
9049 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
9050 * but this case is rarer and harder to test, so the benefit doesn't
9051 * outweigh the potential extra cost of maintenance.
9052 */
9053
9054 /*
9055 * We should've already switched to the new TLI before replaying this
9056 * record.
9057 */
9058 (void) GetCurrentReplayRecPtr(&replayTLI);
9059 if (xlrec.ThisTimeLineID != replayTLI)
9060 ereport(PANIC,
9061 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
9062 xlrec.ThisTimeLineID, replayTLI)));
9063 }
9064 else if (info == XLOG_NOOP)
9065 {
9066 /* nothing to do here */
9067 }
9068 else if (info == XLOG_SWITCH)
9069 {
9070 /* nothing to do here */
9071 }
9072 else if (info == XLOG_RESTORE_POINT)
9073 {
9074 /* nothing to do here, handled in xlogrecovery.c */
9075 }
9076 else if (info == XLOG_ASSIGN_LSN)
9077 {
9078 /* nothing to do here, see XLogGetFakeLSN() */
9079 }
9080 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
9081 {
9082 /*
9083 * XLOG_FPI records contain nothing else but one or more block
9084 * references. Every block reference must include a full-page image
9085 * even if full_page_writes was disabled when the record was generated
9086 * - otherwise there would be no point in this record.
9087 *
9088 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
9089 * WAL-logged because of a hint bit update. They are only generated
9090 * when checksums and/or wal_log_hints are enabled. They may include
9091 * no full-page images if full_page_writes was disabled when they were
9092 * generated. In this case there is nothing to do here.
9093 *
9094 * No recovery conflicts are generated by these generic records - if a
9095 * resource manager needs to generate conflicts, it has to define a
9096 * separate WAL record type and redo routine.
9097 */
9098 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
9099 {
9100 Buffer buffer;
9101
9102 if (!XLogRecHasBlockImage(record, block_id))
9103 {
9104 if (info == XLOG_FPI)
9105 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
9106 continue;
9107 }
9108
9109 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
9110 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
9111 UnlockReleaseBuffer(buffer);
9112 }
9113 }
9114 else if (info == XLOG_BACKUP_END)
9115 {
9116 /* nothing to do here, handled in xlogrecovery_redo() */
9117 }
9118 else if (info == XLOG_PARAMETER_CHANGE)
9119 {
9121
9122 /* Update our copy of the parameters in pg_control */
9123 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
9124
9126 ControlFile->MaxConnections = xlrec.MaxConnections;
9127 ControlFile->max_worker_processes = xlrec.max_worker_processes;
9128 ControlFile->max_wal_senders = xlrec.max_wal_senders;
9129 ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
9130 ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
9131 ControlFile->wal_level = xlrec.wal_level;
9132 ControlFile->wal_log_hints = xlrec.wal_log_hints;
9133
9134 /*
9135 * Update minRecoveryPoint to ensure that if recovery is aborted, we
9136 * recover back up to this point before allowing hot standby again.
9137 * This is important if the max_* settings are decreased, to ensure
9138 * you don't run queries against the WAL preceding the change. The
9139 * local copies cannot be updated as long as crash recovery is
9140 * happening and we expect all the WAL to be replayed.
9141 */
9143 {
9146 }
9148 {
9149 TimeLineID replayTLI;
9150
9151 (void) GetCurrentReplayRecPtr(&replayTLI);
9153 ControlFile->minRecoveryPointTLI = replayTLI;
9154 }
9155
9156 CommitTsParameterChange(xlrec.track_commit_timestamp,
9158 ControlFile->track_commit_timestamp = xlrec.track_commit_timestamp;
9159
9162
9163 /* Check to see if any parameter change gives a problem on recovery */
9165 }
9166 else if (info == XLOG_FPW_CHANGE)
9167 {
9168 bool fpw;
9169
9170 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
9171
9172 /*
9173 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
9174 * do_pg_backup_start() and do_pg_backup_stop() can check whether
9175 * full_page_writes has been disabled during online backup.
9176 */
9177 if (!fpw)
9178 {
9183 }
9184
9185 /* Keep track of full_page_writes */
9187 }
9188 else if (info == XLOG_CHECKPOINT_REDO)
9189 {
9191 bool new_state = false;
9192
9194
9196 XLogCtl->data_checksum_version = redo_rec.data_checksum_version;
9197 if (redo_rec.data_checksum_version != ControlFile->data_checksum_version)
9198 new_state = true;
9200
9201 if (new_state)
9202 EmitAndWaitDataChecksumsBarrier(redo_rec.data_checksum_version);
9203 }
9204 else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE)
9205 {
9206 bool status;
9207
9208 memcpy(&status, XLogRecGetData(record), sizeof(bool));
9209
9210 /*
9211 * We need to toggle the logical decoding status and update the
9212 * XLogLogicalInfo cache of processes synchronously because
9213 * XLogLogicalInfoActive() is used even during read-only queries
9214 * (e.g., via RelationIsAccessibleInLogicalDecoding()). In the
9215 * 'disable' case, it is safe to invalidate existing slots after
9216 * disabling logical decoding because logical decoding cannot process
9217 * subsequent WAL records, which may not contain logical information.
9218 */
9219 if (status)
9221 else
9223
9224 elog(DEBUG1, "update logical decoding status to %d during recovery",
9225 status);
9226
9227 if (InRecovery && InHotStandby)
9228 {
9229 if (!status)
9230 {
9231 /*
9232 * Invalidate logical slots if we are in hot standby and the
9233 * primary disabled logical decoding.
9234 */
9236 0, InvalidOid,
9238 }
9239 else if (sync_replication_slots)
9240 {
9241 /*
9242 * Signal the postmaster to launch the slotsync worker.
9243 *
9244 * XXX: For simplicity, we keep the slotsync worker running
9245 * even after logical decoding is disabled. A future
9246 * improvement can consider starting and stopping the worker
9247 * based on logical decoding status change.
9248 */
9250 }
9251 }
9252 }
9253}
int Buffer
Definition buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5603
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition commit_ts.c:645
pid_t PostmasterPid
Definition globals.c:108
void DisableLogicalDecoding(void)
Definition logicalctl.c:489
void EnableLogicalDecoding(void)
Definition logicalctl.c:338
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2266
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2239
#define XLOG_RESTORE_POINT
Definition pg_control.h:79
#define XLOG_ASSIGN_LSN
Definition pg_control.h:84
#define XLOG_FPI
Definition pg_control.h:83
#define XLOG_FPI_FOR_HINT
Definition pg_control.h:82
#define XLOG_NEXTOID
Definition pg_control.h:75
#define XLOG_NOOP
Definition pg_control.h:74
#define XLOG_PARAMETER_CHANGE
Definition pg_control.h:78
#define XLOG_LOGICAL_DECODING_STATUS_CHANGE
Definition pg_control.h:87
@ RS_INVAL_WAL_LEVEL
Definition slot.h:66
bool sync_replication_slots
Definition slotsync.c:132
void smgrdestroyall(void)
Definition smgr.c:386
#define FullTransactionIdPrecedes(a, b)
Definition transam.h:51
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition xlog.c:8089
#define XLogRecMaxBlockId(decoder)
Definition xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:417
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition xlogutils.c:303
@ STANDBY_INITIALIZED
Definition xlogutils.h:53
#define InHotStandby
Definition xlogutils.h:60
@ BLK_RESTORED
Definition xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, CheckPoint::dataChecksumState, DEBUG1, DisableLogicalDecoding(), elog, EmitAndWaitDataChecksumsBarrier(), EnableLogicalDecoding(), XLogReaderState::EndRecPtr, ereport, errmsg, ERROR, fb(), FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, kill, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::max_locks_per_xact, ControlFileData::max_prepared_xacts, ControlFileData::max_wal_senders, ControlFileData::max_worker_processes, ControlFileData::MaxConnections, memcpy(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PostmasterPid, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetLocalDataChecksumState(), SetTransactionIdLimit(), SIGUSR1, smgrdestroyall(), SpinLockAcquire(), SpinLockRelease(), STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, sync_replication_slots, CheckPoint::ThisTimeLineID, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), ControlFileData::wal_level, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_ASSIGN_LSN, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_LOGICAL_DECODING_STATUS_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsValid.

◆ XLogAssignLSN()

XLogRecPtr XLogAssignLSN ( void  )

Definition at line 8655 of file xlog.c.

8656{
8657 int dummy = 0;
8658
8659 /*
8660 * Records other than XLOG_SWITCH must have content. We use an integer 0
8661 * to satisfy this restriction.
8662 */
8665 XLogRegisterData(&dummy, sizeof(dummy));
8667}

References fb(), XLOG_ASSIGN_LSN, XLOG_MARK_UNIMPORTANT, XLogBeginInsert(), XLogInsert(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by XLogGetFakeLSN().

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 3006 of file xlog.c.

3007{
3009 bool flexible = true;
3010 static TimestampTz lastflush;
3012 int flushblocks;
3014
3015 /* XLOG doesn't need flushing during recovery */
3016 if (RecoveryInProgress())
3017 return false;
3018
3019 /*
3020 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3021 * so we can read it without a lock.
3022 */
3024
3025 /* read updated LogwrtRqst */
3029
3030 /* back off to last completed page boundary */
3031 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3032
3033 /* if we have already flushed that far, consider async commit records */
3035 if (WriteRqst.Write <= LogwrtResult.Flush)
3036 {
3040 flexible = false; /* ensure it all gets written */
3041 }
3042
3043 /*
3044 * If already known flushed, we're done. Just need to check if we are
3045 * holding an open file handle to a logfile that's no longer in use,
3046 * preventing the file from being deleted.
3047 */
3048 if (WriteRqst.Write <= LogwrtResult.Flush)
3049 {
3050 if (openLogFile >= 0)
3051 {
3054 {
3055 XLogFileClose();
3056 }
3057 }
3058 return false;
3059 }
3060
3061 /*
3062 * Determine how far to flush WAL, based on the wal_writer_delay and
3063 * wal_writer_flush_after GUCs.
3064 *
3065 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3066 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3067 * logic is the same in both places if you change this.
3068 */
3070 flushblocks =
3072
3073 if (WalWriterFlushAfter == 0 || lastflush == 0)
3074 {
3075 /* first call, or block based limits disabled */
3076 WriteRqst.Flush = WriteRqst.Write;
3077 lastflush = now;
3078 }
3080 {
3081 /*
3082 * Flush the writes at least every WalWriterDelay ms. This is
3083 * important to bound the amount of time it takes for an asynchronous
3084 * commit to hit disk.
3085 */
3086 WriteRqst.Flush = WriteRqst.Write;
3087 lastflush = now;
3088 }
3089 else if (flushblocks >= WalWriterFlushAfter)
3090 {
3091 /* exceeded wal_writer_flush_after blocks, flush */
3092 WriteRqst.Flush = WriteRqst.Write;
3093 lastflush = now;
3094 }
3095 else
3096 {
3097 /* no flushing, this time round */
3099 }
3100
3101#ifdef WAL_DEBUG
3102 if (XLOG_DEBUG)
3103 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3108#endif
3109
3111
3112 /* now wait for any in-progress insertions to finish and get write lock */
3116 if (WriteRqst.Write > LogwrtResult.Write ||
3118 {
3120 }
3122
3124
3125 /* wake up walsenders now that we've released heavily contended locks */
3127
3128 /*
3129 * If we flushed an LSN that someone was waiting for, notify the waiters.
3130 */
3131 if (waitLSNState &&
3135
3136 /*
3137 * Great, done. To take some work off the critical path, try to initialize
3138 * as many of the no-longer-needed WAL buffers for future use as we can.
3139 */
3141
3142 /*
3143 * If we determined that we need to write data, but somebody else
3144 * wrote/flushed already, it should be considered as being active, to
3145 * avoid hibernating too early.
3146 */
3147 return true;
3148}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1775
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1603
pg_atomic_uint64 minWaitedLSN[WAIT_LSN_TYPE_COUNT]
Definition xlogwait.h:85
XLogRecPtr asyncXactLSN
Definition xlog.c:464
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition walsender.h:64
int WalWriterFlushAfter
Definition walwriter.c:72
int WalWriterDelay
Definition walwriter.c:71
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
struct WaitLSNState * waitLSNState
Definition xlogwait.c:70
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, fb(), XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), WaitLSNState::minWaitedLSN, now(), openLogFile, openLogSegNo, pg_atomic_read_u64(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, TimestampDifferenceExceeds(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1939 of file xlog.c.

1940{
1946
1949
1951 {
1952 /* fits on first page of segment */
1953 if (bytesleft == 0)
1954 seg_offset = 0;
1955 else
1957 }
1958 else
1959 {
1960 /* account for the first page on segment with long header */
1963
1966
1967 if (bytesleft == 0)
1969 else
1971 }
1972
1974
1975 return result;
1976}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by GetXLogInsertEndRecPtr(), ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1899 of file xlog.c.

1900{
1906
1909
1911 {
1912 /* fits on first page of segment */
1914 }
1915 else
1916 {
1917 /* account for the first page on segment with long header */
1920
1923
1925 }
1926
1928
1929 return result;
1930}

References fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2301 of file xlog.c.

2302{
2304
2306
2308 return true;
2309 return false;
2310}

References CheckPointSegments, fb(), RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLogChecksums()

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 5023 of file xlog.c.

5024{
5025 int xbuffers;
5026
5027 xbuffers = NBuffers / 32;
5030 if (xbuffers < 8)
5031 xbuffers = 8;
5032 return xbuffers;
5033}

References fb(), NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemRequest().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3694 of file xlog.c.

3695{
3696 Assert(openLogFile >= 0);
3697
3698 /*
3699 * WAL segment files will not be re-read in normal operation, so we advise
3700 * the OS to release any cached pages. But do not do so if WAL archiving
3701 * or streaming is active, because archiver and walsender process could
3702 * use the cache to read the WAL segment.
3703 */
3704#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3705 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3707#endif
3708
3709 if (close(openLogFile) != 0)
3710 {
3711 char xlogfname[MAXFNAMELEN];
3712 int save_errno = errno;
3713
3715 errno = save_errno;
3716 ereport(PANIC,
3718 errmsg("could not close file \"%s\": %m", xlogfname)));
3719 }
3720
3721 openLogFile = -1;
3723}
void ReleaseExternalFD(void)
Definition fd.c:1225

References Assert, close, ereport, errcode_for_file_access(), errmsg, fb(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3473 of file xlog.c.

3476{
3477 char path[MAXPGPATH];
3478 char tmppath[MAXPGPATH];
3479 PGAlignedXLogBlock buffer;
3480 int srcfd;
3481 int fd;
3482 int nbytes;
3483
3484 /*
3485 * Open the source file
3486 */
3489 if (srcfd < 0)
3490 ereport(ERROR,
3492 errmsg("could not open file \"%s\": %m", path)));
3493
3494 /*
3495 * Copy into a temp file name.
3496 */
3497 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3498
3499 unlink(tmppath);
3500
3501 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3503 if (fd < 0)
3504 ereport(ERROR,
3506 errmsg("could not create file \"%s\": %m", tmppath)));
3507
3508 /*
3509 * Do the data copying.
3510 */
3511 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3512 {
3513 int nread;
3514
3515 nread = upto - nbytes;
3516
3517 /*
3518 * The part that is not read from the source file is filled with
3519 * zeros.
3520 */
3521 if (nread < sizeof(buffer))
3522 memset(buffer.data, 0, sizeof(buffer));
3523
3524 if (nread > 0)
3525 {
3526 int r;
3527
3528 if (nread > sizeof(buffer))
3529 nread = sizeof(buffer);
3531 r = read(srcfd, buffer.data, nread);
3532 if (r != nread)
3533 {
3534 if (r < 0)
3535 ereport(ERROR,
3537 errmsg("could not read file \"%s\": %m",
3538 path)));
3539 else
3540 ereport(ERROR,
3542 errmsg("could not read file \"%s\": read %d of %zu",
3543 path, r, (Size) nread)));
3544 }
3546 }
3547 errno = 0;
3549 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3550 {
3551 int save_errno = errno;
3552
3553 /*
3554 * If we fail to make the file, delete it to release disk space
3555 */
3556 unlink(tmppath);
3557 /* if write didn't set errno, assume problem is no disk space */
3559
3560 ereport(ERROR,
3562 errmsg("could not write to file \"%s\": %m", tmppath)));
3563 }
3565 }
3566
3568 if (pg_fsync(fd) != 0)
3571 errmsg("could not fsync file \"%s\": %m", tmppath)));
3573
3574 if (CloseTransientFile(fd) != 0)
3575 ereport(ERROR,
3577 errmsg("could not close file \"%s\": %m", tmppath)));
3578
3579 if (CloseTransientFile(srcfd) != 0)
3580 ereport(ERROR,
3582 errmsg("could not close file \"%s\": %m", path)));
3583
3584 /*
3585 * Now move the segment into place with its final name.
3586 */
3588 elog(ERROR, "InstallXLogFileSegment should not have failed");
3589}
int CloseTransientFile(int fd)
Definition fd.c:2855
int data_sync_elevel(int elevel)
Definition fd.c:3986
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
char data[XLOG_BLCKSZ]
Definition c.h:1231

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3435 of file xlog.c.

3436{
3437 bool ignore_added;
3438 char path[MAXPGPATH];
3439 int fd;
3440
3441 Assert(logtli != 0);
3442
3444 if (fd >= 0)
3445 return fd;
3446
3447 /* Now open original target segment (might not be file I just made) */
3450 if (fd < 0)
3451 ereport(ERROR,
3453 errmsg("could not open file \"%s\": %m", path)));
3454 return fd;
3455}
#define O_CLOEXEC
Definition win32_port.h:344

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char path 
)
static

Definition at line 3247 of file xlog.c.

3249{
3250 char tmppath[MAXPGPATH];
3253 int fd;
3254 int save_errno;
3257
3258 Assert(logtli != 0);
3259
3261
3262 /*
3263 * Try to use existent file (checkpoint maker may have created it already)
3264 */
3265 *added = false;
3268 if (fd < 0)
3269 {
3270 if (errno != ENOENT)
3271 ereport(ERROR,
3273 errmsg("could not open file \"%s\": %m", path)));
3274 }
3275 else
3276 return fd;
3277
3278 /*
3279 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3280 * another process is doing the same thing. If so, we will end up
3281 * pre-creating an extra log segment. That seems OK, and better than
3282 * holding the lock throughout this lengthy process.
3283 */
3284 elog(DEBUG2, "creating and filling new WAL file");
3285
3286 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3287
3288 unlink(tmppath);
3289
3292
3293 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3295 if (fd < 0)
3296 ereport(ERROR,
3298 errmsg("could not create file \"%s\": %m", tmppath)));
3299
3300 /* Measure I/O timing when initializing segment */
3302
3304 save_errno = 0;
3305 if (wal_init_zero)
3306 {
3307 ssize_t rc;
3308
3309 /*
3310 * Zero-fill the file. With this setting, we do this the hard way to
3311 * ensure that all the file space has really been allocated. On
3312 * platforms that allow "holes" in files, just seeking to the end
3313 * doesn't allocate intermediate space. This way, we know that we
3314 * have all the space and (after the fsync below) that all the
3315 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3316 * O_DSYNC will be sufficient to sync future writes to the log file.
3317 */
3319
3320 if (rc < 0)
3321 save_errno = errno;
3322 }
3323 else
3324 {
3325 /*
3326 * Otherwise, seeking to the end and writing a solitary byte is
3327 * enough.
3328 */
3329 errno = 0;
3330 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3331 {
3332 /* if write didn't set errno, assume no disk space */
3334 }
3335 }
3337
3338 /*
3339 * A full segment worth of data is written when using wal_init_zero. One
3340 * byte is written when not using it.
3341 */
3343 io_start, 1,
3345
3346 if (save_errno)
3347 {
3348 /*
3349 * If we fail to make the file, delete it to release disk space
3350 */
3351 unlink(tmppath);
3352
3353 close(fd);
3354
3355 errno = save_errno;
3356
3357 ereport(ERROR,
3359 errmsg("could not write to file \"%s\": %m", tmppath)));
3360 }
3361
3362 /* Measure I/O timing when flushing segment */
3364
3366 if (pg_fsync(fd) != 0)
3367 {
3368 save_errno = errno;
3369 close(fd);
3370 errno = save_errno;
3371 ereport(ERROR,
3373 errmsg("could not fsync file \"%s\": %m", tmppath)));
3374 }
3376
3378 IOOP_FSYNC, io_start, 1, 0);
3379
3380 if (close(fd) != 0)
3381 ereport(ERROR,
3383 errmsg("could not close file \"%s\": %m", tmppath)));
3384
3385 /*
3386 * Now move the segment into place with its final name. Cope with
3387 * possibility that someone else has created the file while we were
3388 * filling ours: if so, use ours to pre-create a future log segment.
3389 */
3391
3392 /*
3393 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3394 * that was a constant, but that was always a bit dubious: normally, at a
3395 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3396 * here, it was the offset from the insert location. We can't do the
3397 * normal XLOGfileslop calculation here because we don't have access to
3398 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3399 * CheckPointSegments.
3400 */
3403 logtli))
3404 {
3405 *added = true;
3406 elog(DEBUG2, "done creating and filling new WAL file");
3407 }
3408 else
3409 {
3410 /*
3411 * No need for any more future segments, or InstallXLogFileSegment()
3412 * failed to rename the file into place. If the rename failed, a
3413 * caller opening the file may fail.
3414 */
3415 unlink(tmppath);
3416 elog(DEBUG2, "abandoned new WAL file");
3417 }
3418
3419 return -1;
3420}
#define IO_DIRECT_WAL_INIT
Definition fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
Definition file_utils.c:709
@ IOCONTEXT_INIT
Definition pgstat.h:292
@ IOOP_WRITE
Definition pgstat.h:320
#define pg_pwrite
Definition port.h:248
bool wal_init_zero
Definition xlog.c:134

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3673 of file xlog.c.

3674{
3675 char path[MAXPGPATH];
3676 int fd;
3677
3678 XLogFilePath(path, tli, segno, wal_segment_size);
3679
3682 if (fd < 0)
3683 ereport(PANIC,
3685 errmsg("could not open file \"%s\": %m", path)));
3686
3687 return fd;
3688}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2251 of file xlog.c.

2252{
2255 double distance;
2257
2258 /*
2259 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2260 * correspond to. Always recycle enough segments to meet the minimum, and
2261 * remove enough segments to stay below the maximum.
2262 */
2267
2268 /*
2269 * Between those limits, recycle enough segments to get us through to the
2270 * estimated end of next checkpoint.
2271 *
2272 * To estimate where the next checkpoint will finish, assume that the
2273 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2274 * every checkpoint.
2275 */
2277 /* add 10% for good measure. */
2278 distance *= 1.10;
2279
2280 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2282
2283 if (recycleSegNo < minSegNo)
2285 if (recycleSegNo > maxSegNo)
2287
2288 return recycleSegNo;
2289}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, fb(), max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2801 of file xlog.c.

2802{
2806
2807 /*
2808 * During REDO, we are reading not writing WAL. Therefore, instead of
2809 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2810 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2811 * to act this way too, and because when it tries to write the
2812 * end-of-recovery checkpoint, it should indeed flush.
2813 */
2814 if (!XLogInsertAllowed())
2815 {
2816 UpdateMinRecoveryPoint(record, false);
2817 return;
2818 }
2819
2820 /* Quick exit if already known flushed */
2821 if (record <= LogwrtResult.Flush)
2822 return;
2823
2824#ifdef WAL_DEBUG
2825 if (XLOG_DEBUG)
2826 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2827 LSN_FORMAT_ARGS(record),
2830#endif
2831
2833
2834 /*
2835 * Since fsync is usually a horribly expensive operation, we try to
2836 * piggyback as much data as we can on each fsync: if we see any more data
2837 * entered into the xlog buffer, we'll write and fsync that too, so that
2838 * the final value of LogwrtResult.Flush is as large as possible. This
2839 * gives us some chance of avoiding another fsync immediately after.
2840 */
2841
2842 /* initialize to given target; may increase below */
2843 WriteRqstPtr = record;
2844
2845 /*
2846 * Now wait until we get the write lock, or someone else does the flush
2847 * for us.
2848 */
2849 for (;;)
2850 {
2852
2853 /* done already? */
2855 if (record <= LogwrtResult.Flush)
2856 break;
2857
2858 /*
2859 * Before actually performing the write, wait for all in-flight
2860 * insertions to the pages we're about to write to finish.
2861 */
2863 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2867
2868 /*
2869 * Try to get the write lock. If we can't get it immediately, wait
2870 * until it's released, and recheck if we still need to do the flush
2871 * or if the backend that held the lock did it for us already. This
2872 * helps to maintain a good rate of group committing when the system
2873 * is bottlenecked by the speed of fsyncing.
2874 */
2876 {
2877 /*
2878 * The lock is now free, but we didn't acquire it yet. Before we
2879 * do, loop back to check if someone else flushed the record for
2880 * us already.
2881 */
2882 continue;
2883 }
2884
2885 /* Got the lock; recheck whether request is satisfied */
2887 if (record <= LogwrtResult.Flush)
2888 {
2890 break;
2891 }
2892
2893 /*
2894 * Sleep before flush! By adding a delay here, we may give further
2895 * backends the opportunity to join the backlog of group commit
2896 * followers; this can significantly improve transaction throughput,
2897 * at the risk of increasing transaction latency.
2898 *
2899 * We do not sleep if enableFsync is not turned on, nor if there are
2900 * fewer than CommitSiblings other backends with active transactions.
2901 */
2902 if (CommitDelay > 0 && enableFsync &&
2904 {
2908
2909 /*
2910 * Re-check how far we can now flush the WAL. It's generally not
2911 * safe to call WaitXLogInsertionsToFinish while holding
2912 * WALWriteLock, because an in-progress insertion might need to
2913 * also grab WALWriteLock to make progress. But we know that all
2914 * the insertions up to insertpos have already finished, because
2915 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2916 * We're only calling it again to allow insertpos to be moved
2917 * further forward, not to actually wait for anyone.
2918 */
2920 }
2921
2922 /* try to write/flush later additions to XLOG as well */
2923 WriteRqst.Write = insertpos;
2924 WriteRqst.Flush = insertpos;
2925
2926 XLogWrite(WriteRqst, insertTLI, false);
2927
2929 /* done */
2930 break;
2931 }
2932
2934
2935 /* wake up walsenders now that we've released heavily contended locks */
2937
2938 /*
2939 * If we flushed an LSN that someone was waiting for, notify the waiters.
2940 */
2941 if (waitLSNState &&
2945
2946 /*
2947 * If we still haven't flushed to the request point then we have a
2948 * problem; most likely, the requested flush point is past end of XLOG.
2949 * This has been seen to occur when a disk page has a corrupted LSN.
2950 *
2951 * Formerly we treated this as a PANIC condition, but that hurts the
2952 * system's robustness rather than helping it: we do not want to take down
2953 * the whole system due to corruption on one data page. In particular, if
2954 * the bad page is encountered again during recovery then we would be
2955 * unable to restart the database at all! (This scenario actually
2956 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2957 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2958 * the only time we can reach here during recovery is while flushing the
2959 * end-of-recovery checkpoint record, and we don't expect that to have a
2960 * bad LSN.
2961 *
2962 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2963 * since xact.c calls this routine inside a critical section. However,
2964 * calls from bufmgr.c are not within critical sections and so we will not
2965 * force a restart for a bad LSN on a data page.
2966 */
2967 if (LogwrtResult.Flush < record)
2968 elog(ERROR,
2969 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2970 LSN_FORMAT_ARGS(record),
2972
2973 /*
2974 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2975 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2976 * remain consistent.
2977 */
2978 Assert(!XLogNeedsFlush(record));
2979}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1378
bool MinimumActiveBackends(int min)
Definition procarray.c:3589
int CommitDelay
Definition xlog.c:139
int CommitSiblings
Definition xlog.c:140
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3163
bool XLogInsertAllowed(void)
Definition xlog.c:6885

References Assert, CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, fb(), XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), WaitLSNState::minWaitedLSN, pg_atomic_read_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, UpdateMinRecoveryPoint(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FinishSyncWorker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), rebuild_relation_finish_concurrent(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), WalSndWaitForWal(), write_logical_decoding_status_update_record(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogChecksums(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3813 of file xlog.c.

3814{
3815 XLogSegNo lastRemovedSegNo;
3816
3818 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3820
3821 return lastRemovedSegNo;
3822}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3829 of file xlog.c.

3830{
3831 DIR *xldir;
3832 struct dirent *xlde;
3834
3836 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3837 {
3840
3841 /* Ignore files that are not XLOG segments. */
3842 if (!IsXLogFileName(xlde->d_name))
3843 continue;
3844
3845 /* Parse filename to get TLI and segno. */
3848
3849 /* Ignore anything that's not from the TLI of interest. */
3850 if (tli != file_tli)
3851 continue;
3852
3853 /* If it's the oldest so far, update oldest_segno. */
3854 if (oldest_segno == 0 || file_segno < oldest_segno)
3856 }
3857
3858 FreeDir(xldir);
3859 return oldest_segno;
3860}

References AllocateDir(), fb(), FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), and MaybeRemoveOldWalSummaries().

◆ XLogGetReplicationSlotMinimumLSN()

XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )

Definition at line 2700 of file xlog.c.

2701{
2702 XLogRecPtr retval;
2703
2707
2708 return retval;
2709}
XLogRecPtr replicationSlotMinLSN
Definition xlog.c:465

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire(), SpinLockRelease(), and XLogCtl.

Referenced by KeepLogSeg(), and reserve_wal_for_local_slot().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5631 of file xlog.c.

5632{
5633 char xlogfname[MAXFNAMELEN];
5636
5637 /* we always switch to a new timeline after archive recovery */
5638 Assert(endTLI != newTLI);
5639
5640 /*
5641 * Update min recovery point one last time.
5642 */
5644
5645 /*
5646 * Calculate the last segment on the old timeline, and the first segment
5647 * on the new timeline. If the switch happens in the middle of a segment,
5648 * they are the same, but if the switch happens exactly at a segment
5649 * boundary, startLogSegNo will be endLogSegNo + 1.
5650 */
5653
5654 /*
5655 * Initialize the starting WAL segment for the new timeline. If the switch
5656 * happens in the middle of a segment, copy data from the last WAL segment
5657 * of the old timeline up to the switch point, to the starting WAL segment
5658 * on the new timeline.
5659 */
5661 {
5662 /*
5663 * Make a copy of the file on the new timeline.
5664 *
5665 * Writing WAL isn't allowed yet, so there are no locking
5666 * considerations. But we should be just as tense as XLogFileInit to
5667 * avoid emplacing a bogus file.
5668 */
5671 }
5672 else
5673 {
5674 /*
5675 * The switch happened at a segment boundary, so just create the next
5676 * segment on the new timeline.
5677 */
5678 int fd;
5679
5681
5682 if (close(fd) != 0)
5683 {
5684 int save_errno = errno;
5685
5687 errno = save_errno;
5688 ereport(ERROR,
5690 errmsg("could not close file \"%s\": %m", xlogfname)));
5691 }
5692 }
5693
5694 /*
5695 * Let's just make real sure there are not .ready or .done flags posted
5696 * for the new segment.
5697 */
5700}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition xlog.c:3473

References Assert, close, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6885 of file xlog.c.

6886{
6887 /*
6888 * If value is "unconditionally true" or "unconditionally false", just
6889 * return it. This provides the normal fast path once recovery is known
6890 * done.
6891 */
6892 if (LocalXLogInsertAllowed >= 0)
6893 return (bool) LocalXLogInsertAllowed;
6894
6895 /*
6896 * Else, must check to see if we're still in recovery.
6897 */
6898 if (RecoveryInProgress())
6899 return false;
6900
6901 /*
6902 * On exit from recovery, reset to "unconditionally true", since there is
6903 * no need to keep checking.
6904 */
6906 return true;
6907}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
uint64  fpi_bytes,
bool  topxid_included 
)

Definition at line 784 of file xlog.c.

790{
793 bool inserted;
794 XLogRecord *rechdr = (XLogRecord *) rdata->data;
795 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
801
802 /* Does this record type require special handling? */
803 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
804 {
805 if (info == XLOG_SWITCH)
807 else if (info == XLOG_CHECKPOINT_REDO)
809 }
810
811 /* we assume that all of the record header is in the first chunk */
813
814 /* cross-check on whether we should be here or not */
815 if (!XLogInsertAllowed())
816 elog(ERROR, "cannot make new WAL entries during recovery");
817
818 /*
819 * Given that we're not in recovery, InsertTimeLineID is set and can't
820 * change, so we can read it without a lock.
821 */
823
824 /*----------
825 *
826 * We have now done all the preparatory work we can without holding a
827 * lock or modifying shared state. From here on, inserting the new WAL
828 * record to the shared WAL buffer cache is a two-step process:
829 *
830 * 1. Reserve the right amount of space from the WAL. The current head of
831 * reserved space is kept in Insert->CurrBytePos, and is protected by
832 * insertpos_lck.
833 *
834 * 2. Copy the record to the reserved WAL space. This involves finding the
835 * correct WAL buffer containing the reserved space, and copying the
836 * record in place. This can be done concurrently in multiple processes.
837 *
838 * To keep track of which insertions are still in-progress, each concurrent
839 * inserter acquires an insertion lock. In addition to just indicating that
840 * an insertion is in progress, the lock tells others how far the inserter
841 * has progressed. There is a small fixed number of insertion locks,
842 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
843 * boundary, it updates the value stored in the lock to the how far it has
844 * inserted, to allow the previous buffer to be flushed.
845 *
846 * Holding onto an insertion lock also protects RedoRecPtr and
847 * fullPageWrites from changing until the insertion is finished.
848 *
849 * Step 2 can usually be done completely in parallel. If the required WAL
850 * page is not initialized yet, you have to grab WALBufMappingLock to
851 * initialize it, but the WAL writer tries to do that ahead of insertions
852 * to avoid that from happening in the critical path.
853 *
854 *----------
855 */
857
858 if (likely(class == WALINSERT_NORMAL))
859 {
861
862 /*
863 * Check to see if my copy of RedoRecPtr is out of date. If so, may
864 * have to go back and have the caller recompute everything. This can
865 * only happen just after a checkpoint, so it's better to be slow in
866 * this case and fast otherwise.
867 *
868 * Also check to see if fullPageWrites was just turned on or there's a
869 * running backup (which forces full-page writes); if we weren't
870 * already doing full-page writes then go back and recompute.
871 *
872 * If we aren't doing full-page writes then RedoRecPtr doesn't
873 * actually affect the contents of the XLOG record, so we'll update
874 * our local copy but not force a recomputation. (If doPageWrites was
875 * just turned off, we could recompute the record without full pages,
876 * but we choose not to bother.)
877 */
878 if (RedoRecPtr != Insert->RedoRecPtr)
879 {
881 RedoRecPtr = Insert->RedoRecPtr;
882 }
883 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
884
885 if (doPageWrites &&
888 {
889 /*
890 * Oops, some buffer now needs to be backed up that the caller
891 * didn't back up. Start over.
892 */
895 return InvalidXLogRecPtr;
896 }
897
898 /*
899 * Reserve space for the record in the WAL. This also sets the xl_prev
900 * pointer.
901 */
903 &rechdr->xl_prev);
904
905 /* Normal records are always inserted. */
906 inserted = true;
907 }
908 else if (class == WALINSERT_SPECIAL_SWITCH)
909 {
910 /*
911 * In order to insert an XLOG_SWITCH record, we need to hold all of
912 * the WAL insertion locks, not just one, so that no one else can
913 * begin inserting a record until we've figured out how much space
914 * remains in the current WAL segment and claimed all of it.
915 *
916 * Nonetheless, this case is simpler than the normal cases handled
917 * below, which must check for changes in doPageWrites and RedoRecPtr.
918 * Those checks are only needed for records that can contain buffer
919 * references, and an XLOG_SWITCH record never does.
920 */
924 }
925 else
926 {
928
929 /*
930 * We need to update both the local and shared copies of RedoRecPtr,
931 * which means that we need to hold all the WAL insertion locks.
932 * However, there can't be any buffer references, so as above, we need
933 * not check RedoRecPtr before inserting the record; we just need to
934 * update it afterwards.
935 */
939 &rechdr->xl_prev);
940 RedoRecPtr = Insert->RedoRecPtr = StartPos;
941 inserted = true;
942 }
943
944 if (inserted)
945 {
946 /*
947 * Now that xl_prev has been filled in, calculate CRC of the record
948 * header.
949 */
950 rdata_crc = rechdr->xl_crc;
953 rechdr->xl_crc = rdata_crc;
954
955 /*
956 * All the record data, including the header, is now ready to be
957 * inserted. Copy the record in the space reserved.
958 */
959 CopyXLogRecordToWAL(rechdr->xl_tot_len,
962
963 /*
964 * Unless record is flagged as not important, update LSN of last
965 * important record in the current slot. When holding all locks, just
966 * update the first one.
967 */
968 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
969 {
970 int lockno = holdingAllLocks ? 0 : MyLockNo;
971
973 }
974 }
975 else
976 {
977 /*
978 * This was an xlog-switch record, but the current insert location was
979 * already exactly at the beginning of a segment, so there was no need
980 * to do anything.
981 */
982 }
983
984 /*
985 * Done! Let others know that we're finished.
986 */
988
990
992
993 /*
994 * Mark top transaction id is logged (if needed) so that we should not try
995 * to log it again with the next WAL record in the current subtransaction.
996 */
997 if (topxid_included)
999
1000 /*
1001 * Update shared LogwrtRqst.Write, if we crossed page boundary.
1002 */
1004 {
1006 /* advance global request to include new block(s) */
1011 }
1012
1013 /*
1014 * If this was an XLOG_SWITCH record, flush the record and the empty
1015 * padding space that fills the rest of the segment, and perform
1016 * end-of-segment actions (eg, notifying archiver).
1017 */
1018 if (class == WALINSERT_SPECIAL_SWITCH)
1019 {
1022
1023 /*
1024 * Even though we reserved the rest of the segment for us, which is
1025 * reflected in EndPos, we return a pointer to just the end of the
1026 * xlog-switch record.
1027 */
1028 if (inserted)
1029 {
1032 {
1034
1035 if (offset == EndPos % XLOG_BLCKSZ)
1037 else
1039 }
1040 }
1041 }
1042
1043#ifdef WAL_DEBUG
1044 if (XLOG_DEBUG)
1045 {
1047 XLogRecord *record;
1051 char *errormsg = NULL;
1053
1055
1057 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1058
1059 /*
1060 * We have to piece together the WAL record data from the XLogRecData
1061 * entries, so that we can pass it to the rm_desc function as one
1062 * contiguous chunk.
1063 */
1065 for (; rdata != NULL; rdata = rdata->next)
1067
1068 /* We also need temporary space to decode the record. */
1069 record = (XLogRecord *) recordBuf.data;
1072
1073 if (!debug_reader)
1075 XL_ROUTINE(.page_read = NULL,
1076 .segment_open = NULL,
1077 .segment_close = NULL),
1078 NULL);
1079 if (!debug_reader)
1080 {
1081 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1082 }
1084 decoded,
1085 record,
1086 EndPos,
1087 &errormsg))
1088 {
1089 appendStringInfo(&buf, "error decoding record: %s",
1090 errormsg ? errormsg : "no error message");
1091 }
1092 else
1093 {
1094 appendStringInfoString(&buf, " - ");
1095
1096 debug_reader->record = decoded;
1098 debug_reader->record = NULL;
1099 }
1100 elog(LOG, "%s", buf.data);
1101
1102 pfree(decoded);
1103 pfree(buf.data);
1104 pfree(recordBuf.data);
1106 }
1107#endif
1108
1109 /*
1110 * Update our global variables
1111 */
1114
1115 /* Report WAL traffic to the instrumentation. */
1116 if (inserted)
1117 {
1118 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1122
1123 /* Required for the flush of pending stats WAL data */
1124 pgstat_report_fixed = true;
1125 }
1126
1127 return EndPos;
1128}
#define likely(x)
Definition c.h:437
#define unlikely(x)
Definition c.h:438
void * palloc(Size size)
Definition mcxt.c:1387
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
void MarkSubxactTopXidLogged(void)
Definition xact.c:593
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition xact.c:543
XLogRecPtr XactLastRecEnd
Definition xlog.c:261
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition xlog.c:1266
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1149
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1205
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:108
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, fb(), FIN_CRC32C, holdingAllLocks, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire(), SpinLockRelease(), START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogRecPtrIsValid, and XLogSegmentOffset.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3163 of file xlog.c.

3164{
3165 /*
3166 * During recovery, we don't flush WAL but update minRecoveryPoint
3167 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3168 * would need to be updated.
3169 *
3170 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3171 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3172 * This check should be consistent with the one in XLogFlush().
3173 */
3174 if (!XLogInsertAllowed())
3175 {
3176 /* Quick exit if already known to be updated or cannot be updated */
3178 return false;
3179
3180 /*
3181 * An invalid minRecoveryPoint means that we need to recover all the
3182 * WAL, i.e., we're doing crash recovery. We never modify the control
3183 * file's value in that case, so we can short-circuit future checks
3184 * here too. This triggers a quick exit path for the startup process,
3185 * which cannot update its local copy of minRecoveryPoint as long as
3186 * it has not replayed all WAL available when doing crash recovery.
3187 */
3189 {
3190 updateMinRecoveryPoint = false;
3191 return false;
3192 }
3193
3194 /*
3195 * Update local copy of minRecoveryPoint. But if the lock is busy,
3196 * just return a conservative guess.
3197 */
3199 return true;
3203
3204 /*
3205 * Check minRecoveryPoint for any other process than the startup
3206 * process doing crash recovery, which should not update the control
3207 * file value if crash recovery is still running.
3208 */
3210 updateMinRecoveryPoint = false;
3211
3212 /* check again */
3214 return false;
3215 else
3216 return true;
3217 }
3218
3219 /* Quick exit if already known flushed */
3220 if (record <= LogwrtResult.Flush)
3221 return false;
3222
3223 /* read LogwrtResult and update local state */
3225
3226 /* check again */
3227 if (record <= LogwrtResult.Flush)
3228 return false;
3229
3230 return true;
3231}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1321

References ControlFile, fb(), XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsValid.

Referenced by GetVictimBuffer(), SetHintBitsExt(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8570 of file xlog.c.

8571{
8573 XLogRegisterData(&nextOid, sizeof(Oid));
8575
8576 /*
8577 * We need not flush the NEXTOID record immediately, because any of the
8578 * just-allocated OIDs could only reach disk as part of a tuple insert or
8579 * update that would have its own XLOG record that must follow the NEXTOID
8580 * record. Therefore, the standard buffer LSN interlock applied to those
8581 * records will ensure no such OID reaches disk before the NEXTOID record
8582 * does.
8583 *
8584 * Note, however, that the above statement only covers state "within" the
8585 * database. When we use a generated OID as a file or directory name, we
8586 * are in a sense violating the basic WAL rule, because that filesystem
8587 * change may reach disk before the NEXTOID WAL record does. The impact
8588 * of this is that if a database crash occurs immediately afterward, we
8589 * might after restart re-generate the same OID and find that it conflicts
8590 * with the leftover file or directory. But since for safety's sake we
8591 * always loop until finding a nonconflicting filename, this poses no real
8592 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8593 */
8594}

References fb(), XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1982 of file xlog.c.

1983{
1986 uint32 offset;
1987 uint64 result;
1988
1990
1992 offset = ptr % XLOG_BLCKSZ;
1993
1994 if (fullpages == 0)
1995 {
1997 if (offset > 0)
1998 {
1999 Assert(offset >= SizeOfXLogLongPHD);
2000 result += offset - SizeOfXLogLongPHD;
2001 }
2002 }
2003 else
2004 {
2006 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
2007 (fullpages - 1) * UsableBytesInPage; /* full pages */
2008 if (offset > 0)
2009 {
2010 Assert(offset >= SizeOfXLogShortPHD);
2011 result += offset - SizeOfXLogShortPHD;
2012 }
2013 }
2014
2015 return result;
2016}

References Assert, fb(), result, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8674 of file xlog.c.

8675{
8684 {
8685 /*
8686 * The change in number of backend slots doesn't need to be WAL-logged
8687 * if archiving is not enabled, as you can't start archive recovery
8688 * with wal_level=minimal anyway. We don't really care about the
8689 * values in pg_control either if wal_level=minimal, but seems better
8690 * to keep them up-to-date to avoid confusion.
8691 */
8693 {
8696
8698 xlrec.max_worker_processes = max_worker_processes;
8699 xlrec.max_wal_senders = max_wal_senders;
8700 xlrec.max_prepared_xacts = max_prepared_xacts;
8701 xlrec.max_locks_per_xact = max_locks_per_xact;
8702 xlrec.wal_level = wal_level;
8703 xlrec.wal_log_hints = wal_log_hints;
8704 xlrec.track_commit_timestamp = track_commit_timestamp;
8705
8707 XLogRegisterData(&xlrec, sizeof(xlrec));
8708
8711 }
8712
8714
8724
8726 }
8727}

References ControlFile, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char rpName)

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2630 of file xlog.c.

2631{
2632 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2633 bool sleeping;
2634 bool wakeup = false;
2636
2640 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2641 XLogCtl->asyncXactLSN = asyncXactLSN;
2643
2644 /*
2645 * If somebody else already called this function with a more aggressive
2646 * LSN, they will have done what we needed (and perhaps more).
2647 */
2648 if (asyncXactLSN <= prevAsyncXactLSN)
2649 return;
2650
2651 /*
2652 * If the WALWriter is sleeping, kick it to make it come out of low-power
2653 * mode, so that this async commit will reach disk within the expected
2654 * amount of time. Otherwise, determine whether it has enough WAL
2655 * available to flush, the same way that XLogBackgroundFlush() does.
2656 */
2657 if (sleeping)
2658 wakeup = true;
2659 else
2660 {
2661 int flushblocks;
2662
2664
2665 flushblocks =
2667
2669 wakeup = true;
2670 }
2671
2672 if (wakeup)
2673 {
2674 volatile PROC_HDR *procglobal = ProcGlobal;
2675 ProcNumber walwriterProc = procglobal->walwriterProc;
2676
2677 if (walwriterProc != INVALID_PROC_NUMBER)
2678 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2679 }
2680}
void SetLatch(Latch *latch)
Definition latch.c:290
#define GetPGProcByNumber(n)
Definition proc.h:504
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
PROC_HDR * ProcGlobal
Definition proc.c:74
ProcNumber walwriterProc
Definition proc.h:488
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References XLogCtlData::asyncXactLSN, fb(), XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire(), SpinLockRelease(), wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemAttach()

static void XLOGShmemAttach ( void arg)
static

Definition at line 5444 of file xlog.c.

5445{
5447}
WALInsertLockPadded * WALInsertLocks
Definition xlog.c:451

References XLogCtlData::Insert, XLogCtlInsert::WALInsertLocks, WALInsertLocks, and XLogCtl.

◆ XLOGShmemInit()

static void XLOGShmemInit ( void arg)
static

Definition at line 5347 of file xlog.c.

5348{
5349 char *allocptr;
5350 int i;
5351
5352#ifdef WAL_DEBUG
5353
5354 /*
5355 * Create a memory context for WAL debugging that's exempt from the normal
5356 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
5357 * an allocation fails, but wal_debug is not for production use anyway.
5358 */
5359 if (walDebugCxt == NULL)
5360 {
5362 "WAL Debug",
5365 }
5366#endif
5367
5368 memset(XLogCtl, 0, sizeof(XLogCtlData));
5369
5370 /*
5371 * Already have read control file locally, unless in bootstrap mode. Move
5372 * contents into shared memory.
5373 */
5374 if (LocalControlFile)
5375 {
5379 }
5380
5381 /*
5382 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5383 * multiple of the alignment for same, so no extra alignment padding is
5384 * needed here.
5385 */
5386 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5389
5390 for (i = 0; i < XLOGbuffers; i++)
5391 {
5393 }
5394
5395 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5396 allocptr += sizeof(WALInsertLockPadded) -
5401
5402 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5403 {
5407 }
5408
5409 /*
5410 * Align the start of the page buffers to a full xlog block size boundary.
5411 * This simplifies some calculations in XLOG insertion. It is also
5412 * required for O_DIRECT.
5413 */
5417
5418 /*
5419 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5420 * in additional info.)
5421 */
5425 XLogCtl->WalWriterSleeping = false;
5426
5427 /* Use the checksum info from control file */
5429
5431
5438}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:889
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
MemoryContext TopMemoryContext
Definition mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition mcxt.c:743
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
int XLogCacheBlck
Definition xlog.c:500
slock_t insertpos_lck
Definition xlog.c:405

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, ControlFile, XLogCtlData::data_checksum_version, ControlFileData::data_checksum_version, fb(), i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LocalControlFile, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), memcpy(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, SetLocalDataChecksumState(), XLogCtlData::SharedRecoveryState, SpinLockInit(), TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, and XLogCtl.

◆ XLOGShmemRequest()

static void XLOGShmemRequest ( void arg)
static

Definition at line 5293 of file xlog.c.

5294{
5295 Size size;
5296
5297 /*
5298 * If the value of wal_buffers is -1, use the preferred auto-tune value.
5299 * This isn't an amazingly clean place to do this, but we must wait till
5300 * NBuffers has received its final value, and must do it before using the
5301 * value of XLOGbuffers to do anything important.
5302 *
5303 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
5304 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
5305 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
5306 * the matter with PGC_S_OVERRIDE.
5307 */
5308 if (XLOGbuffers == -1)
5309 {
5310 char buf[32];
5311
5312 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
5313 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5315 if (XLOGbuffers == -1) /* failed to apply it? */
5316 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5318 }
5319 Assert(XLOGbuffers > 0);
5320
5321 /* XLogCtl */
5322 size = sizeof(XLogCtlData);
5323
5324 /* WAL insertion locks, plus alignment */
5325 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
5326 /* xlblocks array */
5327 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
5328 /* extra alignment padding for XLOG I/O buffers */
5329 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
5330 /* and the buffers themselves */
5331 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
5332
5333 ShmemRequestStruct(.name = "XLOG Ctl",
5334 .size = size,
5335 .ptr = (void **) &XLogCtl,
5336 );
5337 ShmemRequestStruct(.name = "Control File",
5338 .size = sizeof(ControlFileData),
5339 .ptr = (void **) &ControlFile,
5340 );
5341}
#define Max(x, y)
Definition c.h:1085
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_POSTMASTER
Definition guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
#define ShmemRequestStruct(...)
Definition shmem.h:176
const char * name

References add_size(), Assert, buf, ControlFile, fb(), Max, mul_size(), name, NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), ShmemRequestStruct, snprintf, XLOGbuffers, XLOGChooseNumBuffers(), and XLogCtl.

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 10162 of file xlog.c.

10163{
10165
10168}
#define AmStartupProcess()
Definition miscadmin.h:405
void ShutdownWalRcv(void)
void ResetInstallXLogFileSegmentActive(void)
Definition xlog.c:10181

References AmStartupProcess, Assert, IsUnderPostmaster, ResetInstallXLogFileSegmentActive(), and ShutdownWalRcv().

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2325 of file xlog.c.

2326{
2327 bool ispartialpage;
2328 bool last_iteration;
2329 bool finishing_seg;
2330 int curridx;
2331 int npages;
2332 int startidx;
2334
2335 /* We should always be inside a critical section here */
2337
2338 /*
2339 * Update local LogwrtResult (caller probably did this already, but...)
2340 */
2342
2343 /*
2344 * Since successive pages in the xlog cache are consecutively allocated,
2345 * we can usually gather multiple pages together and issue just one
2346 * write() call. npages is the number of pages we have determined can be
2347 * written together; startidx is the cache block index of the first one,
2348 * and startoffset is the file offset at which it should go. The latter
2349 * two variables are only valid when npages > 0, but we must initialize
2350 * all of them to keep the compiler quiet.
2351 */
2352 npages = 0;
2353 startidx = 0;
2354 startoffset = 0;
2355
2356 /*
2357 * Within the loop, curridx is the cache block index of the page to
2358 * consider writing. Begin at the buffer containing the next unwritten
2359 * page, or last partially written page.
2360 */
2362
2363 while (LogwrtResult.Write < WriteRqst.Write)
2364 {
2365 /*
2366 * Make sure we're not ahead of the insert process. This could happen
2367 * if we're passed a bogus WriteRqst.Write that is past the end of the
2368 * last page that's been initialized by AdvanceXLInsertBuffer.
2369 */
2371
2372 if (LogwrtResult.Write >= EndPtr)
2373 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2376
2377 /* Advance LogwrtResult.Write to end of current buffer page */
2380
2383 {
2384 /*
2385 * Switch to new logfile segment. We cannot have any pending
2386 * pages here (since we dump what we have at segment end).
2387 */
2388 Assert(npages == 0);
2389 if (openLogFile >= 0)
2390 XLogFileClose();
2393 openLogTLI = tli;
2394
2395 /* create/use new log file */
2398 }
2399
2400 /* Make sure we have the current logfile open */
2401 if (openLogFile < 0)
2402 {
2405 openLogTLI = tli;
2408 }
2409
2410 /* Add current page to the set of pending pages-to-dump */
2411 if (npages == 0)
2412 {
2413 /* first of group */
2414 startidx = curridx;
2417 }
2418 npages++;
2419
2420 /*
2421 * Dump the set if this will be the last loop iteration, or if we are
2422 * at the last page of the cache area (since the next page won't be
2423 * contiguous in memory), or if we are at the end of the logfile
2424 * segment.
2425 */
2427
2430
2431 if (last_iteration ||
2434 {
2435 char *from;
2436 Size nbytes;
2437 Size nleft;
2440
2441 /* OK to write the page(s) */
2442 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2443 nbytes = npages * (Size) XLOG_BLCKSZ;
2444 nleft = nbytes;
2445 do
2446 {
2447 errno = 0;
2448
2449 /*
2450 * Measure I/O timing to write WAL data, for pg_stat_io.
2451 */
2453
2457
2459 IOOP_WRITE, start, 1, written);
2460
2461 if (written <= 0)
2462 {
2463 char xlogfname[MAXFNAMELEN];
2464 int save_errno;
2465
2466 if (errno == EINTR)
2467 continue;
2468
2469 save_errno = errno;
2472 errno = save_errno;
2473 ereport(PANIC,
2475 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2477 }
2478 nleft -= written;
2479 from += written;
2481 } while (nleft > 0);
2482
2483 npages = 0;
2484
2485 /*
2486 * If we just wrote the whole last page of a logfile segment,
2487 * fsync the segment immediately. This avoids having to go back
2488 * and re-open prior segments when an fsync request comes along
2489 * later. Doing it here ensures that one and only one backend will
2490 * perform this fsync.
2491 *
2492 * This is also the right place to notify the Archiver that the
2493 * segment is ready to copy to archival storage, and to update the
2494 * timer for archive_timeout, and to signal for a checkpoint if
2495 * too many logfile segments have been used since the last
2496 * checkpoint.
2497 */
2498 if (finishing_seg)
2499 {
2501
2502 /* signal that we need to wakeup walsenders later */
2504
2505 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2506
2507 if (XLogArchivingActive())
2509
2512
2513 /*
2514 * Request a checkpoint if we've consumed too much xlog since
2515 * the last one. For speed, we first check using the local
2516 * copy of RedoRecPtr, which might be out of date; if it looks
2517 * like a checkpoint is needed, forcibly update RedoRecPtr and
2518 * recheck.
2519 */
2521 {
2522 (void) GetRedoRecPtr();
2525 }
2526 }
2527 }
2528
2529 if (ispartialpage)
2530 {
2531 /* Only asked to write a partial page */
2533 break;
2534 }
2536
2537 /* If flexible, break out of loop as soon as we wrote something */
2538 if (flexible && npages == 0)
2539 break;
2540 }
2541
2542 Assert(npages == 0);
2543
2544 /*
2545 * If asked to flush, do so
2546 */
2547 if (LogwrtResult.Flush < WriteRqst.Flush &&
2549 {
2550 /*
2551 * Could get here without iterating above loop, in which case we might
2552 * have no open file or the wrong one. However, we do not need to
2553 * fsync more than one file.
2554 */
2557 {
2558 if (openLogFile >= 0 &&
2561 XLogFileClose();
2562 if (openLogFile < 0)
2563 {
2566 openLogTLI = tli;
2569 }
2570
2572 }
2573
2574 /* signal that we need to wakeup walsenders later */
2576
2578 }
2579
2580 /*
2581 * Update shared-memory status
2582 *
2583 * We make sure that the shared 'request' values do not fall behind the
2584 * 'result' values. This is not absolutely essential, but it saves some
2585 * code in a couple of places.
2586 */
2593
2594 /*
2595 * We write Write first, bar, then Flush. When reading, the opposite must
2596 * be done (with a matching barrier in between), so that we always see a
2597 * Flush value that trails behind the Write value seen.
2598 */
2602
2603#ifdef USE_ASSERT_CHECKING
2604 {
2608
2614
2615 /* WAL written to disk is always ahead of WAL flushed */
2616 Assert(Write >= Flush);
2617
2618 /* WAL inserted to buffers is always ahead of WAL written */
2619 Assert(Insert >= Write);
2620 }
2621#endif
2622}
void ReserveExternalFD(void)
Definition fd.c:1207
volatile uint32 CritSectionCount
Definition globals.c:45
XLogRecPtr Flush
XLogRecPtr Write
#define WalSndWakeupRequest()
Definition walsender.h:57
#define EINTR
Definition win32_port.h:361
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6933
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition xlog.c:3673
#define NextBufIdx(idx)
Definition xlog.c:604
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition xlog.c:9374
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition xlog.c:2301
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg, fb(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire(), SpinLockRelease(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition xlog.h:69
@ ARCHIVE_MODE_OFF
Definition xlog.h:67
@ ARCHIVE_MODE_ON
Definition xlog.h:68

Definition at line 198 of file xlog.c.

198 {
199 {"always", ARCHIVE_MODE_ALWAYS, false},
200 {"on", ARCHIVE_MODE_ON, false},
201 {"off", ARCHIVE_MODE_OFF, false},
202 {"true", ARCHIVE_MODE_ON, true},
203 {"false", ARCHIVE_MODE_OFF, true},
204 {"yes", ARCHIVE_MODE_ON, true},
205 {"no", ARCHIVE_MODE_OFF, true},
206 {"1", ARCHIVE_MODE_ON, true},
207 {"0", ARCHIVE_MODE_OFF, true},
208 {NULL, 0, false}
209};

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 173 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 166 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 139 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 140 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ data_checksums

int data_checksums = 0

Definition at line 683 of file xlog.c.

Referenced by AbsorbDataChecksumsBarrier(), and SetLocalDataChecksumState().

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 129 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 224 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalControlFile

ControlFileData* LocalControlFile = NULL
static

Definition at line 583 of file xlog.c.

Referenced by LocalProcessControlFile(), and XLOGShmemInit().

◆ LocalDataChecksumState

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 231 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 243 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 142 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 122 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 656 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 657 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 167 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 668 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 131 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 133 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 132 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 143 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 134 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 123 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 130 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 135 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

Definition at line 141 of file xlog.c.

Referenced by ApplyLauncherMain(), launch_sync_worker(), and WaitForWALToBecomeAvailable().

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 150 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), decode_concurrent_changes(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), repack_setup_logical_decoding(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 178 of file xlog.c.

178 {
179 {"fsync", WAL_SYNC_METHOD_FSYNC, false},
180#ifdef HAVE_FSYNC_WRITETHROUGH
181 {"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
182#endif
183 {"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
184#ifdef O_SYNC
185 {"open_sync", WAL_SYNC_METHOD_OPEN, false},
186#endif
187#ifdef O_DSYNC
188 {"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
189#endif
190 {NULL, 0, false}
191};

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 125 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 124 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemRequest().

◆ XLogCtl

◆ XLOGShmemCallbacks

const ShmemCallbacks XLOGShmemCallbacks
Initial value:
= {
.request_fn = XLOGShmemRequest,
.init_fn = XLOGShmemInit,
.attach_fn = XLOGShmemAttach,
}
static void XLOGShmemInit(void *arg)
Definition xlog.c:5347
static void XLOGShmemAttach(void *arg)
Definition xlog.c:5444
static void XLOGShmemRequest(void *arg)
Definition xlog.c:5293

Definition at line 590 of file xlog.c.

590 {
591 .request_fn = XLOGShmemRequest,
592 .init_fn = XLOGShmemInit,
593 .attach_fn = XLOGShmemAttach,
594};