PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
bool check_max_slot_wal_keep_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
bool * wal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 111 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 614 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 591 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 595 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 150 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
pg_atomic_uint64 logWriteResult
Definition: xlog.c:467
pg_atomic_uint64 logFlushResult
Definition: xlog.c:468
static XLogCtlData * XLogCtl
Definition: xlog.c:577

Definition at line 631 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 608 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 602 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 570 of file xlog.c.

571{
WalInsertClass
Definition: xlog.c:571
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:573
@ WALINSERT_NORMAL
Definition: xlog.c:572
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:574

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1996 of file xlog.c.

1997{
1999 int nextidx;
2000 XLogRecPtr OldPageRqstPtr;
2001 XLogwrtRqst WriteRqst;
2002 XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
2003 XLogRecPtr NewPageBeginPtr;
2004 XLogPageHeader NewPage;
2005 XLogRecPtr ReservedPtr;
2006 int npages pg_attribute_unused() = 0;
2007
2008 /*
2009 * We must run the loop below inside the critical section as we expect
2010 * XLogCtl->InitializedUpTo to eventually keep up. The most of callers
2011 * already run inside the critical section. Except for WAL writer, which
2012 * passed 'opportunistic == true', and therefore we don't perform
2013 * operations that could error out.
2014 *
2015 * Start an explicit critical section anyway though.
2016 */
2017 Assert(CritSectionCount > 0 || opportunistic);
2019
2020 /*--
2021 * Loop till we get all the pages in WAL buffer before 'upto' reserved for
2022 * initialization. Multiple process can initialize different buffers with
2023 * this loop in parallel as following.
2024 *
2025 * 1. Reserve page for initialization using XLogCtl->InitializeReserved.
2026 * 2. Initialize the reserved page.
2027 * 3. Attempt to advance XLogCtl->InitializedUpTo,
2028 */
2030 while (upto >= ReservedPtr || opportunistic)
2031 {
2032 Assert(ReservedPtr % XLOG_BLCKSZ == 0);
2033
2034 /*
2035 * Get ending-offset of the buffer page we need to replace.
2036 *
2037 * We don't lookup into xlblocks, but rather calculate position we
2038 * must wait to be written. If it was written, xlblocks will have this
2039 * position (or uninitialized)
2040 */
2041 if (ReservedPtr + XLOG_BLCKSZ > XLogCtl->InitializedFrom + XLOG_BLCKSZ * XLOGbuffers)
2042 OldPageRqstPtr = ReservedPtr + XLOG_BLCKSZ - (XLogRecPtr) XLOG_BLCKSZ * XLOGbuffers;
2043 else
2044 OldPageRqstPtr = InvalidXLogRecPtr;
2045
2046 if (LogwrtResult.Write < OldPageRqstPtr && opportunistic)
2047 {
2048 /*
2049 * If we just want to pre-initialize as much as we can without
2050 * flushing, give up now.
2051 */
2052 upto = ReservedPtr - 1;
2053 break;
2054 }
2055
2056 /*
2057 * Attempt to reserve the page for initialization. Failure means that
2058 * this page got reserved by another process.
2059 */
2061 &ReservedPtr,
2062 ReservedPtr + XLOG_BLCKSZ))
2063 continue;
2064
2065 /*
2066 * Wait till page gets correctly initialized up to OldPageRqstPtr.
2067 */
2068 nextidx = XLogRecPtrToBufIdx(ReservedPtr);
2069 while (pg_atomic_read_u64(&XLogCtl->InitializedUpTo) < OldPageRqstPtr)
2070 ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
2072 Assert(pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) == OldPageRqstPtr);
2073
2074 /* Fall through if it's already written out. */
2075 if (LogwrtResult.Write < OldPageRqstPtr)
2076 {
2077 /* Nope, got work to do. */
2078
2079 /* Advance shared memory write request position */
2081 if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2082 XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2084
2085 /*
2086 * Acquire an up-to-date LogwrtResult value and see if we still
2087 * need to write it or if someone else already did.
2088 */
2090 if (LogwrtResult.Write < OldPageRqstPtr)
2091 {
2092 WaitXLogInsertionsToFinish(OldPageRqstPtr);
2093
2094 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2095
2097 if (LogwrtResult.Write >= OldPageRqstPtr)
2098 {
2099 /* OK, someone wrote it already */
2100 LWLockRelease(WALWriteLock);
2101 }
2102 else
2103 {
2104 /* Have to write it ourselves */
2105 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2106 WriteRqst.Write = OldPageRqstPtr;
2107 WriteRqst.Flush = 0;
2108 XLogWrite(WriteRqst, tli, false);
2109 LWLockRelease(WALWriteLock);
2111 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2112 }
2113 }
2114 }
2115
2116 /*
2117 * Now the next buffer slot is free and we can set it up to be the
2118 * next output page.
2119 */
2120 NewPageBeginPtr = ReservedPtr;
2121 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2122
2123 NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2124
2125 /*
2126 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2127 * before initializing. Otherwise, the old page may be partially
2128 * zeroed but look valid.
2129 */
2132
2133 /*
2134 * Be sure to re-zero the buffer so that bytes beyond what we've
2135 * written will look like zeroes and not valid XLOG records...
2136 */
2137 MemSet(NewPage, 0, XLOG_BLCKSZ);
2138
2139 /*
2140 * Fill the new page's header
2141 */
2142 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2143
2144 /* NewPage->xlp_info = 0; */ /* done by memset */
2145 NewPage->xlp_tli = tli;
2146 NewPage->xlp_pageaddr = NewPageBeginPtr;
2147
2148 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2149
2150 /*
2151 * If online backup is not in progress, mark the header to indicate
2152 * that WAL records beginning in this page have removable backup
2153 * blocks. This allows the WAL archiver to know whether it is safe to
2154 * compress archived WAL data by transforming full-block records into
2155 * the non-full-block format. It is sufficient to record this at the
2156 * page level because we force a page switch (in fact a segment
2157 * switch) when starting a backup, so the flag will be off before any
2158 * records can be written during the backup. At the end of a backup,
2159 * the last page will be marked as all unsafe when perhaps only part
2160 * is unsafe, but at worst the archiver would miss the opportunity to
2161 * compress a few records.
2162 */
2163 if (Insert->runningBackups == 0)
2164 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2165
2166 /*
2167 * If first page of an XLOG segment file, make it a long header.
2168 */
2169 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2170 {
2171 XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2172
2173 NewLongPage->xlp_sysid = ControlFile->system_identifier;
2174 NewLongPage->xlp_seg_size = wal_segment_size;
2175 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2176 NewPage->xlp_info |= XLP_LONG_HEADER;
2177 }
2178
2179 /*
2180 * Make sure the initialization of the page becomes visible to others
2181 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2182 * holding a lock.
2183 */
2185
2186 /*-----
2187 * Update the value of XLogCtl->xlblocks[nextidx] and try to advance
2188 * XLogCtl->InitializedUpTo in a lock-less manner.
2189 *
2190 * First, let's provide a formal proof of the algorithm. Let it be 'n'
2191 * process with the following variables in shared memory:
2192 * f - an array of 'n' boolean flags,
2193 * v - atomic integer variable.
2194 *
2195 * Also, let
2196 * i - a number of a process,
2197 * j - local integer variable,
2198 * CAS(var, oldval, newval) - compare-and-swap atomic operation
2199 * returning true on success,
2200 * write_barrier()/read_barrier() - memory barriers.
2201 *
2202 * The pseudocode for each process is the following.
2203 *
2204 * j := i
2205 * f[i] := true
2206 * write_barrier()
2207 * while CAS(v, j, j + 1):
2208 * j := j + 1
2209 * read_barrier()
2210 * if not f[j]:
2211 * break
2212 *
2213 * Let's prove that v eventually reaches the value of n.
2214 * 1. Prove by contradiction. Assume v doesn't reach n and stucks
2215 * on k, where k < n.
2216 * 2. Process k attempts CAS(v, k, k + 1). 1). If, as we assumed, v
2217 * gets stuck at k, then this CAS operation must fail. Therefore,
2218 * v < k when process k attempts CAS(v, k, k + 1).
2219 * 3. If, as we assumed, v gets stuck at k, then the value k of v
2220 * must be achieved by some process m, where m < k. The process
2221 * m must observe f[k] == false. Otherwise, it will later attempt
2222 * CAS(v, k, k + 1) with success.
2223 * 4. Therefore, corresponding read_barrier() (while j == k) on
2224 * process m happend before write_barrier() of process k. But then
2225 * process k attempts CAS(v, k, k + 1) after process m successfully
2226 * incremented v to k, and that CAS operation must succeed.
2227 * That leads to a contradiction. So, there is no such k (k < n)
2228 * where v gets stuck. Q.E.D.
2229 *
2230 * To apply this proof to the code below, we assume
2231 * XLogCtl->InitializedUpTo will play the role of v with XLOG_BLCKSZ
2232 * granularity. We also assume setting XLogCtl->xlblocks[nextidx] to
2233 * NewPageEndPtr to play the role of setting f[i] to true. Also, note
2234 * that processes can't concurrently map different xlog locations to
2235 * the same nextidx because we previously requested that
2236 * XLogCtl->InitializedUpTo >= OldPageRqstPtr. So, a xlog buffer can
2237 * be taken for initialization only once the previous initialization
2238 * takes effect on XLogCtl->InitializedUpTo.
2239 */
2240
2241 pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2242
2244
2245 while (pg_atomic_compare_exchange_u64(&XLogCtl->InitializedUpTo, &NewPageBeginPtr, NewPageEndPtr))
2246 {
2247 NewPageBeginPtr = NewPageEndPtr;
2248 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2249 nextidx = XLogRecPtrToBufIdx(NewPageBeginPtr);
2250
2252
2253 if (pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) != NewPageEndPtr)
2254 {
2255 /*
2256 * Page at nextidx wasn't initialized yet, so we cann't move
2257 * InitializedUpto further. It will be moved by backend which
2258 * will initialize nextidx.
2259 */
2261 break;
2262 }
2263 }
2264
2265 npages++;
2266 }
2267
2269
2270 /*
2271 * All the pages in WAL buffer before 'upto' were reserved for
2272 * initialization. However, some pages might be reserved by concurrent
2273 * processes. Wait till they finish initialization.
2274 */
2275 while (upto >= pg_atomic_read_u64(&XLogCtl->InitializedUpTo))
2276 ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
2278
2280
2281#ifdef WAL_DEBUG
2282 if (XLOG_DEBUG && npages > 0)
2283 {
2284 elog(DEBUG1, "initialized %d pages, up to %X/%X",
2285 npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2286 }
2287#endif
2288}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static bool pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.h:512
#define pg_read_barrier()
Definition: atomics.h:156
#define pg_write_barrier()
Definition: atomics.h:157
#define pg_attribute_unused()
Definition: c.h:133
#define MemSet(start, val, len)
Definition: c.h:991
size_t Size
Definition: c.h:576
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
static void Insert(File file)
Definition: fd.c:1317
volatile uint32 CritSectionCount
Definition: globals.c:46
Assert(PointerIsAligned(start, uint64))
WalUsage pgWalUsage
Definition: instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
uint64 system_identifier
Definition: pg_control.h:110
int64 wal_buffers_full
Definition: instrument.h:56
XLogwrtRqst LogwrtRqst
Definition: xlog.c:450
slock_t info_lck
Definition: xlog.c:564
char * pages
Definition: xlog.c:503
pg_atomic_uint64 InitializeReserved
Definition: xlog.c:486
pg_atomic_uint64 InitializedUpTo
Definition: xlog.c:494
ConditionVariable InitializedUpToCondVar
Definition: xlog.c:495
pg_atomic_uint64 * xlblocks
Definition: xlog.c:504
XLogCtlInsert Insert
Definition: xlog.c:447
XLogRecPtr InitializedFrom
Definition: xlog.c:473
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:322
XLogRecPtr Flush
Definition: xlog.c:317
XLogRecPtr Write
Definition: xlog.c:316
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1515
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:631
int wal_segment_size
Definition: xlog.c:143
static XLogwrtResult LogwrtResult
Definition: xlog.c:623
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:602
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2447
int XLOGbuffers
Definition: xlog.c:117
static ControlFileData * ControlFile
Definition: xlog.c:585
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), ConditionVariableBroadcast(), ConditionVariableCancelSleep(), ConditionVariableSleep(), ControlFile, CritSectionCount, DEBUG1, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedFrom, XLogCtlData::InitializedUpTo, XLogCtlData::InitializedUpToCondVar, XLogCtlData::InitializeReserved, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_read_barrier, pg_write_barrier, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLOGbuffers, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2331 of file xlog.c.

2332{
2335}
double CheckPointCompletionTarget
Definition: checkpointer.c:143
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2295

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2324 of file xlog.c.

2325{
2328}
int max_wal_size_mb
Definition: xlog.c:114

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4938 of file xlog.c.

4939{
4940 /*
4941 * If some checks were deferred, it's possible that the checks will fail
4942 * later during InitializeWalConsistencyChecking(). But in that case, the
4943 * postmaster will exit anyway, so it's safe to proceed with the
4944 * assignment.
4945 *
4946 * Any built-in resource managers specified are assigned immediately,
4947 * which affects WAL created before shared_preload_libraries are
4948 * processed. Any custom resource managers specified won't be assigned
4949 * until after shared_preload_libraries are processed, but that's OK
4950 * because WAL for a custom resource manager can't be written before the
4951 * module is loaded anyway.
4952 */
4954}
bool * wal_consistency_checking
Definition: xlog.c:126

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8829 of file xlog.c.

8830{
8831 if (wal_sync_method != new_wal_sync_method)
8832 {
8833 /*
8834 * To ensure that no blocks escape unsynced, force an fsync on the
8835 * currently open log segment (if any). Also, if the open flag is
8836 * changing, close the log file so it will be reopened (with new flag
8837 * bit) at next use.
8838 */
8839 if (openLogFile >= 0)
8840 {
8841 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8842 if (pg_fsync(openLogFile) != 0)
8843 {
8844 char xlogfname[MAXFNAMELEN];
8845 int save_errno;
8846
8847 save_errno = errno;
8850 errno = save_errno;
8851 ereport(PANIC,
8853 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8854 }
8855
8857 if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8858 XLogFileClose();
8859 }
8860 }
8861}
int errcode_for_file_access(void)
Definition: elog.c:877
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
static int openLogFile
Definition: xlog.c:646
static int get_sync_bit(int method)
Definition: xlog.c:8781
int wal_sync_method
Definition: xlog.c:130
static TimeLineID openLogTLI
Definition: xlog.c:648
static void XLogFileClose(void)
Definition: xlog.c:3778
static XLogSegNo openLogSegNo
Definition: xlog.c:647
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5218 of file xlog.c.

5219{
5220 CheckPoint checkPoint;
5221 char *buffer;
5222 XLogPageHeader page;
5223 XLogLongPageHeader longpage;
5224 XLogRecord *record;
5225 char *recptr;
5226 uint64 sysidentifier;
5227 struct timeval tv;
5228 pg_crc32c crc;
5229
5230 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5232
5233 /*
5234 * Select a hopefully-unique system identifier code for this installation.
5235 * We use the result of gettimeofday(), including the fractional seconds
5236 * field, as being about as unique as we can easily get. (Think not to
5237 * use random(), since it hasn't been seeded and there's no portable way
5238 * to seed it other than the system clock value...) The upper half of the
5239 * uint64 value is just the tv_sec part, while the lower half contains the
5240 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5241 * PID for a little extra uniqueness. A person knowing this encoding can
5242 * determine the initialization time of the installation, which could
5243 * perhaps be useful sometimes.
5244 */
5245 gettimeofday(&tv, NULL);
5246 sysidentifier = ((uint64) tv.tv_sec) << 32;
5247 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5248 sysidentifier |= getpid() & 0xFFF;
5249
5250 /* page buffer must be aligned suitably for O_DIRECT */
5251 buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5252 page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5253 memset(page, 0, XLOG_BLCKSZ);
5254
5255 /*
5256 * Set up information for the initial checkpoint record
5257 *
5258 * The initial checkpoint record is written to the beginning of the WAL
5259 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5260 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5261 */
5265 checkPoint.fullPageWrites = fullPageWrites;
5266 checkPoint.wal_level = wal_level;
5267 checkPoint.nextXid =
5269 checkPoint.nextOid = FirstGenbkiObjectId;
5270 checkPoint.nextMulti = FirstMultiXactId;
5271 checkPoint.nextMultiOffset = 0;
5273 checkPoint.oldestXidDB = Template1DbOid;
5274 checkPoint.oldestMulti = FirstMultiXactId;
5275 checkPoint.oldestMultiDB = Template1DbOid;
5278 checkPoint.time = (pg_time_t) time(NULL);
5280
5281 TransamVariables->nextXid = checkPoint.nextXid;
5282 TransamVariables->nextOid = checkPoint.nextOid;
5284 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5285 AdvanceOldestClogXid(checkPoint.oldestXid);
5286 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5287 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5289
5290 /* Set up the XLOG page header */
5291 page->xlp_magic = XLOG_PAGE_MAGIC;
5292 page->xlp_info = XLP_LONG_HEADER;
5295 longpage = (XLogLongPageHeader) page;
5296 longpage->xlp_sysid = sysidentifier;
5297 longpage->xlp_seg_size = wal_segment_size;
5298 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5299
5300 /* Insert the initial checkpoint record */
5301 recptr = ((char *) page + SizeOfXLogLongPHD);
5302 record = (XLogRecord *) recptr;
5303 record->xl_prev = 0;
5304 record->xl_xid = InvalidTransactionId;
5305 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5307 record->xl_rmid = RM_XLOG_ID;
5308 recptr += SizeOfXLogRecord;
5309 /* fill the XLogRecordDataHeaderShort struct */
5310 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5311 *(recptr++) = sizeof(checkPoint);
5312 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5313 recptr += sizeof(checkPoint);
5314 Assert(recptr - (char *) record == record->xl_tot_len);
5315
5317 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5318 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5319 FIN_CRC32C(crc);
5320 record->xl_crc = crc;
5321
5322 /* Create first XLOG segment file */
5325
5326 /*
5327 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5328 * close the file again in a moment.
5329 */
5330
5331 /* Write the first page with the initial record */
5332 errno = 0;
5333 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5334 if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5335 {
5336 /* if write didn't set errno, assume problem is no disk space */
5337 if (errno == 0)
5338 errno = ENOSPC;
5339 ereport(PANIC,
5341 errmsg("could not write bootstrap write-ahead log file: %m")));
5342 }
5344
5345 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5346 if (pg_fsync(openLogFile) != 0)
5347 ereport(PANIC,
5349 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5351
5352 if (close(openLogFile) != 0)
5353 ereport(PANIC,
5355 errmsg("could not close bootstrap write-ahead log file: %m")));
5356
5357 openLogFile = -1;
5358
5359 /* Now create pg_control */
5360 InitControlFile(sysidentifier, data_checksum_version);
5361 ControlFile->time = checkPoint.time;
5362 ControlFile->checkPoint = checkPoint.redo;
5363 ControlFile->checkPointCopy = checkPoint;
5364
5365 /* some additional ControlFile fields are set in WriteControlFile() */
5367
5368 /* Bootstrap the commit log, too */
5369 BootStrapCLOG();
5373
5374 pfree(buffer);
5375
5376 /*
5377 * Force control file to be read - in contrast to normal processing we'd
5378 * otherwise never run the checks and GUC related initializations therein.
5379 */
5381}
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
uint64_t uint64
Definition: c.h:503
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:2147
void * palloc(Size size)
Definition: mcxt.c:1940
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2328
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2362
void BootStrapMultiXact(void)
Definition: multixact.c:2034
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3519
bool fullPageWrites
Definition: xlog.c:122
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4343
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9661
int wal_level
Definition: xlog.c:131
static void WriteControlFile(void)
Definition: xlog.c:4378
#define BootstrapTimeLineID
Definition: xlog.c:111
static void ReadControlFile(void)
Definition: xlog.c:4487
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert(), BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2295 of file xlog.c.

2296{
2297 double target;
2298
2299 /*-------
2300 * Calculate the distance at which to trigger a checkpoint, to avoid
2301 * exceeding max_wal_size_mb. This is based on two assumptions:
2302 *
2303 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2304 * WAL for two checkpoint cycles to allow us to recover from the
2305 * secondary checkpoint if the first checkpoint failed, though we
2306 * only did this on the primary anyway, not on standby. Keeping just
2307 * one checkpoint simplifies processing and reduces disk space in
2308 * many smaller databases.)
2309 * b) during checkpoint, we consume checkpoint_completion_target *
2310 * number of segments consumed between checkpoints.
2311 *-------
2312 */
2313 target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2315
2316 /* round down */
2317 CheckPointSegments = (int) target;
2318
2319 if (CheckPointSegments < 1)
2321}
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:614
int CheckPointSegments
Definition: xlog.c:156

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_max_slot_wal_keep_size()

bool check_max_slot_wal_keep_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2356 of file xlog.c.

2357{
2358 if (IsBinaryUpgrade && *newval != -1)
2359 {
2360 GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
2361 "max_slot_wal_keep_size");
2362 return false;
2363 }
2364
2365 return true;
2366}
bool IsBinaryUpgrade
Definition: globals.c:122
#define GUC_check_errdetail
Definition: guc.h:481

References GUC_check_errdetail, IsBinaryUpgrade, and newval.

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4816 of file xlog.c.

4817{
4818 /*
4819 * -1 indicates a request for auto-tune.
4820 */
4821 if (*newval == -1)
4822 {
4823 /*
4824 * If we haven't yet changed the boot_val default of -1, just let it
4825 * be. We'll fix it when XLOGShmemSize is called.
4826 */
4827 if (XLOGbuffers == -1)
4828 return true;
4829
4830 /* Otherwise, substitute the auto-tune value */
4832 }
4833
4834 /*
4835 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4836 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4837 * the case, we just silently treat such values as a request for the
4838 * minimum. (We could throw an error instead, but that doesn't seem very
4839 * helpful.)
4840 */
4841 if (*newval < 4)
4842 *newval = 4;
4843
4844 return true;
4845}
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4800

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4851 of file xlog.c.

4852{
4853 char *rawstring;
4854 List *elemlist;
4855 ListCell *l;
4856 bool newwalconsistency[RM_MAX_ID + 1];
4857
4858 /* Initialize the array */
4859 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4860
4861 /* Need a modifiable copy of string */
4862 rawstring = pstrdup(*newval);
4863
4864 /* Parse string into list of identifiers */
4865 if (!SplitIdentifierString(rawstring, ',', &elemlist))
4866 {
4867 /* syntax error in list */
4868 GUC_check_errdetail("List syntax is invalid.");
4869 pfree(rawstring);
4870 list_free(elemlist);
4871 return false;
4872 }
4873
4874 foreach(l, elemlist)
4875 {
4876 char *tok = (char *) lfirst(l);
4877 int rmid;
4878
4879 /* Check for 'all'. */
4880 if (pg_strcasecmp(tok, "all") == 0)
4881 {
4882 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4883 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4884 newwalconsistency[rmid] = true;
4885 }
4886 else
4887 {
4888 /* Check if the token matches any known resource manager. */
4889 bool found = false;
4890
4891 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4892 {
4893 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4894 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4895 {
4896 newwalconsistency[rmid] = true;
4897 found = true;
4898 break;
4899 }
4900 }
4901 if (!found)
4902 {
4903 /*
4904 * During startup, it might be a not-yet-loaded custom
4905 * resource manager. Defer checking until
4906 * InitializeWalConsistencyChecking().
4907 */
4909 {
4911 }
4912 else
4913 {
4914 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4915 pfree(rawstring);
4916 list_free(elemlist);
4917 return false;
4918 }
4919 }
4920 }
4921 }
4922
4923 pfree(rawstring);
4924 list_free(elemlist);
4925
4926 /* assign new value */
4927 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
4928 if (!*extra)
4929 return false;
4930 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4931 return true;
4932}
#define LOG
Definition: elog.h:31
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:638
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:2322
bool process_shared_preload_libraries_done
Definition: miscinit.c:1838
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3525
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:166
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2338 of file xlog.c.

2339{
2341 {
2342 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2343 return false;
2344 }
2345
2346 return true;
2347}
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7698 of file xlog.c.

7699{
7705
7706 /* Write out all dirty data in SLRUs and the main buffer pool */
7707 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7714 CheckPointBuffers(flags);
7715
7716 /* Perform all queued up fsyncs */
7717 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7721 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7722
7723 /* We deliberately delay 2PC checkpointing as long as possible */
7724 CheckPointTwoPhase(checkPointRedo);
7725}
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void CheckPointBuffers(int flags)
Definition: bufmgr.c:4147
void CheckPointCLOG(void)
Definition: clog.c:937
void CheckPointCommitTs(void)
Definition: commit_ts.c:820
void CheckPointMultiXact(void)
Definition: multixact.c:2304
void CheckPointReplicationOrigin(void)
Definition: origin.c:573
void CheckPointPredicate(void)
Definition: predicate.c:1041
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:2032
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1962
TimestampTz ckpt_write_t
Definition: xlog.h:162
TimestampTz ckpt_sync_end_t
Definition: xlog.h:164
TimestampTz ckpt_sync_t
Definition: xlog.h:163
void CheckPointSUBTRANS(void)
Definition: subtrans.c:355
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1806
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5570 of file xlog.c.

5571{
5572 /*
5573 * For archive recovery, the WAL must be generated with at least 'replica'
5574 * wal_level.
5575 */
5577 {
5578 ereport(FATAL,
5579 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5580 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5581 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5582 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5583 }
5584
5585 /*
5586 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5587 * must have at least as many backend slots as the primary.
5588 */
5590 {
5591 /* We ignore autovacuum_worker_slots when we make this test. */
5592 RecoveryRequiresIntParameter("max_connections",
5595 RecoveryRequiresIntParameter("max_worker_processes",
5598 RecoveryRequiresIntParameter("max_wal_senders",
5601 RecoveryRequiresIntParameter("max_prepared_transactions",
5604 RecoveryRequiresIntParameter("max_locks_per_transaction",
5607 }
5608}
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:144
int max_worker_processes
Definition: globals.c:145
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
int max_prepared_xacts
Definition: twophase.c:115
int max_wal_senders
Definition: walsender.c:126
bool EnableHotStandby
Definition: xlog.c:121
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:138
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3866 of file xlog.c.

3867{
3868 int save_errno = errno;
3869 XLogSegNo lastRemovedSegNo;
3870
3872 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3874
3875 if (segno <= lastRemovedSegNo)
3876 {
3877 char filename[MAXFNAMELEN];
3878
3880 errno = save_errno;
3881 ereport(ERROR,
3883 errmsg("requested WAL segment %s has already been removed",
3884 filename)));
3885 }
3886 errno = save_errno;
3887}
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:123
XLogSegNo lastRemovedSegNo
Definition: xlog.c:456
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5474 of file xlog.c.

5476{
5477 /*
5478 * Execute the recovery_end_command, if any.
5479 */
5480 if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5482 "recovery_end_command",
5483 true,
5484 WAIT_EVENT_RECOVERY_END_COMMAND);
5485
5486 /*
5487 * We switched to a new timeline. Clean up segments on the old timeline.
5488 *
5489 * If there are any higher-numbered segments on the old timeline, remove
5490 * them. They might contain valid WAL, but they might also be
5491 * pre-allocated files containing garbage. In any case, they are not part
5492 * of the new timeline's history so we don't need them.
5493 */
5494 RemoveNonParentXlogFiles(EndOfLog, newTLI);
5495
5496 /*
5497 * If the switch happened in the middle of a segment, what to do with the
5498 * last, partial segment on the old timeline? If we don't archive it, and
5499 * the server that created the WAL never archives it either (e.g. because
5500 * it was hit by a meteor), it will never make it to the archive. That's
5501 * OK from our point of view, because the new segment that we created with
5502 * the new TLI contains all the WAL from the old timeline up to the switch
5503 * point. But if you later try to do PITR to the "missing" WAL on the old
5504 * timeline, recovery won't find it in the archive. It's physically
5505 * present in the new file with new TLI, but recovery won't look there
5506 * when it's recovering to the older timeline. On the other hand, if we
5507 * archive the partial segment, and the original server on that timeline
5508 * is still running and archives the completed version of the same segment
5509 * later, it will fail. (We used to do that in 9.4 and below, and it
5510 * caused such problems).
5511 *
5512 * As a compromise, we rename the last segment with the .partial suffix,
5513 * and archive it. Archive recovery will never try to read .partial
5514 * segments, so they will normally go unused. But in the odd PITR case,
5515 * the administrator can copy them manually to the pg_wal directory
5516 * (removing the suffix). They can be useful in debugging, too.
5517 *
5518 * If a .done or .ready file already exists for the old timeline, however,
5519 * we had already determined that the segment is complete, so we can let
5520 * it be archived normally. (In particular, if it was restored from the
5521 * archive to begin with, it's expected to have a .done file).
5522 */
5523 if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5525 {
5526 char origfname[MAXFNAMELEN];
5527 XLogSegNo endLogSegNo;
5528
5529 XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5530 XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5531
5532 if (!XLogArchiveIsReadyOrDone(origfname))
5533 {
5534 char origpath[MAXPGPATH];
5535 char partialfname[MAXFNAMELEN];
5536 char partialpath[MAXPGPATH];
5537
5538 /*
5539 * If we're summarizing WAL, we can't rename the partial file
5540 * until the summarizer finishes with it, else it will fail.
5541 */
5542 if (summarize_wal)
5543 WaitForWalSummarization(EndOfLog);
5544
5545 XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5546 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5547 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5548
5549 /*
5550 * Make sure there's no .done or .ready file for the .partial
5551 * file.
5552 */
5553 XLogArchiveCleanup(partialfname);
5554
5555 durable_rename(origpath, partialpath, ERROR);
5556 XLogArchiveNotify(partialfname);
5557 }
5558 }
5559}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
#define MAXPGPATH
#define snprintf
Definition: port.h:239
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:4079
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:84

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4300 of file xlog.c.

4301{
4302 DIR *xldir;
4303 struct dirent *xlde;
4304 char path[MAXPGPATH + sizeof(XLOGDIR)];
4305
4306 xldir = AllocateDir(XLOGDIR);
4307
4308 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4309 {
4311 {
4312 if (XLogArchiveCheckDone(xlde->d_name))
4313 {
4314 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4315 xlde->d_name);
4316 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4317 unlink(path);
4319 }
4320 }
4321 }
4322
4323 FreeDir(xldir);
4324}
#define DEBUG2
Definition: elog.h:29
int FreeDir(DIR *dir)
Definition: fd.c:3025
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2907
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2973
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1236 of file xlog.c.

1238{
1239 char *currpos;
1240 int freespace;
1241 int written;
1242 XLogRecPtr CurrPos;
1243 XLogPageHeader pagehdr;
1244
1245 /*
1246 * Get a pointer to the right place in the right WAL buffer to start
1247 * inserting to.
1248 */
1249 CurrPos = StartPos;
1250 currpos = GetXLogBuffer(CurrPos, tli);
1251 freespace = INSERT_FREESPACE(CurrPos);
1252
1253 /*
1254 * there should be enough space for at least the first field (xl_tot_len)
1255 * on this page.
1256 */
1257 Assert(freespace >= sizeof(uint32));
1258
1259 /* Copy record data */
1260 written = 0;
1261 while (rdata != NULL)
1262 {
1263 const char *rdata_data = rdata->data;
1264 int rdata_len = rdata->len;
1265
1266 while (rdata_len > freespace)
1267 {
1268 /*
1269 * Write what fits on this page, and continue on the next page.
1270 */
1271 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1272 memcpy(currpos, rdata_data, freespace);
1273 rdata_data += freespace;
1274 rdata_len -= freespace;
1275 written += freespace;
1276 CurrPos += freespace;
1277
1278 /*
1279 * Get pointer to beginning of next page, and set the xlp_rem_len
1280 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1281 *
1282 * It's safe to set the contrecord flag and xlp_rem_len without a
1283 * lock on the page. All the other flags were already set when the
1284 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1285 * only backend that needs to set the contrecord flag.
1286 */
1287 currpos = GetXLogBuffer(CurrPos, tli);
1288 pagehdr = (XLogPageHeader) currpos;
1289 pagehdr->xlp_rem_len = write_len - written;
1291
1292 /* skip over the page header */
1293 if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1294 {
1295 CurrPos += SizeOfXLogLongPHD;
1296 currpos += SizeOfXLogLongPHD;
1297 }
1298 else
1299 {
1300 CurrPos += SizeOfXLogShortPHD;
1301 currpos += SizeOfXLogShortPHD;
1302 }
1303 freespace = INSERT_FREESPACE(CurrPos);
1304 }
1305
1306 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1307 memcpy(currpos, rdata_data, rdata_len);
1308 currpos += rdata_len;
1309 CurrPos += rdata_len;
1310 freespace -= rdata_len;
1311 written += rdata_len;
1312
1313 rdata = rdata->next;
1314 }
1315 Assert(written == write_len);
1316
1317 /*
1318 * If this was an xlog-switch, it's not enough to write the switch record,
1319 * we also have to consume all the remaining space in the WAL segment. We
1320 * have already reserved that space, but we need to actually fill it.
1321 */
1322 if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1323 {
1324 /* An xlog-switch record doesn't contain any data besides the header */
1325 Assert(write_len == SizeOfXLogRecord);
1326
1327 /* Assert that we did reserve the right amount of space */
1329
1330 /* Use up all the remaining space on the current page */
1331 CurrPos += freespace;
1332
1333 /*
1334 * Cause all remaining pages in the segment to be flushed, leaving the
1335 * XLog position where it should be, at the start of the next segment.
1336 * We do this one page at a time, to make sure we don't deadlock
1337 * against ourselves if wal_buffers < wal_segment_size.
1338 */
1339 while (CurrPos < EndPos)
1340 {
1341 /*
1342 * The minimal action to flush the page would be to call
1343 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1344 * AdvanceXLInsertBuffer(...). The page would be left initialized
1345 * mostly to zeros, except for the page header (always the short
1346 * variant, as this is never a segment's first page).
1347 *
1348 * The large vistas of zeros are good for compressibility, but the
1349 * headers interrupting them every XLOG_BLCKSZ (with values that
1350 * differ from page to page) are not. The effect varies with
1351 * compression tool, but bzip2 for instance compresses about an
1352 * order of magnitude worse if those headers are left in place.
1353 *
1354 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1355 * called in heavily-loaded circumstances as well as this lightly-
1356 * loaded one) with variant behavior, we just use GetXLogBuffer
1357 * (which itself calls the two methods we need) to get the pointer
1358 * and zero most of the page. Then we just zero the page header.
1359 */
1360 currpos = GetXLogBuffer(CurrPos, tli);
1361 MemSet(currpos, 0, SizeOfXLogShortPHD);
1362
1363 CurrPos += XLOG_BLCKSZ;
1364 }
1365 }
1366 else
1367 {
1368 /* Align the end position, so that the next record starts aligned */
1369 CurrPos = MAXALIGN64(CurrPos);
1370 }
1371
1372 if (CurrPos != EndPos)
1373 ereport(PANIC,
1375 errmsg_internal("space reserved for WAL record does not match what was written"));
1376}
uint32_t uint32
Definition: c.h:502
#define MAXALIGN64(LEN)
Definition: c.h:807
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
const void * data
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:591
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1643
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert(), XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 7077 of file xlog.c.

7078{
7079 bool shutdown;
7080 CheckPoint checkPoint;
7081 XLogRecPtr recptr;
7082 XLogSegNo _logSegNo;
7084 uint32 freespace;
7085 XLogRecPtr PriorRedoPtr;
7086 XLogRecPtr last_important_lsn;
7087 VirtualTransactionId *vxids;
7088 int nvxids;
7089 int oldXLogAllowed = 0;
7090
7091 /*
7092 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
7093 * issued at a different time.
7094 */
7096 shutdown = true;
7097 else
7098 shutdown = false;
7099
7100 /* sanity check */
7101 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
7102 elog(ERROR, "can't create a checkpoint during recovery");
7103
7104 /*
7105 * Prepare to accumulate statistics.
7106 *
7107 * Note: because it is possible for log_checkpoints to change while a
7108 * checkpoint proceeds, we always accumulate stats, even if
7109 * log_checkpoints is currently off.
7110 */
7113
7114 /*
7115 * Let smgr prepare for checkpoint; this has to happen outside the
7116 * critical section and before we determine the REDO pointer. Note that
7117 * smgr must not do anything that'd have to be undone if we decide no
7118 * checkpoint is needed.
7119 */
7121
7122 /*
7123 * Use a critical section to force system panic if we have trouble.
7124 */
7126
7127 if (shutdown)
7128 {
7129 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7132 LWLockRelease(ControlFileLock);
7133 }
7134
7135 /* Begin filling in the checkpoint WAL record */
7136 MemSet(&checkPoint, 0, sizeof(checkPoint));
7137 checkPoint.time = (pg_time_t) time(NULL);
7138
7139 /*
7140 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7141 * pointer. This allows us to begin accumulating changes to assemble our
7142 * starting snapshot of locks and transactions.
7143 */
7144 if (!shutdown && XLogStandbyInfoActive())
7146 else
7148
7149 /*
7150 * Get location of last important record before acquiring insert locks (as
7151 * GetLastImportantRecPtr() also locks WAL locks).
7152 */
7153 last_important_lsn = GetLastImportantRecPtr();
7154
7155 /*
7156 * If this isn't a shutdown or forced checkpoint, and if there has been no
7157 * WAL activity requiring a checkpoint, skip it. The idea here is to
7158 * avoid inserting duplicate checkpoints when the system is idle.
7159 */
7161 CHECKPOINT_FORCE)) == 0)
7162 {
7163 if (last_important_lsn == ControlFile->checkPoint)
7164 {
7167 (errmsg_internal("checkpoint skipped because system is idle")));
7168 return false;
7169 }
7170 }
7171
7172 /*
7173 * An end-of-recovery checkpoint is created before anyone is allowed to
7174 * write WAL. To allow us to write the checkpoint record, temporarily
7175 * enable XLogInsertAllowed.
7176 */
7177 if (flags & CHECKPOINT_END_OF_RECOVERY)
7178 oldXLogAllowed = LocalSetXLogInsertAllowed();
7179
7181 if (flags & CHECKPOINT_END_OF_RECOVERY)
7183 else
7184 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7185
7186 /*
7187 * We must block concurrent insertions while examining insert state.
7188 */
7190
7191 checkPoint.fullPageWrites = Insert->fullPageWrites;
7192 checkPoint.wal_level = wal_level;
7193
7194 if (shutdown)
7195 {
7196 XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7197
7198 /*
7199 * Compute new REDO record ptr = location of next XLOG record.
7200 *
7201 * Since this is a shutdown checkpoint, there can't be any concurrent
7202 * WAL insertion.
7203 */
7204 freespace = INSERT_FREESPACE(curInsert);
7205 if (freespace == 0)
7206 {
7207 if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7208 curInsert += SizeOfXLogLongPHD;
7209 else
7210 curInsert += SizeOfXLogShortPHD;
7211 }
7212 checkPoint.redo = curInsert;
7213
7214 /*
7215 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7216 * this must be done while holding all the insertion locks.
7217 *
7218 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7219 * left pointing past where it really needs to point. This is okay;
7220 * the only consequence is that XLogInsert might back up whole buffers
7221 * that it didn't really need to. We can't postpone advancing
7222 * RedoRecPtr because XLogInserts that happen while we are dumping
7223 * buffers must assume that their buffer changes are not included in
7224 * the checkpoint.
7225 */
7226 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7227 }
7228
7229 /*
7230 * Now we can release the WAL insertion locks, allowing other xacts to
7231 * proceed while we are flushing disk buffers.
7232 */
7234
7235 /*
7236 * If this is an online checkpoint, we have not yet determined the redo
7237 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7238 * record; the LSN at which it starts becomes the new redo pointer. We
7239 * don't do this for a shutdown checkpoint, because in that case no WAL
7240 * can be written between the redo point and the insertion of the
7241 * checkpoint record itself, so the checkpoint record itself serves to
7242 * mark the redo point.
7243 */
7244 if (!shutdown)
7245 {
7246 /* Include WAL level in record for WAL summarizer's benefit. */
7249 (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7250
7251 /*
7252 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7253 * shared memory and RedoRecPtr in backend-local memory, but we need
7254 * to copy that into the record that will be inserted when the
7255 * checkpoint is complete.
7256 */
7257 checkPoint.redo = RedoRecPtr;
7258 }
7259
7260 /* Update the info_lck-protected copy of RedoRecPtr as well */
7262 XLogCtl->RedoRecPtr = checkPoint.redo;
7264
7265 /*
7266 * If enabled, log checkpoint start. We postpone this until now so as not
7267 * to log anything if we decided to skip the checkpoint.
7268 */
7269 if (log_checkpoints)
7270 LogCheckpointStart(flags, false);
7271
7272 /* Update the process title */
7273 update_checkpoint_display(flags, false, false);
7274
7275 TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7276
7277 /*
7278 * Get the other info we need for the checkpoint record.
7279 *
7280 * We don't need to save oldestClogXid in the checkpoint, it only matters
7281 * for the short period in which clog is being truncated, and if we crash
7282 * during that we'll redo the clog truncation and fix up oldestClogXid
7283 * there.
7284 */
7285 LWLockAcquire(XidGenLock, LW_SHARED);
7286 checkPoint.nextXid = TransamVariables->nextXid;
7287 checkPoint.oldestXid = TransamVariables->oldestXid;
7289 LWLockRelease(XidGenLock);
7290
7291 LWLockAcquire(CommitTsLock, LW_SHARED);
7294 LWLockRelease(CommitTsLock);
7295
7296 LWLockAcquire(OidGenLock, LW_SHARED);
7297 checkPoint.nextOid = TransamVariables->nextOid;
7298 if (!shutdown)
7299 checkPoint.nextOid += TransamVariables->oidCount;
7300 LWLockRelease(OidGenLock);
7301
7302 MultiXactGetCheckptMulti(shutdown,
7303 &checkPoint.nextMulti,
7304 &checkPoint.nextMultiOffset,
7305 &checkPoint.oldestMulti,
7306 &checkPoint.oldestMultiDB);
7307
7308 /*
7309 * Having constructed the checkpoint record, ensure all shmem disk buffers
7310 * and commit-log buffers are flushed to disk.
7311 *
7312 * This I/O could fail for various reasons. If so, we will fail to
7313 * complete the checkpoint, but there is no reason to force a system
7314 * panic. Accordingly, exit critical section while doing it.
7315 */
7317
7318 /*
7319 * In some cases there are groups of actions that must all occur on one
7320 * side or the other of a checkpoint record. Before flushing the
7321 * checkpoint record we must explicitly wait for any backend currently
7322 * performing those groups of actions.
7323 *
7324 * One example is end of transaction, so we must wait for any transactions
7325 * that are currently in commit critical sections. If an xact inserted
7326 * its commit record into XLOG just before the REDO point, then a crash
7327 * restart from the REDO point would not replay that record, which means
7328 * that our flushing had better include the xact's update of pg_xact. So
7329 * we wait till he's out of his commit critical section before proceeding.
7330 * See notes in RecordTransactionCommit().
7331 *
7332 * Because we've already released the insertion locks, this test is a bit
7333 * fuzzy: it is possible that we will wait for xacts we didn't really need
7334 * to wait for. But the delay should be short and it seems better to make
7335 * checkpoint take a bit longer than to hold off insertions longer than
7336 * necessary. (In fact, the whole reason we have this issue is that xact.c
7337 * does commit record XLOG insertion and clog update as two separate steps
7338 * protected by different locks, but again that seems best on grounds of
7339 * minimizing lock contention.)
7340 *
7341 * A transaction that has not yet set delayChkptFlags when we look cannot
7342 * be at risk, since it has not inserted its commit record yet; and one
7343 * that's already cleared it is not at risk either, since it's done fixing
7344 * clog and we will correctly flush the update below. So we cannot miss
7345 * any xacts we need to wait for.
7346 */
7348 if (nvxids > 0)
7349 {
7350 do
7351 {
7352 /*
7353 * Keep absorbing fsync requests while we wait. There could even
7354 * be a deadlock if we don't, if the process that prevents the
7355 * checkpoint is trying to add a request to the queue.
7356 */
7358
7359 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7360 pg_usleep(10000L); /* wait for 10 msec */
7362 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7364 }
7365 pfree(vxids);
7366
7367 CheckPointGuts(checkPoint.redo, flags);
7368
7370 if (nvxids > 0)
7371 {
7372 do
7373 {
7375
7376 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7377 pg_usleep(10000L); /* wait for 10 msec */
7379 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7381 }
7382 pfree(vxids);
7383
7384 /*
7385 * Take a snapshot of running transactions and write this to WAL. This
7386 * allows us to reconstruct the state of running transactions during
7387 * archive recovery, if required. Skip, if this info disabled.
7388 *
7389 * If we are shutting down, or Startup process is completing crash
7390 * recovery we don't need to write running xact data.
7391 */
7392 if (!shutdown && XLogStandbyInfoActive())
7394
7396
7397 /*
7398 * Now insert the checkpoint record into XLOG.
7399 */
7401 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7402 recptr = XLogInsert(RM_XLOG_ID,
7403 shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7405
7406 XLogFlush(recptr);
7407
7408 /*
7409 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7410 * overwritten at next startup. No-one should even try, this just allows
7411 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7412 * to just temporarily disable writing until the system has exited
7413 * recovery.
7414 */
7415 if (shutdown)
7416 {
7417 if (flags & CHECKPOINT_END_OF_RECOVERY)
7418 LocalXLogInsertAllowed = oldXLogAllowed;
7419 else
7420 LocalXLogInsertAllowed = 0; /* never again write WAL */
7421 }
7422
7423 /*
7424 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7425 * = end of actual checkpoint record.
7426 */
7427 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7428 ereport(PANIC,
7429 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7430
7431 /*
7432 * Remember the prior checkpoint's redo ptr for
7433 * UpdateCheckPointDistanceEstimate()
7434 */
7435 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7436
7437 /*
7438 * Update the control file.
7439 */
7440 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7441 if (shutdown)
7444 ControlFile->checkPointCopy = checkPoint;
7445 /* crash recovery should always recover to the end of WAL */
7448
7449 /*
7450 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7451 * unused on non-shutdown checkpoints, but seems useful to store it always
7452 * for debugging purposes.
7453 */
7455
7457 LWLockRelease(ControlFileLock);
7458
7459 /* Update shared-memory copy of checkpoint XID/epoch */
7461 XLogCtl->ckptFullXid = checkPoint.nextXid;
7463
7464 /*
7465 * We are now done with critical updates; no need for system panic if we
7466 * have trouble while fooling with old log segments.
7467 */
7469
7470 /*
7471 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7472 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7473 * where (a) we're not inside of a critical section and (b) we can be
7474 * certain that the relevant record has been flushed to disk, which must
7475 * happen before it can be summarized.
7476 *
7477 * If this is a shutdown checkpoint, then this happens reasonably
7478 * promptly: we've only just inserted and flushed the
7479 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7480 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7481 * record was written before we began flushing data to disk, and that
7482 * could be many minutes ago at this point. However, we don't XLogFlush()
7483 * after inserting that record, so we're not guaranteed that it's on disk
7484 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7485 * record.
7486 */
7488
7489 /*
7490 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7491 */
7493
7494 /*
7495 * Update the average distance between checkpoints if the prior checkpoint
7496 * exists.
7497 */
7498 if (PriorRedoPtr != InvalidXLogRecPtr)
7500
7501 /*
7502 * Delete old log files, those no longer needed for last checkpoint to
7503 * prevent the disk holding the xlog from growing full.
7504 */
7506 KeepLogSeg(recptr, &_logSegNo);
7508 _logSegNo, InvalidOid,
7510 {
7511 /*
7512 * Some slots have been invalidated; recalculate the old-segment
7513 * horizon, starting again from RedoRecPtr.
7514 */
7516 KeepLogSeg(recptr, &_logSegNo);
7517 }
7518 _logSegNo--;
7519 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7520 checkPoint.ThisTimeLineID);
7521
7522 /*
7523 * Make more log segments if needed. (Do this after recycling old log
7524 * segments, since that may supply some of the needed files.)
7525 */
7526 if (!shutdown)
7527 PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7528
7529 /*
7530 * Truncate pg_subtrans if possible. We can throw away all data before
7531 * the oldest XMIN of any running transaction. No future transaction will
7532 * attempt to reference any pg_subtrans entry older than that (see Asserts
7533 * in subtrans.c). During recovery, though, we mustn't do this because
7534 * StartupSUBTRANS hasn't been called yet.
7535 */
7536 if (!RecoveryInProgress())
7538
7539 /* Real work is done; log and update stats. */
7540 LogCheckpointEnd(false);
7541
7542 /* Reset the process title */
7543 update_checkpoint_display(flags, false, true);
7544
7545 TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7546 NBuffers,
7550
7551 return true;
7552}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:476
void AbsorbSyncRequests(void)
int NBuffers
Definition: globals.c:143
@ LW_SHARED
Definition: lwlock.h:115
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2282
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:35
#define DELAY_CHKPT_START
Definition: proc.h:120
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:121
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2880
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3089
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3043
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1976
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:55
@ RS_INVAL_IDLE_TIMEOUT
Definition: slot.h:61
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1282
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:452
TimeLineID InsertTimeLineID
Definition: xlog.c:520
XLogRecPtr RedoRecPtr
Definition: xlog.c:451
TimeLineID PrevTimeLineID
Definition: xlog.c:521
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:459
XLogRecPtr RedoRecPtr
Definition: xlog.c:425
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:253
bool RecoveryInProgress(void)
Definition: xlog.c:6522
static void WALInsertLockRelease(void)
Definition: xlog.c:1456
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1869
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1427
static void UpdateControlFile(void)
Definition: xlog.c:4725
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:4004
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6837
static XLogRecPtr RedoRecPtr
Definition: xlog.c:273
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6869
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3829
bool log_checkpoints
Definition: xlog.c:129
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:8141
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6610
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6744
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6974
static int LocalXLogInsertAllowed
Definition: xlog.c:236
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7698
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:7012
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7563 of file xlog.c.

7564{
7565 xl_end_of_recovery xlrec;
7566 XLogRecPtr recptr;
7567
7568 /* sanity check */
7569 if (!RecoveryInProgress())
7570 elog(ERROR, "can only be used to end recovery");
7571
7572 xlrec.end_time = GetCurrentTimestamp();
7573 xlrec.wal_level = wal_level;
7574
7579
7581
7583 XLogRegisterData(&xlrec, sizeof(xl_end_of_recovery));
7584 recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7585
7586 XLogFlush(recptr);
7587
7588 /*
7589 * Update the control file so that crash recovery can follow the timeline
7590 * changes to this point.
7591 */
7592 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7593 ControlFile->minRecoveryPoint = recptr;
7596 LWLockRelease(ControlFileLock);
7597
7599}
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), wal_level, xl_end_of_recovery::wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7628 of file xlog.c.

7630{
7632 XLogRecPtr recptr;
7633 XLogPageHeader pagehdr;
7634 XLogRecPtr startPos;
7635
7636 /* sanity checks */
7637 if (!RecoveryInProgress())
7638 elog(ERROR, "can only be used at end of recovery");
7639 if (pagePtr % XLOG_BLCKSZ != 0)
7640 elog(ERROR, "invalid position for missing continuation record %X/%X",
7641 LSN_FORMAT_ARGS(pagePtr));
7642
7643 /* The current WAL insert position should be right after the page header */
7644 startPos = pagePtr;
7645 if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7646 startPos += SizeOfXLogLongPHD;
7647 else
7648 startPos += SizeOfXLogShortPHD;
7649 recptr = GetXLogInsertRecPtr();
7650 if (recptr != startPos)
7651 elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
7652 LSN_FORMAT_ARGS(recptr));
7653
7655
7656 /*
7657 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7658 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7659 *
7660 * No other backend is allowed to write WAL yet, so acquiring the WAL
7661 * insertion lock is just pro forma.
7662 */
7664 pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7667
7668 /*
7669 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7670 * page. We know it becomes the first record, because no other backend is
7671 * allowed to write WAL yet.
7672 */
7674 xlrec.overwritten_lsn = aborted_lsn;
7677 recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7678
7679 /* check that the record was inserted to the right place */
7680 if (ProcLastRecPtr != startPos)
7681 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
7683
7684 XLogFlush(recptr);
7685
7687
7688 return recptr;
7689}
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
static void WALInsertLockAcquire(void)
Definition: xlog.c:1382
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9612
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7779 of file xlog.c.

7780{
7781 XLogRecPtr lastCheckPointRecPtr;
7782 XLogRecPtr lastCheckPointEndPtr;
7783 CheckPoint lastCheckPoint;
7784 XLogRecPtr PriorRedoPtr;
7785 XLogRecPtr receivePtr;
7786 XLogRecPtr replayPtr;
7787 TimeLineID replayTLI;
7788 XLogRecPtr endptr;
7789 XLogSegNo _logSegNo;
7790 TimestampTz xtime;
7791
7792 /* Concurrent checkpoint/restartpoint cannot happen */
7794
7795 /* Get a local copy of the last safe checkpoint record. */
7797 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7798 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7799 lastCheckPoint = XLogCtl->lastCheckPoint;
7801
7802 /*
7803 * Check that we're still in recovery mode. It's ok if we exit recovery
7804 * mode after this check, the restart point is valid anyway.
7805 */
7806 if (!RecoveryInProgress())
7807 {
7809 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7810 return false;
7811 }
7812
7813 /*
7814 * If the last checkpoint record we've replayed is already our last
7815 * restartpoint, we can't perform a new restart point. We still update
7816 * minRecoveryPoint in that case, so that if this is a shutdown restart
7817 * point, we won't start up earlier than before. That's not strictly
7818 * necessary, but when hot standby is enabled, it would be rather weird if
7819 * the database opened up for read-only connections at a point-in-time
7820 * before the last shutdown. Such time travel is still possible in case of
7821 * immediate shutdown, though.
7822 *
7823 * We don't explicitly advance minRecoveryPoint when we do create a
7824 * restartpoint. It's assumed that flushing the buffers will do that as a
7825 * side-effect.
7826 */
7827 if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7828 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7829 {
7831 (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7832 LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7833
7835 if (flags & CHECKPOINT_IS_SHUTDOWN)
7836 {
7837 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7840 LWLockRelease(ControlFileLock);
7841 }
7842 return false;
7843 }
7844
7845 /*
7846 * Update the shared RedoRecPtr so that the startup process can calculate
7847 * the number of segments replayed since last restartpoint, and request a
7848 * restartpoint if it exceeds CheckPointSegments.
7849 *
7850 * Like in CreateCheckPoint(), hold off insertions to update it, although
7851 * during recovery this is just pro forma, because no WAL insertions are
7852 * happening.
7853 */
7855 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7857
7858 /* Also update the info_lck-protected copy */
7860 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7862
7863 /*
7864 * Prepare to accumulate statistics.
7865 *
7866 * Note: because it is possible for log_checkpoints to change while a
7867 * checkpoint proceeds, we always accumulate stats, even if
7868 * log_checkpoints is currently off.
7869 */
7872
7873 if (log_checkpoints)
7874 LogCheckpointStart(flags, true);
7875
7876 /* Update the process title */
7877 update_checkpoint_display(flags, true, false);
7878
7879 CheckPointGuts(lastCheckPoint.redo, flags);
7880
7881 /*
7882 * This location needs to be after CheckPointGuts() to ensure that some
7883 * work has already happened during this checkpoint.
7884 */
7885 INJECTION_POINT("create-restart-point");
7886
7887 /*
7888 * Remember the prior checkpoint's redo ptr for
7889 * UpdateCheckPointDistanceEstimate()
7890 */
7891 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7892
7893 /*
7894 * Update pg_control, using current time. Check that it still shows an
7895 * older checkpoint, else do nothing; this is a quick hack to make sure
7896 * nothing really bad happens if somehow we get here after the
7897 * end-of-recovery checkpoint.
7898 */
7899 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7900 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7901 {
7902 /*
7903 * Update the checkpoint information. We do this even if the cluster
7904 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7905 * segments recycled below.
7906 */
7907 ControlFile->checkPoint = lastCheckPointRecPtr;
7908 ControlFile->checkPointCopy = lastCheckPoint;
7909
7910 /*
7911 * Ensure minRecoveryPoint is past the checkpoint record and update it
7912 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7913 * this will have happened already while writing out dirty buffers,
7914 * but not necessarily - e.g. because no buffers were dirtied. We do
7915 * this because a backup performed in recovery uses minRecoveryPoint
7916 * to determine which WAL files must be included in the backup, and
7917 * the file (or files) containing the checkpoint record must be
7918 * included, at a minimum. Note that for an ordinary restart of
7919 * recovery there's no value in having the minimum recovery point any
7920 * earlier than this anyway, because redo will begin just after the
7921 * checkpoint record.
7922 */
7924 {
7925 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7926 {
7927 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7929
7930 /* update local copy */
7933 }
7934 if (flags & CHECKPOINT_IS_SHUTDOWN)
7936 }
7938 }
7939 LWLockRelease(ControlFileLock);
7940
7941 /*
7942 * Update the average distance between checkpoints/restartpoints if the
7943 * prior checkpoint exists.
7944 */
7945 if (PriorRedoPtr != InvalidXLogRecPtr)
7947
7948 /*
7949 * Delete old log files, those no longer needed for last restartpoint to
7950 * prevent the disk holding the xlog from growing full.
7951 */
7953
7954 /*
7955 * Retreat _logSegNo using the current end of xlog replayed or received,
7956 * whichever is later.
7957 */
7958 receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7959 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7960 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7961 KeepLogSeg(endptr, &_logSegNo);
7963 _logSegNo, InvalidOid,
7965 {
7966 /*
7967 * Some slots have been invalidated; recalculate the old-segment
7968 * horizon, starting again from RedoRecPtr.
7969 */
7971 KeepLogSeg(endptr, &_logSegNo);
7972 }
7973 _logSegNo--;
7974
7975 /*
7976 * Try to recycle segments on a useful timeline. If we've been promoted
7977 * since the beginning of this restartpoint, use the new timeline chosen
7978 * at end of recovery. If we're still in recovery, use the timeline we're
7979 * currently replaying.
7980 *
7981 * There is no guarantee that the WAL segments will be useful on the
7982 * current timeline; if recovery proceeds to a new timeline right after
7983 * this, the pre-allocated WAL segments on this timeline will not be used,
7984 * and will go wasted until recycled on the next restartpoint. We'll live
7985 * with that.
7986 */
7987 if (!RecoveryInProgress())
7988 replayTLI = XLogCtl->InsertTimeLineID;
7989
7990 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7991
7992 /*
7993 * Make more log segments if needed. (Do this after recycling old log
7994 * segments, since that may supply some of the needed files.)
7995 */
7996 PreallocXlogFiles(endptr, replayTLI);
7997
7998 /*
7999 * Truncate pg_subtrans if possible. We can throw away all data before
8000 * the oldest XMIN of any running transaction. No future transaction will
8001 * attempt to reference any pg_subtrans entry older than that (see Asserts
8002 * in subtrans.c). When hot standby is disabled, though, we mustn't do
8003 * this because StartupSUBTRANS hasn't been called yet.
8004 */
8005 if (EnableHotStandby)
8007
8008 /* Real work is done; log and update stats. */
8009 LogCheckpointEnd(true);
8010
8011 /* Reset the process title */
8012 update_checkpoint_display(flags, true, true);
8013
8014 xtime = GetLatestXTime();
8016 (errmsg("recovery restart point at %X/%X",
8017 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
8018 xtime ? errdetail("Last completed transaction was at log time %s.",
8019 timestamptz_to_str(xtime)) : 0));
8020
8021 /*
8022 * Finally, execute archive_cleanup_command, if any.
8023 */
8024 if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
8026 "archive_cleanup_command",
8027 false,
8028 WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
8029
8030 return true;
8031}
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1862
int64 TimestampTz
Definition: timestamp.h:39
bool IsUnderPostmaster
Definition: globals.c:121
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:363
BackendType MyBackendType
Definition: miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:556
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:554
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:555
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2843
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:657
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:658
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:85
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert(), B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9571 of file xlog.c.

9572{
9573 bool during_backup_start = DatumGetBool(arg);
9574
9575 /* If called during backup start, there shouldn't be one already running */
9576 Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9577
9578 if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9579 {
9583
9586
9587 if (!during_backup_start)
9589 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9590 }
9591}
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
int runningBackups
Definition: xlog.c:433
static SessionBackupState sessionBackupState
Definition: xlog.c:386
@ SESSION_BACKUP_NONE
Definition: xlog.h:288

References arg, Assert(), DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8969 of file xlog.c.

8971{
8973
8974 Assert(state != NULL);
8976
8977 /*
8978 * During recovery, we don't need to check WAL level. Because, if WAL
8979 * level is not sufficient, it's impossible to get here during recovery.
8980 */
8982 ereport(ERROR,
8983 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8984 errmsg("WAL level not sufficient for making an online backup"),
8985 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8986
8987 if (strlen(backupidstr) > MAXPGPATH)
8988 ereport(ERROR,
8989 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8990 errmsg("backup label too long (max %d bytes)",
8991 MAXPGPATH)));
8992
8993 strlcpy(state->name, backupidstr, sizeof(state->name));
8994
8995 /*
8996 * Mark backup active in shared memory. We must do full-page WAL writes
8997 * during an on-line backup even if not doing so at other times, because
8998 * it's quite possible for the backup dump to obtain a "torn" (partially
8999 * written) copy of a database page if it reads the page concurrently with
9000 * our write to the same page. This can be fixed as long as the first
9001 * write to the page in the WAL sequence is a full-page write. Hence, we
9002 * increment runningBackups then force a CHECKPOINT, to ensure there are
9003 * no dirty pages in shared memory that might get dumped while the backup
9004 * is in progress without having a corresponding WAL record. (Once the
9005 * backup is complete, we need not force full-page writes anymore, since
9006 * we expect that any pages not modified during the backup interval must
9007 * have been correctly captured by the backup.)
9008 *
9009 * Note that forcing full-page writes has no effect during an online
9010 * backup from the standby.
9011 *
9012 * We must hold all the insertion locks to change the value of
9013 * runningBackups, to ensure adequate interlocking against
9014 * XLogInsertRecord().
9015 */
9019
9020 /*
9021 * Ensure we decrement runningBackups if we fail below. NB -- for this to
9022 * work correctly, it is critical that sessionBackupState is only updated
9023 * after this block is over.
9024 */
9026 {
9027 bool gotUniqueStartpoint = false;
9028 DIR *tblspcdir;
9029 struct dirent *de;
9030 tablespaceinfo *ti;
9031 int datadirpathlen;
9032
9033 /*
9034 * Force an XLOG file switch before the checkpoint, to ensure that the
9035 * WAL segment the checkpoint is written to doesn't contain pages with
9036 * old timeline IDs. That would otherwise happen if you called
9037 * pg_backup_start() right after restoring from a PITR archive: the
9038 * first WAL segment containing the startup checkpoint has pages in
9039 * the beginning with the old timeline ID. That can cause trouble at
9040 * recovery: we won't have a history file covering the old timeline if
9041 * pg_wal directory was not included in the base backup and the WAL
9042 * archive was cleared too before starting the backup.
9043 *
9044 * This also ensures that we have emitted a WAL page header that has
9045 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
9046 * Therefore, if a WAL archiver (such as pglesslog) is trying to
9047 * compress out removable backup blocks, it won't remove any that
9048 * occur after this point.
9049 *
9050 * During recovery, we skip forcing XLOG file switch, which means that
9051 * the backup taken during recovery is not available for the special
9052 * recovery case described above.
9053 */
9055 RequestXLogSwitch(false);
9056
9057 do
9058 {
9059 bool checkpointfpw;
9060
9061 /*
9062 * Force a CHECKPOINT. Aside from being necessary to prevent torn
9063 * page problems, this guarantees that two successive backup runs
9064 * will have different checkpoint positions and hence different
9065 * history file names, even if nothing happened in between.
9066 *
9067 * During recovery, establish a restartpoint if possible. We use
9068 * the last restartpoint as the backup starting checkpoint. This
9069 * means that two successive backup runs can have same checkpoint
9070 * positions.
9071 *
9072 * Since the fact that we are executing do_pg_backup_start()
9073 * during recovery means that checkpointer is running, we can use
9074 * RequestCheckpoint() to establish a restartpoint.
9075 *
9076 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
9077 * passing fast = true). Otherwise this can take awhile.
9078 */
9080 (fast ? CHECKPOINT_IMMEDIATE : 0));
9081
9082 /*
9083 * Now we need to fetch the checkpoint record location, and also
9084 * its REDO pointer. The oldest point in WAL that would be needed
9085 * to restore starting from the checkpoint is precisely the REDO
9086 * pointer.
9087 */
9088 LWLockAcquire(ControlFileLock, LW_SHARED);
9089 state->checkpointloc = ControlFile->checkPoint;
9090 state->startpoint = ControlFile->checkPointCopy.redo;
9092 checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
9093 LWLockRelease(ControlFileLock);
9094
9096 {
9097 XLogRecPtr recptr;
9098
9099 /*
9100 * Check to see if all WAL replayed during online backup
9101 * (i.e., since last restartpoint used as backup starting
9102 * checkpoint) contain full-page writes.
9103 */
9105 recptr = XLogCtl->lastFpwDisableRecPtr;
9107
9108 if (!checkpointfpw || state->startpoint <= recptr)
9109 ereport(ERROR,
9110 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9111 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9112 "since last restartpoint"),
9113 errhint("This means that the backup being taken on the standby "
9114 "is corrupt and should not be used. "
9115 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9116 "and then try an online backup again.")));
9117
9118 /*
9119 * During recovery, since we don't use the end-of-backup WAL
9120 * record and don't write the backup history file, the
9121 * starting WAL location doesn't need to be unique. This means
9122 * that two base backups started at the same time might use
9123 * the same checkpoint as starting locations.
9124 */
9125 gotUniqueStartpoint = true;
9126 }
9127
9128 /*
9129 * If two base backups are started at the same time (in WAL sender
9130 * processes), we need to make sure that they use different
9131 * checkpoints as starting locations, because we use the starting
9132 * WAL location as a unique identifier for the base backup in the
9133 * end-of-backup WAL record and when we write the backup history
9134 * file. Perhaps it would be better generate a separate unique ID
9135 * for each backup instead of forcing another checkpoint, but
9136 * taking a checkpoint right after another is not that expensive
9137 * either because only few buffers have been dirtied yet.
9138 */
9140 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9141 {
9142 XLogCtl->Insert.lastBackupStart = state->startpoint;
9143 gotUniqueStartpoint = true;
9144 }
9146 } while (!gotUniqueStartpoint);
9147
9148 /*
9149 * Construct tablespace_map file.
9150 */
9151 datadirpathlen = strlen(DataDir);
9152
9153 /* Collect information about all tablespaces */
9154 tblspcdir = AllocateDir(PG_TBLSPC_DIR);
9155 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9156 {
9157 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9158 char linkpath[MAXPGPATH];
9159 char *relpath = NULL;
9160 char *s;
9161 PGFileType de_type;
9162 char *badp;
9163 Oid tsoid;
9164
9165 /*
9166 * Try to parse the directory name as an unsigned integer.
9167 *
9168 * Tablespace directories should be positive integers that can be
9169 * represented in 32 bits, with no leading zeroes or trailing
9170 * garbage. If we come across a name that doesn't meet those
9171 * criteria, skip it.
9172 */
9173 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9174 continue;
9175 errno = 0;
9176 tsoid = strtoul(de->d_name, &badp, 10);
9177 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9178 continue;
9179
9180 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9181
9182 de_type = get_dirent_type(fullpath, de, false, ERROR);
9183
9184 if (de_type == PGFILETYPE_LNK)
9185 {
9186 StringInfoData escapedpath;
9187 int rllen;
9188
9189 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9190 if (rllen < 0)
9191 {
9193 (errmsg("could not read symbolic link \"%s\": %m",
9194 fullpath)));
9195 continue;
9196 }
9197 else if (rllen >= sizeof(linkpath))
9198 {
9200 (errmsg("symbolic link \"%s\" target is too long",
9201 fullpath)));
9202 continue;
9203 }
9204 linkpath[rllen] = '\0';
9205
9206 /*
9207 * Relpath holds the relative path of the tablespace directory
9208 * when it's located within PGDATA, or NULL if it's located
9209 * elsewhere.
9210 */
9211 if (rllen > datadirpathlen &&
9212 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9213 IS_DIR_SEP(linkpath[datadirpathlen]))
9214 relpath = pstrdup(linkpath + datadirpathlen + 1);
9215
9216 /*
9217 * Add a backslash-escaped version of the link path to the
9218 * tablespace map file.
9219 */
9220 initStringInfo(&escapedpath);
9221 for (s = linkpath; *s; s++)
9222 {
9223 if (*s == '\n' || *s == '\r' || *s == '\\')
9224 appendStringInfoChar(&escapedpath, '\\');
9225 appendStringInfoChar(&escapedpath, *s);
9226 }
9227 appendStringInfo(tblspcmapfile, "%s %s\n",
9228 de->d_name, escapedpath.data);
9229 pfree(escapedpath.data);
9230 }
9231 else if (de_type == PGFILETYPE_DIR)
9232 {
9233 /*
9234 * It's possible to use allow_in_place_tablespaces to create
9235 * directories directly under pg_tblspc, for testing purposes
9236 * only.
9237 *
9238 * In this case, we store a relative path rather than an
9239 * absolute path into the tablespaceinfo.
9240 */
9241 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9242 PG_TBLSPC_DIR, de->d_name);
9243 relpath = pstrdup(linkpath);
9244 }
9245 else
9246 {
9247 /* Skip any other file type that appears here. */
9248 continue;
9249 }
9250
9251 ti = palloc(sizeof(tablespaceinfo));
9252 ti->oid = tsoid;
9253 ti->path = pstrdup(linkpath);
9254 ti->rpath = relpath;
9255 ti->size = -1;
9256
9257 if (tablespaces)
9258 *tablespaces = lappend(*tablespaces, ti);
9259 }
9260 FreeDir(tblspcdir);
9261
9262 state->starttime = (pg_time_t) time(NULL);
9263 }
9265
9266 state->started_in_recovery = backup_started_in_recovery;
9267
9268 /*
9269 * Mark that the start phase has correctly finished for the backup.
9270 */
9272}
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:547
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:72
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:30
#define relpath(rlocator, forknum)
Definition: relpath.h:150
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:562
XLogRecPtr lastBackupStart
Definition: xlog.c:434
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8248
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9571
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:289
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert(), backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9297 of file xlog.c.

9298{
9299 bool backup_stopped_in_recovery = false;
9300 char histfilepath[MAXPGPATH];
9301 char lastxlogfilename[MAXFNAMELEN];
9302 char histfilename[MAXFNAMELEN];
9303 XLogSegNo _logSegNo;
9304 FILE *fp;
9305 int seconds_before_warning;
9306 int waits = 0;
9307 bool reported_waiting = false;
9308
9309 Assert(state != NULL);
9310
9311 backup_stopped_in_recovery = RecoveryInProgress();
9312
9313 /*
9314 * During recovery, we don't need to check WAL level. Because, if WAL
9315 * level is not sufficient, it's impossible to get here during recovery.
9316 */
9317 if (!backup_stopped_in_recovery && !XLogIsNeeded())
9318 ereport(ERROR,
9319 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9320 errmsg("WAL level not sufficient for making an online backup"),
9321 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9322
9323 /*
9324 * OK to update backup counter and session-level lock.
9325 *
9326 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9327 * otherwise they can be updated inconsistently, which might cause
9328 * do_pg_abort_backup() to fail.
9329 */
9331
9332 /*
9333 * It is expected that each do_pg_backup_start() call is matched by
9334 * exactly one do_pg_backup_stop() call.
9335 */
9338
9339 /*
9340 * Clean up session-level lock.
9341 *
9342 * You might think that WALInsertLockRelease() can be called before
9343 * cleaning up session-level lock because session-level lock doesn't need
9344 * to be protected with WAL insertion lock. But since
9345 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9346 * cleaned up before it.
9347 */
9349
9351
9352 /*
9353 * If we are taking an online backup from the standby, we confirm that the
9354 * standby has not been promoted during the backup.
9355 */
9356 if (state->started_in_recovery && !backup_stopped_in_recovery)
9357 ereport(ERROR,
9358 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9359 errmsg("the standby was promoted during online backup"),
9360 errhint("This means that the backup being taken is corrupt "
9361 "and should not be used. "
9362 "Try taking another online backup.")));
9363
9364 /*
9365 * During recovery, we don't write an end-of-backup record. We assume that
9366 * pg_control was backed up last and its minimum recovery point can be
9367 * available as the backup end location. Since we don't have an
9368 * end-of-backup record, we use the pg_control value to check whether
9369 * we've reached the end of backup when starting recovery from this
9370 * backup. We have no way of checking if pg_control wasn't backed up last
9371 * however.
9372 *
9373 * We don't force a switch to new WAL file but it is still possible to
9374 * wait for all the required files to be archived if waitforarchive is
9375 * true. This is okay if we use the backup to start a standby and fetch
9376 * the missing WAL using streaming replication. But in the case of an
9377 * archive recovery, a user should set waitforarchive to true and wait for
9378 * them to be archived to ensure that all the required files are
9379 * available.
9380 *
9381 * We return the current minimum recovery point as the backup end
9382 * location. Note that it can be greater than the exact backup end
9383 * location if the minimum recovery point is updated after the backup of
9384 * pg_control. This is harmless for current uses.
9385 *
9386 * XXX currently a backup history file is for informational and debug
9387 * purposes only. It's not essential for an online backup. Furthermore,
9388 * even if it's created, it will not be archived during recovery because
9389 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9390 * backup history file during recovery.
9391 */
9392 if (backup_stopped_in_recovery)
9393 {
9394 XLogRecPtr recptr;
9395
9396 /*
9397 * Check to see if all WAL replayed during online backup contain
9398 * full-page writes.
9399 */
9401 recptr = XLogCtl->lastFpwDisableRecPtr;
9403
9404 if (state->startpoint <= recptr)
9405 ereport(ERROR,
9406 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9407 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9408 "during online backup"),
9409 errhint("This means that the backup being taken on the standby "
9410 "is corrupt and should not be used. "
9411 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9412 "and then try an online backup again.")));
9413
9414
9415 LWLockAcquire(ControlFileLock, LW_SHARED);
9416 state->stoppoint = ControlFile->minRecoveryPoint;
9418 LWLockRelease(ControlFileLock);
9419 }
9420 else
9421 {
9422 char *history_file;
9423
9424 /*
9425 * Write the backup-end xlog record
9426 */
9428 XLogRegisterData(&state->startpoint,
9429 sizeof(state->startpoint));
9430 state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9431
9432 /*
9433 * Given that we're not in recovery, InsertTimeLineID is set and can't
9434 * change, so we can read it without a lock.
9435 */
9436 state->stoptli = XLogCtl->InsertTimeLineID;
9437
9438 /*
9439 * Force a switch to a new xlog segment file, so that the backup is
9440 * valid as soon as archiver moves out the current segment file.
9441 */
9442 RequestXLogSwitch(false);
9443
9444 state->stoptime = (pg_time_t) time(NULL);
9445
9446 /*
9447 * Write the backup history file
9448 */
9449 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9450 BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9451 state->startpoint, wal_segment_size);
9452 fp = AllocateFile(histfilepath, "w");
9453 if (!fp)
9454 ereport(ERROR,
9456 errmsg("could not create file \"%s\": %m",
9457 histfilepath)));
9458
9459 /* Build and save the contents of the backup history file */
9460 history_file = build_backup_content(state, true);
9461 fprintf(fp, "%s", history_file);
9462 pfree(history_file);
9463
9464 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9465 ereport(ERROR,
9467 errmsg("could not write file \"%s\": %m",
9468 histfilepath)));
9469
9470 /*
9471 * Clean out any no-longer-needed history files. As a side effect,
9472 * this will post a .ready file for the newly created history file,
9473 * notifying the archiver that history file may be archived
9474 * immediately.
9475 */
9477 }
9478
9479 /*
9480 * If archiving is enabled, wait for all the required WAL files to be
9481 * archived before returning. If archiving isn't enabled, the required WAL
9482 * needs to be transported via streaming replication (hopefully with
9483 * wal_keep_size set high enough), or some more exotic mechanism like
9484 * polling and copying files from pg_wal with script. We have no knowledge
9485 * of those mechanisms, so it's up to the user to ensure that he gets all
9486 * the required WAL.
9487 *
9488 * We wait until both the last WAL file filled during backup and the
9489 * history file have been archived, and assume that the alphabetic sorting
9490 * property of the WAL files ensures any earlier WAL files are safely
9491 * archived as well.
9492 *
9493 * We wait forever, since archive_command is supposed to work and we
9494 * assume the admin wanted his backup to work completely. If you don't
9495 * wish to wait, then either waitforarchive should be passed in as false,
9496 * or you can set statement_timeout. Also, some notices are issued to
9497 * clue in anyone who might be doing this interactively.
9498 */
9499
9500 if (waitforarchive &&
9501 ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9502 (backup_stopped_in_recovery && XLogArchivingAlways())))
9503 {
9504 XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9505 XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9507
9508 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9509 BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9510 state->startpoint, wal_segment_size);
9511
9512 seconds_before_warning = 60;
9513 waits = 0;
9514
9515 while (XLogArchiveIsBusy(lastxlogfilename) ||
9516 XLogArchiveIsBusy(histfilename))
9517 {
9519
9520 if (!reported_waiting && waits > 5)
9521 {
9523 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9524 reported_waiting = true;
9525 }
9526
9527 (void) WaitLatch(MyLatch,
9529 1000L,
9530 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9532
9533 if (++waits >= seconds_before_warning)
9534 {
9535 seconds_before_warning *= 2; /* This wraps in >10 years... */
9537 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9538 waits),
9539 errhint("Check that your \"archive_command\" is executing properly. "
9540 "You can safely cancel this backup, "
9541 "but the database backup will not be usable without all the WAL segments.")));
9542 }
9543 }
9544
9546 (errmsg("all required WAL segments have been archived")));
9547 }
9548 else if (waitforarchive)
9550 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9551}
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
#define NOTICE
Definition: elog.h:35
int FreeFile(FILE *file)
Definition: fd.c:2843
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2644
struct Latch * MyLatch
Definition: globals.c:64
void ResetLatch(Latch *latch)
Definition: latch.c:372
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define WL_TIMEOUT
Definition: waiteventset.h:37
#define WL_EXIT_ON_PM_DEATH
Definition: waiteventset.h:39
#define WL_LATCH_SET
Definition: waiteventset.h:34
static void CleanupBackupHistory(void)
Definition: xlog.c:4300
#define XLogArchivingAlways()
Definition: xlog.h:102
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert(), BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9278 of file xlog.c.

9279{
9280 return sessionBackupState;
9281}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8781 of file xlog.c.

8782{
8783 int o_direct_flag = 0;
8784
8785 /*
8786 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8787 * written by walreceiver is normally read by the startup process soon
8788 * after it's written. Also, walreceiver performs unaligned writes, which
8789 * don't work with O_DIRECT, so it is required for correctness too.
8790 */
8792 o_direct_flag = PG_O_DIRECT;
8793
8794 /* If fsync is disabled, never open in sync mode */
8795 if (!enableFsync)
8796 return o_direct_flag;
8797
8798 switch (method)
8799 {
8800 /*
8801 * enum values for all sync options are defined even if they are
8802 * not supported on the current platform. But if not, they are
8803 * not included in the enum option array, and therefore will never
8804 * be seen here.
8805 */
8809 return o_direct_flag;
8810#ifdef O_SYNC
8812 return O_SYNC | o_direct_flag;
8813#endif
8814#ifdef O_DSYNC
8816 return O_DSYNC | o_direct_flag;
8817#endif
8818 default:
8819 /* can't happen (unless we are out of sync with option array) */
8820 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8821 return 0; /* silence warning */
8822 }
8823}
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:130
#define AmWalReceiverProcess()
Definition: miscadmin.h:391
#define O_DSYNC
Definition: win32_port.h:342
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 5040 of file xlog.c.

5041{
5042 return ControlFile->wal_level;
5043}

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4768 of file xlog.c.

4769{
4771}
bool default_char_signedness
Definition: pg_control.h:228

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4783 of file xlog.c.

4784{
4786}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:522

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool *  doPageWrites_p 
)

Definition at line 6655 of file xlog.c.

6656{
6657 *RedoRecPtr_p = RedoRecPtr;
6658 *doPageWrites_p = doPageWrites;
6659}
static bool doPageWrites
Definition: xlog.c:286

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6744 of file xlog.c.

6745{
6747 int i;
6748
6749 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6750 {
6751 XLogRecPtr last_important;
6752
6753 /*
6754 * Need to take a lock to prevent torn reads of the LSN, which are
6755 * possible on some of the supported platforms. WAL insert locks only
6756 * support exclusive mode, so we have to use that.
6757 */
6759 last_important = WALInsertLocks[i].l.lastImportantAt;
6760 LWLockRelease(&WALInsertLocks[i].l.lock);
6761
6762 if (res < last_important)
6763 res = last_important;
6764 }
6765
6766 return res;
6767}
int i
Definition: isn.c:77
XLogRecPtr lastImportantAt
Definition: xlog.c:366
WALInsertLock l
Definition: xlog.c:378
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:580
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:150

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6773 of file xlog.c.

6774{
6775 pg_time_t result;
6776
6777 /* Need WALWriteLock, but shared lock is sufficient */
6778 LWLockAcquire(WALWriteLock, LW_SHARED);
6779 result = XLogCtl->lastSegSwitchTime;
6780 *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6781 LWLockRelease(WALWriteLock);
6782
6783 return result;
6784}
pg_time_t lastSegSwitchTime
Definition: xlog.c:462
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:463

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4744 of file xlog.c.

4745{
4746 Assert(ControlFile != NULL);
4748}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:235

References Assert(), ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6558 of file xlog.c.

6559{
6560 RecoveryState retval;
6561
6563 retval = XLogCtl->SharedRecoveryState;
6565
6566 return retval;
6567}
RecoveryState
Definition: xlog.h:90

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6625 of file xlog.c.

6626{
6627 XLogRecPtr ptr;
6628
6629 /*
6630 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6631 * grabbed a WAL insertion lock to read the authoritative value in
6632 * Insert->RedoRecPtr, someone might update it just after we've released
6633 * the lock.
6634 */
6636 ptr = XLogCtl->RedoRecPtr;
6638
6639 if (RedoRecPtr < ptr)
6640 RedoRecPtr = ptr;
6641
6642 return RedoRecPtr;
6643}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 8057 of file xlog.c.

8058{
8059 XLogRecPtr currpos; /* current write LSN */
8060 XLogSegNo currSeg; /* segid of currpos */
8061 XLogSegNo targetSeg; /* segid of targetLSN */
8062 XLogSegNo oldestSeg; /* actual oldest segid */
8063 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
8064 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
8065 uint64 keepSegs;
8066
8067 /*
8068 * slot does not reserve WAL. Either deactivated, or has never been active
8069 */
8070 if (XLogRecPtrIsInvalid(targetLSN))
8071 return WALAVAIL_INVALID_LSN;
8072
8073 /*
8074 * Calculate the oldest segment currently reserved by all slots,
8075 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
8076 * oldestSlotSeg to the current segment.
8077 */
8078 currpos = GetXLogWriteRecPtr();
8079 XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
8080 KeepLogSeg(currpos, &oldestSlotSeg);
8081
8082 /*
8083 * Find the oldest extant segment file. We get 1 until checkpoint removes
8084 * the first WAL segment file since startup, which causes the status being
8085 * wrong under certain abnormal conditions but that doesn't actually harm.
8086 */
8087 oldestSeg = XLogGetLastRemovedSegno() + 1;
8088
8089 /* calculate oldest segment by max_wal_size */
8090 XLByteToSeg(currpos, currSeg, wal_segment_size);
8092
8093 if (currSeg > keepSegs)
8094 oldestSegMaxWalSize = currSeg - keepSegs;
8095 else
8096 oldestSegMaxWalSize = 1;
8097
8098 /* the segment we care about */
8099 XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
8100
8101 /*
8102 * No point in returning reserved or extended status values if the
8103 * targetSeg is known to be lost.
8104 */
8105 if (targetSeg >= oldestSlotSeg)
8106 {
8107 /* show "reserved" when targetSeg is within max_wal_size */
8108 if (targetSeg >= oldestSegMaxWalSize)
8109 return WALAVAIL_RESERVED;
8110
8111 /* being retained by slots exceeding max_wal_size */
8112 return WALAVAIL_EXTENDED;
8113 }
8114
8115 /* WAL segments are no longer retained but haven't been removed yet */
8116 if (targetSeg >= oldestSeg)
8117 return WALAVAIL_UNRESERVED;
8118
8119 /* Definitely lost */
8120 return WALAVAIL_REMOVED;
8121}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3897
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9628
@ WALAVAIL_REMOVED
Definition: xlog.h:194
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6724 of file xlog.c.

6725{
6726 TimeLineID insertTLI;
6727
6729 insertTLI = XLogCtl->InsertTimeLineID;
6731
6732 return insertTLI;
6733}

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1643 of file xlog.c.

1644{
1645 int idx;
1646 XLogRecPtr endptr;
1647 static uint64 cachedPage = 0;
1648 static char *cachedPos = NULL;
1649 XLogRecPtr expectedEndPtr;
1650
1651 /*
1652 * Fast path for the common case that we need to access again the same
1653 * page as last time.
1654 */
1655 if (ptr / XLOG_BLCKSZ == cachedPage)
1656 {
1657 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1658 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1659 return cachedPos + ptr % XLOG_BLCKSZ;
1660 }
1661
1662 /*
1663 * The XLog buffer cache is organized so that a page is always loaded to a
1664 * particular buffer. That way we can easily calculate the buffer a given
1665 * page must be loaded into, from the XLogRecPtr alone.
1666 */
1667 idx = XLogRecPtrToBufIdx(ptr);
1668
1669 /*
1670 * See what page is loaded in the buffer at the moment. It could be the
1671 * page we're looking for, or something older. It can't be anything newer
1672 * - that would imply the page we're looking for has already been written
1673 * out to disk and evicted, and the caller is responsible for making sure
1674 * that doesn't happen.
1675 *
1676 * We don't hold a lock while we read the value. If someone is just about
1677 * to initialize or has just initialized the page, it's possible that we
1678 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1679 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1680 * we're looking for.
1681 */
1682 expectedEndPtr = ptr;
1683 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1684
1686 if (expectedEndPtr != endptr)
1687 {
1688 XLogRecPtr initializedUpto;
1689
1690 /*
1691 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1692 * know how far we're finished with inserting the record.
1693 *
1694 * NB: If 'ptr' points to just after the page header, advertise a
1695 * position at the beginning of the page rather than 'ptr' itself. If
1696 * there are no other insertions running, someone might try to flush
1697 * up to our advertised location. If we advertised a position after
1698 * the page header, someone might try to flush the page header, even
1699 * though page might actually not be initialized yet. As the first
1700 * inserter on the page, we are effectively responsible for making
1701 * sure that it's initialized, before we let insertingAt to move past
1702 * the page header.
1703 */
1704 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1705 XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1706 initializedUpto = ptr - SizeOfXLogShortPHD;
1707 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1708 XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1709 initializedUpto = ptr - SizeOfXLogLongPHD;
1710 else
1711 initializedUpto = ptr;
1712
1713 WALInsertLockUpdateInsertingAt(initializedUpto);
1714
1715 AdvanceXLInsertBuffer(ptr, tli, false);
1717
1718 if (expectedEndPtr != endptr)
1719 elog(PANIC, "could not find WAL buffer for %X/%X",
1720 LSN_FORMAT_ARGS(ptr));
1721 }
1722 else
1723 {
1724 /*
1725 * Make sure the initialization of the page is visible to us, and
1726 * won't arrive later to overwrite the WAL data we write on the page.
1727 */
1729 }
1730
1731 /*
1732 * Found the buffer holding this page. Return a pointer to the right
1733 * offset within the page.
1734 */
1735 cachedPage = ptr / XLOG_BLCKSZ;
1736 cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1737
1738 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1739 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1740
1741 return cachedPos + ptr % XLOG_BLCKSZ;
1742}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
#define pg_memory_barrier()
Definition: atomics.h:143
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1482
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1996

References AdvanceXLInsertBuffer(), Assert(), elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9612 of file xlog.c.

9613{
9615 uint64 current_bytepos;
9616
9617 SpinLockAcquire(&Insert->insertpos_lck);
9618 current_bytepos = Insert->CurrBytePos;
9619 SpinLockRelease(&Insert->insertpos_lck);
9620
9621 return XLogBytePosToRecPtr(current_bytepos);
9622}

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4343 of file xlog.c.

4344{
4345 char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4346
4347 /*
4348 * Generate a random nonce. This is used for authentication requests that
4349 * will fail because the user does not exist. The nonce is used to create
4350 * a genuine-looking password challenge for the non-existent user, in lieu
4351 * of an actual stored password.
4352 */
4353 if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4354 ereport(PANIC,
4355 (errcode(ERRCODE_INTERNAL_ERROR),
4356 errmsg("could not generate secret authorization token")));
4357
4358 memset(ControlFile, 0, sizeof(ControlFileData));
4359 /* Initialize pg_control status fields */
4360 ControlFile->system_identifier = sysidentifier;
4364
4365 /* Set important parameter values for use when replaying WAL */
4374 ControlFile->data_checksum_version = data_checksum_version;
4375}
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:185
bool wal_log_hints
Definition: xlog.c:123
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4965 of file xlog.c.

4966{
4968
4970 {
4971 struct config_generic *guc;
4972
4973 guc = find_option("wal_consistency_checking", false, false, ERROR);
4974
4976
4977 set_config_option_ext("wal_consistency_checking",
4979 guc->scontext, guc->source, guc->srole,
4980 GUC_ACTION_SET, true, ERROR, false);
4981
4982 /* checking should not be deferred again */
4984 }
4985}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3382
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1235
@ GUC_ACTION_SET
Definition: guc.h:203
GucContext scontext
Definition: guc_tables.h:185
GucSource source
Definition: guc_tables.h:183
char * wal_consistency_checking_string
Definition: xlog.c:125

References Assert(), check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3702 of file xlog.c.

3704{
3705 char path[MAXPGPATH];
3706 struct stat stat_buf;
3707
3708 Assert(tli != 0);
3709
3710 XLogFilePath(path, tli, *segno, wal_segment_size);
3711
3712 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3714 {
3715 LWLockRelease(ControlFileLock);
3716 return false;
3717 }
3718
3719 if (!find_free)
3720 {
3721 /* Force installation: get rid of any pre-existing segment file */
3722 durable_unlink(path, DEBUG1);
3723 }
3724 else
3725 {
3726 /* Find a free slot to put it in */
3727 while (stat(path, &stat_buf) == 0)
3728 {
3729 if ((*segno) >= max_segno)
3730 {
3731 /* Failed to find a free slot within specified range */
3732 LWLockRelease(ControlFileLock);
3733 return false;
3734 }
3735 (*segno)++;
3736 XLogFilePath(path, tli, *segno, wal_segment_size);
3737 }
3738 }
3739
3740 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3741 if (durable_rename(tmppath, path, LOG) != 0)
3742 {
3743 LWLockRelease(ControlFileLock);
3744 /* durable_rename already emitted log message */
3745 return false;
3746 }
3747
3748 LWLockRelease(ControlFileLock);
3749
3750 return true;
3751}
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:537
#define stat
Definition: win32_port.h:274

References Assert(), DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9669 of file xlog.c.

9670{
9671 bool result;
9672
9673 LWLockAcquire(ControlFileLock, LW_SHARED);
9675 LWLockRelease(ControlFileLock);
9676
9677 return result;
9678}

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8871 of file xlog.c.

8872{
8873 char *msg = NULL;
8875
8876 Assert(tli != 0);
8877
8878 /*
8879 * Quick exit if fsync is disabled or write() has already synced the WAL
8880 * file.
8881 */
8882 if (!enableFsync ||
8885 return;
8886
8887 /*
8888 * Measure I/O timing to sync the WAL file for pg_stat_io.
8889 */
8891
8892 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8893 switch (wal_sync_method)
8894 {
8896 if (pg_fsync_no_writethrough(fd) != 0)
8897 msg = _("could not fsync file \"%s\": %m");
8898 break;
8899#ifdef HAVE_FSYNC_WRITETHROUGH
8901 if (pg_fsync_writethrough(fd) != 0)
8902 msg = _("could not fsync write-through file \"%s\": %m");
8903 break;
8904#endif
8906 if (pg_fdatasync(fd) != 0)
8907 msg = _("could not fdatasync file \"%s\": %m");
8908 break;
8911 /* not reachable */
8912 Assert(false);
8913 break;
8914 default:
8915 ereport(PANIC,
8916 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8917 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8918 break;
8919 }
8920
8921 /* PANIC if failed to fsync */
8922 if (msg)
8923 {
8924 char xlogfname[MAXFNAMELEN];
8925 int save_errno = errno;
8926
8927 XLogFileName(xlogfname, tli, segno, wal_segment_size);
8928 errno = save_errno;
8929 ereport(PANIC,
8931 errmsg(msg, xlogfname)));
8932 }
8933
8935
8937 start, 1, 0);
8938}
#define _(x)
Definition: elog.c:91
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
return str start
@ IOOBJECT_WAL
Definition: pgstat.h:276
@ IOCONTEXT_NORMAL
Definition: pgstat.h:286
@ IOOP_FSYNC
Definition: pgstat.h:305
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:90
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:121
static int fd(const char *x, int i)
Definition: preproc-init.c:105
bool track_wal_io_timing
Definition: xlog.c:137

References _, Assert(), enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8141 of file xlog.c.

8142{
8143 XLogSegNo currSegNo;
8144 XLogSegNo segno;
8145 XLogRecPtr keep;
8146
8147 XLByteToSeg(recptr, currSegNo, wal_segment_size);
8148 segno = currSegNo;
8149
8150 /*
8151 * Calculate how many segments are kept by slots first, adjusting for
8152 * max_slot_wal_keep_size.
8153 */
8155 if (keep != InvalidXLogRecPtr && keep < recptr)
8156 {
8157 XLByteToSeg(keep, segno, wal_segment_size);
8158
8159 /* Cap by max_slot_wal_keep_size ... */
8161 {
8162 uint64 slot_keep_segs;
8163
8164 slot_keep_segs =
8166
8167 if (currSegNo - segno > slot_keep_segs)
8168 segno = currSegNo - slot_keep_segs;
8169 }
8170 }
8171
8172 /*
8173 * If WAL summarization is in use, don't remove WAL that has yet to be
8174 * summarized.
8175 */
8176 keep = GetOldestUnsummarizedLSN(NULL, NULL);
8177 if (keep != InvalidXLogRecPtr)
8178 {
8179 XLogSegNo unsummarized_segno;
8180
8181 XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
8182 if (unsummarized_segno < segno)
8183 segno = unsummarized_segno;
8184 }
8185
8186 /* but, keep at least wal_keep_size if that's set */
8187 if (wal_keep_size_mb > 0)
8188 {
8189 uint64 keep_segs;
8190
8192 if (currSegNo - segno < keep_segs)
8193 {
8194 /* avoid underflow, don't go below 1 */
8195 if (currSegNo <= keep_segs)
8196 segno = 1;
8197 else
8198 segno = currSegNo - keep_segs;
8199 }
8200 }
8201
8202 /* don't delete WAL segments newer than the calculated segment */
8203 if (segno < *logSegNo)
8204 *logSegNo = segno;
8205}
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition: xlog.c:116
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2822
int max_slot_wal_keep_size_mb
Definition: xlog.c:135

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 5027 of file xlog.c.

5028{
5029 Assert(reset || ControlFile == NULL);
5032}
void reset(void)
Definition: sql-declare.c:600

References Assert(), ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6610 of file xlog.c.

6611{
6612 int oldXLogAllowed = LocalXLogInsertAllowed;
6613
6615
6616 return oldXLogAllowed;
6617}

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6869 of file xlog.c.

6870{
6871 long write_msecs,
6872 sync_msecs,
6873 total_msecs,
6874 longest_msecs,
6875 average_msecs;
6876 uint64 average_sync_time;
6877
6879
6882
6885
6886 /* Accumulate checkpoint timing summary data, in milliseconds. */
6887 PendingCheckpointerStats.write_time += write_msecs;
6888 PendingCheckpointerStats.sync_time += sync_msecs;
6889
6890 /*
6891 * All of the published timing statistics are accounted for. Only
6892 * continue if a log message is to be written.
6893 */
6894 if (!log_checkpoints)
6895 return;
6896
6899
6900 /*
6901 * Timing values returned from CheckpointStats are in microseconds.
6902 * Convert to milliseconds for consistent printing.
6903 */
6904 longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6905
6906 average_sync_time = 0;
6908 average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6910 average_msecs = (long) ((average_sync_time + 999) / 1000);
6911
6912 /*
6913 * ControlFileLock is not required to see ControlFile->checkPoint and
6914 * ->checkPointCopy here as we are the only updator of those variables at
6915 * this moment.
6916 */
6917 if (restartpoint)
6918 ereport(LOG,
6919 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6920 "wrote %d SLRU buffers; %d WAL file(s) added, "
6921 "%d removed, %d recycled; write=%ld.%03d s, "
6922 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6923 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6924 "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
6931 write_msecs / 1000, (int) (write_msecs % 1000),
6932 sync_msecs / 1000, (int) (sync_msecs % 1000),
6933 total_msecs / 1000, (int) (total_msecs % 1000),
6935 longest_msecs / 1000, (int) (longest_msecs % 1000),
6936 average_msecs / 1000, (int) (average_msecs % 1000),
6937 (int) (PrevCheckPointDistance / 1024.0),
6938 (int) (CheckPointDistanceEstimate / 1024.0),
6941 else
6942 ereport(LOG,
6943 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6944 "wrote %d SLRU buffers; %d WAL file(s) added, "
6945 "%d removed, %d recycled; write=%ld.%03d s, "
6946 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6947 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6948 "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
6955 write_msecs / 1000, (int) (write_msecs % 1000),
6956 sync_msecs / 1000, (int) (sync_msecs % 1000),
6957 total_msecs / 1000, (int) (total_msecs % 1000),
6959 longest_msecs / 1000, (int) (longest_msecs % 1000),
6960 average_msecs / 1000, (int) (average_msecs % 1000),
6961 (int) (PrevCheckPointDistance / 1024.0),
6962 (int) (CheckPointDistanceEstimate / 1024.0),
6965}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1757
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:176
uint64 ckpt_longest_sync
Definition: xlog.h:175
TimestampTz ckpt_end_t
Definition: xlog.h:165
int ckpt_slru_written
Definition: xlog.h:168
int ckpt_sync_rels
Definition: xlog.h:174
PgStat_Counter sync_time
Definition: pgstat.h:262
PgStat_Counter write_time
Definition: pgstat.h:261
static double CheckPointDistanceEstimate
Definition: xlog.c:159
static double PrevCheckPointDistance
Definition: xlog.c:160

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6837 of file xlog.c.

6838{
6839 if (restartpoint)
6840 ereport(LOG,
6841 /* translator: the placeholders show checkpoint options */
6842 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6843 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6844 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6845 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6846 (flags & CHECKPOINT_FORCE) ? " force" : "",
6847 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6848 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6849 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6850 (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6851 else
6852 ereport(LOG,
6853 /* translator: the placeholders show checkpoint options */
6854 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6855 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6856 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6857 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6858 (flags & CHECKPOINT_FORCE) ? " force" : "",
6859 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6860 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6861 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6862 (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6863}
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:148
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:143
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:149

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FLUSH_ALL, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6472 of file xlog.c.

6473{
6474 bool promoted = false;
6475
6476 /*
6477 * Perform a checkpoint to update all our recovery activity to disk.
6478 *
6479 * Note that we write a shutdown checkpoint rather than an on-line one.
6480 * This is not particularly critical, but since we may be assigning a new
6481 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6482 * only changes in shutdown checkpoints, which allows some extra error
6483 * checking in xlog_redo.
6484 *
6485 * In promotion, only create a lightweight end-of-recovery record instead
6486 * of a full checkpoint. A checkpoint is requested later, after we're
6487 * fully out of recovery mode and already accepting queries.
6488 */
6491 {
6492 promoted = true;
6493
6494 /*
6495 * Insert a special WAL record to mark the end of recovery, since we
6496 * aren't doing a checkpoint. That means that the checkpointer process
6497 * may likely be in the middle of a time-smoothed restartpoint and
6498 * could continue to be for minutes after this. That sounds strange,
6499 * but the effect is roughly the same and it would be stranger to try
6500 * to come out of the restartpoint and then checkpoint. We request a
6501 * checkpoint later anyway, just for safety.
6502 */
6504 }
6505 else
6506 {
6510 }
6511
6512 return promoted;
6513}
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7563
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3829 of file xlog.c.

3830{
3831 XLogSegNo _logSegNo;
3832 int lf;
3833 bool added;
3834 char path[MAXPGPATH];
3835 uint64 offset;
3836
3838 return; /* unlocked check says no */
3839
3840 XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3841 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3842 if (offset >= (uint32) (0.75 * wal_segment_size))
3843 {
3844 _logSegNo++;
3845 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3846 if (lf >= 0)
3847 close(lf);
3848 if (added)
3850 }
3851}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3331

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6435 of file xlog.c.

6436{
6437 /*
6438 * We have reached the end of base backup, as indicated by pg_control. The
6439 * data on disk is now consistent (unless minRecoveryPoint is further
6440 * ahead, which can happen if we crashed during previous recovery). Reset
6441 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6442 * make sure we don't allow starting up at an earlier point even if
6443 * recovery is stopped and restarted soon after this.
6444 */
6445 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6446
6447 if (ControlFile->minRecoveryPoint < EndRecPtr)
6448 {
6449 ControlFile->minRecoveryPoint = EndRecPtr;
6451 }
6452
6457
6458 LWLockRelease(ControlFileLock);
6459}
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4487 of file xlog.c.

4488{
4489 pg_crc32c crc;
4490 int fd;
4491 char wal_segsz_str[20];
4492 int r;
4493
4494 /*
4495 * Read data...
4496 */
4498 O_RDWR | PG_BINARY);
4499 if (fd < 0)
4500 ereport(PANIC,
4502 errmsg("could not open file \"%s\": %m",
4504
4505 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4506 r = read(fd, ControlFile, sizeof(ControlFileData));
4507 if (r != sizeof(ControlFileData))
4508 {
4509 if (r < 0)
4510 ereport(PANIC,
4512 errmsg("could not read file \"%s\": %m",
4514 else
4515 ereport(PANIC,
4517 errmsg("could not read file \"%s\": read %d of %zu",
4518 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4519 }
4521
4522 close(fd);
4523
4524 /*
4525 * Check for expected pg_control format version. If this is wrong, the
4526 * CRC check will likely fail because we'll be checking the wrong number
4527 * of bytes. Complaining about wrong version will probably be more
4528 * enlightening than complaining about wrong CRC.
4529 */
4530
4532 ereport(FATAL,
4533 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4534 errmsg("database files are incompatible with server"),
4535 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4536 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4539 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4540
4542 ereport(FATAL,
4543 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4544 errmsg("database files are incompatible with server"),
4545 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4546 " but the server was compiled with PG_CONTROL_VERSION %d.",
4548 errhint("It looks like you need to initdb.")));
4549
4550 /* Now check the CRC. */
4554 offsetof(ControlFileData, crc));
4555 FIN_CRC32C(crc);
4556
4557 if (!EQ_CRC32C(crc, ControlFile->crc))
4558 ereport(FATAL,
4559 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4560 errmsg("incorrect checksum in control file")));
4561
4562 /*
4563 * Do compatibility checking immediately. If the database isn't
4564 * compatible with the backend executable, we want to abort before we can
4565 * possibly do any damage.
4566 */
4568 ereport(FATAL,
4569 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4570 errmsg("database files are incompatible with server"),
4571 /* translator: %s is a variable name and %d is its value */
4572 errdetail("The database cluster was initialized with %s %d,"
4573 " but the server was compiled with %s %d.",
4574 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4575 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4576 errhint("It looks like you need to initdb.")));
4577 if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4578 ereport(FATAL,
4579 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4580 errmsg("database files are incompatible with server"),
4581 /* translator: %s is a variable name and %d is its value */
4582 errdetail("The database cluster was initialized with %s %d,"
4583 " but the server was compiled with %s %d.",
4584 "MAXALIGN", ControlFile->maxAlign,
4585 "MAXALIGN", MAXIMUM_ALIGNOF),
4586 errhint("It looks like you need to initdb.")));
4588 ereport(FATAL,
4589 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4590 errmsg("database files are incompatible with server"),
4591 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4592 errhint("It looks like you need to initdb.")));
4593 if (ControlFile->blcksz != BLCKSZ)
4594 ereport(FATAL,
4595 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4596 errmsg("database files are incompatible with server"),
4597 /* translator: %s is a variable name and %d is its value */
4598 errdetail("The database cluster was initialized with %s %d,"
4599 " but the server was compiled with %s %d.",
4600 "BLCKSZ", ControlFile->blcksz,
4601 "BLCKSZ", BLCKSZ),
4602 errhint("It looks like you need to recompile or initdb.")));
4603 if (ControlFile->relseg_size != RELSEG_SIZE)
4604 ereport(FATAL,
4605 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4606 errmsg("database files are incompatible with server"),
4607 /* translator: %s is a variable name and %d is its value */
4608 errdetail("The database cluster was initialized with %s %d,"
4609 " but the server was compiled with %s %d.",
4610 "RELSEG_SIZE", ControlFile->relseg_size,
4611 "RELSEG_SIZE", RELSEG_SIZE),
4612 errhint("It looks like you need to recompile or initdb.")));
4613 if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4614 ereport(FATAL,
4615 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4616 errmsg("database files are incompatible with server"),
4617 /* translator: %s is a variable name and %d is its value */
4618 errdetail("The database cluster was initialized with %s %d,"
4619 " but the server was compiled with %s %d.",
4620 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4621 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4622 errhint("It looks like you need to recompile or initdb.")));
4624 ereport(FATAL,
4625 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4626 errmsg("database files are incompatible with server"),
4627 /* translator: %s is a variable name and %d is its value */
4628 errdetail("The database cluster was initialized with %s %d,"
4629 " but the server was compiled with %s %d.",
4630 "NAMEDATALEN", ControlFile->nameDataLen,
4631 "NAMEDATALEN", NAMEDATALEN),
4632 errhint("It looks like you need to recompile or initdb.")));
4634 ereport(FATAL,
4635 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4636 errmsg("database files are incompatible with server"),
4637 /* translator: %s is a variable name and %d is its value */
4638 errdetail("The database cluster was initialized with %s %d,"
4639 " but the server was compiled with %s %d.",
4640 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4641 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4642 errhint("It looks like you need to recompile or initdb.")));
4644 ereport(FATAL,
4645 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4646 errmsg("database files are incompatible with server"),
4647 /* translator: %s is a variable name and %d is its value */
4648 errdetail("The database cluster was initialized with %s %d,"
4649 " but the server was compiled with %s %d.",
4650 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4651 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4652 errhint("It looks like you need to recompile or initdb.")));
4654 ereport(FATAL,
4655 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4656 errmsg("database files are incompatible with server"),
4657 /* translator: %s is a variable name and %d is its value */
4658 errdetail("The database cluster was initialized with %s %d,"
4659 " but the server was compiled with %s %d.",
4660 "LOBLKSIZE", ControlFile->loblksize,
4661 "LOBLKSIZE", (int) LOBLKSIZE),
4662 errhint("It looks like you need to recompile or initdb.")));
4663
4664#ifdef USE_FLOAT8_BYVAL
4665 if (ControlFile->float8ByVal != true)
4666 ereport(FATAL,
4667 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4668 errmsg("database files are incompatible with server"),
4669 errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
4670 " but the server was compiled with USE_FLOAT8_BYVAL."),
4671 errhint("It looks like you need to recompile or initdb.")));
4672#else
4673 if (ControlFile->float8ByVal != false)
4674 ereport(FATAL,
4675 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4676 errmsg("database files are incompatible with server"),
4677 errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
4678 " but the server was compiled without USE_FLOAT8_BYVAL."),
4679 errhint("It looks like you need to recompile or initdb.")));
4680#endif
4681
4683
4685 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4686 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4687 "invalid WAL segment size in control file (%d bytes)",
4690 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4691
4692 snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4693 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4695
4696 /* check and update variables dependent on wal_segment_size */
4698 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4699 /* translator: both %s are GUC names */
4700 errmsg("\"%s\" must be at least twice \"%s\"",
4701 "min_wal_size", "wal_segment_size")));
4702
4704 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4705 /* translator: both %s are GUC names */
4706 errmsg("\"%s\" must be at least twice \"%s\"",
4707 "max_wal_size", "wal_segment_size")));
4708
4710 (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4712
4714
4715 /* Make the initdb settings visible as GUC variables, too */
4716 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4718}
#define PG_BINARY
Definition: c.h:1244
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1181
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1089
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:114
@ PGC_INTERNAL
Definition: guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:201
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
uint32 nameDataLen
Definition: pg_control.h:213
uint32 indexMaxKeys
Definition: pg_control.h:214
uint32 relseg_size
Definition: pg_control.h:208
uint32 catalog_version_no
Definition: pg_control.h:126
double floatFormat
Definition: pg_control.h:200
uint32 xlog_blcksz
Definition: pg_control.h:210
uint32 loblksize
Definition: pg_control.h:217
pg_crc32c crc
Definition: pg_control.h:238
uint32 toast_max_chunk_size
Definition: pg_control.h:216
#define UsableBytesInPage
Definition: xlog.c:608
bool DataChecksumsEnabled(void)
Definition: xlog.c:4754
static int UsableBytesInSegment
Definition: xlog.c:617
int min_wal_size_mb
Definition: xlog.c:115
#define XLOG_CONTROL_FILE

References BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6522 of file xlog.c.

6523{
6524 /*
6525 * We check shared state each time only until we leave recovery mode. We
6526 * can't re-enter recovery, so there's no need to keep checking after the
6527 * shared variable has once been seen false.
6528 */
6530 return false;
6531 else
6532 {
6533 /*
6534 * use volatile pointer to make sure we make a fresh read of the
6535 * shared variable.
6536 */
6537 volatile XLogCtlData *xlogctl = XLogCtl;
6538
6540
6541 /*
6542 * Note: We don't need a memory barrier when we're still in recovery.
6543 * We might exit recovery immediately after return, so the caller
6544 * can't rely on 'true' meaning that we're still in recovery anyway.
6545 */
6546
6548 }
6549}
static bool LocalRecoveryInProgress
Definition: xlog.c:224

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7738 of file xlog.c.

7739{
7740 /*
7741 * Also refrain from creating a restartpoint if we have seen any
7742 * references to non-existent pages. Restarting recovery from the
7743 * restartpoint would not see the references, so we would lose the
7744 * cross-check that the pages belonged to a relation that was dropped
7745 * later.
7746 */
7748 {
7749 elog(DEBUG2,
7750 "could not record restart point at %X/%X because there "
7751 "are unresolved references to invalid pages",
7752 LSN_FORMAT_ARGS(checkPoint->redo));
7753 return;
7754 }
7755
7756 /*
7757 * Copy the checkpoint record to shared memory, so that checkpointer can
7758 * work out the next time it wants to perform a restartpoint.
7759 */
7763 XLogCtl->lastCheckPoint = *checkPoint;
7765}
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9598 of file xlog.c.

9599{
9600 static bool already_done = false;
9601
9602 if (already_done)
9603 return;
9605 already_done = true;
9606}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 4079 of file xlog.c.

4080{
4081 DIR *xldir;
4082 struct dirent *xlde;
4083 char switchseg[MAXFNAMELEN];
4084 XLogSegNo endLogSegNo;
4085 XLogSegNo switchLogSegNo;
4086 XLogSegNo recycleSegNo;
4087
4088 /*
4089 * Initialize info about where to begin the work. This will recycle,
4090 * somewhat arbitrarily, 10 future segments.
4091 */
4092 XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
4093 XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
4094 recycleSegNo = endLogSegNo + 10;
4095
4096 /*
4097 * Construct a filename of the last segment to be kept.
4098 */
4099 XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
4100
4101 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
4102 switchseg);
4103
4104 xldir = AllocateDir(XLOGDIR);
4105
4106 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4107 {
4108 /* Ignore files that are not XLOG segments */
4109 if (!IsXLogFileName(xlde->d_name))
4110 continue;
4111
4112 /*
4113 * Remove files that are on a timeline older than the new one we're
4114 * switching to, but with a segment number >= the first segment on the
4115 * new timeline.
4116 */
4117 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4118 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4119 {
4120 /*
4121 * If the file has already been marked as .ready, however, don't
4122 * remove it yet. It should be OK to remove it - files that are
4123 * not part of our timeline history are not required for recovery
4124 * - but seems safer to let them be archived and removed later.
4125 */
4126 if (!XLogArchiveIsReady(xlde->d_name))
4127 RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
4128 }
4129 }
4130
4131 FreeDir(xldir);
4132}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4148
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 4004 of file xlog.c.

4006{
4007 DIR *xldir;
4008 struct dirent *xlde;
4009 char lastoff[MAXFNAMELEN];
4010 XLogSegNo endlogSegNo;
4011 XLogSegNo recycleSegNo;
4012
4013 /* Initialize info about where to try to recycle to */
4014 XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
4015 recycleSegNo = XLOGfileslop(lastredoptr);
4016
4017 /*
4018 * Construct a filename of the last segment to be kept. The timeline ID
4019 * doesn't matter, we ignore that in the comparison. (During recovery,
4020 * InsertTimeLineID isn't set, so we can't use that.)
4021 */
4022 XLogFileName(lastoff, 0, segno, wal_segment_size);
4023
4024 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
4025 lastoff);
4026
4027 xldir = AllocateDir(XLOGDIR);
4028
4029 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4030 {
4031 /* Ignore files that are not XLOG segments */
4032 if (!IsXLogFileName(xlde->d_name) &&
4034 continue;
4035
4036 /*
4037 * We ignore the timeline part of the XLOG segment identifiers in
4038 * deciding whether a segment is still needed. This ensures that we
4039 * won't prematurely remove a segment from a parent timeline. We could
4040 * probably be a little more proactive about removing segments of
4041 * non-parent timelines, but that would be a whole lot more
4042 * complicated.
4043 *
4044 * We use the alphanumeric sorting property of the filenames to decide
4045 * which ones are earlier than the lastoff segment.
4046 */
4047 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
4048 {
4049 if (XLogArchiveCheckDone(xlde->d_name))
4050 {
4051 /* Update the last removed location in shared memory first */
4053
4054 RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
4055 }
4056 }
4057 }
4058
4059 FreeDir(xldir);
4060}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2373
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3951
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3971 of file xlog.c.

3972{
3973 DIR *xldir;
3974 struct dirent *xlde;
3975
3976 elog(DEBUG2, "removing all temporary WAL segments");
3977
3978 xldir = AllocateDir(XLOGDIR);
3979 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3980 {
3981 char path[MAXPGPATH];
3982
3983 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3984 continue;
3985
3986 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3987 unlink(path);
3988 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3989 }
3990 FreeDir(xldir);
3991}

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4148 of file xlog.c.

4151{
4152 char path[MAXPGPATH];
4153#ifdef WIN32
4154 char newpath[MAXPGPATH];
4155#endif
4156 const char *segname = segment_de->d_name;
4157
4158 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4159
4160 /*
4161 * Before deleting the file, see if it can be recycled as a future log
4162 * segment. Only recycle normal files, because we don't want to recycle
4163 * symbolic links pointing to a separate archive directory.
4164 */
4165 if (wal_recycle &&
4166 *endlogSegNo <= recycleSegNo &&
4167 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4168 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4169 InstallXLogFileSegment(endlogSegNo, path,
4170 true, recycleSegNo, insertTLI))
4171 {
4173 (errmsg_internal("recycled write-ahead log file \"%s\"",
4174 segname)));
4176 /* Needn't recheck that slot on future iterations */
4177 (*endlogSegNo)++;
4178 }
4179 else
4180 {
4181 /* No need for any more future segments, or recycling failed ... */
4182 int rc;
4183
4185 (errmsg_internal("removing write-ahead log file \"%s\"",
4186 segname)));
4187
4188#ifdef WIN32
4189
4190 /*
4191 * On Windows, if another process (e.g another backend) holds the file
4192 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4193 * will still show up in directory listing until the last handle is
4194 * closed. To avoid confusing the lingering deleted file for a live
4195 * WAL file that needs to be archived, rename it before deleting it.
4196 *
4197 * If another process holds the file open without FILE_SHARE_DELETE
4198 * flag, rename will fail. We'll try again at the next checkpoint.
4199 */
4200 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4201 if (rename(path, newpath) != 0)
4202 {
4203 ereport(LOG,
4205 errmsg("could not rename file \"%s\": %m",
4206 path)));
4207 return;
4208 }
4209 rc = durable_unlink(newpath, LOG);
4210#else
4211 rc = durable_unlink(path, LOG);
4212#endif
4213 if (rc != 0)
4214 {
4215 /* Message already logged by durable_unlink() */
4216 return;
4217 }
4219 }
4220
4221 XLogArchiveCleanup(segname);
4222}
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3702
bool wal_recycle
Definition: xlog.c:128

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8248 of file xlog.c.

8249{
8250 XLogRecPtr RecPtr;
8251
8252 /* XLOG SWITCH has no data */
8254
8255 if (mark_unimportant)
8257 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
8258
8259 return RecPtr;
8260}
#define XLOG_SWITCH
Definition: pg_control.h:72
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1119 of file xlog.c.

1121{
1123 uint64 startbytepos;
1124 uint64 endbytepos;
1125 uint64 prevbytepos;
1126
1127 size = MAXALIGN(size);
1128
1129 /* All (non xlog-switch) records should contain data. */
1130 Assert(size > SizeOfXLogRecord);
1131
1132 /*
1133 * The duration the spinlock needs to be held is minimized by minimizing
1134 * the calculations that have to be done while holding the lock. The
1135 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1136 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1137 * page headers. The mapping between "usable" byte positions and physical
1138 * positions (XLogRecPtrs) can be done outside the locked region, and
1139 * because the usable byte position doesn't include any headers, reserving
1140 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1141 */
1142 SpinLockAcquire(&Insert->insertpos_lck);
1143
1144 startbytepos = Insert->CurrBytePos;
1145 endbytepos = startbytepos + size;
1146 prevbytepos = Insert->PrevBytePos;
1147 Insert->CurrBytePos = endbytepos;
1148 Insert->PrevBytePos = startbytepos;
1149
1150 SpinLockRelease(&Insert->insertpos_lck);
1151
1152 *StartPos = XLogBytePosToRecPtr(startbytepos);
1153 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1154 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1155
1156 /*
1157 * Check that the conversions between "usable byte positions" and
1158 * XLogRecPtrs work consistently in both directions.
1159 */
1160 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1161 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1162 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1163}
#define MAXALIGN(LEN)
Definition: c.h:782
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1909
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1952

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1175 of file xlog.c.

1176{
1178 uint64 startbytepos;
1179 uint64 endbytepos;
1180 uint64 prevbytepos;
1182 XLogRecPtr ptr;
1183 uint32 segleft;
1184
1185 /*
1186 * These calculations are a bit heavy-weight to be done while holding a
1187 * spinlock, but since we're holding all the WAL insertion locks, there
1188 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1189 * compete for it, but that's not called very frequently.
1190 */
1191 SpinLockAcquire(&Insert->insertpos_lck);
1192
1193 startbytepos = Insert->CurrBytePos;
1194
1195 ptr = XLogBytePosToEndRecPtr(startbytepos);
1196 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1197 {
1198 SpinLockRelease(&Insert->insertpos_lck);
1199 *EndPos = *StartPos = ptr;
1200 return false;
1201 }
1202
1203 endbytepos = startbytepos + size;
1204 prevbytepos = Insert->PrevBytePos;
1205
1206 *StartPos = XLogBytePosToRecPtr(startbytepos);
1207 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1208
1210 if (segleft != wal_segment_size)
1211 {
1212 /* consume the rest of the segment */
1213 *EndPos += segleft;
1214 endbytepos = XLogRecPtrToBytePos(*EndPos);
1215 }
1216 Insert->CurrBytePos = endbytepos;
1217 Insert->PrevBytePos = startbytepos;
1218
1219 SpinLockRelease(&Insert->insertpos_lck);
1220
1221 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1222
1224 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1225 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1226 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1227
1228 return true;
1229}

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9661 of file xlog.c.

9662{
9663 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9665 LWLockRelease(ControlFileLock);
9666}

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9684 of file xlog.c.

9685{
9687 XLogCtl->WalWriterSleeping = sleeping;
9689}
bool WalWriterSleeping
Definition: xlog.c:544

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4991 of file xlog.c.

4992{
4993 if (XLogArchivingActive())
4994 return XLogArchiveCommand;
4995 else
4996 return "(disabled)";
4997}
char * XLogArchiveCommand
Definition: xlog.c:120

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 5003 of file xlog.c.

5004{
5005 /*
5006 * We display the actual state based on shared memory, so that this GUC
5007 * reports up-to-date state if examined intra-query. The underlying
5008 * variable (in_hot_standby_guc) changes only when we transmit a new value
5009 * to the client.
5010 */
5011 return RecoveryInProgress() ? "on" : "off";
5012}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6790 of file xlog.c.

6791{
6792 /*
6793 * We should have an aux process resource owner to use, and we should not
6794 * be in a transaction that's installed some other resowner.
6795 */
6797 Assert(CurrentResourceOwner == NULL ||
6800
6801 /* Don't be chatty in standalone mode */
6803 (errmsg("shutting down")));
6804
6805 /*
6806 * Signal walsenders to move to stopping state.
6807 */
6809
6810 /*
6811 * Wait for WAL senders to be in stopping state. This prevents commands
6812 * from writing new WAL.
6813 */
6815
6816 if (RecoveryInProgress())
6818 else
6819 {
6820 /*
6821 * If archiving is enabled, rotate the last XLOG file so that all the
6822 * remaining records are archived (postmaster wakes up the archiver
6823 * process one more time at the end of shutdown). The checkpoint
6824 * record will go to the next XLOG file and won't be archived (yet).
6825 */
6826 if (XLogArchivingActive())
6827 RequestXLogSwitch(false);
6828
6830 }
6831}
bool IsPostmasterEnvironment
Definition: globals.c:120
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:176
void WalSndInitStopping(void)
Definition: walsender.c:3750
void WalSndWaitStopping(void)
Definition: walsender.c:3776
bool CreateRestartPoint(int flags)
Definition: xlog.c:7779
bool CreateCheckPoint(int flags)
Definition: xlog.c:7077

References Assert(), AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5614 of file xlog.c.

5615{
5617 CheckPoint checkPoint;
5618 bool wasShutdown;
5619 bool didCrash;
5620 bool haveTblspcMap;
5621 bool haveBackupLabel;
5622 XLogRecPtr EndOfLog;
5623 TimeLineID EndOfLogTLI;
5624 TimeLineID newTLI;
5625 bool performedWalRecovery;
5626 EndOfWalRecoveryInfo *endOfRecoveryInfo;
5629 TransactionId oldestActiveXID;
5630 bool promoted = false;
5631
5632 /*
5633 * We should have an aux process resource owner to use, and we should not
5634 * be in a transaction that's installed some other resowner.
5635 */
5637 Assert(CurrentResourceOwner == NULL ||
5640
5641 /*
5642 * Check that contents look valid.
5643 */
5645 ereport(FATAL,
5647 errmsg("control file contains invalid checkpoint location")));
5648
5649 switch (ControlFile->state)
5650 {
5651 case DB_SHUTDOWNED:
5652
5653 /*
5654 * This is the expected case, so don't be chatty in standalone
5655 * mode
5656 */
5658 (errmsg("database system was shut down at %s",
5660 break;
5661
5663 ereport(LOG,
5664 (errmsg("database system was shut down in recovery at %s",
5666 break;
5667
5668 case DB_SHUTDOWNING:
5669 ereport(LOG,
5670 (errmsg("database system shutdown was interrupted; last known up at %s",
5672 break;
5673
5675 ereport(LOG,
5676 (errmsg("database system was interrupted while in recovery at %s",
5678 errhint("This probably means that some data is corrupted and"
5679 " you will have to use the last backup for recovery.")));
5680 break;
5681
5683 ereport(LOG,
5684 (errmsg("database system was interrupted while in recovery at log time %s",
5686 errhint("If this has occurred more than once some data might be corrupted"
5687 " and you might need to choose an earlier recovery target.")));
5688 break;
5689
5690 case DB_IN_PRODUCTION:
5691 ereport(LOG,
5692 (errmsg("database system was interrupted; last known up at %s",
5694 break;
5695
5696 default:
5697 ereport(FATAL,
5699 errmsg("control file contains invalid database cluster state")));
5700 }
5701
5702 /* This is just to allow attaching to startup process with a debugger */
5703#ifdef XLOG_REPLAY_DELAY
5705 pg_usleep(60000000L);
5706#endif
5707
5708 /*
5709 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5710 * In cases where someone has performed a copy for PITR, these directories
5711 * may have been excluded and need to be re-created.
5712 */
5714
5715 /* Set up timeout handler needed to report startup progress. */
5719
5720 /*----------
5721 * If we previously crashed, perform a couple of actions:
5722 *
5723 * - The pg_wal directory may still include some temporary WAL segments
5724 * used when creating a new segment, so perform some clean up to not
5725 * bloat this path. This is done first as there is no point to sync
5726 * this temporary data.
5727 *
5728 * - There might be data which we had written, intending to fsync it, but
5729 * which we had not actually fsync'd yet. Therefore, a power failure in
5730 * the near future might cause earlier unflushed writes to be lost, even
5731 * though more recent data written to disk from here on would be
5732 * persisted. To avoid that, fsync the entire data directory.
5733 */
5736 {
5739 didCrash = true;
5740 }
5741 else
5742 didCrash = false;
5743
5744 /*
5745 * Prepare for WAL recovery if needed.
5746 *
5747 * InitWalRecovery analyzes the control file and the backup label file, if
5748 * any. It updates the in-memory ControlFile buffer according to the
5749 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5750 * It also applies the tablespace map file, if any.
5751 */
5752 InitWalRecovery(ControlFile, &wasShutdown,
5753 &haveBackupLabel, &haveTblspcMap);
5754 checkPoint = ControlFile->checkPointCopy;
5755
5756 /* initialize shared memory variables from the checkpoint record */
5757 TransamVariables->nextXid = checkPoint.nextXid;
5758 TransamVariables->nextOid = checkPoint.nextOid;
5760 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5761 AdvanceOldestClogXid(checkPoint.oldestXid);
5762 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5763 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5765 checkPoint.newestCommitTsXid);
5766 XLogCtl->ckptFullXid = checkPoint.nextXid;
5767
5768 /*
5769 * Clear out any old relcache cache files. This is *necessary* if we do
5770 * any WAL replay, since that would probably result in the cache files
5771 * being out of sync with database reality. In theory we could leave them
5772 * in place if the database had been cleanly shut down, but it seems
5773 * safest to just remove them always and let them be rebuilt during the
5774 * first backend startup. These files needs to be removed from all
5775 * directories including pg_tblspc, however the symlinks are created only
5776 * after reading tablespace_map file in case of archive recovery from
5777 * backup, so needs to clear old relcache files here after creating
5778 * symlinks.
5779 */
5781
5782 /*
5783 * Initialize replication slots, before there's a chance to remove
5784 * required resources.
5785 */
5787
5788 /*
5789 * Startup logical state, needs to be setup now so we have proper data
5790 * during crash recovery.
5791 */
5793
5794 /*
5795 * Startup CLOG. This must be done after TransamVariables->nextXid has
5796 * been initialized and before we accept connections or begin WAL replay.
5797 */
5798 StartupCLOG();
5799
5800 /*
5801 * Startup MultiXact. We need to do this early to be able to replay
5802 * truncations.
5803 */
5805
5806 /*
5807 * Ditto for commit timestamps. Activate the facility if the setting is
5808 * enabled in the control file, as there should be no tracking of commit
5809 * timestamps done when the setting was disabled. This facility can be
5810 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5811 */
5814
5815 /*
5816 * Recover knowledge about replay progress of known replication partners.
5817 */
5819
5820 /*
5821 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5822 * control file. On recovery, all unlogged relations are blown away, so
5823 * the unlogged LSN counter can be reset too.
5824 */
5828 else
5831
5832 /*
5833 * Copy any missing timeline history files between 'now' and the recovery
5834 * target timeline from archive to pg_wal. While we don't need those files
5835 * ourselves - the history file of the recovery target timeline covers all
5836 * the previous timelines in the history too - a cascading standby server
5837 * might be interested in them. Or, if you archive the WAL from this
5838 * server to a different archive than the primary, it'd be good for all
5839 * the history files to get archived there after failover, so that you can
5840 * use one of the old timelines as a PITR target. Timeline history files
5841 * are small, so it's better to copy them unnecessarily than not copy them
5842 * and regret later.
5843 */
5845
5846 /*
5847 * Before running in recovery, scan pg_twophase and fill in its status to
5848 * be able to work on entries generated by redo. Doing a scan before
5849 * taking any recovery action has the merit to discard any 2PC files that
5850 * are newer than the first record to replay, saving from any conflicts at
5851 * replay. This avoids as well any subsequent scans when doing recovery
5852 * of the on-disk two-phase data.
5853 */
5855
5856 /*
5857 * When starting with crash recovery, reset pgstat data - it might not be
5858 * valid. Otherwise restore pgstat data. It's safe to do this here,
5859 * because postmaster will not yet have started any other processes.
5860 *
5861 * NB: Restoring replication slot stats relies on slot state to have
5862 * already been restored from disk.
5863 *
5864 * TODO: With a bit of extra work we could just start with a pgstat file
5865 * associated with the checkpoint redo location we're starting from.
5866 */
5867 if (didCrash)
5869 else
5871
5873
5876
5877 /* REDO */
5878 if (InRecovery)
5879 {
5880 /* Initialize state for RecoveryInProgress() */
5884 else
5887
5888 /*
5889 * Update pg_control to show that we are recovering and to show the
5890 * selected checkpoint as the place we are starting from. We also mark
5891 * pg_control with any minimum recovery stop point obtained from a
5892 * backup history file.
5893 *
5894 * No need to hold ControlFileLock yet, we aren't up far enough.
5895 */
5897
5898 /*
5899 * If there was a backup label file, it's done its job and the info
5900 * has now been propagated into pg_control. We must get rid of the
5901 * label file so that if we crash during recovery, we'll pick up at
5902 * the latest recovery restartpoint instead of going all the way back
5903 * to the backup start point. It seems prudent though to just rename
5904 * the file out of the way rather than delete it completely.
5905 */
5906 if (haveBackupLabel)
5907 {
5908 unlink(BACKUP_LABEL_OLD);
5910 }
5911
5912 /*
5913 * If there was a tablespace_map file, it's done its job and the
5914 * symlinks have been created. We must get rid of the map file so
5915 * that if we crash during recovery, we don't create symlinks again.
5916 * It seems prudent though to just rename the file out of the way
5917 * rather than delete it completely.
5918 */
5919 if (haveTblspcMap)
5920 {
5921 unlink(TABLESPACE_MAP_OLD);
5923 }
5924
5925 /*
5926 * Initialize our local copy of minRecoveryPoint. When doing crash
5927 * recovery we want to replay up to the end of WAL. Particularly, in
5928 * the case of a promoted standby minRecoveryPoint value in the
5929 * control file is only updated after the first checkpoint. However,
5930 * if the instance crashes before the first post-recovery checkpoint
5931 * is completed then recovery will use a stale location causing the
5932 * startup process to think that there are still invalid page
5933 * references when checking for data consistency.
5934 */
5936 {
5939 }
5940 else
5941 {
5944 }
5945
5946 /* Check that the GUCs used to generate the WAL allow recovery */
5948
5949 /*
5950 * We're in recovery, so unlogged relations may be trashed and must be
5951 * reset. This should be done BEFORE allowing Hot Standby
5952 * connections, so that read-only backends don't try to read whatever
5953 * garbage is left over from before.
5954 */
5956
5957 /*
5958 * Likewise, delete any saved transaction snapshot files that got left
5959 * behind by crashed backends.
5960 */
5962
5963 /*
5964 * Initialize for Hot Standby, if enabled. We won't let backends in
5965 * yet, not until we've reached the min recovery point specified in
5966 * control file and we've established a recovery snapshot from a
5967 * running-xacts WAL record.
5968 */
5970 {
5971 TransactionId *xids;
5972 int nxids;
5973
5975 (errmsg_internal("initializing for hot standby")));
5976
5978
5979 if (wasShutdown)
5980 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5981 else
5982 oldestActiveXID = checkPoint.oldestActiveXid;
5983 Assert(TransactionIdIsValid(oldestActiveXID));
5984
5985 /* Tell procarray about the range of xids it has to deal with */
5987
5988 /*
5989 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5990 * have already been started up and other SLRUs are not maintained
5991 * during recovery and need not be started yet.
5992 */
5993 StartupSUBTRANS(oldestActiveXID);
5994
5995 /*
5996 * If we're beginning at a shutdown checkpoint, we know that
5997 * nothing was running on the primary at this point. So fake-up an
5998 * empty running-xacts record and use that here and now. Recover
5999 * additional standby state for prepared transactions.
6000 */
6001 if (wasShutdown)
6002 {
6004 TransactionId latestCompletedXid;
6005
6006 /* Update pg_subtrans entries for any prepared transactions */
6008
6009 /*
6010 * Construct a RunningTransactions snapshot representing a
6011 * shut down server, with only prepared transactions still
6012 * alive. We're never overflowed at this point because all
6013 * subxids are listed with their parent prepared transactions.
6014 */
6015 running.xcnt = nxids;
6016 running.subxcnt = 0;
6018 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
6019 running.oldestRunningXid = oldestActiveXID;
6020 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
6021 TransactionIdRetreat(latestCompletedXid);
6022 Assert(TransactionIdIsNormal(latestCompletedXid));
6023 running.latestCompletedXid = latestCompletedXid;
6024 running.xids = xids;
6025
6027 }
6028 }
6029
6030 /*
6031 * We're all set for replaying the WAL now. Do it.
6032 */
6034 performedWalRecovery = true;
6035 }
6036 else
6037 performedWalRecovery = false;
6038
6039 /*
6040 * Finish WAL recovery.
6041 */
6042 endOfRecoveryInfo = FinishWalRecovery();
6043 EndOfLog = endOfRecoveryInfo->endOfLog;
6044 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
6045 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
6046 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
6047
6048 /*
6049 * Reset ps status display, so as no information related to recovery shows
6050 * up.
6051 */
6052 set_ps_display("");
6053
6054 /*
6055 * When recovering from a backup (we are in recovery, and archive recovery
6056 * was requested), complain if we did not roll forward far enough to reach
6057 * the point where the database is consistent. For regular online
6058 * backup-from-primary, that means reaching the end-of-backup WAL record
6059 * (at which point we reset backupStartPoint to be Invalid), for
6060 * backup-from-replica (which can't inject records into the WAL stream),
6061 * that point is when we reach the minRecoveryPoint in pg_control (which
6062 * we purposefully copy last when backing up from a replica). For
6063 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
6064 * or snapshot-style backups (which don't), backupEndRequired will be set
6065 * to false.
6066 *
6067 * Note: it is indeed okay to look at the local variable
6068 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
6069 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
6070 * been advanced beyond the WAL we processed.
6071 */
6072 if (InRecovery &&
6073 (EndOfLog < LocalMinRecoveryPoint ||
6075 {
6076 /*
6077 * Ran off end of WAL before reaching end-of-backup WAL record, or
6078 * minRecoveryPoint. That's a bad sign, indicating that you tried to
6079 * recover from an online backup but never called pg_backup_stop(), or
6080 * you didn't archive all the WAL needed.
6081 */
6083 {
6085 ereport(FATAL,
6086 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6087 errmsg("WAL ends before end of online backup"),
6088 errhint("All WAL generated while online backup was taken must be available at recovery.")));
6089 else
6090 ereport(FATAL,
6091 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6092 errmsg("WAL ends before consistent recovery point")));
6093 }
6094 }
6095
6096 /*
6097 * Reset unlogged relations to the contents of their INIT fork. This is
6098 * done AFTER recovery is complete so as to include any unlogged relations
6099 * created during recovery, but BEFORE recovery is marked as having
6100 * completed successfully. Otherwise we'd not retry if any of the post
6101 * end-of-recovery steps fail.
6102 */
6103 if (InRecovery)
6105
6106 /*
6107 * Pre-scan prepared transactions to find out the range of XIDs present.
6108 * This information is not quite needed yet, but it is positioned here so
6109 * as potential problems are detected before any on-disk change is done.
6110 */
6111 oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
6112
6113 /*
6114 * Allow ordinary WAL segment creation before possibly switching to a new
6115 * timeline, which creates a new segment, and after the last ReadRecord().
6116 */
6118
6119 /*
6120 * Consider whether we need to assign a new timeline ID.
6121 *
6122 * If we did archive recovery, we always assign a new ID. This handles a
6123 * couple of issues. If we stopped short of the end of WAL during
6124 * recovery, then we are clearly generating a new timeline and must assign
6125 * it a unique new ID. Even if we ran to the end, modifying the current
6126 * last segment is problematic because it may result in trying to
6127 * overwrite an already-archived copy of that segment, and we encourage
6128 * DBAs to make their archive_commands reject that. We can dodge the
6129 * problem by making the new active segment have a new timeline ID.
6130 *
6131 * In a normal crash recovery, we can just extend the timeline we were in.
6132 */
6133 newTLI = endOfRecoveryInfo->lastRecTLI;
6135 {
6137 ereport(LOG,
6138 (errmsg("selected new timeline ID: %u", newTLI)));
6139
6140 /*
6141 * Make a writable copy of the last WAL segment. (Note that we also
6142 * have a copy of the last block of the old WAL in
6143 * endOfRecovery->lastPage; we will use that below.)
6144 */
6145 XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
6146
6147 /*
6148 * Remove the signal files out of the way, so that we don't
6149 * accidentally re-enter archive recovery mode in a subsequent crash.
6150 */
6151 if (endOfRecoveryInfo->standby_signal_file_found)
6153
6154 if (endOfRecoveryInfo->recovery_signal_file_found)
6156
6157 /*
6158 * Write the timeline history file, and have it archived. After this
6159 * point (or rather, as soon as the file is archived), the timeline
6160 * will appear as "taken" in the WAL archive and to any standby
6161 * servers. If we crash before actually switching to the new
6162 * timeline, standby servers will nevertheless think that we switched
6163 * to the new timeline, and will try to connect to the new timeline.
6164 * To minimize the window for that, try to do as little as possible
6165 * between here and writing the end-of-recovery record.
6166 */
6168 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6169
6170 ereport(LOG,
6171 (errmsg("archive recovery complete")));
6172 }
6173
6174 /* Save the selected TimeLineID in shared memory, too */
6176 XLogCtl->InsertTimeLineID = newTLI;
6177 XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
6179
6180 /*
6181 * Actually, if WAL ended in an incomplete record, skip the parts that
6182 * made it through and start writing after the portion that persisted.
6183 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6184 * we'll do as soon as we're open for writing new WAL.)
6185 */
6187 {
6188 /*
6189 * We should only have a missingContrecPtr if we're not switching to a
6190 * new timeline. When a timeline switch occurs, WAL is copied from the
6191 * old timeline to the new only up to the end of the last complete
6192 * record, so there can't be an incomplete WAL record that we need to
6193 * disregard.
6194 */
6195 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6197 EndOfLog = missingContrecPtr;
6198 }
6199
6200 /*
6201 * Prepare to write WAL starting at EndOfLog location, and init xlog
6202 * buffer cache using the block containing the last record from the
6203 * previous incarnation.
6204 */
6205 Insert = &XLogCtl->Insert;
6206 Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6207 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6208
6209 /*
6210 * Tricky point here: lastPage contains the *last* block that the LastRec
6211 * record spans, not the one it starts in. The last block is indeed the
6212 * one we want to use.
6213 */
6214 if (EndOfLog % XLOG_BLCKSZ != 0)
6215 {
6216 char *page;
6217 int len;
6218 int firstIdx;
6219
6220 firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6221 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6222 Assert(len < XLOG_BLCKSZ);
6223
6224 /* Copy the valid part of the last block, and zero the rest */
6225 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6226 memcpy(page, endOfRecoveryInfo->lastPage, len);
6227 memset(page + len, 0, XLOG_BLCKSZ - len);
6228
6229 pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6230 pg_atomic_write_u64(&XLogCtl->InitializedUpTo, endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6231 XLogCtl->InitializedFrom = endOfRecoveryInfo->lastPageBeginPtr;
6232 }
6233 else
6234 {
6235 /*
6236 * There is no partial block to copy. Just set InitializedUpTo, and
6237 * let the first attempt to insert a log record to initialize the next
6238 * buffer.
6239 */
6241 XLogCtl->InitializedFrom = EndOfLog;
6242 }
6244
6245 /*
6246 * Update local and shared status. This is OK to do without any locks
6247 * because no other process can be reading or writing WAL yet.
6248 */
6249 LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6253 XLogCtl->LogwrtRqst.Write = EndOfLog;
6254 XLogCtl->LogwrtRqst.Flush = EndOfLog;
6255
6256 /*
6257 * Preallocate additional log files, if wanted.
6258 */
6259 PreallocXlogFiles(EndOfLog, newTLI);
6260
6261 /*
6262 * Okay, we're officially UP.
6263 */
6264 InRecovery = false;
6265
6266 /* start the archive_timeout timer and LSN running */
6267 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6268 XLogCtl->lastSegSwitchLSN = EndOfLog;
6269
6270 /* also initialize latestCompletedXid, to nextXid - 1 */
6271 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6274 LWLockRelease(ProcArrayLock);
6275
6276 /*
6277 * Start up subtrans, if not already done for hot standby. (commit
6278 * timestamps are started below, if necessary.)
6279 */
6281 StartupSUBTRANS(oldestActiveXID);
6282
6283 /*
6284 * Perform end of recovery actions for any SLRUs that need it.
6285 */
6286 TrimCLOG();
6287 TrimMultiXact();
6288
6289 /*
6290 * Reload shared-memory state for prepared transactions. This needs to
6291 * happen before renaming the last partial segment of the old timeline as
6292 * it may be possible that we have to recover some transactions from it.
6293 */
6295
6296 /* Shut down xlogreader */
6298
6299 /* Enable WAL writes for this backend only. */
6301
6302 /* If necessary, write overwrite-contrecord before doing anything else */
6304 {
6307 }
6308
6309 /*
6310 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6311 * record before resource manager writes cleanup WAL records or checkpoint
6312 * record is written.
6313 */
6314 Insert->fullPageWrites = lastFullPageWrites;
6316
6317 /*
6318 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6319 */
6320 if (performedWalRecovery)
6321 promoted = PerformRecoveryXLogAction();
6322
6323 /*
6324 * If any of the critical GUCs have changed, log them before we allow
6325 * backends to write WAL.
6326 */
6328
6329 /* If this is archive recovery, perform post-recovery cleanup actions. */
6331 CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6332
6333 /*
6334 * Local WAL inserts enabled, so it's time to finish initialization of
6335 * commit timestamp.
6336 */
6338
6339 /*
6340 * All done with end-of-recovery actions.
6341 *
6342 * Now allow backends to write WAL and update the control file status in
6343 * consequence. SharedRecoveryState, that controls if backends can write
6344 * WAL, is updated while holding ControlFileLock to prevent other backends
6345 * to look at an inconsistent state of the control file in shared memory.
6346 * There is still a small window during which backends can write WAL and
6347 * the control file is still referring to a system not in DB_IN_PRODUCTION
6348 * state while looking at the on-disk control file.
6349 *
6350 * Also, we use info_lck to update SharedRecoveryState to ensure that
6351 * there are no race conditions concerning visibility of other recent
6352 * updates to shared memory.
6353 */
6354 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6356
6360
6362 LWLockRelease(ControlFileLock);
6363
6364 /*
6365 * Shutdown the recovery environment. This must occur after
6366 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6367 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6368 * any session building a snapshot will not rely on KnownAssignedXids as
6369 * RecoveryInProgress() would return false at this stage. This is
6370 * particularly critical for prepared 2PC transactions, that would still
6371 * need to be included in snapshots once recovery has ended.
6372 */
6375
6376 /*
6377 * If there were cascading standby servers connected to us, nudge any wal
6378 * sender processes to notice that we've been promoted.
6379 */
6380 WalSndWakeup(true, true);
6381
6382 /*
6383 * If this was a promotion, request an (online) checkpoint now. This isn't
6384 * required for consistency, but the last restartpoint might be far back,
6385 * and in case of a crash, recovering from it might take a longer than is
6386 * appropriate now that we're not in standby mode anymore.
6387 */
6388 if (promoted)
6390}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:494
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:307
uint32 TransactionId
Definition: c.h:623
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
void SyncDataDirectory(void)
Definition: fd.c:3609
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
void TrimMultiXact(void)
Definition: multixact.c:2178
void StartupMultiXact(void)
Definition: multixact.c:2153
void StartupReplicationOrigin(void)
Definition: origin.c:699
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:504
void pgstat_discard_stats(void)
Definition: pgstat.c:516
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6883
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:2089
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1574
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:95
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:161
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:466
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2073
void restoreTwoPhaseData(void)
Definition: twophase.c:1888
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1952
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2032
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3671
void UpdateFullPageWrites(void)
Definition: xlog.c:8354
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5384
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4238
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7628
static void XLogReportParameters(void)
Definition: xlog.c:8291
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6472
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5474
static bool lastFullPageWrites
Definition: xlog.c:217
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5399
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5570
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3971
#define TABLESPACE_MAP_OLD
Definition: xlog.h:307
#define TABLESPACE_MAP
Definition: xlog.h:306
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:302
#define BACKUP_LABEL_OLD
Definition: xlog.h:304
#define BACKUP_LABEL_FILE
Definition: xlog.h:303
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:301
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:139
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:379
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:378
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:518
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:123
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert(), AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedFrom, XLogCtlData::InitializedUpTo, XLogCtlData::InitializeReserved, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_read_u64(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow)
static

Definition at line 5384 of file xlog.c.

5385{
5386 char *buf = palloc(128);
5387
5388 pg_strftime(buf, 128,
5389 "%Y-%m-%d %H:%M:%S %Z",
5390 pg_localtime(&tnow, log_timezone));
5391
5392 return buf;
5393}
static char * buf
Definition: pg_test_fsync.c:72
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, log_timezone, palloc(), pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6397 of file xlog.c.

6398{
6399 /* initialize minRecoveryPoint to this record */
6400 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6402 if (ControlFile->minRecoveryPoint < EndRecPtr)
6403 {
6404 ControlFile->minRecoveryPoint = EndRecPtr;
6405 ControlFile->minRecoveryPointTLI = replayTLI;
6406 }
6407 /* update local copy */
6410
6411 /*
6412 * The startup process can update its local copy of minRecoveryPoint from
6413 * this point.
6414 */
6416
6418
6419 /*
6420 * We update SharedRecoveryState while holding the lock on ControlFileLock
6421 * so both states are consistent in shared memory.
6422 */
6426
6427 LWLockRelease(ControlFileLock);
6428}
static bool updateMinRecoveryPoint
Definition: xlog.c:659

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 7012 of file xlog.c.

7013{
7014 /*
7015 * The status is reported only for end-of-recovery and shutdown
7016 * checkpoints or shutdown restartpoints. Updating the ps display is
7017 * useful in those situations as it may not be possible to rely on
7018 * pg_stat_activity to see the status of the checkpointer or the startup
7019 * process.
7020 */
7022 return;
7023
7024 if (reset)
7025 set_ps_display("");
7026 else
7027 {
7028 char activitymsg[128];
7029
7030 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
7031 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
7032 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
7033 restartpoint ? "restartpoint" : "checkpoint");
7034 set_ps_display(activitymsg);
7035 }
7036}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6974 of file xlog.c.

6975{
6976 /*
6977 * To estimate the number of segments consumed between checkpoints, keep a
6978 * moving average of the amount of WAL generated in previous checkpoint
6979 * cycles. However, if the load is bursty, with quiet periods and busy
6980 * periods, we want to cater for the peak load. So instead of a plain
6981 * moving average, let the average decline slowly if the previous cycle
6982 * used less WAL than estimated, but bump it up immediately if it used
6983 * more.
6984 *
6985 * When checkpoints are triggered by max_wal_size, this should converge to
6986 * CheckpointSegments * wal_segment_size,
6987 *
6988 * Note: This doesn't pay any attention to what caused the checkpoint.
6989 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6990 * starting a base backup, are counted the same as those created
6991 * automatically. The slow-decline will largely mask them out, if they are
6992 * not frequent. If they are frequent, it seems reasonable to count them
6993 * in as any others; if you issue a manual checkpoint every 5 minutes and
6994 * never let a timed checkpoint happen, it makes sense to base the
6995 * preallocation on that 5 minute interval rather than whatever
6996 * checkpoint_timeout is set to.
6997 */
6998 PrevCheckPointDistance = nbytes;
6999 if (CheckPointDistanceEstimate < nbytes)
7001 else
7003 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
7004}

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4725 of file xlog.c.

4726{
4728}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8354 of file xlog.c.

8355{
8357 bool recoveryInProgress;
8358
8359 /*
8360 * Do nothing if full_page_writes has not been changed.
8361 *
8362 * It's safe to check the shared full_page_writes without the lock,
8363 * because we assume that there is no concurrently running process which
8364 * can update it.
8365 */
8366 if (fullPageWrites == Insert->fullPageWrites)
8367 return;
8368
8369 /*
8370 * Perform this outside critical section so that the WAL insert
8371 * initialization done by RecoveryInProgress() doesn't trigger an
8372 * assertion failure.
8373 */
8374 recoveryInProgress = RecoveryInProgress();
8375
8377
8378 /*
8379 * It's always safe to take full page images, even when not strictly
8380 * required, but not the other round. So if we're setting full_page_writes
8381 * to true, first set it true and then write the WAL record. If we're
8382 * setting it to false, first write the WAL record and then set the global
8383 * flag.
8384 */
8385 if (fullPageWrites)
8386 {
8388 Insert->fullPageWrites = true;
8390 }
8391
8392 /*
8393 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8394 * full_page_writes during archive recovery, if required.
8395 */
8396 if (XLogStandbyInfoActive() && !recoveryInProgress)
8397 {
8399 XLogRegisterData(&fullPageWrites, sizeof(bool));
8400
8401 XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8402 }
8403
8404 if (!fullPageWrites)
8405 {
8407 Insert->fullPageWrites = false;
8409 }
8411}
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3951 of file xlog.c.

3952{
3953 uint32 tli;
3954 XLogSegNo segno;
3955
3957
3959 if (segno > XLogCtl->lastRemovedSegNo)
3960 XLogCtl->lastRemovedSegNo = segno;
3962}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2843 of file xlog.c.

2844{
2845 /* Quick check using our local copy of the variable */
2846 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2847 return;
2848
2849 /*
2850 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2851 * i.e., we're doing crash recovery. We never modify the control file's
2852 * value in that case, so we can short-circuit future checks here too. The
2853 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2854 * updated until crash recovery finishes. We only do this for the startup
2855 * process as it should not update its own reference of minRecoveryPoint
2856 * until it has finished crash recovery to make sure that all WAL
2857 * available is replayed in this case. This also saves from extra locks
2858 * taken on the control file from the startup process.
2859 */
2861 {
2862 updateMinRecoveryPoint = false;
2863 return;
2864 }
2865
2866 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2867
2868 /* update local copy */
2871
2873 updateMinRecoveryPoint = false;
2874 else if (force || LocalMinRecoveryPoint < lsn)
2875 {
2876 XLogRecPtr newMinRecoveryPoint;
2877 TimeLineID newMinRecoveryPointTLI;
2878
2879 /*
2880 * To avoid having to update the control file too often, we update it
2881 * all the way to the last record being replayed, even though 'lsn'
2882 * would suffice for correctness. This also allows the 'force' case
2883 * to not need a valid 'lsn' value.
2884 *
2885 * Another important reason for doing it this way is that the passed
2886 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2887 * the caller got it from a corrupted heap page. Accepting such a
2888 * value as the min recovery point would prevent us from coming up at
2889 * all. Instead, we just log a warning and continue with recovery.
2890 * (See also the comments about corrupt LSNs in XLogFlush.)
2891 */
2892 newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2893 if (!force && newMinRecoveryPoint < lsn)
2894 elog(WARNING,
2895 "xlog min recovery request %X/%X is past current point %X/%X",
2896 LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2897
2898 /* update control file */
2899 if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2900 {
2901 ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2902 ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2904 LocalMinRecoveryPoint = newMinRecoveryPoint;
2905 LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2906
2908 (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
2909 LSN_FORMAT_ARGS(newMinRecoveryPoint),
2910 newMinRecoveryPointTLI)));
2911 }
2912 }
2913 LWLockRelease(ControlFileLock);
2914}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4238 of file xlog.c.

4239{
4240 char path[MAXPGPATH];
4241 struct stat stat_buf;
4242
4243 /* Check for pg_wal; if it doesn't exist, error out */
4244 if (stat(XLOGDIR, &stat_buf) != 0 ||
4245 !S_ISDIR(stat_buf.st_mode))
4246 ereport(FATAL,
4248 errmsg("required WAL directory \"%s\" does not exist",
4249 XLOGDIR)));
4250
4251 /* Check for archive_status */
4252 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4253 if (stat(path, &stat_buf) == 0)
4254 {
4255 /* Check for weird cases where it exists but isn't a directory */
4256 if (!S_ISDIR(stat_buf.st_mode))
4257 ereport(FATAL,
4259 errmsg("required WAL directory \"%s\" does not exist",
4260 path)));
4261 }
4262 else
4263 {
4264 ereport(LOG,
4265 (errmsg("creating missing WAL directory \"%s\"", path)));
4266 if (MakePGDirectory(path) < 0)
4267 ereport(FATAL,
4269 errmsg("could not create missing directory \"%s\": %m",
4270 path)));
4271 }
4272
4273 /* Check for summaries */
4274 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4275 if (stat(path, &stat_buf) == 0)
4276 {
4277 /* Check for weird cases where it exists but isn't a directory */
4278 if (!S_ISDIR(stat_buf.st_mode))
4279 ereport(FATAL,
4280 (errmsg("required WAL directory \"%s\" does not exist",
4281 path)));
4282 }
4283 else
4284 {
4285 ereport(LOG,
4286 (errmsg("creating missing WAL directory \"%s\"", path)));
4287 if (MakePGDirectory(path) < 0)
4288 ereport(FATAL,
4289 (errmsg("could not create missing directory \"%s\": %m",
4290 path)));
4291 }
4292}
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3978
#define S_ISDIR(m)
Definition: win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1515 of file xlog.c.

1516{
1517 uint64 bytepos;
1518 XLogRecPtr inserted;
1519 XLogRecPtr reservedUpto;
1520 XLogRecPtr finishedUpto;
1522 int i;
1523
1524 if (MyProc == NULL)
1525 elog(PANIC, "cannot wait without a PGPROC structure");
1526
1527 /*
1528 * Check if there's any work to do. Use a barrier to ensure we get the
1529 * freshest value.
1530 */
1532 if (upto <= inserted)
1533 return inserted;
1534
1535 /* Read the current insert position */
1536 SpinLockAcquire(&Insert->insertpos_lck);
1537 bytepos = Insert->CurrBytePos;
1538 SpinLockRelease(&Insert->insertpos_lck);
1539 reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1540
1541 /*
1542 * No-one should request to flush a piece of WAL that hasn't even been
1543 * reserved yet. However, it can happen if there is a block with a bogus
1544 * LSN on disk, for example. XLogFlush checks for that situation and
1545 * complains, but only after the flush. Here we just assume that to mean
1546 * that all WAL that has been reserved needs to be finished. In this
1547 * corner-case, the return value can be smaller than 'upto' argument.
1548 */
1549 if (upto > reservedUpto)
1550 {
1551 ereport(LOG,
1552 (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1553 LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
1554 upto = reservedUpto;
1555 }
1556
1557 /*
1558 * Loop through all the locks, sleeping on any in-progress insert older
1559 * than 'upto'.
1560 *
1561 * finishedUpto is our return value, indicating the point upto which all
1562 * the WAL insertions have been finished. Initialize it to the head of
1563 * reserved WAL, and as we iterate through the insertion locks, back it
1564 * out for any insertion that's still in progress.
1565 */
1566 finishedUpto = reservedUpto;
1567 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1568 {
1569 XLogRecPtr insertingat = InvalidXLogRecPtr;
1570
1571 do
1572 {
1573 /*
1574 * See if this insertion is in progress. LWLockWaitForVar will
1575 * wait for the lock to be released, or for the 'value' to be set
1576 * by a LWLockUpdateVar call. When a lock is initially acquired,
1577 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1578 * know where it's inserting yet. We will have to wait for it. If
1579 * it's a small insertion, the record will most likely fit on the
1580 * same page and the inserter will release the lock without ever
1581 * calling LWLockUpdateVar. But if it has to sleep, it will
1582 * advertise the insertion point with LWLockUpdateVar before
1583 * sleeping.
1584 *
1585 * In this loop we are only waiting for insertions that started
1586 * before WaitXLogInsertionsToFinish was called. The lack of
1587 * memory barriers in the loop means that we might see locks as
1588 * "unused" that have since become used. This is fine because
1589 * they only can be used for later insertions that we would not
1590 * want to wait on anyway. Not taking a lock to acquire the
1591 * current insertingAt value means that we might see older
1592 * insertingAt values. This is also fine, because if we read a
1593 * value too old, we will add ourselves to the wait queue, which
1594 * contains atomic operations.
1595 */
1596 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1598 insertingat, &insertingat))
1599 {
1600 /* the lock was free, so no insertion in progress */
1601 insertingat = InvalidXLogRecPtr;
1602 break;
1603 }
1604
1605 /*
1606 * This insertion is still in progress. Have to wait, unless the
1607 * inserter has proceeded past 'upto'.
1608 */
1609 } while (insertingat < upto);
1610
1611 if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1612 finishedUpto = insertingat;
1613 }
1614
1615 /*
1616 * Advance the limit we know to have been inserted and return the freshest
1617 * value we know of, which might be beyond what we requested if somebody
1618 * is concurrently doing this with an 'upto' pointer ahead of us.
1619 */
1621 finishedUpto);
1622
1623 return finishedUpto;
1624}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition: atomics.h:585
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1598
PGPROC * MyProc
Definition: proc.c:67
pg_atomic_uint64 insertingAt
Definition: xlog.c:365

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1382 of file xlog.c.

1383{
1384 bool immed;
1385
1386 /*
1387 * It doesn't matter which of the WAL insertion locks we acquire, so try
1388 * the one we used last time. If the system isn't particularly busy, it's
1389 * a good bet that it's still available, and it's good to have some
1390 * affinity to a particular lock so that you don't unnecessarily bounce
1391 * cache lines between processes when there's no contention.
1392 *
1393 * If this is the first time through in this backend, pick a lock
1394 * (semi-)randomly. This allows the locks to be used evenly if you have a
1395 * lot of very short connections.
1396 */
1397 static int lockToTry = -1;
1398
1399 if (lockToTry == -1)
1400 lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1401 MyLockNo = lockToTry;
1402
1403 /*
1404 * The insertingAt value is initially set to 0, as we don't know our
1405 * insert location yet.
1406 */
1408 if (!immed)
1409 {
1410 /*
1411 * If we couldn't get the lock immediately, try another lock next
1412 * time. On a system with more insertion locks than concurrent
1413 * inserters, this causes all the inserters to eventually migrate to a
1414 * lock that no-one else is using. On a system with more inserters
1415 * than locks, it still helps to distribute the inserters evenly
1416 * across the locks.
1417 */
1418 lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1419 }
1420}
ProcNumber MyProcNumber
Definition: globals.c:91
static int MyLockNo
Definition: xlog.c:662

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1427 of file xlog.c.

1428{
1429 int i;
1430
1431 /*
1432 * When holding all the locks, all but the last lock's insertingAt
1433 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1434 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1435 */
1436 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1437 {
1442 }
1443 /* Variable value reset to 0 at release */
1445
1446 holdingAllLocks = true;
1447}
#define PG_UINT64_MAX
Definition: c.h:564
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1734
static bool holdingAllLocks
Definition: xlog.c:663

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1456 of file xlog.c.

1457{
1458 if (holdingAllLocks)
1459 {
1460 int i;
1461
1462 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1465 0);
1466
1467 holdingAllLocks = false;
1468 }
1469 else
1470 {
1473 0);
1474 }
1475}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1931

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1482 of file xlog.c.

1483{
1484 if (holdingAllLocks)
1485 {
1486 /*
1487 * We use the last lock to mark our actual position, see comments in
1488 * WALInsertLockAcquireExclusive.
1489 */
1492 insertingAt);
1493 }
1494 else
1497 insertingAt);
1498}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1759 of file xlog.c.

1761{
1762 char *pdst = dstbuf;
1763 XLogRecPtr recptr = startptr;
1764 XLogRecPtr inserted;
1765 Size nbytes = count;
1766
1768 return 0;
1769
1770 Assert(!XLogRecPtrIsInvalid(startptr));
1771
1772 /*
1773 * Caller should ensure that the requested data has been inserted into WAL
1774 * buffers before we try to read it.
1775 */
1777 if (startptr + count > inserted)
1778 ereport(ERROR,
1779 errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1780 LSN_FORMAT_ARGS(startptr + count),
1781 LSN_FORMAT_ARGS(inserted)));
1782
1783 /*
1784 * Loop through the buffers without a lock. For each buffer, atomically
1785 * read and verify the end pointer, then copy the data out, and finally
1786 * re-read and re-verify the end pointer.
1787 *
1788 * Once a page is evicted, it never returns to the WAL buffers, so if the
1789 * end pointer matches the expected end pointer before and after we copy
1790 * the data, then the right page must have been present during the data
1791 * copy. Read barriers are necessary to ensure that the data copy actually
1792 * happens between the two verification steps.
1793 *
1794 * If either verification fails, we simply terminate the loop and return
1795 * with the data that had been already copied out successfully.
1796 */
1797 while (nbytes > 0)
1798 {
1799 uint32 offset = recptr % XLOG_BLCKSZ;
1800 int idx = XLogRecPtrToBufIdx(recptr);
1801 XLogRecPtr expectedEndPtr;
1802 XLogRecPtr endptr;
1803 const char *page;
1804 const char *psrc;
1805 Size npagebytes;
1806
1807 /*
1808 * Calculate the end pointer we expect in the xlblocks array if the
1809 * correct page is present.
1810 */
1811 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1812
1813 /*
1814 * First verification step: check that the correct page is present in
1815 * the WAL buffers.
1816 */
1818 if (expectedEndPtr != endptr)
1819 break;
1820
1821 /*
1822 * The correct page is present (or was at the time the endptr was
1823 * read; must re-verify later). Calculate pointer to source data and
1824 * determine how much data to read from this page.
1825 */
1826 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1827 psrc = page + offset;
1828 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1829
1830 /*
1831 * Ensure that the data copy and the first verification step are not
1832 * reordered.
1833 */
1835
1836 /* data copy */
1837 memcpy(pdst, psrc, npagebytes);
1838
1839 /*
1840 * Ensure that the data copy and the second verification step are not
1841 * reordered.
1842 */
1844
1845 /*
1846 * Second verification step: check that the page we read from wasn't
1847 * evicted while we were copying the data.
1848 */
1850 if (expectedEndPtr != endptr)
1851 break;
1852
1853 pdst += npagebytes;
1854 recptr += npagebytes;
1855 nbytes -= npagebytes;
1856 }
1857
1858 Assert(pdst - dstbuf <= count);
1859
1860 return pdst - dstbuf;
1861}
#define Min(x, y)
Definition: c.h:975
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6708

References Assert(), ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4378 of file xlog.c.

4379{
4380 int fd;
4381 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4382
4383 /*
4384 * Initialize version and compatibility-check fields
4385 */
4388
4389 ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4391
4392 ControlFile->blcksz = BLCKSZ;
4393 ControlFile->relseg_size = RELSEG_SIZE;
4394 ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4396
4399
4402
4404
4405 /*
4406 * Initialize the default 'char' signedness.
4407 *
4408 * The signedness of the char type is implementation-defined. For instance
4409 * on x86 architecture CPUs, the char data type is typically treated as
4410 * signed by default, whereas on aarch architecture CPUs, it is typically
4411 * treated as unsigned by default. In v17 or earlier, we accidentally let
4412 * C implementation signedness affect persistent data. This led to
4413 * inconsistent results when comparing char data across different
4414 * platforms.
4415 *
4416 * This flag can be used as a hint to ensure consistent behavior for
4417 * pre-v18 data files that store data sorted by the 'char' type on disk,
4418 * especially in cross-platform replication scenarios.
4419 *
4420 * Newly created database clusters unconditionally set the default char
4421 * signedness to true. pg_upgrade changes this flag for clusters that were
4422 * initialized on signedness=false platforms. As a result,
4423 * signedness=false setting will become rare over time. If we had known
4424 * about this problem during the last development cycle that forced initdb
4425 * (v8.3), we would have made all clusters signed or all clusters
4426 * unsigned. Making pg_upgrade the only source of signedness=false will
4427 * cause the population of database clusters to converge toward that
4428 * retrospective ideal.
4429 */
4431
4432 /* Contents are protected with a CRC */
4436 offsetof(ControlFileData, crc));
4438
4439 /*
4440 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4441 * the excess over sizeof(ControlFileData). This reduces the odds of
4442 * premature-EOF errors when reading pg_control. We'll still fail when we
4443 * check the contents of the file, but hopefully with a more specific
4444 * error than "couldn't read pg_control".
4445 */
4446 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4447 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4448
4450 O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4451 if (fd < 0)
4452 ereport(PANIC,
4454 errmsg("could not create file \"%s\": %m",
4456
4457 errno = 0;
4458 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4460 {
4461 /* if write didn't set errno, assume problem is no disk space */
4462 if (errno == 0)
4463 errno = ENOSPC;
4464 ereport(PANIC,
4466 errmsg("could not write to file \"%s\": %m",
4468 }
4470
4471 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4472 if (pg_fsync(fd) != 0)
4473 ereport(PANIC,
4475 errmsg("could not fsync file \"%s\": %m",
4478
4479 if (close(fd) != 0)
4480 ereport(PANIC,
4482 errmsg("could not close file \"%s\": %m",
4484}
#define FLOAT8PASSBYVAL
Definition: c.h:606
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:256

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, FLOAT8PASSBYVAL, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8423 of file xlog.c.

8424{
8425 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8426 XLogRecPtr lsn = record->EndRecPtr;
8427
8428 /*
8429 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8430 * XLOG_FPI_FOR_HINT records.
8431 */
8432 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8433 !XLogRecHasAnyBlockRefs(record));
8434
8435 if (info == XLOG_NEXTOID)
8436 {
8437 Oid nextOid;
8438
8439 /*
8440 * We used to try to take the maximum of TransamVariables->nextOid and
8441 * the recorded nextOid, but that fails if the OID counter wraps
8442 * around. Since no OID allocation should be happening during replay
8443 * anyway, better to just believe the record exactly. We still take
8444 * OidGenLock while setting the variable, just in case.
8445 */
8446 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8447 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8448 TransamVariables->nextOid = nextOid;
8450 LWLockRelease(OidGenLock);
8451 }
8452 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8453 {
8454 CheckPoint checkPoint;
8455 TimeLineID replayTLI;
8456
8457 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8458 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8459 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8460 TransamVariables->nextXid = checkPoint.nextXid;
8461 LWLockRelease(XidGenLock);
8462 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8463 TransamVariables->nextOid = checkPoint.nextOid;
8465 LWLockRelease(OidGenLock);
8467 checkPoint.nextMultiOffset);
8468
8470 checkPoint.oldestMultiDB);
8471
8472 /*
8473 * No need to set oldestClogXid here as well; it'll be set when we
8474 * redo an xl_clog_truncate if it changed since initialization.
8475 */
8476 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8477
8478 /*
8479 * If we see a shutdown checkpoint while waiting for an end-of-backup
8480 * record, the backup was canceled and the end-of-backup record will
8481 * never arrive.
8482 */
8486 ereport(PANIC,
8487 (errmsg("online backup was canceled, recovery cannot continue")));
8488
8489 /*
8490 * If we see a shutdown checkpoint, we know that nothing was running
8491 * on the primary at this point. So fake-up an empty running-xacts
8492 * record and use that here and now. Recover additional standby state
8493 * for prepared transactions.
8494 */
8496 {
8497 TransactionId *xids;
8498 int nxids;
8499 TransactionId oldestActiveXID;
8500 TransactionId latestCompletedXid;
8502
8503 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8504
8505 /* Update pg_subtrans entries for any prepared transactions */
8507
8508 /*
8509 * Construct a RunningTransactions snapshot representing a shut
8510 * down server, with only prepared transactions still alive. We're
8511 * never overflowed at this point because all subxids are listed
8512 * with their parent prepared transactions.
8513 */
8514 running.xcnt = nxids;
8515 running.subxcnt = 0;
8517 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8518 running.oldestRunningXid = oldestActiveXID;
8519 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8520 TransactionIdRetreat(latestCompletedXid);
8521 Assert(TransactionIdIsNormal(latestCompletedXid));
8522 running.latestCompletedXid = latestCompletedXid;
8523 running.xids = xids;
8524
8526 }
8527
8528 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8529 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8531 LWLockRelease(ControlFileLock);
8532
8533 /* Update shared-memory copy of checkpoint XID/epoch */
8535 XLogCtl->ckptFullXid = checkPoint.nextXid;
8537
8538 /*
8539 * We should've already switched to the new TLI before replaying this
8540 * record.
8541 */
8542 (void) GetCurrentReplayRecPtr(&replayTLI);
8543 if (checkPoint.ThisTimeLineID != replayTLI)
8544 ereport(PANIC,
8545 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8546 checkPoint.ThisTimeLineID, replayTLI)));
8547
8548 RecoveryRestartPoint(&checkPoint, record);
8549 }
8550 else if (info == XLOG_CHECKPOINT_ONLINE)
8551 {
8552 CheckPoint checkPoint;
8553 TimeLineID replayTLI;
8554
8555 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8556 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8557 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8559 checkPoint.nextXid))
8560 TransamVariables->nextXid = checkPoint.nextXid;
8561 LWLockRelease(XidGenLock);
8562
8563 /*
8564 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8565 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8566 * counter is from the start of the checkpoint and might well be stale
8567 * compared to later XLOG_NEXTOID records. We could try to take the
8568 * maximum of the nextOid counter and our latest value, but since
8569 * there's no particular guarantee about the speed with which the OID
8570 * counter wraps around, that's a risky thing to do. In any case,
8571 * users of the nextOid counter are required to avoid assignment of
8572 * duplicates, so that a somewhat out-of-date value should be safe.
8573 */
8574
8575 /* Handle multixact */
8577 checkPoint.nextMultiOffset);
8578
8579 /*
8580 * NB: This may perform multixact truncation when replaying WAL
8581 * generated by an older primary.
8582 */
8584 checkPoint.oldestMultiDB);
8586 checkPoint.oldestXid))
8588 checkPoint.oldestXidDB);
8589 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8590 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8592 LWLockRelease(ControlFileLock);
8593
8594 /* Update shared-memory copy of checkpoint XID/epoch */
8596 XLogCtl->ckptFullXid = checkPoint.nextXid;
8598
8599 /* TLI should not change in an on-line checkpoint */
8600 (void) GetCurrentReplayRecPtr(&replayTLI);
8601 if (checkPoint.ThisTimeLineID != replayTLI)
8602 ereport(PANIC,
8603 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8604 checkPoint.ThisTimeLineID, replayTLI)));
8605
8606 RecoveryRestartPoint(&checkPoint, record);
8607 }
8608 else if (info == XLOG_OVERWRITE_CONTRECORD)
8609 {
8610 /* nothing to do here, handled in xlogrecovery_redo() */
8611 }
8612 else if (info == XLOG_END_OF_RECOVERY)
8613 {
8614 xl_end_of_recovery xlrec;
8615 TimeLineID replayTLI;
8616
8617 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8618
8619 /*
8620 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8621 * but this case is rarer and harder to test, so the benefit doesn't
8622 * outweigh the potential extra cost of maintenance.
8623 */
8624
8625 /*
8626 * We should've already switched to the new TLI before replaying this
8627 * record.
8628 */
8629 (void) GetCurrentReplayRecPtr(&replayTLI);
8630 if (xlrec.ThisTimeLineID != replayTLI)
8631 ereport(PANIC,
8632 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8633 xlrec.ThisTimeLineID, replayTLI)));
8634 }
8635 else if (info == XLOG_NOOP)
8636 {
8637 /* nothing to do here */
8638 }
8639 else if (info == XLOG_SWITCH)
8640 {
8641 /* nothing to do here */
8642 }
8643 else if (info == XLOG_RESTORE_POINT)
8644 {
8645 /* nothing to do here, handled in xlogrecovery.c */
8646 }
8647 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8648 {
8649 /*
8650 * XLOG_FPI records contain nothing else but one or more block
8651 * references. Every block reference must include a full-page image
8652 * even if full_page_writes was disabled when the record was generated
8653 * - otherwise there would be no point in this record.
8654 *
8655 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8656 * WAL-logged because of a hint bit update. They are only generated
8657 * when checksums and/or wal_log_hints are enabled. They may include
8658 * no full-page images if full_page_writes was disabled when they were
8659 * generated. In this case there is nothing to do here.
8660 *
8661 * No recovery conflicts are generated by these generic records - if a
8662 * resource manager needs to generate conflicts, it has to define a
8663 * separate WAL record type and redo routine.
8664 */
8665 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8666 {
8667 Buffer buffer;
8668
8669 if (!XLogRecHasBlockImage(record, block_id))
8670 {
8671 if (info == XLOG_FPI)
8672 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8673 continue;
8674 }
8675
8676 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8677 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8678 UnlockReleaseBuffer(buffer);
8679 }
8680 }
8681 else if (info == XLOG_BACKUP_END)
8682 {
8683 /* nothing to do here, handled in xlogrecovery_redo() */
8684 }
8685 else if (info == XLOG_PARAMETER_CHANGE)
8686 {
8687 xl_parameter_change xlrec;
8688
8689 /* Update our copy of the parameters in pg_control */
8690 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8691
8692 /*
8693 * Invalidate logical slots if we are in hot standby and the primary
8694 * does not have a WAL level sufficient for logical decoding. No need
8695 * to search for potentially conflicting logically slots if standby is
8696 * running with wal_level lower than logical, because in that case, we
8697 * would have either disallowed creation of logical slots or
8698 * invalidated existing ones.
8699 */
8700 if (InRecovery && InHotStandby &&
8701 xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8704 0, InvalidOid,
8706
8707 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8715
8716 /*
8717 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8718 * recover back up to this point before allowing hot standby again.
8719 * This is important if the max_* settings are decreased, to ensure
8720 * you don't run queries against the WAL preceding the change. The
8721 * local copies cannot be updated as long as crash recovery is
8722 * happening and we expect all the WAL to be replayed.
8723 */
8725 {
8728 }
8730 {
8731 TimeLineID replayTLI;
8732
8733 (void) GetCurrentReplayRecPtr(&replayTLI);
8735 ControlFile->minRecoveryPointTLI = replayTLI;
8736 }
8737
8741
8743 LWLockRelease(ControlFileLock);
8744
8745 /* Check to see if any parameter change gives a problem on recovery */
8747 }
8748 else if (info == XLOG_FPW_CHANGE)
8749 {
8750 bool fpw;
8751
8752 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8753
8754 /*
8755 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8756 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8757 * full_page_writes has been disabled during online backup.
8758 */
8759 if (!fpw)
8760 {
8765 }
8766
8767 /* Keep track of full_page_writes */
8768 lastFullPageWrites = fpw;
8769 }
8770 else if (info == XLOG_CHECKPOINT_REDO)
8771 {
8772 /* nothing to do here, just for informational purposes */
8773 }
8774}
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5320
uint8_t uint8
Definition: c.h:500
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2536
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2511
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:59
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7738
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert(), ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsInvalid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 3111 of file xlog.c.

3112{
3113 XLogwrtRqst WriteRqst;
3114 bool flexible = true;
3115 static TimestampTz lastflush;
3117 int flushblocks;
3118 TimeLineID insertTLI;
3119
3120 /* XLOG doesn't need flushing during recovery */
3121 if (RecoveryInProgress())
3122 return false;
3123
3124 /*
3125 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3126 * so we can read it without a lock.
3127 */
3128 insertTLI = XLogCtl->InsertTimeLineID;
3129
3130 /* read updated LogwrtRqst */
3132 WriteRqst = XLogCtl->LogwrtRqst;
3134
3135 /* back off to last completed page boundary */
3136 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3137
3138 /* if we have already flushed that far, consider async commit records */
3140 if (WriteRqst.Write <= LogwrtResult.Flush)
3141 {
3143 WriteRqst.Write = XLogCtl->asyncXactLSN;
3145 flexible = false; /* ensure it all gets written */
3146 }
3147
3148 /*
3149 * If already known flushed, we're done. Just need to check if we are
3150 * holding an open file handle to a logfile that's no longer in use,
3151 * preventing the file from being deleted.
3152 */
3153 if (WriteRqst.Write <= LogwrtResult.Flush)
3154 {
3155 if (openLogFile >= 0)
3156 {
3159 {
3160 XLogFileClose();
3161 }
3162 }
3163 return false;
3164 }
3165
3166 /*
3167 * Determine how far to flush WAL, based on the wal_writer_delay and
3168 * wal_writer_flush_after GUCs.
3169 *
3170 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3171 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3172 * logic is the same in both places if you change this.
3173 */
3175 flushblocks =
3176 WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3177
3178 if (WalWriterFlushAfter == 0 || lastflush == 0)
3179 {
3180 /* first call, or block based limits disabled */
3181 WriteRqst.Flush = WriteRqst.Write;
3182 lastflush = now;
3183 }
3184 else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3185 {
3186 /*
3187 * Flush the writes at least every WalWriterDelay ms. This is
3188 * important to bound the amount of time it takes for an asynchronous
3189 * commit to hit disk.
3190 */
3191 WriteRqst.Flush = WriteRqst.Write;
3192 lastflush = now;
3193 }
3194 else if (flushblocks >= WalWriterFlushAfter)
3195 {
3196 /* exceeded wal_writer_flush_after blocks, flush */
3197 WriteRqst.Flush = WriteRqst.Write;
3198 lastflush = now;
3199 }
3200 else
3201 {
3202 /* no flushing, this time round */
3203 WriteRqst.Flush = 0;
3204 }
3205
3206#ifdef WAL_DEBUG
3207 if (XLOG_DEBUG)
3208 elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3209 LSN_FORMAT_ARGS(WriteRqst.Write),
3210 LSN_FORMAT_ARGS(WriteRqst.Flush),
3213#endif
3214
3216
3217 /* now wait for any in-progress insertions to finish and get write lock */
3219 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3221 if (WriteRqst.Write > LogwrtResult.Write ||
3222 WriteRqst.Flush > LogwrtResult.Flush)
3223 {
3224 XLogWrite(WriteRqst, insertTLI, flexible);
3225 }
3226 LWLockRelease(WALWriteLock);
3227
3229
3230 /* wake up walsenders now that we've released heavily contended locks */
3232
3233 /*
3234 * Great, done. To take some work off the critical path, try to initialize
3235 * as many of the no-longer-needed WAL buffers for future use as we can.
3236 */
3237 AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3238
3239 /*
3240 * If we determined that we need to write data, but somebody else
3241 * wrote/flushed already, it should be considered as being active, to
3242 * avoid hibernating too early.
3243 */
3244 return true;
3245}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
XLogRecPtr asyncXactLSN
Definition: xlog.c:453
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:71
int WalWriterDelay
Definition: walwriter.c:70
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1909 of file xlog.c.

1910{
1911 uint64 fullsegs;
1912 uint64 fullpages;
1913 uint64 bytesleft;
1914 uint32 seg_offset;
1915 XLogRecPtr result;
1916
1917 fullsegs = bytepos / UsableBytesInSegment;
1918 bytesleft = bytepos % UsableBytesInSegment;
1919
1920 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1921 {
1922 /* fits on first page of segment */
1923 if (bytesleft == 0)
1924 seg_offset = 0;
1925 else
1926 seg_offset = bytesleft + SizeOfXLogLongPHD;
1927 }
1928 else
1929 {
1930 /* account for the first page on segment with long header */
1931 seg_offset = XLOG_BLCKSZ;
1932 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1933
1934 fullpages = bytesleft / UsableBytesInPage;
1935 bytesleft = bytesleft % UsableBytesInPage;
1936
1937 if (bytesleft == 0)
1938 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1939 else
1940 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1941 }
1942
1943 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1944
1945 return result;
1946}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1869 of file xlog.c.

1870{
1871 uint64 fullsegs;
1872 uint64 fullpages;
1873 uint64 bytesleft;
1874 uint32 seg_offset;
1875 XLogRecPtr result;
1876
1877 fullsegs = bytepos / UsableBytesInSegment;
1878 bytesleft = bytepos % UsableBytesInSegment;
1879
1880 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1881 {
1882 /* fits on first page of segment */
1883 seg_offset = bytesleft + SizeOfXLogLongPHD;
1884 }
1885 else
1886 {
1887 /* account for the first page on segment with long header */
1888 seg_offset = XLOG_BLCKSZ;
1889 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1890
1891 fullpages = bytesleft / UsableBytesInPage;
1892 bytesleft = bytesleft % UsableBytesInPage;
1893
1894 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1895 }
1896
1897 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1898
1899 return result;
1900}

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2423 of file xlog.c.

2424{
2425 XLogSegNo old_segno;
2426
2428
2429 if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2430 return true;
2431 return false;
2432}

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4800 of file xlog.c.

4801{
4802 int xbuffers;
4803
4804 xbuffers = NBuffers / 32;
4805 if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4806 xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4807 if (xbuffers < 8)
4808 xbuffers = 8;
4809 return xbuffers;
4810}

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3778 of file xlog.c.

3779{
3780 Assert(openLogFile >= 0);
3781
3782 /*
3783 * WAL segment files will not be re-read in normal operation, so we advise
3784 * the OS to release any cached pages. But do not do so if WAL archiving
3785 * or streaming is active, because archiver and walsender process could
3786 * use the cache to read the WAL segment.
3787 */
3788#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3789 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3790 (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3791#endif
3792
3793 if (close(openLogFile) != 0)
3794 {
3795 char xlogfname[MAXFNAMELEN];
3796 int save_errno = errno;
3797
3799 errno = save_errno;
3800 ereport(PANIC,
3802 errmsg("could not close file \"%s\": %m", xlogfname)));
3803 }
3804
3805 openLogFile = -1;
3807}
void ReleaseExternalFD(void)
Definition: fd.c:1241

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3557 of file xlog.c.

3560{
3561 char path[MAXPGPATH];
3562 char tmppath[MAXPGPATH];
3563 PGAlignedXLogBlock buffer;
3564 int srcfd;
3565 int fd;
3566 int nbytes;
3567
3568 /*
3569 * Open the source file
3570 */
3571 XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3572 srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3573 if (srcfd < 0)
3574 ereport(ERROR,
3576 errmsg("could not open file \"%s\": %m", path)));
3577
3578 /*
3579 * Copy into a temp file name.
3580 */
3581 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3582
3583 unlink(tmppath);
3584
3585 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3586 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3587 if (fd < 0)
3588 ereport(ERROR,
3590 errmsg("could not create file \"%s\": %m", tmppath)));
3591
3592 /*
3593 * Do the data copying.
3594 */
3595 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3596 {
3597 int nread;
3598
3599 nread = upto - nbytes;
3600
3601 /*
3602 * The part that is not read from the source file is filled with
3603 * zeros.
3604 */
3605 if (nread < sizeof(buffer))
3606 memset(buffer.data, 0, sizeof(buffer));
3607
3608 if (nread > 0)
3609 {
3610 int r;
3611
3612 if (nread > sizeof(buffer))
3613 nread = sizeof(buffer);
3614 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3615 r = read(srcfd, buffer.data, nread);
3616 if (r != nread)
3617 {
3618 if (r < 0)
3619 ereport(ERROR,
3621 errmsg("could not read file \"%s\": %m",
3622 path)));
3623 else
3624 ereport(ERROR,
3626 errmsg("could not read file \"%s\": read %d of %zu",
3627 path, r, (Size) nread)));
3628 }
3630 }
3631 errno = 0;
3632 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3633 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3634 {
3635 int save_errno = errno;
3636
3637 /*
3638 * If we fail to make the file, delete it to release disk space
3639 */
3640 unlink(tmppath);
3641 /* if write didn't set errno, assume problem is no disk space */
3642 errno = save_errno ? save_errno : ENOSPC;
3643
3644 ereport(ERROR,
3646 errmsg("could not write to file \"%s\": %m", tmppath)));
3647 }
3649 }
3650
3651 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3652 if (pg_fsync(fd) != 0)
3655 errmsg("could not fsync file \"%s\": %m", tmppath)));
3657
3658 if (CloseTransientFile(fd) != 0)
3659 ereport(ERROR,
3661 errmsg("could not close file \"%s\": %m", tmppath)));
3662
3663 if (CloseTransientFile(srcfd) != 0)
3664 ereport(ERROR,
3666 errmsg("could not close file \"%s\": %m", path)));
3667
3668 /*
3669 * Now move the segment into place with its final name.
3670 */
3671 if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3672 elog(ERROR, "InstallXLogFileSegment should not have failed");
3673}
int CloseTransientFile(int fd)
Definition: fd.c:2871
int data_sync_elevel(int elevel)
Definition: fd.c:4001
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
char data[XLOG_BLCKSZ]
Definition: c.h:1119

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3519 of file xlog.c.

3520{
3521 bool ignore_added;
3522 char path[MAXPGPATH];
3523 int fd;
3524
3525 Assert(logtli != 0);
3526
3527 fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3528 if (fd >= 0)
3529 return fd;
3530
3531 /* Now open original target segment (might not be file I just made) */
3532 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3534 if (fd < 0)
3535 ereport(ERROR,
3537 errmsg("could not open file \"%s\": %m", path)));
3538 return fd;
3539}
#define O_CLOEXEC
Definition: win32_port.h:349

References Assert(), BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool *  added,
char *  path 
)
static

Definition at line 3331 of file xlog.c.

3333{
3334 char tmppath[MAXPGPATH];
3335 XLogSegNo installed_segno;
3336 XLogSegNo max_segno;
3337 int fd;
3338 int save_errno;
3339 int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3340 instr_time io_start;
3341
3342 Assert(logtli != 0);
3343
3344 XLogFilePath(path, logtli, logsegno, wal_segment_size);
3345
3346 /*
3347 * Try to use existent file (checkpoint maker may have created it already)
3348 */
3349 *added = false;
3350 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3352 if (fd < 0)
3353 {
3354 if (errno != ENOENT)
3355 ereport(ERROR,
3357 errmsg("could not open file \"%s\": %m", path)));
3358 }
3359 else
3360 return fd;
3361
3362 /*
3363 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3364 * another process is doing the same thing. If so, we will end up
3365 * pre-creating an extra log segment. That seems OK, and better than
3366 * holding the lock throughout this lengthy process.
3367 */
3368 elog(DEBUG2, "creating and filling new WAL file");
3369
3370 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3371
3372 unlink(tmppath);
3373
3375 open_flags |= PG_O_DIRECT;
3376
3377 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3378 fd = BasicOpenFile(tmppath, open_flags);
3379 if (fd < 0)
3380 ereport(ERROR,
3382 errmsg("could not create file \"%s\": %m", tmppath)));
3383
3384 /* Measure I/O timing when initializing segment */
3386
3387 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3388 save_errno = 0;
3389 if (wal_init_zero)
3390 {
3391 ssize_t rc;
3392
3393 /*
3394 * Zero-fill the file. With this setting, we do this the hard way to
3395 * ensure that all the file space has really been allocated. On
3396 * platforms that allow "holes" in files, just seeking to the end
3397 * doesn't allocate intermediate space. This way, we know that we
3398 * have all the space and (after the fsync below) that all the
3399 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3400 * O_DSYNC will be sufficient to sync future writes to the log file.
3401 */
3403
3404 if (rc < 0)
3405 save_errno = errno;
3406 }
3407 else
3408 {
3409 /*
3410 * Otherwise, seeking to the end and writing a solitary byte is
3411 * enough.
3412 */
3413 errno = 0;
3414 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3415 {
3416 /* if write didn't set errno, assume no disk space */
3417 save_errno = errno ? errno : ENOSPC;
3418 }
3419 }
3421
3422 /*
3423 * A full segment worth of data is written when using wal_init_zero. One
3424 * byte is written when not using it.
3425 */
3427 io_start, 1,
3429
3430 if (save_errno)
3431 {
3432 /*
3433 * If we fail to make the file, delete it to release disk space
3434 */
3435 unlink(tmppath);
3436
3437 close(fd);
3438
3439 errno = save_errno;
3440
3441 ereport(ERROR,
3443 errmsg("could not write to file \"%s\": %m", tmppath)));
3444 }
3445
3446 /* Measure I/O timing when flushing segment */
3448
3449 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3450 if (pg_fsync(fd) != 0)
3451 {
3452 save_errno = errno;
3453 close(fd);
3454 errno = save_errno;
3455 ereport(ERROR,
3457 errmsg("could not fsync file \"%s\": %m", tmppath)));
3458 }
3460
3462 IOOP_FSYNC, io_start, 1, 0);
3463
3464 if (close(fd) != 0)
3465 ereport(ERROR,
3467 errmsg("could not close file \"%s\": %m", tmppath)));
3468
3469 /*
3470 * Now move the segment into place with its final name. Cope with
3471 * possibility that someone else has created the file while we were
3472 * filling ours: if so, use ours to pre-create a future log segment.
3473 */
3474 installed_segno = logsegno;
3475
3476 /*
3477 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3478 * that was a constant, but that was always a bit dubious: normally, at a
3479 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3480 * here, it was the offset from the insert location. We can't do the
3481 * normal XLOGfileslop calculation here because we don't have access to
3482 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3483 * CheckPointSegments.
3484 */
3485 max_segno = logsegno + CheckPointSegments;
3486 if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3487 logtli))
3488 {
3489 *added = true;
3490 elog(DEBUG2, "done creating and filling new WAL file");
3491 }
3492 else
3493 {
3494 /*
3495 * No need for any more future segments, or InstallXLogFileSegment()
3496 * failed to rename the file into place. If the rename failed, a
3497 * caller opening the file may fail.
3498 */
3499 unlink(tmppath);
3500 elog(DEBUG2, "abandoned new WAL file");
3501 }
3502
3503 return -1;
3504}
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:709
@ IOCONTEXT_INIT
Definition: pgstat.h:285
@ IOOP_WRITE
Definition: pgstat.h:313
#define pg_pwrite
Definition: port.h:227
bool wal_init_zero
Definition: xlog.c:127

References Assert(), BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3757 of file xlog.c.

3758{
3759 char path[MAXPGPATH];
3760 int fd;
3761
3762 XLogFilePath(path, tli, segno, wal_segment_size);
3763
3764 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3766 if (fd < 0)
3767 ereport(PANIC,
3769 errmsg("could not open file \"%s\": %m", path)));
3770
3771 return fd;
3772}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2373 of file xlog.c.

2374{
2375 XLogSegNo minSegNo;
2376 XLogSegNo maxSegNo;
2377 double distance;
2378 XLogSegNo recycleSegNo;
2379
2380 /*
2381 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2382 * correspond to. Always recycle enough segments to meet the minimum, and
2383 * remove enough segments to stay below the maximum.
2384 */
2385 minSegNo = lastredoptr / wal_segment_size +
2387 maxSegNo = lastredoptr / wal_segment_size +
2389
2390 /*
2391 * Between those limits, recycle enough segments to get us through to the
2392 * estimated end of next checkpoint.
2393 *
2394 * To estimate where the next checkpoint will finish, assume that the
2395 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2396 * every checkpoint.
2397 */
2399 /* add 10% for good measure. */
2400 distance *= 1.10;
2401
2402 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2404
2405 if (recycleSegNo < minSegNo)
2406 recycleSegNo = minSegNo;
2407 if (recycleSegNo > maxSegNo)
2408 recycleSegNo = maxSegNo;
2409
2410 return recycleSegNo;
2411}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2923 of file xlog.c.

2924{
2925 XLogRecPtr WriteRqstPtr;
2926 XLogwrtRqst WriteRqst;
2927 TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2928
2929 /*
2930 * During REDO, we are reading not writing WAL. Therefore, instead of
2931 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2932 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2933 * to act this way too, and because when it tries to write the
2934 * end-of-recovery checkpoint, it should indeed flush.
2935 */
2936 if (!XLogInsertAllowed())
2937 {
2938 UpdateMinRecoveryPoint(record, false);
2939 return;
2940 }
2941
2942 /* Quick exit if already known flushed */
2943 if (record <= LogwrtResult.Flush)
2944 return;
2945
2946#ifdef WAL_DEBUG
2947 if (XLOG_DEBUG)
2948 elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2949 LSN_FORMAT_ARGS(record),
2952#endif
2953
2955
2956 /*
2957 * Since fsync is usually a horribly expensive operation, we try to
2958 * piggyback as much data as we can on each fsync: if we see any more data
2959 * entered into the xlog buffer, we'll write and fsync that too, so that
2960 * the final value of LogwrtResult.Flush is as large as possible. This
2961 * gives us some chance of avoiding another fsync immediately after.
2962 */
2963
2964 /* initialize to given target; may increase below */
2965 WriteRqstPtr = record;
2966
2967 /*
2968 * Now wait until we get the write lock, or someone else does the flush
2969 * for us.
2970 */
2971 for (;;)
2972 {
2973 XLogRecPtr insertpos;
2974
2975 /* done already? */
2977 if (record <= LogwrtResult.Flush)
2978 break;
2979
2980 /*
2981 * Before actually performing the write, wait for all in-flight
2982 * insertions to the pages we're about to write to finish.
2983 */
2985 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2986 WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2988 insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2989
2990 /*
2991 * Try to get the write lock. If we can't get it immediately, wait
2992 * until it's released, and recheck if we still need to do the flush
2993 * or if the backend that held the lock did it for us already. This
2994 * helps to maintain a good rate of group committing when the system
2995 * is bottlenecked by the speed of fsyncing.
2996 */
2997 if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2998 {
2999 /*
3000 * The lock is now free, but we didn't acquire it yet. Before we
3001 * do, loop back to check if someone else flushed the record for
3002 * us already.
3003 */
3004 continue;
3005 }
3006
3007 /* Got the lock; recheck whether request is satisfied */
3009 if (record <= LogwrtResult.Flush)
3010 {
3011 LWLockRelease(WALWriteLock);
3012 break;
3013 }
3014
3015 /*
3016 * Sleep before flush! By adding a delay here, we may give further
3017 * backends the opportunity to join the backlog of group commit
3018 * followers; this can significantly improve transaction throughput,
3019 * at the risk of increasing transaction latency.
3020 *
3021 * We do not sleep if enableFsync is not turned on, nor if there are
3022 * fewer than CommitSiblings other backends with active transactions.
3023 */
3024 if (CommitDelay > 0 && enableFsync &&
3026 {
3028
3029 /*
3030 * Re-check how far we can now flush the WAL. It's generally not
3031 * safe to call WaitXLogInsertionsToFinish while holding
3032 * WALWriteLock, because an in-progress insertion might need to
3033 * also grab WALWriteLock to make progress. But we know that all
3034 * the insertions up to insertpos have already finished, because
3035 * that's what the earlier WaitXLogInsertionsToFinish() returned.
3036 * We're only calling it again to allow insertpos to be moved
3037 * further forward, not to actually wait for anyone.
3038 */
3039 insertpos = WaitXLogInsertionsToFinish(insertpos);
3040 }
3041
3042 /* try to write/flush later additions to XLOG as well */
3043 WriteRqst.Write = insertpos;
3044 WriteRqst.Flush = insertpos;
3045
3046 XLogWrite(WriteRqst, insertTLI, false);
3047
3048 LWLockRelease(WALWriteLock);
3049 /* done */
3050 break;
3051 }
3052
3054
3055 /* wake up walsenders now that we've released heavily contended locks */
3057
3058 /*
3059 * If we still haven't flushed to the request point then we have a
3060 * problem; most likely, the requested flush point is past end of XLOG.
3061 * This has been seen to occur when a disk page has a corrupted LSN.
3062 *
3063 * Formerly we treated this as a PANIC condition, but that hurts the
3064 * system's robustness rather than helping it: we do not want to take down
3065 * the whole system due to corruption on one data page. In particular, if
3066 * the bad page is encountered again during recovery then we would be
3067 * unable to restart the database at all! (This scenario actually
3068 * happened in the field several times with 7.1 releases.) As of 8.4, bad
3069 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
3070 * the only time we can reach here during recovery is while flushing the
3071 * end-of-recovery checkpoint record, and we don't expect that to have a
3072 * bad LSN.
3073 *
3074 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
3075 * since xact.c calls this routine inside a critical section. However,
3076 * calls from bufmgr.c are not within critical sections and so we will not
3077 * force a restart for a bad LSN on a data page.
3078 */
3079 if (LogwrtResult.Flush < record)
3080 elog(ERROR,
3081 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
3082 LSN_FORMAT_ARGS(record),
3084}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1410
bool MinimumActiveBackends(int min)
Definition: procarray.c:3546
int CommitDelay
Definition: xlog.c:132
int CommitSiblings
Definition: xlog.c:133
bool XLogInsertAllowed(void)
Definition: xlog.c:6577

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), finish_sync_worker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3897 of file xlog.c.

3898{
3899 XLogSegNo lastRemovedSegNo;
3900
3902 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3904
3905 return lastRemovedSegNo;
3906}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3913 of file xlog.c.

3914{
3915 DIR *xldir;
3916 struct dirent *xlde;
3917 XLogSegNo oldest_segno = 0;
3918
3919 xldir = AllocateDir(XLOGDIR);
3920 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3921 {
3922 TimeLineID file_tli;
3923 XLogSegNo file_segno;
3924
3925 /* Ignore files that are not XLOG segments. */
3926 if (!IsXLogFileName(xlde->d_name))
3927 continue;
3928
3929 /* Parse filename to get TLI and segno. */
3930 XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3932
3933 /* Ignore anything that's not from the TLI of interest. */
3934 if (tli != file_tli)
3935 continue;
3936
3937 /* If it's the oldest so far, update oldest_segno. */
3938 if (oldest_segno == 0 || file_segno < oldest_segno)
3939 oldest_segno = file_segno;
3940 }
3941
3942 FreeDir(xldir);
3943 return oldest_segno;
3944}

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2822 of file xlog.c.

2823{
2824 XLogRecPtr retval;
2825
2829
2830 return retval;
2831}
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:454

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5399 of file xlog.c.

5400{
5401 char xlogfname[MAXFNAMELEN];
5402 XLogSegNo endLogSegNo;
5403 XLogSegNo startLogSegNo;
5404
5405 /* we always switch to a new timeline after archive recovery */
5406 Assert(endTLI != newTLI);
5407
5408 /*
5409 * Update min recovery point one last time.
5410 */
5412
5413 /*
5414 * Calculate the last segment on the old timeline, and the first segment
5415 * on the new timeline. If the switch happens in the middle of a segment,
5416 * they are the same, but if the switch happens exactly at a segment
5417 * boundary, startLogSegNo will be endLogSegNo + 1.
5418 */
5419 XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5420 XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5421
5422 /*
5423 * Initialize the starting WAL segment for the new timeline. If the switch
5424 * happens in the middle of a segment, copy data from the last WAL segment
5425 * of the old timeline up to the switch point, to the starting WAL segment
5426 * on the new timeline.
5427 */
5428 if (endLogSegNo == startLogSegNo)
5429 {
5430 /*
5431 * Make a copy of the file on the new timeline.
5432 *
5433 * Writing WAL isn't allowed yet, so there are no locking
5434 * considerations. But we should be just as tense as XLogFileInit to
5435 * avoid emplacing a bogus file.
5436 */
5437 XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5439 }
5440 else
5441 {
5442 /*
5443 * The switch happened at a segment boundary, so just create the next
5444 * segment on the new timeline.
5445 */
5446 int fd;
5447
5448 fd = XLogFileInit(startLogSegNo, newTLI);
5449
5450 if (close(fd) != 0)
5451 {
5452 int save_errno = errno;
5453
5454 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5455 errno = save_errno;
5456 ereport(ERROR,
5458 errmsg("could not close file \"%s\": %m", xlogfname)));
5459 }
5460 }
5461
5462 /*
5463 * Let's just make real sure there are not .ready or .done flags posted
5464 * for the new segment.
5465 */
5466 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5467 XLogArchiveCleanup(xlogfname);
5468}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3557

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6577 of file xlog.c.

6578{
6579 /*
6580 * If value is "unconditionally true" or "unconditionally false", just
6581 * return it. This provides the normal fast path once recovery is known
6582 * done.
6583 */
6584 if (LocalXLogInsertAllowed >= 0)
6585 return (bool) LocalXLogInsertAllowed;
6586
6587 /*
6588 * Else, must check to see if we're still in recovery.
6589 */
6590 if (RecoveryInProgress())
6591 return false;
6592
6593 /*
6594 * On exit from recovery, reset to "unconditionally true", since there is
6595 * no need to keep checking.
6596 */
6598 return true;
6599}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 759 of file xlog.c.

764{
766 pg_crc32c rdata_crc;
767 bool inserted;
768 XLogRecord *rechdr = (XLogRecord *) rdata->data;
769 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
771 XLogRecPtr StartPos;
772 XLogRecPtr EndPos;
773 bool prevDoPageWrites = doPageWrites;
774 TimeLineID insertTLI;
775
776 /* Does this record type require special handling? */
777 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
778 {
779 if (info == XLOG_SWITCH)
781 else if (info == XLOG_CHECKPOINT_REDO)
783 }
784
785 /* we assume that all of the record header is in the first chunk */
786 Assert(rdata->len >= SizeOfXLogRecord);
787
788 /* cross-check on whether we should be here or not */
789 if (!XLogInsertAllowed())
790 elog(ERROR, "cannot make new WAL entries during recovery");
791
792 /*
793 * Given that we're not in recovery, InsertTimeLineID is set and can't
794 * change, so we can read it without a lock.
795 */
796 insertTLI = XLogCtl->InsertTimeLineID;
797
798 /*----------
799 *
800 * We have now done all the preparatory work we can without holding a
801 * lock or modifying shared state. From here on, inserting the new WAL
802 * record to the shared WAL buffer cache is a two-step process:
803 *
804 * 1. Reserve the right amount of space from the WAL. The current head of
805 * reserved space is kept in Insert->CurrBytePos, and is protected by
806 * insertpos_lck.
807 *
808 * 2. Copy the record to the reserved WAL space. This involves finding the
809 * correct WAL buffer containing the reserved space, and copying the
810 * record in place. This can be done concurrently in multiple processes.
811 *
812 * To keep track of which insertions are still in-progress, each concurrent
813 * inserter acquires an insertion lock. In addition to just indicating that
814 * an insertion is in progress, the lock tells others how far the inserter
815 * has progressed. There is a small fixed number of insertion locks,
816 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
817 * boundary, it updates the value stored in the lock to the how far it has
818 * inserted, to allow the previous buffer to be flushed.
819 *
820 * Holding onto an insertion lock also protects RedoRecPtr and
821 * fullPageWrites from changing until the insertion is finished.
822 *
823 * Step 2 can usually be done completely in parallel. If the required WAL
824 * page is not initialized yet, you have to go through AdvanceXLInsertBuffer,
825 * which will ensure it is initialized. But the WAL writer tries to do that
826 * ahead of insertions to avoid that from happening in the critical path.
827 *
828 *----------
829 */
831
832 if (likely(class == WALINSERT_NORMAL))
833 {
835
836 /*
837 * Check to see if my copy of RedoRecPtr is out of date. If so, may
838 * have to go back and have the caller recompute everything. This can
839 * only happen just after a checkpoint, so it's better to be slow in
840 * this case and fast otherwise.
841 *
842 * Also check to see if fullPageWrites was just turned on or there's a
843 * running backup (which forces full-page writes); if we weren't
844 * already doing full-page writes then go back and recompute.
845 *
846 * If we aren't doing full-page writes then RedoRecPtr doesn't
847 * actually affect the contents of the XLOG record, so we'll update
848 * our local copy but not force a recomputation. (If doPageWrites was
849 * just turned off, we could recompute the record without full pages,
850 * but we choose not to bother.)
851 */
852 if (RedoRecPtr != Insert->RedoRecPtr)
853 {
854 Assert(RedoRecPtr < Insert->RedoRecPtr);
855 RedoRecPtr = Insert->RedoRecPtr;
856 }
857 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
858
859 if (doPageWrites &&
860 (!prevDoPageWrites ||
861 (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
862 {
863 /*
864 * Oops, some buffer now needs to be backed up that the caller
865 * didn't back up. Start over.
866 */
869 return InvalidXLogRecPtr;
870 }
871
872 /*
873 * Reserve space for the record in the WAL. This also sets the xl_prev
874 * pointer.
875 */
876 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
877 &rechdr->xl_prev);
878
879 /* Normal records are always inserted. */
880 inserted = true;
881 }
882 else if (class == WALINSERT_SPECIAL_SWITCH)
883 {
884 /*
885 * In order to insert an XLOG_SWITCH record, we need to hold all of
886 * the WAL insertion locks, not just one, so that no one else can
887 * begin inserting a record until we've figured out how much space
888 * remains in the current WAL segment and claimed all of it.
889 *
890 * Nonetheless, this case is simpler than the normal cases handled
891 * below, which must check for changes in doPageWrites and RedoRecPtr.
892 * Those checks are only needed for records that can contain buffer
893 * references, and an XLOG_SWITCH record never does.
894 */
895 Assert(fpw_lsn == InvalidXLogRecPtr);
897 inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
898 }
899 else
900 {
902
903 /*
904 * We need to update both the local and shared copies of RedoRecPtr,
905 * which means that we need to hold all the WAL insertion locks.
906 * However, there can't be any buffer references, so as above, we need
907 * not check RedoRecPtr before inserting the record; we just need to
908 * update it afterwards.
909 */
910 Assert(fpw_lsn == InvalidXLogRecPtr);
912 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
913 &rechdr->xl_prev);
914 RedoRecPtr = Insert->RedoRecPtr = StartPos;
915 inserted = true;
916 }
917
918 if (inserted)
919 {
920 /*
921 * Now that xl_prev has been filled in, calculate CRC of the record
922 * header.
923 */
924 rdata_crc = rechdr->xl_crc;
925 COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
926 FIN_CRC32C(rdata_crc);
927 rechdr->xl_crc = rdata_crc;
928
929 /*
930 * All the record data, including the header, is now ready to be
931 * inserted. Copy the record in the space reserved.
932 */
934 class == WALINSERT_SPECIAL_SWITCH, rdata,
935 StartPos, EndPos, insertTLI);
936
937 /*
938 * Unless record is flagged as not important, update LSN of last
939 * important record in the current slot. When holding all locks, just
940 * update the first one.
941 */
942 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
943 {
944 int lockno = holdingAllLocks ? 0 : MyLockNo;
945
946 WALInsertLocks[lockno].l.lastImportantAt = StartPos;
947 }
948 }
949 else
950 {
951 /*
952 * This was an xlog-switch record, but the current insert location was
953 * already exactly at the beginning of a segment, so there was no need
954 * to do anything.
955 */
956 }
957
958 /*
959 * Done! Let others know that we're finished.
960 */
962
964
966
967 /*
968 * Mark top transaction id is logged (if needed) so that we should not try
969 * to log it again with the next WAL record in the current subtransaction.
970 */
971 if (topxid_included)
973
974 /*
975 * Update shared LogwrtRqst.Write, if we crossed page boundary.
976 */
977 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
978 {
980 /* advance global request to include new block(s) */
981 if (XLogCtl->LogwrtRqst.Write < EndPos)
982 XLogCtl->LogwrtRqst.Write = EndPos;
985 }
986
987 /*
988 * If this was an XLOG_SWITCH record, flush the record and the empty
989 * padding space that fills the rest of the segment, and perform
990 * end-of-segment actions (eg, notifying archiver).
991 */
992 if (class == WALINSERT_SPECIAL_SWITCH)
993 {
994 TRACE_POSTGRESQL_WAL_SWITCH();
995 XLogFlush(EndPos);
996
997 /*
998 * Even though we reserved the rest of the segment for us, which is
999 * reflected in EndPos, we return a pointer to just the end of the
1000 * xlog-switch record.
1001 */
1002 if (inserted)
1003 {
1004 EndPos = StartPos + SizeOfXLogRecord;
1005 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
1006 {
1007 uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
1008
1009 if (offset == EndPos % XLOG_BLCKSZ)
1010 EndPos += SizeOfXLogLongPHD;
1011 else
1012 EndPos += SizeOfXLogShortPHD;
1013 }
1014 }
1015 }
1016
1017#ifdef WAL_DEBUG
1018 if (XLOG_DEBUG)
1019 {
1020 static XLogReaderState *debug_reader = NULL;
1021 XLogRecord *record;
1022 DecodedXLogRecord *decoded;
1024 StringInfoData recordBuf;
1025 char *errormsg = NULL;
1026 MemoryContext oldCxt;
1027
1028 oldCxt = MemoryContextSwitchTo(walDebugCxt);
1029
1031 appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1032
1033 /*
1034 * We have to piece together the WAL record data from the XLogRecData
1035 * entries, so that we can pass it to the rm_desc function as one
1036 * contiguous chunk.
1037 */
1038 initStringInfo(&recordBuf);
1039 for (; rdata != NULL; rdata = rdata->next)
1040 appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1041
1042 /* We also need temporary space to decode the record. */
1043 record = (XLogRecord *) recordBuf.data;
1044 decoded = (DecodedXLogRecord *)
1046
1047 if (!debug_reader)
1048 debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1049 XL_ROUTINE(.page_read = NULL,
1050 .segment_open = NULL,
1051 .segment_close = NULL),
1052 NULL);
1053 if (!debug_reader)
1054 {
1055 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1056 }
1057 else if (!DecodeXLogRecord(debug_reader,
1058 decoded,
1059 record,
1060 EndPos,
1061 &errormsg))
1062 {
1063 appendStringInfo(&buf, "error decoding record: %s",
1064 errormsg ? errormsg : "no error message");
1065 }
1066 else
1067 {
1068 appendStringInfoString(&buf, " - ");
1069
1070 debug_reader->record = decoded;
1071 xlog_outdesc(&buf, debug_reader);
1072 debug_reader->record = NULL;
1073 }
1074 elog(LOG, "%s", buf.data);
1075
1076 pfree(decoded);
1077 pfree(buf.data);
1078 pfree(recordBuf.data);
1079 MemoryContextSwitchTo(oldCxt);
1080 }
1081#endif
1082
1083 /*
1084 * Update our global variables
1085 */
1086 ProcLastRecPtr = StartPos;
1087 XactLastRecEnd = EndPos;
1088
1089 /* Report WAL traffic to the instrumentation. */
1090 if (inserted)
1091 {
1092 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1094 pgWalUsage.wal_fpi += num_fpi;
1095 }
1096
1097 return EndPos;
1098}
#define likely(x)
Definition: c.h:346
#define unlikely(x)
Definition: c.h:347
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:591
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:541
XLogRecPtr XactLastRecEnd
Definition: xlog.c:254
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1236
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1119
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1175
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1672
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1639
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert(), buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3254 of file xlog.c.

3255{
3256 /*
3257 * During recovery, we don't flush WAL but update minRecoveryPoint
3258 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3259 * would need to be updated.
3260 */
3261 if (RecoveryInProgress())
3262 {
3263 /*
3264 * An invalid minRecoveryPoint means that we need to recover all the
3265 * WAL, i.e., we're doing crash recovery. We never modify the control
3266 * file's value in that case, so we can short-circuit future checks
3267 * here too. This triggers a quick exit path for the startup process,
3268 * which cannot update its local copy of minRecoveryPoint as long as
3269 * it has not replayed all WAL available when doing crash recovery.
3270 */
3272 updateMinRecoveryPoint = false;
3273
3274 /* Quick exit if already known to be updated or cannot be updated */
3276 return false;
3277
3278 /*
3279 * Update local copy of minRecoveryPoint. But if the lock is busy,
3280 * just return a conservative guess.
3281 */
3282 if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3283 return true;
3286 LWLockRelease(ControlFileLock);
3287
3288 /*
3289 * Check minRecoveryPoint for any other process than the startup
3290 * process doing crash recovery, which should not update the control
3291 * file value if crash recovery is still running.
3292 */
3294 updateMinRecoveryPoint = false;
3295
3296 /* check again */
3298 return false;
3299 else
3300 return true;
3301 }
3302
3303 /* Quick exit if already known flushed */
3304 if (record <= LogwrtResult.Flush)
3305 return false;
3306
3307 /* read LogwrtResult and update local state */
3309
3310 /* check again */
3311 if (record <= LogwrtResult.Flush)
3312 return false;
3313
3314 return true;
3315}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1353

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8211 of file xlog.c.

8212{
8214 XLogRegisterData(&nextOid, sizeof(Oid));
8215 (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8216
8217 /*
8218 * We need not flush the NEXTOID record immediately, because any of the
8219 * just-allocated OIDs could only reach disk as part of a tuple insert or
8220 * update that would have its own XLOG record that must follow the NEXTOID
8221 * record. Therefore, the standard buffer LSN interlock applied to those
8222 * records will ensure no such OID reaches disk before the NEXTOID record
8223 * does.
8224 *
8225 * Note, however, that the above statement only covers state "within" the
8226 * database. When we use a generated OID as a file or directory name, we
8227 * are in a sense violating the basic WAL rule, because that filesystem
8228 * change may reach disk before the NEXTOID WAL record does. The impact
8229 * of this is that if a database crash occurs immediately afterward, we
8230 * might after restart re-generate the same OID and find that it conflicts
8231 * with the leftover file or directory. But since for safety's sake we
8232 * always loop until finding a nonconflicting filename, this poses no real
8233 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8234 */
8235}

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1952 of file xlog.c.

1953{
1954 uint64 fullsegs;
1955 uint32 fullpages;
1956 uint32 offset;
1957 uint64 result;
1958
1959 XLByteToSeg(ptr, fullsegs, wal_segment_size);
1960
1961 fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1962 offset = ptr % XLOG_BLCKSZ;
1963
1964 if (fullpages == 0)
1965 {
1966 result = fullsegs * UsableBytesInSegment;
1967 if (offset > 0)
1968 {
1969 Assert(offset >= SizeOfXLogLongPHD);
1970 result += offset - SizeOfXLogLongPHD;
1971 }
1972 }
1973 else
1974 {
1975 result = fullsegs * UsableBytesInSegment +
1976 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1977 (fullpages - 1) * UsableBytesInPage; /* full pages */
1978 if (offset > 0)
1979 {
1980 Assert(offset >= SizeOfXLogShortPHD);
1981 result += offset - SizeOfXLogShortPHD;
1982 }
1983 }
1984
1985 return result;
1986}

References Assert(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8291 of file xlog.c.

8292{
8301 {
8302 /*
8303 * The change in number of backend slots doesn't need to be WAL-logged
8304 * if archiving is not enabled, as you can't start archive recovery
8305 * with wal_level=minimal anyway. We don't really care about the
8306 * values in pg_control either if wal_level=minimal, but seems better
8307 * to keep them up-to-date to avoid confusion.
8308 */
8310 {
8311 xl_parameter_change xlrec;
8312 XLogRecPtr recptr;
8313
8319 xlrec.wal_level = wal_level;
8322
8324 XLogRegisterData(&xlrec, sizeof(xlrec));
8325
8326 recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8327 XLogFlush(recptr);
8328 }
8329
8330 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8331
8341
8342 LWLockRelease(ControlFileLock);
8343 }
8344}

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8266 of file xlog.c.

8267{
8268 XLogRecPtr RecPtr;
8269 xl_restore_point xlrec;
8270
8271 xlrec.rp_time = GetCurrentTimestamp();
8272 strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8273
8275 XLogRegisterData(&xlrec, sizeof(xl_restore_point));
8276
8277 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8278
8279 ereport(LOG,
8280 (errmsg("restore point \"%s\" created at %X/%X",
8281 rpName, LSN_FORMAT_ARGS(RecPtr))));
8282
8283 return RecPtr;
8284}
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2752 of file xlog.c.

2753{
2754 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2755 bool sleeping;
2756 bool wakeup = false;
2757 XLogRecPtr prevAsyncXactLSN;
2758
2760 sleeping = XLogCtl->WalWriterSleeping;
2761 prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2762 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2763 XLogCtl->asyncXactLSN = asyncXactLSN;
2765
2766 /*
2767 * If somebody else already called this function with a more aggressive
2768 * LSN, they will have done what we needed (and perhaps more).
2769 */
2770 if (asyncXactLSN <= prevAsyncXactLSN)
2771 return;
2772
2773 /*
2774 * If the WALWriter is sleeping, kick it to make it come out of low-power
2775 * mode, so that this async commit will reach disk within the expected
2776 * amount of time. Otherwise, determine whether it has enough WAL
2777 * available to flush, the same way that XLogBackgroundFlush() does.
2778 */
2779 if (sleeping)
2780 wakeup = true;
2781 else
2782 {
2783 int flushblocks;
2784
2786
2787 flushblocks =
2788 WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2789
2790 if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2791 wakeup = true;
2792 }
2793
2794 if (wakeup)
2795 {
2796 volatile PROC_HDR *procglobal = ProcGlobal;
2797 ProcNumber walwriterProc = procglobal->walwriterProc;
2798
2799 if (walwriterProc != INVALID_PROC_NUMBER)
2800 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2801 }
2802}
void SetLatch(Latch *latch)
Definition: latch.c:288
#define GetPGProcByNumber(n)
Definition: proc.h:424
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
PROC_HDR * ProcGlobal
Definition: proc.c:79
Definition: proc.h:370
ProcNumber walwriterProc
Definition: proc.h:408
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:130

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 5099 of file xlog.c.

5100{
5101 bool foundCFile,
5102 foundXLog;
5103 char *allocptr;
5104 int i;
5105 ControlFileData *localControlFile;
5106
5107#ifdef WAL_DEBUG
5108
5109 /*
5110 * Create a memory context for WAL debugging that's exempt from the normal
5111 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
5112 * an allocation fails, but wal_debug is not for production use anyway.
5113 */
5114 if (walDebugCxt == NULL)
5115 {
5117 "WAL Debug",
5119 MemoryContextAllowInCriticalSection(walDebugCxt, true);
5120 }
5121#endif
5122
5123
5124 XLogCtl = (XLogCtlData *)
5125 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
5126
5127 localControlFile = ControlFile;
5129 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
5130
5131 if (foundCFile || foundXLog)
5132 {
5133 /* both should be present or neither */
5134 Assert(foundCFile && foundXLog);
5135
5136 /* Initialize local copy of WALInsertLocks */
5138
5139 if (localControlFile)
5140 pfree(localControlFile);
5141 return;
5142 }
5143 memset(XLogCtl, 0, sizeof(XLogCtlData));
5144
5145 /*
5146 * Already have read control file locally, unless in bootstrap mode. Move
5147 * contents into shared memory.
5148 */
5149 if (localControlFile)
5150 {
5151 memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
5152 pfree(localControlFile);
5153 }
5154
5155 /*
5156 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5157 * multiple of the alignment for same, so no extra alignment padding is
5158 * needed here.
5159 */
5160 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5161 XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
5162 allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
5163
5164 for (i = 0; i < XLOGbuffers; i++)
5165 {
5167 }
5168
5169 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5170 allocptr += sizeof(WALInsertLockPadded) -
5171 ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
5173 (WALInsertLockPadded *) allocptr;
5174 allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
5175
5176 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5177 {
5181 }
5182
5183 /*
5184 * Align the start of the page buffers to a full xlog block size boundary.
5185 * This simplifies some calculations in XLOG insertion. It is also
5186 * required for O_DIRECT.
5187 */
5188 allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5189 XLogCtl->pages = allocptr;
5190 memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5191
5192 /*
5193 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5194 * in additional info.)
5195 */
5199 XLogCtl->WalWriterSleeping = false;
5200
5207
5211}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
void ConditionVariableInit(ConditionVariable *cv)
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:188
MemoryContext TopMemoryContext
Definition: mcxt.c:165
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:725
#define AllocSetContextCreate
Definition: memutils.h:149
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:180
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:505
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:439
slock_t insertpos_lck
Definition: xlog.c:393
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:5049
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), ConditionVariableInit(), ControlFile, i, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::InitializedUpToCondVar, XLogCtlData::InitializeReserved, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 5049 of file xlog.c.

5050{
5051 Size size;
5052
5053 /*
5054 * If the value of wal_buffers is -1, use the preferred auto-tune value.
5055 * This isn't an amazingly clean place to do this, but we must wait till
5056 * NBuffers has received its final value, and must do it before using the
5057 * value of XLOGbuffers to do anything important.
5058 *
5059 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
5060 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
5061 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
5062 * the matter with PGC_S_OVERRIDE.
5063 */
5064 if (XLOGbuffers == -1)
5065 {
5066 char buf[32];
5067
5068 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
5069 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5071 if (XLOGbuffers == -1) /* failed to apply it? */
5072 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
5074 }
5075 Assert(XLOGbuffers > 0);
5076
5077 /* XLogCtl */
5078 size = sizeof(XLogCtlData);
5079
5080 /* WAL insertion locks, plus alignment */
5081 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
5082 /* xlblocks array */
5083 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
5084 /* extra alignment padding for XLOG I/O buffers */
5085 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
5086 /* and the buffers themselves */
5087 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
5088
5089 /*
5090 * Note: we don't count ControlFileData, it comes out of the "slop factor"
5091 * added by CreateSharedMemoryAndSemaphores. This lets us use this
5092 * routine again below to compute the actual allocation size.
5093 */
5094
5095 return size;
5096}
#define Max(x, y)
Definition: c.h:969
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_POSTMASTER
Definition: guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), Assert(), buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9650 of file xlog.c.

9651{
9653
9654 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9656 LWLockRelease(ControlFileLock);
9657}
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2447 of file xlog.c.

2448{
2449 bool ispartialpage;
2450 bool last_iteration;
2451 bool finishing_seg;
2452 int curridx;
2453 int npages;
2454 int startidx;
2455 uint32 startoffset;
2456
2457 /* We should always be inside a critical section here */
2459
2460 /*
2461 * Update local LogwrtResult (caller probably did this already, but...)
2462 */
2464
2465 /*
2466 * Since successive pages in the xlog cache are consecutively allocated,
2467 * we can usually gather multiple pages together and issue just one
2468 * write() call. npages is the number of pages we have determined can be
2469 * written together; startidx is the cache block index of the first one,
2470 * and startoffset is the file offset at which it should go. The latter
2471 * two variables are only valid when npages > 0, but we must initialize
2472 * all of them to keep the compiler quiet.
2473 */
2474 npages = 0;
2475 startidx = 0;
2476 startoffset = 0;
2477
2478 /*
2479 * Within the loop, curridx is the cache block index of the page to
2480 * consider writing. Begin at the buffer containing the next unwritten
2481 * page, or last partially written page.
2482 */
2484
2485 while (LogwrtResult.Write < WriteRqst.Write)
2486 {
2487 /*
2488 * Make sure we're not ahead of the insert process. This could happen
2489 * if we're passed a bogus WriteRqst.Write that is past the end of the
2490 * last page that's been initialized by AdvanceXLInsertBuffer.
2491 */
2492 XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2493
2494 if (LogwrtResult.Write >= EndPtr)
2495 elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
2497 LSN_FORMAT_ARGS(EndPtr));
2498
2499 /* Advance LogwrtResult.Write to end of current buffer page */
2500 LogwrtResult.Write = EndPtr;
2501 ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2502
2505 {
2506 /*
2507 * Switch to new logfile segment. We cannot have any pending
2508 * pages here (since we dump what we have at segment end).
2509 */
2510 Assert(npages == 0);
2511 if (openLogFile >= 0)
2512 XLogFileClose();
2515 openLogTLI = tli;
2516
2517 /* create/use new log file */
2520 }
2521
2522 /* Make sure we have the current logfile open */
2523 if (openLogFile < 0)
2524 {
2527 openLogTLI = tli;
2530 }
2531
2532 /* Add current page to the set of pending pages-to-dump */
2533 if (npages == 0)
2534 {
2535 /* first of group */
2536 startidx = curridx;
2537 startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2539 }
2540 npages++;
2541
2542 /*
2543 * Dump the set if this will be the last loop iteration, or if we are
2544 * at the last page of the cache area (since the next page won't be
2545 * contiguous in memory), or if we are at the end of the logfile
2546 * segment.
2547 */
2548 last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2549
2550 finishing_seg = !ispartialpage &&
2551 (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2552
2553 if (last_iteration ||
2554 curridx == XLogCtl->XLogCacheBlck ||
2555 finishing_seg)
2556 {
2557 char *from;
2558 Size nbytes;
2559 Size nleft;
2560 ssize_t written;
2562
2563 /* OK to write the page(s) */
2564 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2565 nbytes = npages * (Size) XLOG_BLCKSZ;
2566 nleft = nbytes;
2567 do
2568 {
2569 errno = 0;
2570
2571 /*
2572 * Measure I/O timing to write WAL data, for pg_stat_io.
2573 */
2575
2576 pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2577 written = pg_pwrite(openLogFile, from, nleft, startoffset);
2579
2581 IOOP_WRITE, start, 1, written);
2582
2583 if (written <= 0)
2584 {
2585 char xlogfname[MAXFNAMELEN];
2586 int save_errno;
2587
2588 if (errno == EINTR)
2589 continue;
2590
2591 save_errno = errno;
2592 XLogFileName(xlogfname, tli, openLogSegNo,
2594 errno = save_errno;
2595 ereport(PANIC,
2597 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2598 xlogfname, startoffset, nleft)));
2599 }
2600 nleft -= written;
2601 from += written;
2602 startoffset += written;
2603 } while (nleft > 0);
2604
2605 npages = 0;
2606
2607 /*
2608 * If we just wrote the whole last page of a logfile segment,
2609 * fsync the segment immediately. This avoids having to go back
2610 * and re-open prior segments when an fsync request comes along
2611 * later. Doing it here ensures that one and only one backend will
2612 * perform this fsync.
2613 *
2614 * This is also the right place to notify the Archiver that the
2615 * segment is ready to copy to archival storage, and to update the
2616 * timer for archive_timeout, and to signal for a checkpoint if
2617 * too many logfile segments have been used since the last
2618 * checkpoint.
2619 */
2620 if (finishing_seg)
2621 {
2623
2624 /* signal that we need to wakeup walsenders later */
2626
2627 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2628
2629 if (XLogArchivingActive())
2631
2632 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2634
2635 /*
2636 * Request a checkpoint if we've consumed too much xlog since
2637 * the last one. For speed, we first check using the local
2638 * copy of RedoRecPtr, which might be out of date; if it looks
2639 * like a checkpoint is needed, forcibly update RedoRecPtr and
2640 * recheck.
2641 */
2643 {
2644 (void) GetRedoRecPtr();
2647 }
2648 }
2649 }
2650
2651 if (ispartialpage)
2652 {
2653 /* Only asked to write a partial page */
2654 LogwrtResult.Write = WriteRqst.Write;
2655 break;
2656 }
2657 curridx = NextBufIdx(curridx);
2658
2659 /* If flexible, break out of loop as soon as we wrote something */
2660 if (flexible && npages == 0)
2661 break;
2662 }
2663
2664 Assert(npages == 0);
2665
2666 /*
2667 * If asked to flush, do so
2668 */
2669 if (LogwrtResult.Flush < WriteRqst.Flush &&
2671 {
2672 /*
2673 * Could get here without iterating above loop, in which case we might
2674 * have no open file or the wrong one. However, we do not need to
2675 * fsync more than one file.
2676 */
2679 {
2680 if (openLogFile >= 0 &&
2683 XLogFileClose();
2684 if (openLogFile < 0)
2685 {
2688 openLogTLI = tli;
2691 }
2692
2694 }
2695
2696 /* signal that we need to wakeup walsenders later */
2698
2700 }
2701
2702 /*
2703 * Update shared-memory status
2704 *
2705 * We make sure that the shared 'request' values do not fall behind the
2706 * 'result' values. This is not absolutely essential, but it saves some
2707 * code in a couple of places.
2708 */
2715
2716 /*
2717 * We write Write first, bar, then Flush. When reading, the opposite must
2718 * be done (with a matching barrier in between), so that we always see a
2719 * Flush value that trails behind the Write value seen.
2720 */
2724
2725#ifdef USE_ASSERT_CHECKING
2726 {
2730
2736
2737 /* WAL written to disk is always ahead of WAL flushed */
2738 Assert(Write >= Flush);
2739
2740 /* WAL inserted to buffers is always ahead of WAL written */
2741 Assert(Insert >= Write);
2742 }
2743#endif
2744}
void ReserveExternalFD(void)
Definition: fd.c:1223
XLogRecPtr Flush
Definition: walreceiver.c:112
XLogRecPtr Write
Definition: walreceiver.c:111
#define WalSndWakeupRequest()
Definition: walsender.h:58
#define EINTR
Definition: win32_port.h:364
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6625
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3757
#define NextBufIdx(idx)
Definition: xlog.c:595
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8871
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2423
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert(), CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

Definition at line 191 of file xlog.c.

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 166 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 159 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

bool doPageWrites
static

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 122 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 217 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 224 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 236 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 135 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 115 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 647 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 648 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 160 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 659 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 124 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 126 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 125 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 136 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 127 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 116 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 123 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 128 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 143 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 171 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 118 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 117 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl