PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
bool check_max_slot_wal_keep_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
bool * wal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 111 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 603 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 580 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 584 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 150 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
pg_atomic_uint64 logWriteResult
Definition: xlog.c:472
pg_atomic_uint64 logFlushResult
Definition: xlog.c:473
static XLogCtlData * XLogCtl
Definition: xlog.c:566

Definition at line 620 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 597 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 591 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 559 of file xlog.c.

560{
WalInsertClass
Definition: xlog.c:560
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:562
@ WALINSERT_NORMAL
Definition: xlog.c:561
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:563

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1985 of file xlog.c.

1986{
1988 int nextidx;
1989 XLogRecPtr OldPageRqstPtr;
1990 XLogwrtRqst WriteRqst;
1991 XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1992 XLogRecPtr NewPageBeginPtr;
1993 XLogPageHeader NewPage;
1994 int npages pg_attribute_unused() = 0;
1995
1996 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
1997
1998 /*
1999 * Now that we have the lock, check if someone initialized the page
2000 * already.
2001 */
2002 while (upto >= XLogCtl->InitializedUpTo || opportunistic)
2003 {
2005
2006 /*
2007 * Get ending-offset of the buffer page we need to replace (this may
2008 * be zero if the buffer hasn't been used yet). Fall through if it's
2009 * already written out.
2010 */
2011 OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
2012 if (LogwrtResult.Write < OldPageRqstPtr)
2013 {
2014 /*
2015 * Nope, got work to do. If we just want to pre-initialize as much
2016 * as we can without flushing, give up now.
2017 */
2018 if (opportunistic)
2019 break;
2020
2021 /* Advance shared memory write request position */
2023 if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2024 XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2026
2027 /*
2028 * Acquire an up-to-date LogwrtResult value and see if we still
2029 * need to write it or if someone else already did.
2030 */
2032 if (LogwrtResult.Write < OldPageRqstPtr)
2033 {
2034 /*
2035 * Must acquire write lock. Release WALBufMappingLock first,
2036 * to make sure that all insertions that we need to wait for
2037 * can finish (up to this same position). Otherwise we risk
2038 * deadlock.
2039 */
2040 LWLockRelease(WALBufMappingLock);
2041
2042 WaitXLogInsertionsToFinish(OldPageRqstPtr);
2043
2044 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2045
2047 if (LogwrtResult.Write >= OldPageRqstPtr)
2048 {
2049 /* OK, someone wrote it already */
2050 LWLockRelease(WALWriteLock);
2051 }
2052 else
2053 {
2054 /* Have to write it ourselves */
2055 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2056 WriteRqst.Write = OldPageRqstPtr;
2057 WriteRqst.Flush = 0;
2058 XLogWrite(WriteRqst, tli, false);
2059 LWLockRelease(WALWriteLock);
2061 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2062 }
2063 /* Re-acquire WALBufMappingLock and retry */
2064 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2065 continue;
2066 }
2067 }
2068
2069 /*
2070 * Now the next buffer slot is free and we can set it up to be the
2071 * next output page.
2072 */
2073 NewPageBeginPtr = XLogCtl->InitializedUpTo;
2074 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2075
2076 Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2077
2078 NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2079
2080 /*
2081 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2082 * before initializing. Otherwise, the old page may be partially
2083 * zeroed but look valid.
2084 */
2087
2088 /*
2089 * Be sure to re-zero the buffer so that bytes beyond what we've
2090 * written will look like zeroes and not valid XLOG records...
2091 */
2092 MemSet(NewPage, 0, XLOG_BLCKSZ);
2093
2094 /*
2095 * Fill the new page's header
2096 */
2097 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2098
2099 /* NewPage->xlp_info = 0; */ /* done by memset */
2100 NewPage->xlp_tli = tli;
2101 NewPage->xlp_pageaddr = NewPageBeginPtr;
2102
2103 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2104
2105 /*
2106 * If online backup is not in progress, mark the header to indicate
2107 * that WAL records beginning in this page have removable backup
2108 * blocks. This allows the WAL archiver to know whether it is safe to
2109 * compress archived WAL data by transforming full-block records into
2110 * the non-full-block format. It is sufficient to record this at the
2111 * page level because we force a page switch (in fact a segment
2112 * switch) when starting a backup, so the flag will be off before any
2113 * records can be written during the backup. At the end of a backup,
2114 * the last page will be marked as all unsafe when perhaps only part
2115 * is unsafe, but at worst the archiver would miss the opportunity to
2116 * compress a few records.
2117 */
2118 if (Insert->runningBackups == 0)
2119 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2120
2121 /*
2122 * If first page of an XLOG segment file, make it a long header.
2123 */
2124 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2125 {
2126 XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2127
2128 NewLongPage->xlp_sysid = ControlFile->system_identifier;
2129 NewLongPage->xlp_seg_size = wal_segment_size;
2130 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2131 NewPage->xlp_info |= XLP_LONG_HEADER;
2132 }
2133
2134 /*
2135 * Make sure the initialization of the page becomes visible to others
2136 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2137 * holding a lock.
2138 */
2140
2141 pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2142 XLogCtl->InitializedUpTo = NewPageEndPtr;
2143
2144 npages++;
2145 }
2146 LWLockRelease(WALBufMappingLock);
2147
2148#ifdef WAL_DEBUG
2149 if (XLOG_DEBUG && npages > 0)
2150 {
2151 elog(DEBUG1, "initialized %d pages, up to %X/%X",
2152 npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2153 }
2154#endif
2155}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
#define pg_write_barrier()
Definition: atomics.h:157
#define pg_attribute_unused()
Definition: c.h:133
#define Assert(condition)
Definition: c.h:815
#define MemSet(start, val, len)
Definition: c.h:977
size_t Size
Definition: c.h:562
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
static void Insert(File file)
Definition: fd.c:1312
WalUsage pgWalUsage
Definition: instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
uint64 system_identifier
Definition: pg_control.h:110
int64 wal_buffers_full
Definition: instrument.h:56
XLogwrtRqst LogwrtRqst
Definition: xlog.c:455
slock_t info_lck
Definition: xlog.c:553
XLogRecPtr InitializedUpTo
Definition: xlog.c:485
char * pages
Definition: xlog.c:492
pg_atomic_uint64 * xlblocks
Definition: xlog.c:493
XLogCtlInsert Insert
Definition: xlog.c:452
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:327
XLogRecPtr Flush
Definition: xlog.c:322
XLogRecPtr Write
Definition: xlog.c:321
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1504
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:620
int wal_segment_size
Definition: xlog.c:143
static XLogwrtResult LogwrtResult
Definition: xlog.c:612
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:591
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2314
static ControlFileData * ControlFile
Definition: xlog.c:574
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2198 of file xlog.c.

2199{
2202}
double CheckPointCompletionTarget
Definition: checkpointer.c:142
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2162

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2191 of file xlog.c.

2192{
2195}
int max_wal_size_mb
Definition: xlog.c:114

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4778 of file xlog.c.

4779{
4780 /*
4781 * If some checks were deferred, it's possible that the checks will fail
4782 * later during InitializeWalConsistencyChecking(). But in that case, the
4783 * postmaster will exit anyway, so it's safe to proceed with the
4784 * assignment.
4785 *
4786 * Any built-in resource managers specified are assigned immediately,
4787 * which affects WAL created before shared_preload_libraries are
4788 * processed. Any custom resource managers specified won't be assigned
4789 * until after shared_preload_libraries are processed, but that's OK
4790 * because WAL for a custom resource manager can't be written before the
4791 * module is loaded anyway.
4792 */
4794}
bool * wal_consistency_checking
Definition: xlog.c:126

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8662 of file xlog.c.

8663{
8664 if (wal_sync_method != new_wal_sync_method)
8665 {
8666 /*
8667 * To ensure that no blocks escape unsynced, force an fsync on the
8668 * currently open log segment (if any). Also, if the open flag is
8669 * changing, close the log file so it will be reopened (with new flag
8670 * bit) at next use.
8671 */
8672 if (openLogFile >= 0)
8673 {
8674 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8675 if (pg_fsync(openLogFile) != 0)
8676 {
8677 char xlogfname[MAXFNAMELEN];
8678 int save_errno;
8679
8680 save_errno = errno;
8683 errno = save_errno;
8684 ereport(PANIC,
8686 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8687 }
8688
8690 if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8691 XLogFileClose();
8692 }
8693 }
8694}
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:385
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
static int openLogFile
Definition: xlog.c:635
static int get_sync_bit(int method)
Definition: xlog.c:8614
int wal_sync_method
Definition: xlog.c:130
static TimeLineID openLogTLI
Definition: xlog.c:637
static void XLogFileClose(void)
Definition: xlog.c:3660
static XLogSegNo openLogSegNo
Definition: xlog.c:636
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5054 of file xlog.c.

5055{
5056 CheckPoint checkPoint;
5057 char *buffer;
5058 XLogPageHeader page;
5059 XLogLongPageHeader longpage;
5060 XLogRecord *record;
5061 char *recptr;
5062 uint64 sysidentifier;
5063 struct timeval tv;
5064 pg_crc32c crc;
5065
5066 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5068
5069 /*
5070 * Select a hopefully-unique system identifier code for this installation.
5071 * We use the result of gettimeofday(), including the fractional seconds
5072 * field, as being about as unique as we can easily get. (Think not to
5073 * use random(), since it hasn't been seeded and there's no portable way
5074 * to seed it other than the system clock value...) The upper half of the
5075 * uint64 value is just the tv_sec part, while the lower half contains the
5076 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5077 * PID for a little extra uniqueness. A person knowing this encoding can
5078 * determine the initialization time of the installation, which could
5079 * perhaps be useful sometimes.
5080 */
5081 gettimeofday(&tv, NULL);
5082 sysidentifier = ((uint64) tv.tv_sec) << 32;
5083 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5084 sysidentifier |= getpid() & 0xFFF;
5085
5086 /* page buffer must be aligned suitably for O_DIRECT */
5087 buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5088 page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5089 memset(page, 0, XLOG_BLCKSZ);
5090
5091 /*
5092 * Set up information for the initial checkpoint record
5093 *
5094 * The initial checkpoint record is written to the beginning of the WAL
5095 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5096 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5097 */
5101 checkPoint.fullPageWrites = fullPageWrites;
5102 checkPoint.wal_level = wal_level;
5103 checkPoint.nextXid =
5105 checkPoint.nextOid = FirstGenbkiObjectId;
5106 checkPoint.nextMulti = FirstMultiXactId;
5107 checkPoint.nextMultiOffset = 0;
5109 checkPoint.oldestXidDB = Template1DbOid;
5110 checkPoint.oldestMulti = FirstMultiXactId;
5111 checkPoint.oldestMultiDB = Template1DbOid;
5114 checkPoint.time = (pg_time_t) time(NULL);
5116
5117 TransamVariables->nextXid = checkPoint.nextXid;
5118 TransamVariables->nextOid = checkPoint.nextOid;
5120 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5121 AdvanceOldestClogXid(checkPoint.oldestXid);
5122 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5123 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5125
5126 /* Set up the XLOG page header */
5127 page->xlp_magic = XLOG_PAGE_MAGIC;
5128 page->xlp_info = XLP_LONG_HEADER;
5131 longpage = (XLogLongPageHeader) page;
5132 longpage->xlp_sysid = sysidentifier;
5133 longpage->xlp_seg_size = wal_segment_size;
5134 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5135
5136 /* Insert the initial checkpoint record */
5137 recptr = ((char *) page + SizeOfXLogLongPHD);
5138 record = (XLogRecord *) recptr;
5139 record->xl_prev = 0;
5140 record->xl_xid = InvalidTransactionId;
5141 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5143 record->xl_rmid = RM_XLOG_ID;
5144 recptr += SizeOfXLogRecord;
5145 /* fill the XLogRecordDataHeaderShort struct */
5146 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5147 *(recptr++) = sizeof(checkPoint);
5148 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5149 recptr += sizeof(checkPoint);
5150 Assert(recptr - (char *) record == record->xl_tot_len);
5151
5153 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5154 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5155 FIN_CRC32C(crc);
5156 record->xl_crc = crc;
5157
5158 /* Create first XLOG segment file */
5161
5162 /*
5163 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5164 * close the file again in a moment.
5165 */
5166
5167 /* Write the first page with the initial record */
5168 errno = 0;
5169 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5170 if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5171 {
5172 /* if write didn't set errno, assume problem is no disk space */
5173 if (errno == 0)
5174 errno = ENOSPC;
5175 ereport(PANIC,
5177 errmsg("could not write bootstrap write-ahead log file: %m")));
5178 }
5180
5181 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5182 if (pg_fsync(openLogFile) != 0)
5183 ereport(PANIC,
5185 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5187
5188 if (close(openLogFile) != 0)
5189 ereport(PANIC,
5191 errmsg("could not close bootstrap write-ahead log file: %m")));
5192
5193 openLogFile = -1;
5194
5195 /* Now create pg_control */
5196 InitControlFile(sysidentifier, data_checksum_version);
5197 ControlFile->time = checkPoint.time;
5198 ControlFile->checkPoint = checkPoint.redo;
5199 ControlFile->checkPointCopy = checkPoint;
5200
5201 /* some additional ControlFile fields are set in WriteControlFile() */
5203
5204 /* Bootstrap the commit log, too */
5205 BootStrapCLOG();
5209
5210 pfree(buffer);
5211
5212 /*
5213 * Force control file to be read - in contrast to normal processing we'd
5214 * otherwise never run the checks and GUC related initializations therein.
5215 */
5217}
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:761
uint64_t uint64
Definition: c.h:489
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2328
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2362
void BootStrapMultiXact(void)
Definition: multixact.c:2034
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3401
bool fullPageWrites
Definition: xlog.c:122
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4225
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9508
int wal_level
Definition: xlog.c:131
static void WriteControlFile(void)
Definition: xlog.c:4260
#define BootstrapTimeLineID
Definition: xlog.c:111
static void ReadControlFile(void)
Definition: xlog.c:4342
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2162 of file xlog.c.

2163{
2164 double target;
2165
2166 /*-------
2167 * Calculate the distance at which to trigger a checkpoint, to avoid
2168 * exceeding max_wal_size_mb. This is based on two assumptions:
2169 *
2170 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2171 * WAL for two checkpoint cycles to allow us to recover from the
2172 * secondary checkpoint if the first checkpoint failed, though we
2173 * only did this on the primary anyway, not on standby. Keeping just
2174 * one checkpoint simplifies processing and reduces disk space in
2175 * many smaller databases.)
2176 * b) during checkpoint, we consume checkpoint_completion_target *
2177 * number of segments consumed between checkpoints.
2178 *-------
2179 */
2180 target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2182
2183 /* round down */
2184 CheckPointSegments = (int) target;
2185
2186 if (CheckPointSegments < 1)
2188}
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:603
int CheckPointSegments
Definition: xlog.c:156

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_max_slot_wal_keep_size()

bool check_max_slot_wal_keep_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2223 of file xlog.c.

2224{
2225 if (IsBinaryUpgrade && *newval != -1)
2226 {
2227 GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
2228 "max_slot_wal_keep_size");
2229 return false;
2230 }
2231
2232 return true;
2233}
bool IsBinaryUpgrade
Definition: globals.c:120
#define GUC_check_errdetail
Definition: guc.h:480

References GUC_check_errdetail, IsBinaryUpgrade, and newval.

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4658 of file xlog.c.

4659{
4660 /*
4661 * -1 indicates a request for auto-tune.
4662 */
4663 if (*newval == -1)
4664 {
4665 /*
4666 * If we haven't yet changed the boot_val default of -1, just let it
4667 * be. We'll fix it when XLOGShmemSize is called.
4668 */
4669 if (XLOGbuffers == -1)
4670 return true;
4671
4672 /* Otherwise, substitute the auto-tune value */
4674 }
4675
4676 /*
4677 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4678 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4679 * the case, we just silently treat such values as a request for the
4680 * minimum. (We could throw an error instead, but that doesn't seem very
4681 * helpful.)
4682 */
4683 if (*newval < 4)
4684 *newval = 4;
4685
4686 return true;
4687}
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4642
int XLOGbuffers
Definition: xlog.c:117

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4693 of file xlog.c.

4694{
4695 char *rawstring;
4696 List *elemlist;
4697 ListCell *l;
4698 bool newwalconsistency[RM_MAX_ID + 1];
4699
4700 /* Initialize the array */
4701 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4702
4703 /* Need a modifiable copy of string */
4704 rawstring = pstrdup(*newval);
4705
4706 /* Parse string into list of identifiers */
4707 if (!SplitIdentifierString(rawstring, ',', &elemlist))
4708 {
4709 /* syntax error in list */
4710 GUC_check_errdetail("List syntax is invalid.");
4711 pfree(rawstring);
4712 list_free(elemlist);
4713 return false;
4714 }
4715
4716 foreach(l, elemlist)
4717 {
4718 char *tok = (char *) lfirst(l);
4719 int rmid;
4720
4721 /* Check for 'all'. */
4722 if (pg_strcasecmp(tok, "all") == 0)
4723 {
4724 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4725 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4726 newwalconsistency[rmid] = true;
4727 }
4728 else
4729 {
4730 /* Check if the token matches any known resource manager. */
4731 bool found = false;
4732
4733 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4734 {
4735 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4736 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4737 {
4738 newwalconsistency[rmid] = true;
4739 found = true;
4740 break;
4741 }
4742 }
4743 if (!found)
4744 {
4745 /*
4746 * During startup, it might be a not-yet-loaded custom
4747 * resource manager. Defer checking until
4748 * InitializeWalConsistencyChecking().
4749 */
4751 {
4753 }
4754 else
4755 {
4756 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4757 pfree(rawstring);
4758 list_free(elemlist);
4759 return false;
4760 }
4761 }
4762 }
4763 }
4764
4765 pfree(rawstring);
4766 list_free(elemlist);
4767
4768 /* assign new value */
4769 *extra = guc_malloc(ERROR, (RM_MAX_ID + 1) * sizeof(bool));
4770 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4771 return true;
4772}
#define ERROR
Definition: elog.h:39
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:638
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1696
bool process_shared_preload_libraries_done
Definition: miscinit.c:1835
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3432
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:166
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, ERROR, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2205 of file xlog.c.

2206{
2208 {
2209 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2210 return false;
2211 }
2212
2213 return true;
2214}
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7531 of file xlog.c.

7532{
7538
7539 /* Write out all dirty data in SLRUs and the main buffer pool */
7540 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7547 CheckPointBuffers(flags);
7548
7549 /* Perform all queued up fsyncs */
7550 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7554 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7555
7556 /* We deliberately delay 2PC checkpointing as long as possible */
7557 CheckPointTwoPhase(checkPointRedo);
7558}
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
void CheckPointBuffers(int flags)
Definition: bufmgr.c:3707
void CheckPointCLOG(void)
Definition: clog.c:937
void CheckPointCommitTs(void)
Definition: commit_ts.c:820
void CheckPointMultiXact(void)
Definition: multixact.c:2304
void CheckPointReplicationOrigin(void)
Definition: origin.c:577
void CheckPointPredicate(void)
Definition: predicate.c:1041
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:2032
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1922
TimestampTz ckpt_write_t
Definition: xlog.h:162
TimestampTz ckpt_sync_end_t
Definition: xlog.h:164
TimestampTz ckpt_sync_t
Definition: xlog.h:163
void CheckPointSUBTRANS(void)
Definition: subtrans.c:355
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1806
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5406 of file xlog.c.

5407{
5408 /*
5409 * For archive recovery, the WAL must be generated with at least 'replica'
5410 * wal_level.
5411 */
5413 {
5414 ereport(FATAL,
5415 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5416 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5417 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5418 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5419 }
5420
5421 /*
5422 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5423 * must have at least as many backend slots as the primary.
5424 */
5426 {
5427 /* We ignore autovacuum_worker_slots when we make this test. */
5428 RecoveryRequiresIntParameter("max_connections",
5431 RecoveryRequiresIntParameter("max_worker_processes",
5434 RecoveryRequiresIntParameter("max_wal_senders",
5437 RecoveryRequiresIntParameter("max_prepared_transactions",
5440 RecoveryRequiresIntParameter("max_locks_per_transaction",
5443 }
5444}
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:142
int max_worker_processes
Definition: globals.c:143
int max_locks_per_xact
Definition: lock.c:52
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
int max_prepared_xacts
Definition: twophase.c:115
int max_wal_senders
Definition: walsender.c:121
bool EnableHotStandby
Definition: xlog.c:121
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:138
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3748 of file xlog.c.

3749{
3750 int save_errno = errno;
3751 XLogSegNo lastRemovedSegNo;
3752
3754 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3756
3757 if (segno <= lastRemovedSegNo)
3758 {
3759 char filename[MAXFNAMELEN];
3760
3762 errno = save_errno;
3763 ereport(ERROR,
3765 errmsg("requested WAL segment %s has already been removed",
3766 filename)));
3767 }
3768 errno = save_errno;
3769}
static char * filename
Definition: pg_dumpall.c:123
XLogSegNo lastRemovedSegNo
Definition: xlog.c:461
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5310 of file xlog.c.

5312{
5313 /*
5314 * Execute the recovery_end_command, if any.
5315 */
5316 if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5318 "recovery_end_command",
5319 true,
5320 WAIT_EVENT_RECOVERY_END_COMMAND);
5321
5322 /*
5323 * We switched to a new timeline. Clean up segments on the old timeline.
5324 *
5325 * If there are any higher-numbered segments on the old timeline, remove
5326 * them. They might contain valid WAL, but they might also be
5327 * pre-allocated files containing garbage. In any case, they are not part
5328 * of the new timeline's history so we don't need them.
5329 */
5330 RemoveNonParentXlogFiles(EndOfLog, newTLI);
5331
5332 /*
5333 * If the switch happened in the middle of a segment, what to do with the
5334 * last, partial segment on the old timeline? If we don't archive it, and
5335 * the server that created the WAL never archives it either (e.g. because
5336 * it was hit by a meteor), it will never make it to the archive. That's
5337 * OK from our point of view, because the new segment that we created with
5338 * the new TLI contains all the WAL from the old timeline up to the switch
5339 * point. But if you later try to do PITR to the "missing" WAL on the old
5340 * timeline, recovery won't find it in the archive. It's physically
5341 * present in the new file with new TLI, but recovery won't look there
5342 * when it's recovering to the older timeline. On the other hand, if we
5343 * archive the partial segment, and the original server on that timeline
5344 * is still running and archives the completed version of the same segment
5345 * later, it will fail. (We used to do that in 9.4 and below, and it
5346 * caused such problems).
5347 *
5348 * As a compromise, we rename the last segment with the .partial suffix,
5349 * and archive it. Archive recovery will never try to read .partial
5350 * segments, so they will normally go unused. But in the odd PITR case,
5351 * the administrator can copy them manually to the pg_wal directory
5352 * (removing the suffix). They can be useful in debugging, too.
5353 *
5354 * If a .done or .ready file already exists for the old timeline, however,
5355 * we had already determined that the segment is complete, so we can let
5356 * it be archived normally. (In particular, if it was restored from the
5357 * archive to begin with, it's expected to have a .done file).
5358 */
5359 if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5361 {
5362 char origfname[MAXFNAMELEN];
5363 XLogSegNo endLogSegNo;
5364
5365 XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5366 XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5367
5368 if (!XLogArchiveIsReadyOrDone(origfname))
5369 {
5370 char origpath[MAXPGPATH];
5371 char partialfname[MAXFNAMELEN];
5372 char partialpath[MAXPGPATH];
5373
5374 /*
5375 * If we're summarizing WAL, we can't rename the partial file
5376 * until the summarizer finishes with it, else it will fail.
5377 */
5378 if (summarize_wal)
5379 WaitForWalSummarization(EndOfLog);
5380
5381 XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5382 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5383 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5384
5385 /*
5386 * Make sure there's no .done or .ready file for the .partial
5387 * file.
5388 */
5389 XLogArchiveCleanup(partialfname);
5390
5391 durable_rename(origpath, partialpath, ERROR);
5392 XLogArchiveNotify(partialfname);
5393 }
5394 }
5395}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:781
#define MAXPGPATH
#define snprintf
Definition: port.h:239
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3961
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:84

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4182 of file xlog.c.

4183{
4184 DIR *xldir;
4185 struct dirent *xlde;
4186 char path[MAXPGPATH + sizeof(XLOGDIR)];
4187
4188 xldir = AllocateDir(XLOGDIR);
4189
4190 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4191 {
4193 {
4194 if (XLogArchiveCheckDone(xlde->d_name))
4195 {
4196 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4197 xlde->d_name);
4198 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4199 unlink(path);
4201 }
4202 }
4203 }
4204
4205 FreeDir(xldir);
4206}
#define DEBUG2
Definition: elog.h:29
int FreeDir(DIR *dir)
Definition: fd.c:2983
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2865
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2931
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1225 of file xlog.c.

1227{
1228 char *currpos;
1229 int freespace;
1230 int written;
1231 XLogRecPtr CurrPos;
1232 XLogPageHeader pagehdr;
1233
1234 /*
1235 * Get a pointer to the right place in the right WAL buffer to start
1236 * inserting to.
1237 */
1238 CurrPos = StartPos;
1239 currpos = GetXLogBuffer(CurrPos, tli);
1240 freespace = INSERT_FREESPACE(CurrPos);
1241
1242 /*
1243 * there should be enough space for at least the first field (xl_tot_len)
1244 * on this page.
1245 */
1246 Assert(freespace >= sizeof(uint32));
1247
1248 /* Copy record data */
1249 written = 0;
1250 while (rdata != NULL)
1251 {
1252 const char *rdata_data = rdata->data;
1253 int rdata_len = rdata->len;
1254
1255 while (rdata_len > freespace)
1256 {
1257 /*
1258 * Write what fits on this page, and continue on the next page.
1259 */
1260 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1261 memcpy(currpos, rdata_data, freespace);
1262 rdata_data += freespace;
1263 rdata_len -= freespace;
1264 written += freespace;
1265 CurrPos += freespace;
1266
1267 /*
1268 * Get pointer to beginning of next page, and set the xlp_rem_len
1269 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1270 *
1271 * It's safe to set the contrecord flag and xlp_rem_len without a
1272 * lock on the page. All the other flags were already set when the
1273 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1274 * only backend that needs to set the contrecord flag.
1275 */
1276 currpos = GetXLogBuffer(CurrPos, tli);
1277 pagehdr = (XLogPageHeader) currpos;
1278 pagehdr->xlp_rem_len = write_len - written;
1280
1281 /* skip over the page header */
1282 if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1283 {
1284 CurrPos += SizeOfXLogLongPHD;
1285 currpos += SizeOfXLogLongPHD;
1286 }
1287 else
1288 {
1289 CurrPos += SizeOfXLogShortPHD;
1290 currpos += SizeOfXLogShortPHD;
1291 }
1292 freespace = INSERT_FREESPACE(CurrPos);
1293 }
1294
1295 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1296 memcpy(currpos, rdata_data, rdata_len);
1297 currpos += rdata_len;
1298 CurrPos += rdata_len;
1299 freespace -= rdata_len;
1300 written += rdata_len;
1301
1302 rdata = rdata->next;
1303 }
1304 Assert(written == write_len);
1305
1306 /*
1307 * If this was an xlog-switch, it's not enough to write the switch record,
1308 * we also have to consume all the remaining space in the WAL segment. We
1309 * have already reserved that space, but we need to actually fill it.
1310 */
1311 if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1312 {
1313 /* An xlog-switch record doesn't contain any data besides the header */
1314 Assert(write_len == SizeOfXLogRecord);
1315
1316 /* Assert that we did reserve the right amount of space */
1318
1319 /* Use up all the remaining space on the current page */
1320 CurrPos += freespace;
1321
1322 /*
1323 * Cause all remaining pages in the segment to be flushed, leaving the
1324 * XLog position where it should be, at the start of the next segment.
1325 * We do this one page at a time, to make sure we don't deadlock
1326 * against ourselves if wal_buffers < wal_segment_size.
1327 */
1328 while (CurrPos < EndPos)
1329 {
1330 /*
1331 * The minimal action to flush the page would be to call
1332 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1333 * AdvanceXLInsertBuffer(...). The page would be left initialized
1334 * mostly to zeros, except for the page header (always the short
1335 * variant, as this is never a segment's first page).
1336 *
1337 * The large vistas of zeros are good for compressibility, but the
1338 * headers interrupting them every XLOG_BLCKSZ (with values that
1339 * differ from page to page) are not. The effect varies with
1340 * compression tool, but bzip2 for instance compresses about an
1341 * order of magnitude worse if those headers are left in place.
1342 *
1343 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1344 * called in heavily-loaded circumstances as well as this lightly-
1345 * loaded one) with variant behavior, we just use GetXLogBuffer
1346 * (which itself calls the two methods we need) to get the pointer
1347 * and zero most of the page. Then we just zero the page header.
1348 */
1349 currpos = GetXLogBuffer(CurrPos, tli);
1350 MemSet(currpos, 0, SizeOfXLogShortPHD);
1351
1352 CurrPos += XLOG_BLCKSZ;
1353 }
1354 }
1355 else
1356 {
1357 /* Align the end position, so that the next record starts aligned */
1358 CurrPos = MAXALIGN64(CurrPos);
1359 }
1360
1361 if (CurrPos != EndPos)
1362 ereport(PANIC,
1364 errmsg_internal("space reserved for WAL record does not match what was written"));
1365}
uint32_t uint32
Definition: c.h:488
#define MAXALIGN64(LEN)
Definition: c.h:793
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
const void * data
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:580
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1632
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert, XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 6910 of file xlog.c.

6911{
6912 bool shutdown;
6913 CheckPoint checkPoint;
6914 XLogRecPtr recptr;
6915 XLogSegNo _logSegNo;
6917 uint32 freespace;
6918 XLogRecPtr PriorRedoPtr;
6919 XLogRecPtr last_important_lsn;
6920 VirtualTransactionId *vxids;
6921 int nvxids;
6922 int oldXLogAllowed = 0;
6923
6924 /*
6925 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6926 * issued at a different time.
6927 */
6929 shutdown = true;
6930 else
6931 shutdown = false;
6932
6933 /* sanity check */
6934 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6935 elog(ERROR, "can't create a checkpoint during recovery");
6936
6937 /*
6938 * Prepare to accumulate statistics.
6939 *
6940 * Note: because it is possible for log_checkpoints to change while a
6941 * checkpoint proceeds, we always accumulate stats, even if
6942 * log_checkpoints is currently off.
6943 */
6946
6947 /*
6948 * Let smgr prepare for checkpoint; this has to happen outside the
6949 * critical section and before we determine the REDO pointer. Note that
6950 * smgr must not do anything that'd have to be undone if we decide no
6951 * checkpoint is needed.
6952 */
6954
6955 /*
6956 * Use a critical section to force system panic if we have trouble.
6957 */
6959
6960 if (shutdown)
6961 {
6962 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6965 LWLockRelease(ControlFileLock);
6966 }
6967
6968 /* Begin filling in the checkpoint WAL record */
6969 MemSet(&checkPoint, 0, sizeof(checkPoint));
6970 checkPoint.time = (pg_time_t) time(NULL);
6971
6972 /*
6973 * For Hot Standby, derive the oldestActiveXid before we fix the redo
6974 * pointer. This allows us to begin accumulating changes to assemble our
6975 * starting snapshot of locks and transactions.
6976 */
6977 if (!shutdown && XLogStandbyInfoActive())
6979 else
6981
6982 /*
6983 * Get location of last important record before acquiring insert locks (as
6984 * GetLastImportantRecPtr() also locks WAL locks).
6985 */
6986 last_important_lsn = GetLastImportantRecPtr();
6987
6988 /*
6989 * If this isn't a shutdown or forced checkpoint, and if there has been no
6990 * WAL activity requiring a checkpoint, skip it. The idea here is to
6991 * avoid inserting duplicate checkpoints when the system is idle.
6992 */
6994 CHECKPOINT_FORCE)) == 0)
6995 {
6996 if (last_important_lsn == ControlFile->checkPoint)
6997 {
7000 (errmsg_internal("checkpoint skipped because system is idle")));
7001 return false;
7002 }
7003 }
7004
7005 /*
7006 * An end-of-recovery checkpoint is created before anyone is allowed to
7007 * write WAL. To allow us to write the checkpoint record, temporarily
7008 * enable XLogInsertAllowed.
7009 */
7010 if (flags & CHECKPOINT_END_OF_RECOVERY)
7011 oldXLogAllowed = LocalSetXLogInsertAllowed();
7012
7014 if (flags & CHECKPOINT_END_OF_RECOVERY)
7016 else
7017 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7018
7019 /*
7020 * We must block concurrent insertions while examining insert state.
7021 */
7023
7024 checkPoint.fullPageWrites = Insert->fullPageWrites;
7025 checkPoint.wal_level = wal_level;
7026
7027 if (shutdown)
7028 {
7029 XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7030
7031 /*
7032 * Compute new REDO record ptr = location of next XLOG record.
7033 *
7034 * Since this is a shutdown checkpoint, there can't be any concurrent
7035 * WAL insertion.
7036 */
7037 freespace = INSERT_FREESPACE(curInsert);
7038 if (freespace == 0)
7039 {
7040 if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7041 curInsert += SizeOfXLogLongPHD;
7042 else
7043 curInsert += SizeOfXLogShortPHD;
7044 }
7045 checkPoint.redo = curInsert;
7046
7047 /*
7048 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7049 * this must be done while holding all the insertion locks.
7050 *
7051 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7052 * left pointing past where it really needs to point. This is okay;
7053 * the only consequence is that XLogInsert might back up whole buffers
7054 * that it didn't really need to. We can't postpone advancing
7055 * RedoRecPtr because XLogInserts that happen while we are dumping
7056 * buffers must assume that their buffer changes are not included in
7057 * the checkpoint.
7058 */
7059 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7060 }
7061
7062 /*
7063 * Now we can release the WAL insertion locks, allowing other xacts to
7064 * proceed while we are flushing disk buffers.
7065 */
7067
7068 /*
7069 * If this is an online checkpoint, we have not yet determined the redo
7070 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7071 * record; the LSN at which it starts becomes the new redo pointer. We
7072 * don't do this for a shutdown checkpoint, because in that case no WAL
7073 * can be written between the redo point and the insertion of the
7074 * checkpoint record itself, so the checkpoint record itself serves to
7075 * mark the redo point.
7076 */
7077 if (!shutdown)
7078 {
7079 /* Include WAL level in record for WAL summarizer's benefit. */
7082 (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7083
7084 /*
7085 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7086 * shared memory and RedoRecPtr in backend-local memory, but we need
7087 * to copy that into the record that will be inserted when the
7088 * checkpoint is complete.
7089 */
7090 checkPoint.redo = RedoRecPtr;
7091 }
7092
7093 /* Update the info_lck-protected copy of RedoRecPtr as well */
7095 XLogCtl->RedoRecPtr = checkPoint.redo;
7097
7098 /*
7099 * If enabled, log checkpoint start. We postpone this until now so as not
7100 * to log anything if we decided to skip the checkpoint.
7101 */
7102 if (log_checkpoints)
7103 LogCheckpointStart(flags, false);
7104
7105 /* Update the process title */
7106 update_checkpoint_display(flags, false, false);
7107
7108 TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7109
7110 /*
7111 * Get the other info we need for the checkpoint record.
7112 *
7113 * We don't need to save oldestClogXid in the checkpoint, it only matters
7114 * for the short period in which clog is being truncated, and if we crash
7115 * during that we'll redo the clog truncation and fix up oldestClogXid
7116 * there.
7117 */
7118 LWLockAcquire(XidGenLock, LW_SHARED);
7119 checkPoint.nextXid = TransamVariables->nextXid;
7120 checkPoint.oldestXid = TransamVariables->oldestXid;
7122 LWLockRelease(XidGenLock);
7123
7124 LWLockAcquire(CommitTsLock, LW_SHARED);
7127 LWLockRelease(CommitTsLock);
7128
7129 LWLockAcquire(OidGenLock, LW_SHARED);
7130 checkPoint.nextOid = TransamVariables->nextOid;
7131 if (!shutdown)
7132 checkPoint.nextOid += TransamVariables->oidCount;
7133 LWLockRelease(OidGenLock);
7134
7135 MultiXactGetCheckptMulti(shutdown,
7136 &checkPoint.nextMulti,
7137 &checkPoint.nextMultiOffset,
7138 &checkPoint.oldestMulti,
7139 &checkPoint.oldestMultiDB);
7140
7141 /*
7142 * Having constructed the checkpoint record, ensure all shmem disk buffers
7143 * and commit-log buffers are flushed to disk.
7144 *
7145 * This I/O could fail for various reasons. If so, we will fail to
7146 * complete the checkpoint, but there is no reason to force a system
7147 * panic. Accordingly, exit critical section while doing it.
7148 */
7150
7151 /*
7152 * In some cases there are groups of actions that must all occur on one
7153 * side or the other of a checkpoint record. Before flushing the
7154 * checkpoint record we must explicitly wait for any backend currently
7155 * performing those groups of actions.
7156 *
7157 * One example is end of transaction, so we must wait for any transactions
7158 * that are currently in commit critical sections. If an xact inserted
7159 * its commit record into XLOG just before the REDO point, then a crash
7160 * restart from the REDO point would not replay that record, which means
7161 * that our flushing had better include the xact's update of pg_xact. So
7162 * we wait till he's out of his commit critical section before proceeding.
7163 * See notes in RecordTransactionCommit().
7164 *
7165 * Because we've already released the insertion locks, this test is a bit
7166 * fuzzy: it is possible that we will wait for xacts we didn't really need
7167 * to wait for. But the delay should be short and it seems better to make
7168 * checkpoint take a bit longer than to hold off insertions longer than
7169 * necessary. (In fact, the whole reason we have this issue is that xact.c
7170 * does commit record XLOG insertion and clog update as two separate steps
7171 * protected by different locks, but again that seems best on grounds of
7172 * minimizing lock contention.)
7173 *
7174 * A transaction that has not yet set delayChkptFlags when we look cannot
7175 * be at risk, since it has not inserted its commit record yet; and one
7176 * that's already cleared it is not at risk either, since it's done fixing
7177 * clog and we will correctly flush the update below. So we cannot miss
7178 * any xacts we need to wait for.
7179 */
7181 if (nvxids > 0)
7182 {
7183 do
7184 {
7185 /*
7186 * Keep absorbing fsync requests while we wait. There could even
7187 * be a deadlock if we don't, if the process that prevents the
7188 * checkpoint is trying to add a request to the queue.
7189 */
7191
7192 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7193 pg_usleep(10000L); /* wait for 10 msec */
7195 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7197 }
7198 pfree(vxids);
7199
7200 CheckPointGuts(checkPoint.redo, flags);
7201
7203 if (nvxids > 0)
7204 {
7205 do
7206 {
7208
7209 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7210 pg_usleep(10000L); /* wait for 10 msec */
7212 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7214 }
7215 pfree(vxids);
7216
7217 /*
7218 * Take a snapshot of running transactions and write this to WAL. This
7219 * allows us to reconstruct the state of running transactions during
7220 * archive recovery, if required. Skip, if this info disabled.
7221 *
7222 * If we are shutting down, or Startup process is completing crash
7223 * recovery we don't need to write running xact data.
7224 */
7225 if (!shutdown && XLogStandbyInfoActive())
7227
7229
7230 /*
7231 * Now insert the checkpoint record into XLOG.
7232 */
7234 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7235 recptr = XLogInsert(RM_XLOG_ID,
7236 shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7238
7239 XLogFlush(recptr);
7240
7241 /*
7242 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7243 * overwritten at next startup. No-one should even try, this just allows
7244 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7245 * to just temporarily disable writing until the system has exited
7246 * recovery.
7247 */
7248 if (shutdown)
7249 {
7250 if (flags & CHECKPOINT_END_OF_RECOVERY)
7251 LocalXLogInsertAllowed = oldXLogAllowed;
7252 else
7253 LocalXLogInsertAllowed = 0; /* never again write WAL */
7254 }
7255
7256 /*
7257 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7258 * = end of actual checkpoint record.
7259 */
7260 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7261 ereport(PANIC,
7262 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7263
7264 /*
7265 * Remember the prior checkpoint's redo ptr for
7266 * UpdateCheckPointDistanceEstimate()
7267 */
7268 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7269
7270 /*
7271 * Update the control file.
7272 */
7273 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7274 if (shutdown)
7277 ControlFile->checkPointCopy = checkPoint;
7278 /* crash recovery should always recover to the end of WAL */
7281
7282 /*
7283 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7284 * unused on non-shutdown checkpoints, but seems useful to store it always
7285 * for debugging purposes.
7286 */
7288
7290 LWLockRelease(ControlFileLock);
7291
7292 /* Update shared-memory copy of checkpoint XID/epoch */
7294 XLogCtl->ckptFullXid = checkPoint.nextXid;
7296
7297 /*
7298 * We are now done with critical updates; no need for system panic if we
7299 * have trouble while fooling with old log segments.
7300 */
7302
7303 /*
7304 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7305 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7306 * where (a) we're not inside of a critical section and (b) we can be
7307 * certain that the relevant record has been flushed to disk, which must
7308 * happen before it can be summarized.
7309 *
7310 * If this is a shutdown checkpoint, then this happens reasonably
7311 * promptly: we've only just inserted and flushed the
7312 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7313 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7314 * record was written before we began flushing data to disk, and that
7315 * could be many minutes ago at this point. However, we don't XLogFlush()
7316 * after inserting that record, so we're not guaranteed that it's on disk
7317 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7318 * record.
7319 */
7321
7322 /*
7323 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7324 */
7326
7327 /*
7328 * Update the average distance between checkpoints if the prior checkpoint
7329 * exists.
7330 */
7331 if (PriorRedoPtr != InvalidXLogRecPtr)
7333
7334 /*
7335 * Delete old log files, those no longer needed for last checkpoint to
7336 * prevent the disk holding the xlog from growing full.
7337 */
7339 KeepLogSeg(recptr, &_logSegNo);
7341 _logSegNo, InvalidOid,
7343 {
7344 /*
7345 * Some slots have been invalidated; recalculate the old-segment
7346 * horizon, starting again from RedoRecPtr.
7347 */
7349 KeepLogSeg(recptr, &_logSegNo);
7350 }
7351 _logSegNo--;
7352 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7353 checkPoint.ThisTimeLineID);
7354
7355 /*
7356 * Make more log segments if needed. (Do this after recycling old log
7357 * segments, since that may supply some of the needed files.)
7358 */
7359 if (!shutdown)
7360 PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7361
7362 /*
7363 * Truncate pg_subtrans if possible. We can throw away all data before
7364 * the oldest XMIN of any running transaction. No future transaction will
7365 * attempt to reference any pg_subtrans entry older than that (see Asserts
7366 * in subtrans.c). During recovery, though, we mustn't do this because
7367 * StartupSUBTRANS hasn't been called yet.
7368 */
7369 if (!RecoveryInProgress())
7371
7372 /* Real work is done; log and update stats. */
7373 LogCheckpointEnd(false);
7374
7375 /* Reset the process title */
7376 update_checkpoint_display(flags, false, true);
7377
7378 TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7379 NBuffers,
7383
7384 return true;
7385}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:476
void AbsorbSyncRequests(void)
int NBuffers
Definition: globals.c:141
@ LW_SHARED
Definition: lwlock.h:115
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2282
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:37
#define DELAY_CHKPT_START
Definition: proc.h:119
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2880
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3089
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3043
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1976
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:55
@ RS_INVAL_IDLE_TIMEOUT
Definition: slot.h:61
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1281
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:457
TimeLineID InsertTimeLineID
Definition: xlog.c:509
XLogRecPtr RedoRecPtr
Definition: xlog.c:456
TimeLineID PrevTimeLineID
Definition: xlog.c:510
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:464
XLogRecPtr RedoRecPtr
Definition: xlog.c:430
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:253
bool RecoveryInProgress(void)
Definition: xlog.c:6355
static void WALInsertLockRelease(void)
Definition: xlog.c:1445
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1858
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1416
static void UpdateControlFile(void)
Definition: xlog.c:4580
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3886
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6670
static XLogRecPtr RedoRecPtr
Definition: xlog.c:273
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6702
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3711
bool log_checkpoints
Definition: xlog.c:129
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7974
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6443
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6577
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6807
static int LocalXLogInsertAllowed
Definition: xlog.c:236
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2805
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7531
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6845
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7396 of file xlog.c.

7397{
7398 xl_end_of_recovery xlrec;
7399 XLogRecPtr recptr;
7400
7401 /* sanity check */
7402 if (!RecoveryInProgress())
7403 elog(ERROR, "can only be used to end recovery");
7404
7405 xlrec.end_time = GetCurrentTimestamp();
7406 xlrec.wal_level = wal_level;
7407
7412
7414
7416 XLogRegisterData(&xlrec, sizeof(xl_end_of_recovery));
7417 recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7418
7419 XLogFlush(recptr);
7420
7421 /*
7422 * Update the control file so that crash recovery can follow the timeline
7423 * changes to this point.
7424 */
7425 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7426 ControlFile->minRecoveryPoint = recptr;
7429 LWLockRelease(ControlFileLock);
7430
7432}
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), wal_level, xl_end_of_recovery::wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7461 of file xlog.c.

7463{
7465 XLogRecPtr recptr;
7466 XLogPageHeader pagehdr;
7467 XLogRecPtr startPos;
7468
7469 /* sanity checks */
7470 if (!RecoveryInProgress())
7471 elog(ERROR, "can only be used at end of recovery");
7472 if (pagePtr % XLOG_BLCKSZ != 0)
7473 elog(ERROR, "invalid position for missing continuation record %X/%X",
7474 LSN_FORMAT_ARGS(pagePtr));
7475
7476 /* The current WAL insert position should be right after the page header */
7477 startPos = pagePtr;
7478 if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7479 startPos += SizeOfXLogLongPHD;
7480 else
7481 startPos += SizeOfXLogShortPHD;
7482 recptr = GetXLogInsertRecPtr();
7483 if (recptr != startPos)
7484 elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
7485 LSN_FORMAT_ARGS(recptr));
7486
7488
7489 /*
7490 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7491 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7492 *
7493 * No other backend is allowed to write WAL yet, so acquiring the WAL
7494 * insertion lock is just pro forma.
7495 */
7497 pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7500
7501 /*
7502 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7503 * page. We know it becomes the first record, because no other backend is
7504 * allowed to write WAL yet.
7505 */
7507 xlrec.overwritten_lsn = aborted_lsn;
7510 recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7511
7512 /* check that the record was inserted to the right place */
7513 if (ProcLastRecPtr != startPos)
7514 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
7516
7517 XLogFlush(recptr);
7518
7520
7521 return recptr;
7522}
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
static void WALInsertLockAcquire(void)
Definition: xlog.c:1371
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9459
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7612 of file xlog.c.

7613{
7614 XLogRecPtr lastCheckPointRecPtr;
7615 XLogRecPtr lastCheckPointEndPtr;
7616 CheckPoint lastCheckPoint;
7617 XLogRecPtr PriorRedoPtr;
7618 XLogRecPtr receivePtr;
7619 XLogRecPtr replayPtr;
7620 TimeLineID replayTLI;
7621 XLogRecPtr endptr;
7622 XLogSegNo _logSegNo;
7623 TimestampTz xtime;
7624
7625 /* Concurrent checkpoint/restartpoint cannot happen */
7627
7628 /* Get a local copy of the last safe checkpoint record. */
7630 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7631 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7632 lastCheckPoint = XLogCtl->lastCheckPoint;
7634
7635 /*
7636 * Check that we're still in recovery mode. It's ok if we exit recovery
7637 * mode after this check, the restart point is valid anyway.
7638 */
7639 if (!RecoveryInProgress())
7640 {
7642 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7643 return false;
7644 }
7645
7646 /*
7647 * If the last checkpoint record we've replayed is already our last
7648 * restartpoint, we can't perform a new restart point. We still update
7649 * minRecoveryPoint in that case, so that if this is a shutdown restart
7650 * point, we won't start up earlier than before. That's not strictly
7651 * necessary, but when hot standby is enabled, it would be rather weird if
7652 * the database opened up for read-only connections at a point-in-time
7653 * before the last shutdown. Such time travel is still possible in case of
7654 * immediate shutdown, though.
7655 *
7656 * We don't explicitly advance minRecoveryPoint when we do create a
7657 * restartpoint. It's assumed that flushing the buffers will do that as a
7658 * side-effect.
7659 */
7660 if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7661 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7662 {
7664 (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7665 LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7666
7668 if (flags & CHECKPOINT_IS_SHUTDOWN)
7669 {
7670 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7673 LWLockRelease(ControlFileLock);
7674 }
7675 return false;
7676 }
7677
7678 /*
7679 * Update the shared RedoRecPtr so that the startup process can calculate
7680 * the number of segments replayed since last restartpoint, and request a
7681 * restartpoint if it exceeds CheckPointSegments.
7682 *
7683 * Like in CreateCheckPoint(), hold off insertions to update it, although
7684 * during recovery this is just pro forma, because no WAL insertions are
7685 * happening.
7686 */
7688 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7690
7691 /* Also update the info_lck-protected copy */
7693 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7695
7696 /*
7697 * Prepare to accumulate statistics.
7698 *
7699 * Note: because it is possible for log_checkpoints to change while a
7700 * checkpoint proceeds, we always accumulate stats, even if
7701 * log_checkpoints is currently off.
7702 */
7705
7706 if (log_checkpoints)
7707 LogCheckpointStart(flags, true);
7708
7709 /* Update the process title */
7710 update_checkpoint_display(flags, true, false);
7711
7712 CheckPointGuts(lastCheckPoint.redo, flags);
7713
7714 /*
7715 * This location needs to be after CheckPointGuts() to ensure that some
7716 * work has already happened during this checkpoint.
7717 */
7718 INJECTION_POINT("create-restart-point");
7719
7720 /*
7721 * Remember the prior checkpoint's redo ptr for
7722 * UpdateCheckPointDistanceEstimate()
7723 */
7724 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7725
7726 /*
7727 * Update pg_control, using current time. Check that it still shows an
7728 * older checkpoint, else do nothing; this is a quick hack to make sure
7729 * nothing really bad happens if somehow we get here after the
7730 * end-of-recovery checkpoint.
7731 */
7732 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7733 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7734 {
7735 /*
7736 * Update the checkpoint information. We do this even if the cluster
7737 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7738 * segments recycled below.
7739 */
7740 ControlFile->checkPoint = lastCheckPointRecPtr;
7741 ControlFile->checkPointCopy = lastCheckPoint;
7742
7743 /*
7744 * Ensure minRecoveryPoint is past the checkpoint record and update it
7745 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7746 * this will have happened already while writing out dirty buffers,
7747 * but not necessarily - e.g. because no buffers were dirtied. We do
7748 * this because a backup performed in recovery uses minRecoveryPoint
7749 * to determine which WAL files must be included in the backup, and
7750 * the file (or files) containing the checkpoint record must be
7751 * included, at a minimum. Note that for an ordinary restart of
7752 * recovery there's no value in having the minimum recovery point any
7753 * earlier than this anyway, because redo will begin just after the
7754 * checkpoint record.
7755 */
7757 {
7758 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7759 {
7760 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7762
7763 /* update local copy */
7766 }
7767 if (flags & CHECKPOINT_IS_SHUTDOWN)
7769 }
7771 }
7772 LWLockRelease(ControlFileLock);
7773
7774 /*
7775 * Update the average distance between checkpoints/restartpoints if the
7776 * prior checkpoint exists.
7777 */
7778 if (PriorRedoPtr != InvalidXLogRecPtr)
7780
7781 /*
7782 * Delete old log files, those no longer needed for last restartpoint to
7783 * prevent the disk holding the xlog from growing full.
7784 */
7786
7787 /*
7788 * Retreat _logSegNo using the current end of xlog replayed or received,
7789 * whichever is later.
7790 */
7791 receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7792 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7793 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7794 KeepLogSeg(endptr, &_logSegNo);
7796 _logSegNo, InvalidOid,
7798 {
7799 /*
7800 * Some slots have been invalidated; recalculate the old-segment
7801 * horizon, starting again from RedoRecPtr.
7802 */
7804 KeepLogSeg(endptr, &_logSegNo);
7805 }
7806 _logSegNo--;
7807
7808 /*
7809 * Try to recycle segments on a useful timeline. If we've been promoted
7810 * since the beginning of this restartpoint, use the new timeline chosen
7811 * at end of recovery. If we're still in recovery, use the timeline we're
7812 * currently replaying.
7813 *
7814 * There is no guarantee that the WAL segments will be useful on the
7815 * current timeline; if recovery proceeds to a new timeline right after
7816 * this, the pre-allocated WAL segments on this timeline will not be used,
7817 * and will go wasted until recycled on the next restartpoint. We'll live
7818 * with that.
7819 */
7820 if (!RecoveryInProgress())
7821 replayTLI = XLogCtl->InsertTimeLineID;
7822
7823 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7824
7825 /*
7826 * Make more log segments if needed. (Do this after recycling old log
7827 * segments, since that may supply some of the needed files.)
7828 */
7829 PreallocXlogFiles(endptr, replayTLI);
7830
7831 /*
7832 * Truncate pg_subtrans if possible. We can throw away all data before
7833 * the oldest XMIN of any running transaction. No future transaction will
7834 * attempt to reference any pg_subtrans entry older than that (see Asserts
7835 * in subtrans.c). When hot standby is disabled, though, we mustn't do
7836 * this because StartupSUBTRANS hasn't been called yet.
7837 */
7838 if (EnableHotStandby)
7840
7841 /* Real work is done; log and update stats. */
7842 LogCheckpointEnd(true);
7843
7844 /* Reset the process title */
7845 update_checkpoint_display(flags, true, true);
7846
7847 xtime = GetLatestXTime();
7849 (errmsg("recovery restart point at %X/%X",
7850 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7851 xtime ? errdetail("Last completed transaction was at log time %s.",
7852 timestamptz_to_str(xtime)) : 0));
7853
7854 /*
7855 * Finally, execute archive_cleanup_command, if any.
7856 */
7857 if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7859 "archive_cleanup_command",
7860 false,
7861 WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7862
7863 return true;
7864}
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1861
int64 TimestampTz
Definition: timestamp.h:39
#define LOG
Definition: elog.h:31
bool IsUnderPostmaster
Definition: globals.c:119
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:362
BackendType MyBackendType
Definition: miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:545
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:543
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:544
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2725
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:646
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:647
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:85
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9418 of file xlog.c.

9419{
9420 bool during_backup_start = DatumGetBool(arg);
9421
9422 /* If called during backup start, there shouldn't be one already running */
9423 Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9424
9425 if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9426 {
9430
9433
9434 if (!during_backup_start)
9436 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9437 }
9438}
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
int runningBackups
Definition: xlog.c:438
static SessionBackupState sessionBackupState
Definition: xlog.c:391
@ SESSION_BACKUP_NONE
Definition: xlog.h:287

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8816 of file xlog.c.

8818{
8820
8821 Assert(state != NULL);
8823
8824 /*
8825 * During recovery, we don't need to check WAL level. Because, if WAL
8826 * level is not sufficient, it's impossible to get here during recovery.
8827 */
8829 ereport(ERROR,
8830 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8831 errmsg("WAL level not sufficient for making an online backup"),
8832 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8833
8834 if (strlen(backupidstr) > MAXPGPATH)
8835 ereport(ERROR,
8836 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8837 errmsg("backup label too long (max %d bytes)",
8838 MAXPGPATH)));
8839
8840 strlcpy(state->name, backupidstr, sizeof(state->name));
8841
8842 /*
8843 * Mark backup active in shared memory. We must do full-page WAL writes
8844 * during an on-line backup even if not doing so at other times, because
8845 * it's quite possible for the backup dump to obtain a "torn" (partially
8846 * written) copy of a database page if it reads the page concurrently with
8847 * our write to the same page. This can be fixed as long as the first
8848 * write to the page in the WAL sequence is a full-page write. Hence, we
8849 * increment runningBackups then force a CHECKPOINT, to ensure there are
8850 * no dirty pages in shared memory that might get dumped while the backup
8851 * is in progress without having a corresponding WAL record. (Once the
8852 * backup is complete, we need not force full-page writes anymore, since
8853 * we expect that any pages not modified during the backup interval must
8854 * have been correctly captured by the backup.)
8855 *
8856 * Note that forcing full-page writes has no effect during an online
8857 * backup from the standby.
8858 *
8859 * We must hold all the insertion locks to change the value of
8860 * runningBackups, to ensure adequate interlocking against
8861 * XLogInsertRecord().
8862 */
8866
8867 /*
8868 * Ensure we decrement runningBackups if we fail below. NB -- for this to
8869 * work correctly, it is critical that sessionBackupState is only updated
8870 * after this block is over.
8871 */
8873 {
8874 bool gotUniqueStartpoint = false;
8875 DIR *tblspcdir;
8876 struct dirent *de;
8877 tablespaceinfo *ti;
8878 int datadirpathlen;
8879
8880 /*
8881 * Force an XLOG file switch before the checkpoint, to ensure that the
8882 * WAL segment the checkpoint is written to doesn't contain pages with
8883 * old timeline IDs. That would otherwise happen if you called
8884 * pg_backup_start() right after restoring from a PITR archive: the
8885 * first WAL segment containing the startup checkpoint has pages in
8886 * the beginning with the old timeline ID. That can cause trouble at
8887 * recovery: we won't have a history file covering the old timeline if
8888 * pg_wal directory was not included in the base backup and the WAL
8889 * archive was cleared too before starting the backup.
8890 *
8891 * This also ensures that we have emitted a WAL page header that has
8892 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8893 * Therefore, if a WAL archiver (such as pglesslog) is trying to
8894 * compress out removable backup blocks, it won't remove any that
8895 * occur after this point.
8896 *
8897 * During recovery, we skip forcing XLOG file switch, which means that
8898 * the backup taken during recovery is not available for the special
8899 * recovery case described above.
8900 */
8902 RequestXLogSwitch(false);
8903
8904 do
8905 {
8906 bool checkpointfpw;
8907
8908 /*
8909 * Force a CHECKPOINT. Aside from being necessary to prevent torn
8910 * page problems, this guarantees that two successive backup runs
8911 * will have different checkpoint positions and hence different
8912 * history file names, even if nothing happened in between.
8913 *
8914 * During recovery, establish a restartpoint if possible. We use
8915 * the last restartpoint as the backup starting checkpoint. This
8916 * means that two successive backup runs can have same checkpoint
8917 * positions.
8918 *
8919 * Since the fact that we are executing do_pg_backup_start()
8920 * during recovery means that checkpointer is running, we can use
8921 * RequestCheckpoint() to establish a restartpoint.
8922 *
8923 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8924 * passing fast = true). Otherwise this can take awhile.
8925 */
8927 (fast ? CHECKPOINT_IMMEDIATE : 0));
8928
8929 /*
8930 * Now we need to fetch the checkpoint record location, and also
8931 * its REDO pointer. The oldest point in WAL that would be needed
8932 * to restore starting from the checkpoint is precisely the REDO
8933 * pointer.
8934 */
8935 LWLockAcquire(ControlFileLock, LW_SHARED);
8936 state->checkpointloc = ControlFile->checkPoint;
8937 state->startpoint = ControlFile->checkPointCopy.redo;
8939 checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8940 LWLockRelease(ControlFileLock);
8941
8943 {
8944 XLogRecPtr recptr;
8945
8946 /*
8947 * Check to see if all WAL replayed during online backup
8948 * (i.e., since last restartpoint used as backup starting
8949 * checkpoint) contain full-page writes.
8950 */
8952 recptr = XLogCtl->lastFpwDisableRecPtr;
8954
8955 if (!checkpointfpw || state->startpoint <= recptr)
8956 ereport(ERROR,
8957 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8958 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8959 "since last restartpoint"),
8960 errhint("This means that the backup being taken on the standby "
8961 "is corrupt and should not be used. "
8962 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8963 "and then try an online backup again.")));
8964
8965 /*
8966 * During recovery, since we don't use the end-of-backup WAL
8967 * record and don't write the backup history file, the
8968 * starting WAL location doesn't need to be unique. This means
8969 * that two base backups started at the same time might use
8970 * the same checkpoint as starting locations.
8971 */
8972 gotUniqueStartpoint = true;
8973 }
8974
8975 /*
8976 * If two base backups are started at the same time (in WAL sender
8977 * processes), we need to make sure that they use different
8978 * checkpoints as starting locations, because we use the starting
8979 * WAL location as a unique identifier for the base backup in the
8980 * end-of-backup WAL record and when we write the backup history
8981 * file. Perhaps it would be better generate a separate unique ID
8982 * for each backup instead of forcing another checkpoint, but
8983 * taking a checkpoint right after another is not that expensive
8984 * either because only few buffers have been dirtied yet.
8985 */
8987 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8988 {
8989 XLogCtl->Insert.lastBackupStart = state->startpoint;
8990 gotUniqueStartpoint = true;
8991 }
8993 } while (!gotUniqueStartpoint);
8994
8995 /*
8996 * Construct tablespace_map file.
8997 */
8998 datadirpathlen = strlen(DataDir);
8999
9000 /* Collect information about all tablespaces */
9001 tblspcdir = AllocateDir(PG_TBLSPC_DIR);
9002 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9003 {
9004 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9005 char linkpath[MAXPGPATH];
9006 char *relpath = NULL;
9007 char *s;
9008 PGFileType de_type;
9009 char *badp;
9010 Oid tsoid;
9011
9012 /*
9013 * Try to parse the directory name as an unsigned integer.
9014 *
9015 * Tablespace directories should be positive integers that can be
9016 * represented in 32 bits, with no leading zeroes or trailing
9017 * garbage. If we come across a name that doesn't meet those
9018 * criteria, skip it.
9019 */
9020 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9021 continue;
9022 errno = 0;
9023 tsoid = strtoul(de->d_name, &badp, 10);
9024 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9025 continue;
9026
9027 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9028
9029 de_type = get_dirent_type(fullpath, de, false, ERROR);
9030
9031 if (de_type == PGFILETYPE_LNK)
9032 {
9033 StringInfoData escapedpath;
9034 int rllen;
9035
9036 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9037 if (rllen < 0)
9038 {
9040 (errmsg("could not read symbolic link \"%s\": %m",
9041 fullpath)));
9042 continue;
9043 }
9044 else if (rllen >= sizeof(linkpath))
9045 {
9047 (errmsg("symbolic link \"%s\" target is too long",
9048 fullpath)));
9049 continue;
9050 }
9051 linkpath[rllen] = '\0';
9052
9053 /*
9054 * Relpath holds the relative path of the tablespace directory
9055 * when it's located within PGDATA, or NULL if it's located
9056 * elsewhere.
9057 */
9058 if (rllen > datadirpathlen &&
9059 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9060 IS_DIR_SEP(linkpath[datadirpathlen]))
9061 relpath = pstrdup(linkpath + datadirpathlen + 1);
9062
9063 /*
9064 * Add a backslash-escaped version of the link path to the
9065 * tablespace map file.
9066 */
9067 initStringInfo(&escapedpath);
9068 for (s = linkpath; *s; s++)
9069 {
9070 if (*s == '\n' || *s == '\r' || *s == '\\')
9071 appendStringInfoChar(&escapedpath, '\\');
9072 appendStringInfoChar(&escapedpath, *s);
9073 }
9074 appendStringInfo(tblspcmapfile, "%s %s\n",
9075 de->d_name, escapedpath.data);
9076 pfree(escapedpath.data);
9077 }
9078 else if (de_type == PGFILETYPE_DIR)
9079 {
9080 /*
9081 * It's possible to use allow_in_place_tablespaces to create
9082 * directories directly under pg_tblspc, for testing purposes
9083 * only.
9084 *
9085 * In this case, we store a relative path rather than an
9086 * absolute path into the tablespaceinfo.
9087 */
9088 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9089 PG_TBLSPC_DIR, de->d_name);
9090 relpath = pstrdup(linkpath);
9091 }
9092 else
9093 {
9094 /* Skip any other file type that appears here. */
9095 continue;
9096 }
9097
9098 ti = palloc(sizeof(tablespaceinfo));
9099 ti->oid = tsoid;
9100 ti->path = pstrdup(linkpath);
9101 ti->rpath = relpath;
9102 ti->size = -1;
9103
9104 if (tablespaces)
9105 *tablespaces = lappend(*tablespaces, ti);
9106 }
9107 FreeDir(tblspcdir);
9108
9109 state->starttime = (pg_time_t) time(NULL);
9110 }
9112
9113 state->started_in_recovery = backup_started_in_recovery;
9114
9115 /*
9116 * Mark that the start phase has correctly finished for the backup.
9117 */
9119}
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:995
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:526
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:70
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:32
#define relpath(rlocator, forknum)
Definition: relpath.h:102
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:551
XLogRecPtr lastBackupStart
Definition: xlog.c:439
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8081
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9418
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:288
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9144 of file xlog.c.

9145{
9146 bool backup_stopped_in_recovery = false;
9147 char histfilepath[MAXPGPATH];
9148 char lastxlogfilename[MAXFNAMELEN];
9149 char histfilename[MAXFNAMELEN];
9150 XLogSegNo _logSegNo;
9151 FILE *fp;
9152 int seconds_before_warning;
9153 int waits = 0;
9154 bool reported_waiting = false;
9155
9156 Assert(state != NULL);
9157
9158 backup_stopped_in_recovery = RecoveryInProgress();
9159
9160 /*
9161 * During recovery, we don't need to check WAL level. Because, if WAL
9162 * level is not sufficient, it's impossible to get here during recovery.
9163 */
9164 if (!backup_stopped_in_recovery && !XLogIsNeeded())
9165 ereport(ERROR,
9166 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9167 errmsg("WAL level not sufficient for making an online backup"),
9168 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9169
9170 /*
9171 * OK to update backup counter and session-level lock.
9172 *
9173 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9174 * otherwise they can be updated inconsistently, which might cause
9175 * do_pg_abort_backup() to fail.
9176 */
9178
9179 /*
9180 * It is expected that each do_pg_backup_start() call is matched by
9181 * exactly one do_pg_backup_stop() call.
9182 */
9185
9186 /*
9187 * Clean up session-level lock.
9188 *
9189 * You might think that WALInsertLockRelease() can be called before
9190 * cleaning up session-level lock because session-level lock doesn't need
9191 * to be protected with WAL insertion lock. But since
9192 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9193 * cleaned up before it.
9194 */
9196
9198
9199 /*
9200 * If we are taking an online backup from the standby, we confirm that the
9201 * standby has not been promoted during the backup.
9202 */
9203 if (state->started_in_recovery && !backup_stopped_in_recovery)
9204 ereport(ERROR,
9205 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9206 errmsg("the standby was promoted during online backup"),
9207 errhint("This means that the backup being taken is corrupt "
9208 "and should not be used. "
9209 "Try taking another online backup.")));
9210
9211 /*
9212 * During recovery, we don't write an end-of-backup record. We assume that
9213 * pg_control was backed up last and its minimum recovery point can be
9214 * available as the backup end location. Since we don't have an
9215 * end-of-backup record, we use the pg_control value to check whether
9216 * we've reached the end of backup when starting recovery from this
9217 * backup. We have no way of checking if pg_control wasn't backed up last
9218 * however.
9219 *
9220 * We don't force a switch to new WAL file but it is still possible to
9221 * wait for all the required files to be archived if waitforarchive is
9222 * true. This is okay if we use the backup to start a standby and fetch
9223 * the missing WAL using streaming replication. But in the case of an
9224 * archive recovery, a user should set waitforarchive to true and wait for
9225 * them to be archived to ensure that all the required files are
9226 * available.
9227 *
9228 * We return the current minimum recovery point as the backup end
9229 * location. Note that it can be greater than the exact backup end
9230 * location if the minimum recovery point is updated after the backup of
9231 * pg_control. This is harmless for current uses.
9232 *
9233 * XXX currently a backup history file is for informational and debug
9234 * purposes only. It's not essential for an online backup. Furthermore,
9235 * even if it's created, it will not be archived during recovery because
9236 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9237 * backup history file during recovery.
9238 */
9239 if (backup_stopped_in_recovery)
9240 {
9241 XLogRecPtr recptr;
9242
9243 /*
9244 * Check to see if all WAL replayed during online backup contain
9245 * full-page writes.
9246 */
9248 recptr = XLogCtl->lastFpwDisableRecPtr;
9250
9251 if (state->startpoint <= recptr)
9252 ereport(ERROR,
9253 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9254 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9255 "during online backup"),
9256 errhint("This means that the backup being taken on the standby "
9257 "is corrupt and should not be used. "
9258 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9259 "and then try an online backup again.")));
9260
9261
9262 LWLockAcquire(ControlFileLock, LW_SHARED);
9263 state->stoppoint = ControlFile->minRecoveryPoint;
9265 LWLockRelease(ControlFileLock);
9266 }
9267 else
9268 {
9269 char *history_file;
9270
9271 /*
9272 * Write the backup-end xlog record
9273 */
9275 XLogRegisterData(&state->startpoint,
9276 sizeof(state->startpoint));
9277 state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9278
9279 /*
9280 * Given that we're not in recovery, InsertTimeLineID is set and can't
9281 * change, so we can read it without a lock.
9282 */
9283 state->stoptli = XLogCtl->InsertTimeLineID;
9284
9285 /*
9286 * Force a switch to a new xlog segment file, so that the backup is
9287 * valid as soon as archiver moves out the current segment file.
9288 */
9289 RequestXLogSwitch(false);
9290
9291 state->stoptime = (pg_time_t) time(NULL);
9292
9293 /*
9294 * Write the backup history file
9295 */
9296 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9297 BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9298 state->startpoint, wal_segment_size);
9299 fp = AllocateFile(histfilepath, "w");
9300 if (!fp)
9301 ereport(ERROR,
9303 errmsg("could not create file \"%s\": %m",
9304 histfilepath)));
9305
9306 /* Build and save the contents of the backup history file */
9307 history_file = build_backup_content(state, true);
9308 fprintf(fp, "%s", history_file);
9309 pfree(history_file);
9310
9311 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9312 ereport(ERROR,
9314 errmsg("could not write file \"%s\": %m",
9315 histfilepath)));
9316
9317 /*
9318 * Clean out any no-longer-needed history files. As a side effect,
9319 * this will post a .ready file for the newly created history file,
9320 * notifying the archiver that history file may be archived
9321 * immediately.
9322 */
9324 }
9325
9326 /*
9327 * If archiving is enabled, wait for all the required WAL files to be
9328 * archived before returning. If archiving isn't enabled, the required WAL
9329 * needs to be transported via streaming replication (hopefully with
9330 * wal_keep_size set high enough), or some more exotic mechanism like
9331 * polling and copying files from pg_wal with script. We have no knowledge
9332 * of those mechanisms, so it's up to the user to ensure that he gets all
9333 * the required WAL.
9334 *
9335 * We wait until both the last WAL file filled during backup and the
9336 * history file have been archived, and assume that the alphabetic sorting
9337 * property of the WAL files ensures any earlier WAL files are safely
9338 * archived as well.
9339 *
9340 * We wait forever, since archive_command is supposed to work and we
9341 * assume the admin wanted his backup to work completely. If you don't
9342 * wish to wait, then either waitforarchive should be passed in as false,
9343 * or you can set statement_timeout. Also, some notices are issued to
9344 * clue in anyone who might be doing this interactively.
9345 */
9346
9347 if (waitforarchive &&
9348 ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9349 (backup_stopped_in_recovery && XLogArchivingAlways())))
9350 {
9351 XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9352 XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9354
9355 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9356 BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9357 state->startpoint, wal_segment_size);
9358
9359 seconds_before_warning = 60;
9360 waits = 0;
9361
9362 while (XLogArchiveIsBusy(lastxlogfilename) ||
9363 XLogArchiveIsBusy(histfilename))
9364 {
9366
9367 if (!reported_waiting && waits > 5)
9368 {
9370 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9371 reported_waiting = true;
9372 }
9373
9374 (void) WaitLatch(MyLatch,
9376 1000L,
9377 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9379
9380 if (++waits >= seconds_before_warning)
9381 {
9382 seconds_before_warning *= 2; /* This wraps in >10 years... */
9384 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9385 waits),
9386 errhint("Check that your \"archive_command\" is executing properly. "
9387 "You can safely cancel this backup, "
9388 "but the database backup will not be usable without all the WAL segments.")));
9389 }
9390 }
9391
9393 (errmsg("all required WAL segments have been archived")));
9394 }
9395 else if (waitforarchive)
9397 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9398}
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
#define NOTICE
Definition: elog.h:35
int FreeFile(FILE *file)
Definition: fd.c:2803
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2605
struct Latch * MyLatch
Definition: globals.c:62
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
static void CleanupBackupHistory(void)
Definition: xlog.c:4182
#define XLogArchivingAlways()
Definition: xlog.h:102
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9125 of file xlog.c.

9126{
9127 return sessionBackupState;
9128}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8614 of file xlog.c.

8615{
8616 int o_direct_flag = 0;
8617
8618 /*
8619 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8620 * written by walreceiver is normally read by the startup process soon
8621 * after it's written. Also, walreceiver performs unaligned writes, which
8622 * don't work with O_DIRECT, so it is required for correctness too.
8623 */
8625 o_direct_flag = PG_O_DIRECT;
8626
8627 /* If fsync is disabled, never open in sync mode */
8628 if (!enableFsync)
8629 return o_direct_flag;
8630
8631 switch (method)
8632 {
8633 /*
8634 * enum values for all sync options are defined even if they are
8635 * not supported on the current platform. But if not, they are
8636 * not included in the enum option array, and therefore will never
8637 * be seen here.
8638 */
8642 return o_direct_flag;
8643#ifdef O_SYNC
8645 return O_SYNC | o_direct_flag;
8646#endif
8647#ifdef O_DSYNC
8649 return O_DSYNC | o_direct_flag;
8650#endif
8651 default:
8652 /* can't happen (unless we are out of sync with option array) */
8653 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8654 return 0; /* silence warning */
8655 }
8656}
int io_direct_flags
Definition: fd.c:167
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:128
#define AmWalReceiverProcess()
Definition: miscadmin.h:389
#define O_DSYNC
Definition: win32_port.h:342
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4880 of file xlog.c.

4881{
4882 return ControlFile->wal_level;
4883}

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4625 of file xlog.c.

4626{
4628}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:522

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool *  doPageWrites_p 
)

Definition at line 6488 of file xlog.c.

6489{
6490 *RedoRecPtr_p = RedoRecPtr;
6491 *doPageWrites_p = doPageWrites;
6492}
static bool doPageWrites
Definition: xlog.c:286

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6503 of file xlog.c.

6504{
6505 XLogRecPtr recptr;
6506
6508 recptr = XLogCtl->LogwrtRqst.Write;
6510
6511 return recptr;
6512}

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6577 of file xlog.c.

6578{
6580 int i;
6581
6582 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6583 {
6584 XLogRecPtr last_important;
6585
6586 /*
6587 * Need to take a lock to prevent torn reads of the LSN, which are
6588 * possible on some of the supported platforms. WAL insert locks only
6589 * support exclusive mode, so we have to use that.
6590 */
6592 last_important = WALInsertLocks[i].l.lastImportantAt;
6593 LWLockRelease(&WALInsertLocks[i].l.lock);
6594
6595 if (res < last_important)
6596 res = last_important;
6597 }
6598
6599 return res;
6600}
int i
Definition: isn.c:72
XLogRecPtr lastImportantAt
Definition: xlog.c:371
WALInsertLock l
Definition: xlog.c:383
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:569
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:150

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6606 of file xlog.c.

6607{
6608 pg_time_t result;
6609
6610 /* Need WALWriteLock, but shared lock is sufficient */
6611 LWLockAcquire(WALWriteLock, LW_SHARED);
6612 result = XLogCtl->lastSegSwitchTime;
6613 *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6614 LWLockRelease(WALWriteLock);
6615
6616 return result;
6617}
pg_time_t lastSegSwitchTime
Definition: xlog.c:467
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:468

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4599 of file xlog.c.

4600{
4601 Assert(ControlFile != NULL);
4603}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6391 of file xlog.c.

6392{
6393 RecoveryState retval;
6394
6396 retval = XLogCtl->SharedRecoveryState;
6398
6399 return retval;
6400}
RecoveryState
Definition: xlog.h:90

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6458 of file xlog.c.

6459{
6460 XLogRecPtr ptr;
6461
6462 /*
6463 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6464 * grabbed a WAL insertion lock to read the authoritative value in
6465 * Insert->RedoRecPtr, someone might update it just after we've released
6466 * the lock.
6467 */
6469 ptr = XLogCtl->RedoRecPtr;
6471
6472 if (RedoRecPtr < ptr)
6473 RedoRecPtr = ptr;
6474
6475 return RedoRecPtr;
6476}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), pgstat_before_server_shutdown(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7890 of file xlog.c.

7891{
7892 XLogRecPtr currpos; /* current write LSN */
7893 XLogSegNo currSeg; /* segid of currpos */
7894 XLogSegNo targetSeg; /* segid of targetLSN */
7895 XLogSegNo oldestSeg; /* actual oldest segid */
7896 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7897 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7898 uint64 keepSegs;
7899
7900 /*
7901 * slot does not reserve WAL. Either deactivated, or has never been active
7902 */
7903 if (XLogRecPtrIsInvalid(targetLSN))
7904 return WALAVAIL_INVALID_LSN;
7905
7906 /*
7907 * Calculate the oldest segment currently reserved by all slots,
7908 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7909 * oldestSlotSeg to the current segment.
7910 */
7911 currpos = GetXLogWriteRecPtr();
7912 XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7913 KeepLogSeg(currpos, &oldestSlotSeg);
7914
7915 /*
7916 * Find the oldest extant segment file. We get 1 until checkpoint removes
7917 * the first WAL segment file since startup, which causes the status being
7918 * wrong under certain abnormal conditions but that doesn't actually harm.
7919 */
7920 oldestSeg = XLogGetLastRemovedSegno() + 1;
7921
7922 /* calculate oldest segment by max_wal_size */
7923 XLByteToSeg(currpos, currSeg, wal_segment_size);
7925
7926 if (currSeg > keepSegs)
7927 oldestSegMaxWalSize = currSeg - keepSegs;
7928 else
7929 oldestSegMaxWalSize = 1;
7930
7931 /* the segment we care about */
7932 XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7933
7934 /*
7935 * No point in returning reserved or extended status values if the
7936 * targetSeg is known to be lost.
7937 */
7938 if (targetSeg >= oldestSlotSeg)
7939 {
7940 /* show "reserved" when targetSeg is within max_wal_size */
7941 if (targetSeg >= oldestSegMaxWalSize)
7942 return WALAVAIL_RESERVED;
7943
7944 /* being retained by slots exceeding max_wal_size */
7945 return WALAVAIL_EXTENDED;
7946 }
7947
7948 /* WAL segments are no longer retained but haven't been removed yet */
7949 if (targetSeg >= oldestSeg)
7950 return WALAVAIL_UNRESERVED;
7951
7952 /* Definitely lost */
7953 return WALAVAIL_REMOVED;
7954}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3779
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9475
@ WALAVAIL_REMOVED
Definition: xlog.h:194
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6541 of file xlog.c.

6542{
6544
6545 /* Since the value can't be changing, no lock is required. */
6546 return XLogCtl->InsertTimeLineID;
6547}

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6557 of file xlog.c.

6558{
6559 TimeLineID insertTLI;
6560
6562 insertTLI = XLogCtl->InsertTimeLineID;
6564
6565 return insertTLI;
6566}

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1632 of file xlog.c.

1633{
1634 int idx;
1635 XLogRecPtr endptr;
1636 static uint64 cachedPage = 0;
1637 static char *cachedPos = NULL;
1638 XLogRecPtr expectedEndPtr;
1639
1640 /*
1641 * Fast path for the common case that we need to access again the same
1642 * page as last time.
1643 */
1644 if (ptr / XLOG_BLCKSZ == cachedPage)
1645 {
1646 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1647 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1648 return cachedPos + ptr % XLOG_BLCKSZ;
1649 }
1650
1651 /*
1652 * The XLog buffer cache is organized so that a page is always loaded to a
1653 * particular buffer. That way we can easily calculate the buffer a given
1654 * page must be loaded into, from the XLogRecPtr alone.
1655 */
1656 idx = XLogRecPtrToBufIdx(ptr);
1657
1658 /*
1659 * See what page is loaded in the buffer at the moment. It could be the
1660 * page we're looking for, or something older. It can't be anything newer
1661 * - that would imply the page we're looking for has already been written
1662 * out to disk and evicted, and the caller is responsible for making sure
1663 * that doesn't happen.
1664 *
1665 * We don't hold a lock while we read the value. If someone is just about
1666 * to initialize or has just initialized the page, it's possible that we
1667 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1668 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1669 * we're looking for.
1670 */
1671 expectedEndPtr = ptr;
1672 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1673
1675 if (expectedEndPtr != endptr)
1676 {
1677 XLogRecPtr initializedUpto;
1678
1679 /*
1680 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1681 * know how far we're finished with inserting the record.
1682 *
1683 * NB: If 'ptr' points to just after the page header, advertise a
1684 * position at the beginning of the page rather than 'ptr' itself. If
1685 * there are no other insertions running, someone might try to flush
1686 * up to our advertised location. If we advertised a position after
1687 * the page header, someone might try to flush the page header, even
1688 * though page might actually not be initialized yet. As the first
1689 * inserter on the page, we are effectively responsible for making
1690 * sure that it's initialized, before we let insertingAt to move past
1691 * the page header.
1692 */
1693 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1694 XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1695 initializedUpto = ptr - SizeOfXLogShortPHD;
1696 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1697 XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1698 initializedUpto = ptr - SizeOfXLogLongPHD;
1699 else
1700 initializedUpto = ptr;
1701
1702 WALInsertLockUpdateInsertingAt(initializedUpto);
1703
1704 AdvanceXLInsertBuffer(ptr, tli, false);
1706
1707 if (expectedEndPtr != endptr)
1708 elog(PANIC, "could not find WAL buffer for %X/%X",
1709 LSN_FORMAT_ARGS(ptr));
1710 }
1711 else
1712 {
1713 /*
1714 * Make sure the initialization of the page is visible to us, and
1715 * won't arrive later to overwrite the WAL data we write on the page.
1716 */
1718 }
1719
1720 /*
1721 * Found the buffer holding this page. Return a pointer to the right
1722 * offset within the page.
1723 */
1724 cachedPage = ptr / XLOG_BLCKSZ;
1725 cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1726
1727 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1728 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1729
1730 return cachedPos + ptr % XLOG_BLCKSZ;
1731}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_memory_barrier()
Definition: atomics.h:143
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1471
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1985

References AdvanceXLInsertBuffer(), Assert, elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9459 of file xlog.c.

9460{
9462 uint64 current_bytepos;
9463
9464 SpinLockAcquire(&Insert->insertpos_lck);
9465 current_bytepos = Insert->CurrBytePos;
9466 SpinLockRelease(&Insert->insertpos_lck);
9467
9468 return XLogBytePosToRecPtr(current_bytepos);
9469}

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4225 of file xlog.c.

4226{
4227 char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4228
4229 /*
4230 * Generate a random nonce. This is used for authentication requests that
4231 * will fail because the user does not exist. The nonce is used to create
4232 * a genuine-looking password challenge for the non-existent user, in lieu
4233 * of an actual stored password.
4234 */
4235 if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4236 ereport(PANIC,
4237 (errcode(ERRCODE_INTERNAL_ERROR),
4238 errmsg("could not generate secret authorization token")));
4239
4240 memset(ControlFile, 0, sizeof(ControlFileData));
4241 /* Initialize pg_control status fields */
4242 ControlFile->system_identifier = sysidentifier;
4246
4247 /* Set important parameter values for use when replaying WAL */
4256 ControlFile->data_checksum_version = data_checksum_version;
4257}
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:185
bool wal_log_hints
Definition: xlog.c:123
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4805 of file xlog.c.

4806{
4808
4810 {
4811 struct config_generic *guc;
4812
4813 guc = find_option("wal_consistency_checking", false, false, ERROR);
4814
4816
4817 set_config_option_ext("wal_consistency_checking",
4819 guc->scontext, guc->source, guc->srole,
4820 GUC_ACTION_SET, true, ERROR, false);
4821
4822 /* checking should not be deferred again */
4824 }
4825}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3382
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1235
@ GUC_ACTION_SET
Definition: guc.h:203
GucContext scontext
Definition: guc_tables.h:184
GucSource source
Definition: guc_tables.h:182
char * wal_consistency_checking_string
Definition: xlog.c:125

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3584 of file xlog.c.

3586{
3587 char path[MAXPGPATH];
3588 struct stat stat_buf;
3589
3590 Assert(tli != 0);
3591
3592 XLogFilePath(path, tli, *segno, wal_segment_size);
3593
3594 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3596 {
3597 LWLockRelease(ControlFileLock);
3598 return false;
3599 }
3600
3601 if (!find_free)
3602 {
3603 /* Force installation: get rid of any pre-existing segment file */
3604 durable_unlink(path, DEBUG1);
3605 }
3606 else
3607 {
3608 /* Find a free slot to put it in */
3609 while (stat(path, &stat_buf) == 0)
3610 {
3611 if ((*segno) >= max_segno)
3612 {
3613 /* Failed to find a free slot within specified range */
3614 LWLockRelease(ControlFileLock);
3615 return false;
3616 }
3617 (*segno)++;
3618 XLogFilePath(path, tli, *segno, wal_segment_size);
3619 }
3620 }
3621
3622 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3623 if (durable_rename(tmppath, path, LOG) != 0)
3624 {
3625 LWLockRelease(ControlFileLock);
3626 /* durable_rename already emitted log message */
3627 return false;
3628 }
3629
3630 LWLockRelease(ControlFileLock);
3631
3632 return true;
3633}
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:871
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:526
#define stat
Definition: win32_port.h:274

References Assert, DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9516 of file xlog.c.

9517{
9518 bool result;
9519
9520 LWLockAcquire(ControlFileLock, LW_SHARED);
9522 LWLockRelease(ControlFileLock);
9523
9524 return result;
9525}

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8704 of file xlog.c.

8705{
8706 char *msg = NULL;
8708
8709 Assert(tli != 0);
8710
8711 /*
8712 * Quick exit if fsync is disabled or write() has already synced the WAL
8713 * file.
8714 */
8715 if (!enableFsync ||
8718 return;
8719
8720 /*
8721 * Measure I/O timing to sync the WAL file for pg_stat_io and/or
8722 * pg_stat_wal.
8723 */
8725
8726 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8727 switch (wal_sync_method)
8728 {
8730 if (pg_fsync_no_writethrough(fd) != 0)
8731 msg = _("could not fsync file \"%s\": %m");
8732 break;
8733#ifdef HAVE_FSYNC_WRITETHROUGH
8735 if (pg_fsync_writethrough(fd) != 0)
8736 msg = _("could not fsync write-through file \"%s\": %m");
8737 break;
8738#endif
8740 if (pg_fdatasync(fd) != 0)
8741 msg = _("could not fdatasync file \"%s\": %m");
8742 break;
8745 /* not reachable */
8746 Assert(false);
8747 break;
8748 default:
8749 ereport(PANIC,
8750 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8751 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8752 break;
8753 }
8754
8755 /* PANIC if failed to fsync */
8756 if (msg)
8757 {
8758 char xlogfname[MAXFNAMELEN];
8759 int save_errno = errno;
8760
8761 XLogFileName(xlogfname, tli, segno, wal_segment_size);
8762 errno = save_errno;
8763 ereport(PANIC,
8765 errmsg(msg, xlogfname)));
8766 }
8767
8769
8770 /*
8771 * Increment the I/O timing and the number of times WAL files were synced.
8772 */
8774 {
8775 instr_time end;
8776
8779 }
8780
8782 start, 1, 0);
8783
8785}
bool track_io_timing
Definition: bufmgr.c:143
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:440
int pg_fdatasync(int fd)
Definition: fd.c:479
int pg_fsync_writethrough(int fd)
Definition: fd.c:460
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
@ IOOBJECT_WAL
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:287
@ IOOP_FSYNC
Definition: pgstat.h:306
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:90
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:120
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:501
PgStat_Counter wal_sync
Definition: pgstat.h:499
bool track_wal_io_timing
Definition: xlog.c:137

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_io_timing, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 7974 of file xlog.c.

7975{
7976 XLogSegNo currSegNo;
7977 XLogSegNo segno;
7978 XLogRecPtr keep;
7979
7980 XLByteToSeg(recptr, currSegNo, wal_segment_size);
7981 segno = currSegNo;
7982
7983 /*
7984 * Calculate how many segments are kept by slots first, adjusting for
7985 * max_slot_wal_keep_size.
7986 */
7988 if (keep != InvalidXLogRecPtr && keep < recptr)
7989 {
7990 XLByteToSeg(keep, segno, wal_segment_size);
7991
7992 /* Cap by max_slot_wal_keep_size ... */
7994 {
7995 uint64 slot_keep_segs;
7996
7997 slot_keep_segs =
7999
8000 if (currSegNo - segno > slot_keep_segs)
8001 segno = currSegNo - slot_keep_segs;
8002 }
8003 }
8004
8005 /*
8006 * If WAL summarization is in use, don't remove WAL that has yet to be
8007 * summarized.
8008 */
8009 keep = GetOldestUnsummarizedLSN(NULL, NULL);
8010 if (keep != InvalidXLogRecPtr)
8011 {
8012 XLogSegNo unsummarized_segno;
8013
8014 XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
8015 if (unsummarized_segno < segno)
8016 segno = unsummarized_segno;
8017 }
8018
8019 /* but, keep at least wal_keep_size if that's set */
8020 if (wal_keep_size_mb > 0)
8021 {
8022 uint64 keep_segs;
8023
8025 if (currSegNo - segno < keep_segs)
8026 {
8027 /* avoid underflow, don't go below 1 */
8028 if (currSegNo <= keep_segs)
8029 segno = 1;
8030 else
8031 segno = currSegNo - keep_segs;
8032 }
8033 }
8034
8035 /* don't delete WAL segments newer than the calculated segment */
8036 if (segno < *logSegNo)
8037 *logSegNo = segno;
8038}
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition: xlog.c:116
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2704
int max_slot_wal_keep_size_mb
Definition: xlog.c:135

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4867 of file xlog.c.

4868{
4869 Assert(reset || ControlFile == NULL);
4872}
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6443 of file xlog.c.

6444{
6445 int oldXLogAllowed = LocalXLogInsertAllowed;
6446
6448
6449 return oldXLogAllowed;
6450}

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6702 of file xlog.c.

6703{
6704 long write_msecs,
6705 sync_msecs,
6706 total_msecs,
6707 longest_msecs,
6708 average_msecs;
6709 uint64 average_sync_time;
6710
6712
6715
6718
6719 /* Accumulate checkpoint timing summary data, in milliseconds. */
6720 PendingCheckpointerStats.write_time += write_msecs;
6721 PendingCheckpointerStats.sync_time += sync_msecs;
6722
6723 /*
6724 * All of the published timing statistics are accounted for. Only
6725 * continue if a log message is to be written.
6726 */
6727 if (!log_checkpoints)
6728 return;
6729
6732
6733 /*
6734 * Timing values returned from CheckpointStats are in microseconds.
6735 * Convert to milliseconds for consistent printing.
6736 */
6737 longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6738
6739 average_sync_time = 0;
6741 average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6743 average_msecs = (long) ((average_sync_time + 999) / 1000);
6744
6745 /*
6746 * ControlFileLock is not required to see ControlFile->checkPoint and
6747 * ->checkPointCopy here as we are the only updator of those variables at
6748 * this moment.
6749 */
6750 if (restartpoint)
6751 ereport(LOG,
6752 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6753 "wrote %d SLRU buffers; %d WAL file(s) added, "
6754 "%d removed, %d recycled; write=%ld.%03d s, "
6755 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6756 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6757 "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
6764 write_msecs / 1000, (int) (write_msecs % 1000),
6765 sync_msecs / 1000, (int) (sync_msecs % 1000),
6766 total_msecs / 1000, (int) (total_msecs % 1000),
6768 longest_msecs / 1000, (int) (longest_msecs % 1000),
6769 average_msecs / 1000, (int) (average_msecs % 1000),
6770 (int) (PrevCheckPointDistance / 1024.0),
6771 (int) (CheckPointDistanceEstimate / 1024.0),
6774 else
6775 ereport(LOG,
6776 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6777 "wrote %d SLRU buffers; %d WAL file(s) added, "
6778 "%d removed, %d recycled; write=%ld.%03d s, "
6779 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6780 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6781 "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
6788 write_msecs / 1000, (int) (write_msecs % 1000),
6789 sync_msecs / 1000, (int) (sync_msecs % 1000),
6790 total_msecs / 1000, (int) (total_msecs % 1000),
6792 longest_msecs / 1000, (int) (longest_msecs % 1000),
6793 average_msecs / 1000, (int) (average_msecs % 1000),
6794 (int) (PrevCheckPointDistance / 1024.0),
6795 (int) (CheckPointDistanceEstimate / 1024.0),
6798}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1756
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:176
uint64 ckpt_longest_sync
Definition: xlog.h:175
TimestampTz ckpt_end_t
Definition: xlog.h:165
int ckpt_slru_written
Definition: xlog.h:168
int ckpt_sync_rels
Definition: xlog.h:174
PgStat_Counter sync_time
Definition: pgstat.h:263
PgStat_Counter write_time
Definition: pgstat.h:262
static double CheckPointDistanceEstimate
Definition: xlog.c:159
static double PrevCheckPointDistance
Definition: xlog.c:160

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6670 of file xlog.c.

6671{
6672 if (restartpoint)
6673 ereport(LOG,
6674 /* translator: the placeholders show checkpoint options */
6675 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6676 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6677 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6678 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6679 (flags & CHECKPOINT_FORCE) ? " force" : "",
6680 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6681 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6682 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6683 (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6684 else
6685 ereport(LOG,
6686 /* translator: the placeholders show checkpoint options */
6687 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6688 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6689 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6690 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6691 (flags & CHECKPOINT_FORCE) ? " force" : "",
6692 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6693 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6694 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6695 (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6696}
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:148
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:143
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:149

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FLUSH_ALL, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6305 of file xlog.c.

6306{
6307 bool promoted = false;
6308
6309 /*
6310 * Perform a checkpoint to update all our recovery activity to disk.
6311 *
6312 * Note that we write a shutdown checkpoint rather than an on-line one.
6313 * This is not particularly critical, but since we may be assigning a new
6314 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6315 * only changes in shutdown checkpoints, which allows some extra error
6316 * checking in xlog_redo.
6317 *
6318 * In promotion, only create a lightweight end-of-recovery record instead
6319 * of a full checkpoint. A checkpoint is requested later, after we're
6320 * fully out of recovery mode and already accepting queries.
6321 */
6324 {
6325 promoted = true;
6326
6327 /*
6328 * Insert a special WAL record to mark the end of recovery, since we
6329 * aren't doing a checkpoint. That means that the checkpointer process
6330 * may likely be in the middle of a time-smoothed restartpoint and
6331 * could continue to be for minutes after this. That sounds strange,
6332 * but the effect is roughly the same and it would be stranger to try
6333 * to come out of the restartpoint and then checkpoint. We request a
6334 * checkpoint later anyway, just for safety.
6335 */
6337 }
6338 else
6339 {
6343 }
6344
6345 return promoted;
6346}
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7396
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3711 of file xlog.c.

3712{
3713 XLogSegNo _logSegNo;
3714 int lf;
3715 bool added;
3716 char path[MAXPGPATH];
3717 uint64 offset;
3718
3720 return; /* unlocked check says no */
3721
3722 XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3723 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3724 if (offset >= (uint32) (0.75 * wal_segment_size))
3725 {
3726 _logSegNo++;
3727 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3728 if (lf >= 0)
3729 close(lf);
3730 if (added)
3732 }
3733}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3213

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6268 of file xlog.c.

6269{
6270 /*
6271 * We have reached the end of base backup, as indicated by pg_control. The
6272 * data on disk is now consistent (unless minRecoveryPoint is further
6273 * ahead, which can happen if we crashed during previous recovery). Reset
6274 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6275 * make sure we don't allow starting up at an earlier point even if
6276 * recovery is stopped and restarted soon after this.
6277 */
6278 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6279
6280 if (ControlFile->minRecoveryPoint < EndRecPtr)
6281 {
6282 ControlFile->minRecoveryPoint = EndRecPtr;
6284 }
6285
6290
6291 LWLockRelease(ControlFileLock);
6292}
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4342 of file xlog.c.

4343{
4344 pg_crc32c crc;
4345 int fd;
4346 char wal_segsz_str[20];
4347 int r;
4348
4349 /*
4350 * Read data...
4351 */
4353 O_RDWR | PG_BINARY);
4354 if (fd < 0)
4355 ereport(PANIC,
4357 errmsg("could not open file \"%s\": %m",
4359
4360 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4361 r = read(fd, ControlFile, sizeof(ControlFileData));
4362 if (r != sizeof(ControlFileData))
4363 {
4364 if (r < 0)
4365 ereport(PANIC,
4367 errmsg("could not read file \"%s\": %m",
4369 else
4370 ereport(PANIC,
4372 errmsg("could not read file \"%s\": read %d of %zu",
4373 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4374 }
4376
4377 close(fd);
4378
4379 /*
4380 * Check for expected pg_control format version. If this is wrong, the
4381 * CRC check will likely fail because we'll be checking the wrong number
4382 * of bytes. Complaining about wrong version will probably be more
4383 * enlightening than complaining about wrong CRC.
4384 */
4385
4387 ereport(FATAL,
4388 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4389 errmsg("database files are incompatible with server"),
4390 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4391 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4394 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4395
4397 ereport(FATAL,
4398 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4399 errmsg("database files are incompatible with server"),
4400 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4401 " but the server was compiled with PG_CONTROL_VERSION %d.",
4403 errhint("It looks like you need to initdb.")));
4404
4405 /* Now check the CRC. */
4408 (char *) ControlFile,
4409 offsetof(ControlFileData, crc));
4410 FIN_CRC32C(crc);
4411
4412 if (!EQ_CRC32C(crc, ControlFile->crc))
4413 ereport(FATAL,
4414 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4415 errmsg("incorrect checksum in control file")));
4416
4417 /*
4418 * Do compatibility checking immediately. If the database isn't
4419 * compatible with the backend executable, we want to abort before we can
4420 * possibly do any damage.
4421 */
4423 ereport(FATAL,
4424 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4425 errmsg("database files are incompatible with server"),
4426 /* translator: %s is a variable name and %d is its value */
4427 errdetail("The database cluster was initialized with %s %d,"
4428 " but the server was compiled with %s %d.",
4429 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4430 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4431 errhint("It looks like you need to initdb.")));
4432 if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4433 ereport(FATAL,
4434 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4435 errmsg("database files are incompatible with server"),
4436 /* translator: %s is a variable name and %d is its value */
4437 errdetail("The database cluster was initialized with %s %d,"
4438 " but the server was compiled with %s %d.",
4439 "MAXALIGN", ControlFile->maxAlign,
4440 "MAXALIGN", MAXIMUM_ALIGNOF),
4441 errhint("It looks like you need to initdb.")));
4443 ereport(FATAL,
4444 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4445 errmsg("database files are incompatible with server"),
4446 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4447 errhint("It looks like you need to initdb.")));
4448 if (ControlFile->blcksz != BLCKSZ)
4449 ereport(FATAL,
4450 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4451 errmsg("database files are incompatible with server"),
4452 /* translator: %s is a variable name and %d is its value */
4453 errdetail("The database cluster was initialized with %s %d,"
4454 " but the server was compiled with %s %d.",
4455 "BLCKSZ", ControlFile->blcksz,
4456 "BLCKSZ", BLCKSZ),
4457 errhint("It looks like you need to recompile or initdb.")));
4458 if (ControlFile->relseg_size != RELSEG_SIZE)
4459 ereport(FATAL,
4460 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4461 errmsg("database files are incompatible with server"),
4462 /* translator: %s is a variable name and %d is its value */
4463 errdetail("The database cluster was initialized with %s %d,"
4464 " but the server was compiled with %s %d.",
4465 "RELSEG_SIZE", ControlFile->relseg_size,
4466 "RELSEG_SIZE", RELSEG_SIZE),
4467 errhint("It looks like you need to recompile or initdb.")));
4468 if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4469 ereport(FATAL,
4470 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4471 errmsg("database files are incompatible with server"),
4472 /* translator: %s is a variable name and %d is its value */
4473 errdetail("The database cluster was initialized with %s %d,"
4474 " but the server was compiled with %s %d.",
4475 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4476 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4477 errhint("It looks like you need to recompile or initdb.")));
4479 ereport(FATAL,
4480 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4481 errmsg("database files are incompatible with server"),
4482 /* translator: %s is a variable name and %d is its value */
4483 errdetail("The database cluster was initialized with %s %d,"
4484 " but the server was compiled with %s %d.",
4485 "NAMEDATALEN", ControlFile->nameDataLen,
4486 "NAMEDATALEN", NAMEDATALEN),
4487 errhint("It looks like you need to recompile or initdb.")));
4489 ereport(FATAL,
4490 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4491 errmsg("database files are incompatible with server"),
4492 /* translator: %s is a variable name and %d is its value */
4493 errdetail("The database cluster was initialized with %s %d,"
4494 " but the server was compiled with %s %d.",
4495 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4496 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4497 errhint("It looks like you need to recompile or initdb.")));
4499 ereport(FATAL,
4500 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4501 errmsg("database files are incompatible with server"),
4502 /* translator: %s is a variable name and %d is its value */
4503 errdetail("The database cluster was initialized with %s %d,"
4504 " but the server was compiled with %s %d.",
4505 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4506 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4507 errhint("It looks like you need to recompile or initdb.")));
4509 ereport(FATAL,
4510 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4511 errmsg("database files are incompatible with server"),
4512 /* translator: %s is a variable name and %d is its value */
4513 errdetail("The database cluster was initialized with %s %d,"
4514 " but the server was compiled with %s %d.",
4515 "LOBLKSIZE", ControlFile->loblksize,
4516 "LOBLKSIZE", (int) LOBLKSIZE),
4517 errhint("It looks like you need to recompile or initdb.")));
4518
4519#ifdef USE_FLOAT8_BYVAL
4520 if (ControlFile->float8ByVal != true)
4521 ereport(FATAL,
4522 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4523 errmsg("database files are incompatible with server"),
4524 errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
4525 " but the server was compiled with USE_FLOAT8_BYVAL."),
4526 errhint("It looks like you need to recompile or initdb.")));
4527#else
4528 if (ControlFile->float8ByVal != false)
4529 ereport(FATAL,
4530 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4531 errmsg("database files are incompatible with server"),
4532 errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
4533 " but the server was compiled without USE_FLOAT8_BYVAL."),
4534 errhint("It looks like you need to recompile or initdb.")));
4535#endif
4536
4538
4540 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4541 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4542 "invalid WAL segment size in control file (%d bytes)",
4545 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4546
4547 snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4548 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4550
4551 /* check and update variables dependent on wal_segment_size */
4553 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4554 /* translator: both %s are GUC names */
4555 errmsg("\"%s\" must be at least twice \"%s\"",
4556 "min_wal_size", "wal_segment_size")));
4557
4559 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4560 /* translator: both %s are GUC names */
4561 errmsg("\"%s\" must be at least twice \"%s\"",
4562 "max_wal_size", "wal_segment_size")));
4563
4565 (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4567
4569
4570 /* Make the initdb settings visible as GUC variables, too */
4571 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4573}
#define PG_BINARY
Definition: c.h:1230
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1180
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1086
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:114
@ PGC_INTERNAL
Definition: guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:201
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
uint32 nameDataLen
Definition: pg_control.h:213
uint32 indexMaxKeys
Definition: pg_control.h:214
uint32 relseg_size
Definition: pg_control.h:208
uint32 catalog_version_no
Definition: pg_control.h:126
double floatFormat
Definition: pg_control.h:200
uint32 xlog_blcksz
Definition: pg_control.h:210
uint32 loblksize
Definition: pg_control.h:217
pg_crc32c crc
Definition: pg_control.h:232
uint32 toast_max_chunk_size
Definition: pg_control.h:216
#define UsableBytesInPage
Definition: xlog.c:597
bool DataChecksumsEnabled(void)
Definition: xlog.c:4609
static int UsableBytesInSegment
Definition: xlog.c:606
int min_wal_size_mb
Definition: xlog.c:115
#define XLOG_CONTROL_FILE

References BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6355 of file xlog.c.

6356{
6357 /*
6358 * We check shared state each time only until we leave recovery mode. We
6359 * can't re-enter recovery, so there's no need to keep checking after the
6360 * shared variable has once been seen false.
6361 */
6363 return false;
6364 else
6365 {
6366 /*
6367 * use volatile pointer to make sure we make a fresh read of the
6368 * shared variable.
6369 */
6370 volatile XLogCtlData *xlogctl = XLogCtl;
6371
6373
6374 /*
6375 * Note: We don't need a memory barrier when we're still in recovery.
6376 * We might exit recovery immediately after return, so the caller
6377 * can't rely on 'true' meaning that we're still in recovery anyway.
6378 */
6379
6381 }
6382}
static bool LocalRecoveryInProgress
Definition: xlog.c:224

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7571 of file xlog.c.

7572{
7573 /*
7574 * Also refrain from creating a restartpoint if we have seen any
7575 * references to non-existent pages. Restarting recovery from the
7576 * restartpoint would not see the references, so we would lose the
7577 * cross-check that the pages belonged to a relation that was dropped
7578 * later.
7579 */
7581 {
7582 elog(DEBUG2,
7583 "could not record restart point at %X/%X because there "
7584 "are unresolved references to invalid pages",
7585 LSN_FORMAT_ARGS(checkPoint->redo));
7586 return;
7587 }
7588
7589 /*
7590 * Copy the checkpoint record to shared memory, so that checkpointer can
7591 * work out the next time it wants to perform a restartpoint.
7592 */
7596 XLogCtl->lastCheckPoint = *checkPoint;
7598}
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:235

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9445 of file xlog.c.

9446{
9447 static bool already_done = false;
9448
9449 if (already_done)
9450 return;
9452 already_done = true;
9453}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3961 of file xlog.c.

3962{
3963 DIR *xldir;
3964 struct dirent *xlde;
3965 char switchseg[MAXFNAMELEN];
3966 XLogSegNo endLogSegNo;
3967 XLogSegNo switchLogSegNo;
3968 XLogSegNo recycleSegNo;
3969
3970 /*
3971 * Initialize info about where to begin the work. This will recycle,
3972 * somewhat arbitrarily, 10 future segments.
3973 */
3974 XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3975 XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3976 recycleSegNo = endLogSegNo + 10;
3977
3978 /*
3979 * Construct a filename of the last segment to be kept.
3980 */
3981 XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3982
3983 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3984 switchseg);
3985
3986 xldir = AllocateDir(XLOGDIR);
3987
3988 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3989 {
3990 /* Ignore files that are not XLOG segments */
3991 if (!IsXLogFileName(xlde->d_name))
3992 continue;
3993
3994 /*
3995 * Remove files that are on a timeline older than the new one we're
3996 * switching to, but with a segment number >= the first segment on the
3997 * new timeline.
3998 */
3999 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4000 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4001 {
4002 /*
4003 * If the file has already been marked as .ready, however, don't
4004 * remove it yet. It should be OK to remove it - files that are
4005 * not part of our timeline history are not required for recovery
4006 * - but seems safer to let them be archived and removed later.
4007 */
4008 if (!XLogArchiveIsReady(xlde->d_name))
4009 RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
4010 }
4011 }
4012
4013 FreeDir(xldir);
4014}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4030
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3886 of file xlog.c.

3888{
3889 DIR *xldir;
3890 struct dirent *xlde;
3891 char lastoff[MAXFNAMELEN];
3892 XLogSegNo endlogSegNo;
3893 XLogSegNo recycleSegNo;
3894
3895 /* Initialize info about where to try to recycle to */
3896 XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3897 recycleSegNo = XLOGfileslop(lastredoptr);
3898
3899 /*
3900 * Construct a filename of the last segment to be kept. The timeline ID
3901 * doesn't matter, we ignore that in the comparison. (During recovery,
3902 * InsertTimeLineID isn't set, so we can't use that.)
3903 */
3904 XLogFileName(lastoff, 0, segno, wal_segment_size);
3905
3906 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3907 lastoff);
3908
3909 xldir = AllocateDir(XLOGDIR);
3910
3911 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3912 {
3913 /* Ignore files that are not XLOG segments */
3914 if (!IsXLogFileName(xlde->d_name) &&
3916 continue;
3917
3918 /*
3919 * We ignore the timeline part of the XLOG segment identifiers in
3920 * deciding whether a segment is still needed. This ensures that we
3921 * won't prematurely remove a segment from a parent timeline. We could
3922 * probably be a little more proactive about removing segments of
3923 * non-parent timelines, but that would be a whole lot more
3924 * complicated.
3925 *
3926 * We use the alphanumeric sorting property of the filenames to decide
3927 * which ones are earlier than the lastoff segment.
3928 */
3929 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3930 {
3931 if (XLogArchiveCheckDone(xlde->d_name))
3932 {
3933 /* Update the last removed location in shared memory first */
3935
3936 RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3937 }
3938 }
3939 }
3940
3941 FreeDir(xldir);
3942}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2240
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3833
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3853 of file xlog.c.

3854{
3855 DIR *xldir;
3856 struct dirent *xlde;
3857
3858 elog(DEBUG2, "removing all temporary WAL segments");
3859
3860 xldir = AllocateDir(XLOGDIR);
3861 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3862 {
3863 char path[MAXPGPATH];
3864
3865 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3866 continue;
3867
3868 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3869 unlink(path);
3870 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3871 }
3872 FreeDir(xldir);
3873}

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4030 of file xlog.c.

4033{
4034 char path[MAXPGPATH];
4035#ifdef WIN32
4036 char newpath[MAXPGPATH];
4037#endif
4038 const char *segname = segment_de->d_name;
4039
4040 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4041
4042 /*
4043 * Before deleting the file, see if it can be recycled as a future log
4044 * segment. Only recycle normal files, because we don't want to recycle
4045 * symbolic links pointing to a separate archive directory.
4046 */
4047 if (wal_recycle &&
4048 *endlogSegNo <= recycleSegNo &&
4049 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4050 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4051 InstallXLogFileSegment(endlogSegNo, path,
4052 true, recycleSegNo, insertTLI))
4053 {
4055 (errmsg_internal("recycled write-ahead log file \"%s\"",
4056 segname)));
4058 /* Needn't recheck that slot on future iterations */
4059 (*endlogSegNo)++;
4060 }
4061 else
4062 {
4063 /* No need for any more future segments, or recycling failed ... */
4064 int rc;
4065
4067 (errmsg_internal("removing write-ahead log file \"%s\"",
4068 segname)));
4069
4070#ifdef WIN32
4071
4072 /*
4073 * On Windows, if another process (e.g another backend) holds the file
4074 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4075 * will still show up in directory listing until the last handle is
4076 * closed. To avoid confusing the lingering deleted file for a live
4077 * WAL file that needs to be archived, rename it before deleting it.
4078 *
4079 * If another process holds the file open without FILE_SHARE_DELETE
4080 * flag, rename will fail. We'll try again at the next checkpoint.
4081 */
4082 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4083 if (rename(path, newpath) != 0)
4084 {
4085 ereport(LOG,
4087 errmsg("could not rename file \"%s\": %m",
4088 path)));
4089 return;
4090 }
4091 rc = durable_unlink(newpath, LOG);
4092#else
4093 rc = durable_unlink(path, LOG);
4094#endif
4095 if (rc != 0)
4096 {
4097 /* Message already logged by durable_unlink() */
4098 return;
4099 }
4101 }
4102
4103 XLogArchiveCleanup(segname);
4104}
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3584
bool wal_recycle
Definition: xlog.c:128

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8081 of file xlog.c.

8082{
8083 XLogRecPtr RecPtr;
8084
8085 /* XLOG SWITCH has no data */
8087
8088 if (mark_unimportant)
8090 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
8091
8092 return RecPtr;
8093}
#define XLOG_SWITCH
Definition: pg_control.h:72
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1108 of file xlog.c.

1110{
1112 uint64 startbytepos;
1113 uint64 endbytepos;
1114 uint64 prevbytepos;
1115
1116 size = MAXALIGN(size);
1117
1118 /* All (non xlog-switch) records should contain data. */
1120
1121 /*
1122 * The duration the spinlock needs to be held is minimized by minimizing
1123 * the calculations that have to be done while holding the lock. The
1124 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1125 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1126 * page headers. The mapping between "usable" byte positions and physical
1127 * positions (XLogRecPtrs) can be done outside the locked region, and
1128 * because the usable byte position doesn't include any headers, reserving
1129 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1130 */
1131 SpinLockAcquire(&Insert->insertpos_lck);
1132
1133 startbytepos = Insert->CurrBytePos;
1134 endbytepos = startbytepos + size;
1135 prevbytepos = Insert->PrevBytePos;
1136 Insert->CurrBytePos = endbytepos;
1137 Insert->PrevBytePos = startbytepos;
1138
1139 SpinLockRelease(&Insert->insertpos_lck);
1140
1141 *StartPos = XLogBytePosToRecPtr(startbytepos);
1142 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1143 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1144
1145 /*
1146 * Check that the conversions between "usable byte positions" and
1147 * XLogRecPtrs work consistently in both directions.
1148 */
1149 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1150 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1151 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1152}
#define MAXALIGN(LEN)
Definition: c.h:768
static pg_noinline void Size size
Definition: slab.c:607
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1898
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1941

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1164 of file xlog.c.

1165{
1167 uint64 startbytepos;
1168 uint64 endbytepos;
1169 uint64 prevbytepos;
1171 XLogRecPtr ptr;
1172 uint32 segleft;
1173
1174 /*
1175 * These calculations are a bit heavy-weight to be done while holding a
1176 * spinlock, but since we're holding all the WAL insertion locks, there
1177 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1178 * compete for it, but that's not called very frequently.
1179 */
1180 SpinLockAcquire(&Insert->insertpos_lck);
1181
1182 startbytepos = Insert->CurrBytePos;
1183
1184 ptr = XLogBytePosToEndRecPtr(startbytepos);
1185 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1186 {
1187 SpinLockRelease(&Insert->insertpos_lck);
1188 *EndPos = *StartPos = ptr;
1189 return false;
1190 }
1191
1192 endbytepos = startbytepos + size;
1193 prevbytepos = Insert->PrevBytePos;
1194
1195 *StartPos = XLogBytePosToRecPtr(startbytepos);
1196 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1197
1199 if (segleft != wal_segment_size)
1200 {
1201 /* consume the rest of the segment */
1202 *EndPos += segleft;
1203 endbytepos = XLogRecPtrToBytePos(*EndPos);
1204 }
1205 Insert->CurrBytePos = endbytepos;
1206 Insert->PrevBytePos = startbytepos;
1207
1208 SpinLockRelease(&Insert->insertpos_lck);
1209
1210 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1211
1213 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1214 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1215 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1216
1217 return true;
1218}

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9508 of file xlog.c.

9509{
9510 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9512 LWLockRelease(ControlFileLock);
9513}

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9531 of file xlog.c.

9532{
9534 XLogCtl->WalWriterSleeping = sleeping;
9536}
bool WalWriterSleeping
Definition: xlog.c:533

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4831 of file xlog.c.

4832{
4833 if (XLogArchivingActive())
4834 return XLogArchiveCommand;
4835 else
4836 return "(disabled)";
4837}
char * XLogArchiveCommand
Definition: xlog.c:120

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 4843 of file xlog.c.

4844{
4845 /*
4846 * We display the actual state based on shared memory, so that this GUC
4847 * reports up-to-date state if examined intra-query. The underlying
4848 * variable (in_hot_standby_guc) changes only when we transmit a new value
4849 * to the client.
4850 */
4851 return RecoveryInProgress() ? "on" : "off";
4852}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6623 of file xlog.c.

6624{
6625 /*
6626 * We should have an aux process resource owner to use, and we should not
6627 * be in a transaction that's installed some other resowner.
6628 */
6630 Assert(CurrentResourceOwner == NULL ||
6633
6634 /* Don't be chatty in standalone mode */
6636 (errmsg("shutting down")));
6637
6638 /*
6639 * Signal walsenders to move to stopping state.
6640 */
6642
6643 /*
6644 * Wait for WAL senders to be in stopping state. This prevents commands
6645 * from writing new WAL.
6646 */
6648
6649 if (RecoveryInProgress())
6651 else
6652 {
6653 /*
6654 * If archiving is enabled, rotate the last XLOG file so that all the
6655 * remaining records are archived (postmaster wakes up the archiver
6656 * process one more time at the end of shutdown). The checkpoint
6657 * record will go to the next XLOG file and won't be archived (yet).
6658 */
6659 if (XLogArchivingActive())
6660 RequestXLogSwitch(false);
6661
6663 }
6664}
bool IsPostmasterEnvironment
Definition: globals.c:118
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3716
void WalSndWaitStopping(void)
Definition: walsender.c:3742
bool CreateRestartPoint(int flags)
Definition: xlog.c:7612
bool CreateCheckPoint(int flags)
Definition: xlog.c:6910

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5450 of file xlog.c.

5451{
5453 CheckPoint checkPoint;
5454 bool wasShutdown;
5455 bool didCrash;
5456 bool haveTblspcMap;
5457 bool haveBackupLabel;
5458 XLogRecPtr EndOfLog;
5459 TimeLineID EndOfLogTLI;
5460 TimeLineID newTLI;
5461 bool performedWalRecovery;
5462 EndOfWalRecoveryInfo *endOfRecoveryInfo;
5465 TransactionId oldestActiveXID;
5466 bool promoted = false;
5467
5468 /*
5469 * We should have an aux process resource owner to use, and we should not
5470 * be in a transaction that's installed some other resowner.
5471 */
5473 Assert(CurrentResourceOwner == NULL ||
5476
5477 /*
5478 * Check that contents look valid.
5479 */
5481 ereport(FATAL,
5483 errmsg("control file contains invalid checkpoint location")));
5484
5485 switch (ControlFile->state)
5486 {
5487 case DB_SHUTDOWNED:
5488
5489 /*
5490 * This is the expected case, so don't be chatty in standalone
5491 * mode
5492 */
5494 (errmsg("database system was shut down at %s",
5496 break;
5497
5499 ereport(LOG,
5500 (errmsg("database system was shut down in recovery at %s",
5502 break;
5503
5504 case DB_SHUTDOWNING:
5505 ereport(LOG,
5506 (errmsg("database system shutdown was interrupted; last known up at %s",
5508 break;
5509
5511 ereport(LOG,
5512 (errmsg("database system was interrupted while in recovery at %s",
5514 errhint("This probably means that some data is corrupted and"
5515 " you will have to use the last backup for recovery.")));
5516 break;
5517
5519 ereport(LOG,
5520 (errmsg("database system was interrupted while in recovery at log time %s",
5522 errhint("If this has occurred more than once some data might be corrupted"
5523 " and you might need to choose an earlier recovery target.")));
5524 break;
5525
5526 case DB_IN_PRODUCTION:
5527 ereport(LOG,
5528 (errmsg("database system was interrupted; last known up at %s",
5530 break;
5531
5532 default:
5533 ereport(FATAL,
5535 errmsg("control file contains invalid database cluster state")));
5536 }
5537
5538 /* This is just to allow attaching to startup process with a debugger */
5539#ifdef XLOG_REPLAY_DELAY
5541 pg_usleep(60000000L);
5542#endif
5543
5544 /*
5545 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5546 * In cases where someone has performed a copy for PITR, these directories
5547 * may have been excluded and need to be re-created.
5548 */
5550
5551 /* Set up timeout handler needed to report startup progress. */
5555
5556 /*----------
5557 * If we previously crashed, perform a couple of actions:
5558 *
5559 * - The pg_wal directory may still include some temporary WAL segments
5560 * used when creating a new segment, so perform some clean up to not
5561 * bloat this path. This is done first as there is no point to sync
5562 * this temporary data.
5563 *
5564 * - There might be data which we had written, intending to fsync it, but
5565 * which we had not actually fsync'd yet. Therefore, a power failure in
5566 * the near future might cause earlier unflushed writes to be lost, even
5567 * though more recent data written to disk from here on would be
5568 * persisted. To avoid that, fsync the entire data directory.
5569 */
5572 {
5575 didCrash = true;
5576 }
5577 else
5578 didCrash = false;
5579
5580 /*
5581 * Prepare for WAL recovery if needed.
5582 *
5583 * InitWalRecovery analyzes the control file and the backup label file, if
5584 * any. It updates the in-memory ControlFile buffer according to the
5585 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5586 * It also applies the tablespace map file, if any.
5587 */
5588 InitWalRecovery(ControlFile, &wasShutdown,
5589 &haveBackupLabel, &haveTblspcMap);
5590 checkPoint = ControlFile->checkPointCopy;
5591
5592 /* initialize shared memory variables from the checkpoint record */
5593 TransamVariables->nextXid = checkPoint.nextXid;
5594 TransamVariables->nextOid = checkPoint.nextOid;
5596 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5597 AdvanceOldestClogXid(checkPoint.oldestXid);
5598 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5599 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5601 checkPoint.newestCommitTsXid);
5602 XLogCtl->ckptFullXid = checkPoint.nextXid;
5603
5604 /*
5605 * Clear out any old relcache cache files. This is *necessary* if we do
5606 * any WAL replay, since that would probably result in the cache files
5607 * being out of sync with database reality. In theory we could leave them
5608 * in place if the database had been cleanly shut down, but it seems
5609 * safest to just remove them always and let them be rebuilt during the
5610 * first backend startup. These files needs to be removed from all
5611 * directories including pg_tblspc, however the symlinks are created only
5612 * after reading tablespace_map file in case of archive recovery from
5613 * backup, so needs to clear old relcache files here after creating
5614 * symlinks.
5615 */
5617
5618 /*
5619 * Initialize replication slots, before there's a chance to remove
5620 * required resources.
5621 */
5623
5624 /*
5625 * Startup logical state, needs to be setup now so we have proper data
5626 * during crash recovery.
5627 */
5629
5630 /*
5631 * Startup CLOG. This must be done after TransamVariables->nextXid has
5632 * been initialized and before we accept connections or begin WAL replay.
5633 */
5634 StartupCLOG();
5635
5636 /*
5637 * Startup MultiXact. We need to do this early to be able to replay
5638 * truncations.
5639 */
5641
5642 /*
5643 * Ditto for commit timestamps. Activate the facility if the setting is
5644 * enabled in the control file, as there should be no tracking of commit
5645 * timestamps done when the setting was disabled. This facility can be
5646 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5647 */
5650
5651 /*
5652 * Recover knowledge about replay progress of known replication partners.
5653 */
5655
5656 /*
5657 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5658 * control file. On recovery, all unlogged relations are blown away, so
5659 * the unlogged LSN counter can be reset too.
5660 */
5664 else
5667
5668 /*
5669 * Copy any missing timeline history files between 'now' and the recovery
5670 * target timeline from archive to pg_wal. While we don't need those files
5671 * ourselves - the history file of the recovery target timeline covers all
5672 * the previous timelines in the history too - a cascading standby server
5673 * might be interested in them. Or, if you archive the WAL from this
5674 * server to a different archive than the primary, it'd be good for all
5675 * the history files to get archived there after failover, so that you can
5676 * use one of the old timelines as a PITR target. Timeline history files
5677 * are small, so it's better to copy them unnecessarily than not copy them
5678 * and regret later.
5679 */
5681
5682 /*
5683 * Before running in recovery, scan pg_twophase and fill in its status to
5684 * be able to work on entries generated by redo. Doing a scan before
5685 * taking any recovery action has the merit to discard any 2PC files that
5686 * are newer than the first record to replay, saving from any conflicts at
5687 * replay. This avoids as well any subsequent scans when doing recovery
5688 * of the on-disk two-phase data.
5689 */
5691
5692 /*
5693 * When starting with crash recovery, reset pgstat data - it might not be
5694 * valid. Otherwise restore pgstat data. It's safe to do this here,
5695 * because postmaster will not yet have started any other processes.
5696 *
5697 * NB: Restoring replication slot stats relies on slot state to have
5698 * already been restored from disk.
5699 *
5700 * TODO: With a bit of extra work we could just start with a pgstat file
5701 * associated with the checkpoint redo location we're starting from.
5702 */
5703 if (didCrash)
5705 else
5706 pgstat_restore_stats(checkPoint.redo);
5707
5709
5712
5713 /* REDO */
5714 if (InRecovery)
5715 {
5716 /* Initialize state for RecoveryInProgress() */
5720 else
5723
5724 /*
5725 * Update pg_control to show that we are recovering and to show the
5726 * selected checkpoint as the place we are starting from. We also mark
5727 * pg_control with any minimum recovery stop point obtained from a
5728 * backup history file.
5729 *
5730 * No need to hold ControlFileLock yet, we aren't up far enough.
5731 */
5733
5734 /*
5735 * If there was a backup label file, it's done its job and the info
5736 * has now been propagated into pg_control. We must get rid of the
5737 * label file so that if we crash during recovery, we'll pick up at
5738 * the latest recovery restartpoint instead of going all the way back
5739 * to the backup start point. It seems prudent though to just rename
5740 * the file out of the way rather than delete it completely.
5741 */
5742 if (haveBackupLabel)
5743 {
5744 unlink(BACKUP_LABEL_OLD);
5746 }
5747
5748 /*
5749 * If there was a tablespace_map file, it's done its job and the
5750 * symlinks have been created. We must get rid of the map file so
5751 * that if we crash during recovery, we don't create symlinks again.
5752 * It seems prudent though to just rename the file out of the way
5753 * rather than delete it completely.
5754 */
5755 if (haveTblspcMap)
5756 {
5757 unlink(TABLESPACE_MAP_OLD);
5759 }
5760
5761 /*
5762 * Initialize our local copy of minRecoveryPoint. When doing crash
5763 * recovery we want to replay up to the end of WAL. Particularly, in
5764 * the case of a promoted standby minRecoveryPoint value in the
5765 * control file is only updated after the first checkpoint. However,
5766 * if the instance crashes before the first post-recovery checkpoint
5767 * is completed then recovery will use a stale location causing the
5768 * startup process to think that there are still invalid page
5769 * references when checking for data consistency.
5770 */
5772 {
5775 }
5776 else
5777 {
5780 }
5781
5782 /* Check that the GUCs used to generate the WAL allow recovery */
5784
5785 /*
5786 * We're in recovery, so unlogged relations may be trashed and must be
5787 * reset. This should be done BEFORE allowing Hot Standby
5788 * connections, so that read-only backends don't try to read whatever
5789 * garbage is left over from before.
5790 */
5792
5793 /*
5794 * Likewise, delete any saved transaction snapshot files that got left
5795 * behind by crashed backends.
5796 */
5798
5799 /*
5800 * Initialize for Hot Standby, if enabled. We won't let backends in
5801 * yet, not until we've reached the min recovery point specified in
5802 * control file and we've established a recovery snapshot from a
5803 * running-xacts WAL record.
5804 */
5806 {
5807 TransactionId *xids;
5808 int nxids;
5809
5811 (errmsg_internal("initializing for hot standby")));
5812
5814
5815 if (wasShutdown)
5816 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5817 else
5818 oldestActiveXID = checkPoint.oldestActiveXid;
5819 Assert(TransactionIdIsValid(oldestActiveXID));
5820
5821 /* Tell procarray about the range of xids it has to deal with */
5823
5824 /*
5825 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5826 * have already been started up and other SLRUs are not maintained
5827 * during recovery and need not be started yet.
5828 */
5829 StartupSUBTRANS(oldestActiveXID);
5830
5831 /*
5832 * If we're beginning at a shutdown checkpoint, we know that
5833 * nothing was running on the primary at this point. So fake-up an
5834 * empty running-xacts record and use that here and now. Recover
5835 * additional standby state for prepared transactions.
5836 */
5837 if (wasShutdown)
5838 {
5840 TransactionId latestCompletedXid;
5841
5842 /* Update pg_subtrans entries for any prepared transactions */
5844
5845 /*
5846 * Construct a RunningTransactions snapshot representing a
5847 * shut down server, with only prepared transactions still
5848 * alive. We're never overflowed at this point because all
5849 * subxids are listed with their parent prepared transactions.
5850 */
5851 running.xcnt = nxids;
5852 running.subxcnt = 0;
5854 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5855 running.oldestRunningXid = oldestActiveXID;
5856 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5857 TransactionIdRetreat(latestCompletedXid);
5858 Assert(TransactionIdIsNormal(latestCompletedXid));
5859 running.latestCompletedXid = latestCompletedXid;
5860 running.xids = xids;
5861
5863 }
5864 }
5865
5866 /*
5867 * We're all set for replaying the WAL now. Do it.
5868 */
5870 performedWalRecovery = true;
5871 }
5872 else
5873 performedWalRecovery = false;
5874
5875 /*
5876 * Finish WAL recovery.
5877 */
5878 endOfRecoveryInfo = FinishWalRecovery();
5879 EndOfLog = endOfRecoveryInfo->endOfLog;
5880 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5881 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5882 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5883
5884 /*
5885 * Reset ps status display, so as no information related to recovery shows
5886 * up.
5887 */
5888 set_ps_display("");
5889
5890 /*
5891 * When recovering from a backup (we are in recovery, and archive recovery
5892 * was requested), complain if we did not roll forward far enough to reach
5893 * the point where the database is consistent. For regular online
5894 * backup-from-primary, that means reaching the end-of-backup WAL record
5895 * (at which point we reset backupStartPoint to be Invalid), for
5896 * backup-from-replica (which can't inject records into the WAL stream),
5897 * that point is when we reach the minRecoveryPoint in pg_control (which
5898 * we purposefully copy last when backing up from a replica). For
5899 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5900 * or snapshot-style backups (which don't), backupEndRequired will be set
5901 * to false.
5902 *
5903 * Note: it is indeed okay to look at the local variable
5904 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5905 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5906 * been advanced beyond the WAL we processed.
5907 */
5908 if (InRecovery &&
5909 (EndOfLog < LocalMinRecoveryPoint ||
5911 {
5912 /*
5913 * Ran off end of WAL before reaching end-of-backup WAL record, or
5914 * minRecoveryPoint. That's a bad sign, indicating that you tried to
5915 * recover from an online backup but never called pg_backup_stop(), or
5916 * you didn't archive all the WAL needed.
5917 */
5919 {
5921 ereport(FATAL,
5922 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5923 errmsg("WAL ends before end of online backup"),
5924 errhint("All WAL generated while online backup was taken must be available at recovery.")));
5925 else
5926 ereport(FATAL,
5927 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5928 errmsg("WAL ends before consistent recovery point")));
5929 }
5930 }
5931
5932 /*
5933 * Reset unlogged relations to the contents of their INIT fork. This is
5934 * done AFTER recovery is complete so as to include any unlogged relations
5935 * created during recovery, but BEFORE recovery is marked as having
5936 * completed successfully. Otherwise we'd not retry if any of the post
5937 * end-of-recovery steps fail.
5938 */
5939 if (InRecovery)
5941
5942 /*
5943 * Pre-scan prepared transactions to find out the range of XIDs present.
5944 * This information is not quite needed yet, but it is positioned here so
5945 * as potential problems are detected before any on-disk change is done.
5946 */
5947 oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5948
5949 /*
5950 * Allow ordinary WAL segment creation before possibly switching to a new
5951 * timeline, which creates a new segment, and after the last ReadRecord().
5952 */
5954
5955 /*
5956 * Consider whether we need to assign a new timeline ID.
5957 *
5958 * If we did archive recovery, we always assign a new ID. This handles a
5959 * couple of issues. If we stopped short of the end of WAL during
5960 * recovery, then we are clearly generating a new timeline and must assign
5961 * it a unique new ID. Even if we ran to the end, modifying the current
5962 * last segment is problematic because it may result in trying to
5963 * overwrite an already-archived copy of that segment, and we encourage
5964 * DBAs to make their archive_commands reject that. We can dodge the
5965 * problem by making the new active segment have a new timeline ID.
5966 *
5967 * In a normal crash recovery, we can just extend the timeline we were in.
5968 */
5969 newTLI = endOfRecoveryInfo->lastRecTLI;
5971 {
5973 ereport(LOG,
5974 (errmsg("selected new timeline ID: %u", newTLI)));
5975
5976 /*
5977 * Make a writable copy of the last WAL segment. (Note that we also
5978 * have a copy of the last block of the old WAL in
5979 * endOfRecovery->lastPage; we will use that below.)
5980 */
5981 XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5982
5983 /*
5984 * Remove the signal files out of the way, so that we don't
5985 * accidentally re-enter archive recovery mode in a subsequent crash.
5986 */
5987 if (endOfRecoveryInfo->standby_signal_file_found)
5989
5990 if (endOfRecoveryInfo->recovery_signal_file_found)
5992
5993 /*
5994 * Write the timeline history file, and have it archived. After this
5995 * point (or rather, as soon as the file is archived), the timeline
5996 * will appear as "taken" in the WAL archive and to any standby
5997 * servers. If we crash before actually switching to the new
5998 * timeline, standby servers will nevertheless think that we switched
5999 * to the new timeline, and will try to connect to the new timeline.
6000 * To minimize the window for that, try to do as little as possible
6001 * between here and writing the end-of-recovery record.
6002 */
6004 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6005
6006 ereport(LOG,
6007 (errmsg("archive recovery complete")));
6008 }
6009
6010 /* Save the selected TimeLineID in shared memory, too */
6012 XLogCtl->InsertTimeLineID = newTLI;
6013 XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
6015
6016 /*
6017 * Actually, if WAL ended in an incomplete record, skip the parts that
6018 * made it through and start writing after the portion that persisted.
6019 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6020 * we'll do as soon as we're open for writing new WAL.)
6021 */
6023 {
6024 /*
6025 * We should only have a missingContrecPtr if we're not switching to a
6026 * new timeline. When a timeline switch occurs, WAL is copied from the
6027 * old timeline to the new only up to the end of the last complete
6028 * record, so there can't be an incomplete WAL record that we need to
6029 * disregard.
6030 */
6031 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6033 EndOfLog = missingContrecPtr;
6034 }
6035
6036 /*
6037 * Prepare to write WAL starting at EndOfLog location, and init xlog
6038 * buffer cache using the block containing the last record from the
6039 * previous incarnation.
6040 */
6041 Insert = &XLogCtl->Insert;
6042 Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6043 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6044
6045 /*
6046 * Tricky point here: lastPage contains the *last* block that the LastRec
6047 * record spans, not the one it starts in. The last block is indeed the
6048 * one we want to use.
6049 */
6050 if (EndOfLog % XLOG_BLCKSZ != 0)
6051 {
6052 char *page;
6053 int len;
6054 int firstIdx;
6055
6056 firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6057 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6058 Assert(len < XLOG_BLCKSZ);
6059
6060 /* Copy the valid part of the last block, and zero the rest */
6061 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6062 memcpy(page, endOfRecoveryInfo->lastPage, len);
6063 memset(page + len, 0, XLOG_BLCKSZ - len);
6064
6065 pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6066 XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6067 }
6068 else
6069 {
6070 /*
6071 * There is no partial block to copy. Just set InitializedUpTo, and
6072 * let the first attempt to insert a log record to initialize the next
6073 * buffer.
6074 */
6075 XLogCtl->InitializedUpTo = EndOfLog;
6076 }
6077
6078 /*
6079 * Update local and shared status. This is OK to do without any locks
6080 * because no other process can be reading or writing WAL yet.
6081 */
6082 LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6086 XLogCtl->LogwrtRqst.Write = EndOfLog;
6087 XLogCtl->LogwrtRqst.Flush = EndOfLog;
6088
6089 /*
6090 * Preallocate additional log files, if wanted.
6091 */
6092 PreallocXlogFiles(EndOfLog, newTLI);
6093
6094 /*
6095 * Okay, we're officially UP.
6096 */
6097 InRecovery = false;
6098
6099 /* start the archive_timeout timer and LSN running */
6100 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6101 XLogCtl->lastSegSwitchLSN = EndOfLog;
6102
6103 /* also initialize latestCompletedXid, to nextXid - 1 */
6104 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6107 LWLockRelease(ProcArrayLock);
6108
6109 /*
6110 * Start up subtrans, if not already done for hot standby. (commit
6111 * timestamps are started below, if necessary.)
6112 */
6114 StartupSUBTRANS(oldestActiveXID);
6115
6116 /*
6117 * Perform end of recovery actions for any SLRUs that need it.
6118 */
6119 TrimCLOG();
6120 TrimMultiXact();
6121
6122 /*
6123 * Reload shared-memory state for prepared transactions. This needs to
6124 * happen before renaming the last partial segment of the old timeline as
6125 * it may be possible that we have to recover some transactions from it.
6126 */
6128
6129 /* Shut down xlogreader */
6131
6132 /* Enable WAL writes for this backend only. */
6134
6135 /* If necessary, write overwrite-contrecord before doing anything else */
6137 {
6140 }
6141
6142 /*
6143 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6144 * record before resource manager writes cleanup WAL records or checkpoint
6145 * record is written.
6146 */
6147 Insert->fullPageWrites = lastFullPageWrites;
6149
6150 /*
6151 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6152 */
6153 if (performedWalRecovery)
6154 promoted = PerformRecoveryXLogAction();
6155
6156 /*
6157 * If any of the critical GUCs have changed, log them before we allow
6158 * backends to write WAL.
6159 */
6161
6162 /* If this is archive recovery, perform post-recovery cleanup actions. */
6164 CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6165
6166 /*
6167 * Local WAL inserts enabled, so it's time to finish initialization of
6168 * commit timestamp.
6169 */
6171
6172 /*
6173 * All done with end-of-recovery actions.
6174 *
6175 * Now allow backends to write WAL and update the control file status in
6176 * consequence. SharedRecoveryState, that controls if backends can write
6177 * WAL, is updated while holding ControlFileLock to prevent other backends
6178 * to look at an inconsistent state of the control file in shared memory.
6179 * There is still a small window during which backends can write WAL and
6180 * the control file is still referring to a system not in DB_IN_PRODUCTION
6181 * state while looking at the on-disk control file.
6182 *
6183 * Also, we use info_lck to update SharedRecoveryState to ensure that
6184 * there are no race conditions concerning visibility of other recent
6185 * updates to shared memory.
6186 */
6187 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6189
6193
6195 LWLockRelease(ControlFileLock);
6196
6197 /*
6198 * Shutdown the recovery environment. This must occur after
6199 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6200 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6201 * any session building a snapshot will not rely on KnownAssignedXids as
6202 * RecoveryInProgress() would return false at this stage. This is
6203 * particularly critical for prepared 2PC transactions, that would still
6204 * need to be included in snapshots once recovery has ended.
6205 */
6208
6209 /*
6210 * If there were cascading standby servers connected to us, nudge any wal
6211 * sender processes to notice that we've been promoted.
6212 */
6213 WalSndWakeup(true, true);
6214
6215 /*
6216 * If this was a promotion, request an (online) checkpoint now. This isn't
6217 * required for consistency, but the last restartpoint might be far back,
6218 * and in case of a crash, recovering from it might take a longer than is
6219 * appropriate now that we're not in standby mode anymore.
6220 */
6221 if (promoted)
6223}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:494
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:609
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
void SyncDataDirectory(void)
Definition: fd.c:3567
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:466
void TrimMultiXact(void)
Definition: multixact.c:2178
void StartupMultiXact(void)
Definition: multixact.c:2153
void StartupReplicationOrigin(void)
Definition: origin.c:703
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(XLogRecPtr redo)
Definition: pgstat.c:505
void pgstat_discard_stats(void)
Definition: pgstat.c:517
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6823
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:2089
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1515
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:471
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2073
void restoreTwoPhaseData(void)
Definition: twophase.c:1888
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1952
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2032
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3637
void UpdateFullPageWrites(void)
Definition: xlog.c:8187
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5220
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4120
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7461
static void XLogReportParameters(void)
Definition: xlog.c:8124
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6305
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5310
static bool lastFullPageWrites
Definition: xlog.c:217
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5235
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5406
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3853
#define TABLESPACE_MAP_OLD
Definition: xlog.h:306
#define TABLESPACE_MAP
Definition: xlog.h:305
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:301
#define BACKUP_LABEL_OLD
Definition: xlog.h:303
#define BACKUP_LABEL_FILE
Definition: xlog.h:302
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:300
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:139
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:374
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:373
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:513
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:123
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow)
static

Definition at line 5220 of file xlog.c.

5221{
5222 char *buf = palloc(128);
5223
5224 pg_strftime(buf, 128,
5225 "%Y-%m-%d %H:%M:%S %Z",
5226 pg_localtime(&tnow, log_timezone));
5227
5228 return buf;
5229}
static char * buf
Definition: pg_test_fsync.c:72
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, log_timezone, palloc(), pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6230 of file xlog.c.

6231{
6232 /* initialize minRecoveryPoint to this record */
6233 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6235 if (ControlFile->minRecoveryPoint < EndRecPtr)
6236 {
6237 ControlFile->minRecoveryPoint = EndRecPtr;
6238 ControlFile->minRecoveryPointTLI = replayTLI;
6239 }
6240 /* update local copy */
6243
6244 /*
6245 * The startup process can update its local copy of minRecoveryPoint from
6246 * this point.
6247 */
6249
6251
6252 /*
6253 * We update SharedRecoveryState while holding the lock on ControlFileLock
6254 * so both states are consistent in shared memory.
6255 */
6259
6260 LWLockRelease(ControlFileLock);
6261}
static bool updateMinRecoveryPoint
Definition: xlog.c:648

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6845 of file xlog.c.

6846{
6847 /*
6848 * The status is reported only for end-of-recovery and shutdown
6849 * checkpoints or shutdown restartpoints. Updating the ps display is
6850 * useful in those situations as it may not be possible to rely on
6851 * pg_stat_activity to see the status of the checkpointer or the startup
6852 * process.
6853 */
6855 return;
6856
6857 if (reset)
6858 set_ps_display("");
6859 else
6860 {
6861 char activitymsg[128];
6862
6863 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6864 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6865 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6866 restartpoint ? "restartpoint" : "checkpoint");
6867 set_ps_display(activitymsg);
6868 }
6869}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6807 of file xlog.c.

6808{
6809 /*
6810 * To estimate the number of segments consumed between checkpoints, keep a
6811 * moving average of the amount of WAL generated in previous checkpoint
6812 * cycles. However, if the load is bursty, with quiet periods and busy
6813 * periods, we want to cater for the peak load. So instead of a plain
6814 * moving average, let the average decline slowly if the previous cycle
6815 * used less WAL than estimated, but bump it up immediately if it used
6816 * more.
6817 *
6818 * When checkpoints are triggered by max_wal_size, this should converge to
6819 * CheckpointSegments * wal_segment_size,
6820 *
6821 * Note: This doesn't pay any attention to what caused the checkpoint.
6822 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6823 * starting a base backup, are counted the same as those created
6824 * automatically. The slow-decline will largely mask them out, if they are
6825 * not frequent. If they are frequent, it seems reasonable to count them
6826 * in as any others; if you issue a manual checkpoint every 5 minutes and
6827 * never let a timed checkpoint happen, it makes sense to base the
6828 * preallocation on that 5 minute interval rather than whatever
6829 * checkpoint_timeout is set to.
6830 */
6831 PrevCheckPointDistance = nbytes;
6832 if (CheckPointDistanceEstimate < nbytes)
6834 else
6836 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6837}

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4580 of file xlog.c.

4581{
4583}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8187 of file xlog.c.

8188{
8190 bool recoveryInProgress;
8191
8192 /*
8193 * Do nothing if full_page_writes has not been changed.
8194 *
8195 * It's safe to check the shared full_page_writes without the lock,
8196 * because we assume that there is no concurrently running process which
8197 * can update it.
8198 */
8199 if (fullPageWrites == Insert->fullPageWrites)
8200 return;
8201
8202 /*
8203 * Perform this outside critical section so that the WAL insert
8204 * initialization done by RecoveryInProgress() doesn't trigger an
8205 * assertion failure.
8206 */
8207 recoveryInProgress = RecoveryInProgress();
8208
8210
8211 /*
8212 * It's always safe to take full page images, even when not strictly
8213 * required, but not the other round. So if we're setting full_page_writes
8214 * to true, first set it true and then write the WAL record. If we're
8215 * setting it to false, first write the WAL record and then set the global
8216 * flag.
8217 */
8218 if (fullPageWrites)
8219 {
8221 Insert->fullPageWrites = true;
8223 }
8224
8225 /*
8226 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8227 * full_page_writes during archive recovery, if required.
8228 */
8229 if (XLogStandbyInfoActive() && !recoveryInProgress)
8230 {
8232 XLogRegisterData(&fullPageWrites, sizeof(bool));
8233
8234 XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8235 }
8236
8237 if (!fullPageWrites)
8238 {
8240 Insert->fullPageWrites = false;
8242 }
8244}
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3833 of file xlog.c.

3834{
3835 uint32 tli;
3836 XLogSegNo segno;
3837
3839
3841 if (segno > XLogCtl->lastRemovedSegNo)
3842 XLogCtl->lastRemovedSegNo = segno;
3844}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2725 of file xlog.c.

2726{
2727 /* Quick check using our local copy of the variable */
2728 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2729 return;
2730
2731 /*
2732 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2733 * i.e., we're doing crash recovery. We never modify the control file's
2734 * value in that case, so we can short-circuit future checks here too. The
2735 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2736 * updated until crash recovery finishes. We only do this for the startup
2737 * process as it should not update its own reference of minRecoveryPoint
2738 * until it has finished crash recovery to make sure that all WAL
2739 * available is replayed in this case. This also saves from extra locks
2740 * taken on the control file from the startup process.
2741 */
2743 {
2744 updateMinRecoveryPoint = false;
2745 return;
2746 }
2747
2748 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2749
2750 /* update local copy */
2753
2755 updateMinRecoveryPoint = false;
2756 else if (force || LocalMinRecoveryPoint < lsn)
2757 {
2758 XLogRecPtr newMinRecoveryPoint;
2759 TimeLineID newMinRecoveryPointTLI;
2760
2761 /*
2762 * To avoid having to update the control file too often, we update it
2763 * all the way to the last record being replayed, even though 'lsn'
2764 * would suffice for correctness. This also allows the 'force' case
2765 * to not need a valid 'lsn' value.
2766 *
2767 * Another important reason for doing it this way is that the passed
2768 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2769 * the caller got it from a corrupted heap page. Accepting such a
2770 * value as the min recovery point would prevent us from coming up at
2771 * all. Instead, we just log a warning and continue with recovery.
2772 * (See also the comments about corrupt LSNs in XLogFlush.)
2773 */
2774 newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2775 if (!force && newMinRecoveryPoint < lsn)
2776 elog(WARNING,
2777 "xlog min recovery request %X/%X is past current point %X/%X",
2778 LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2779
2780 /* update control file */
2781 if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2782 {
2783 ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2784 ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2786 LocalMinRecoveryPoint = newMinRecoveryPoint;
2787 LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2788
2790 (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
2791 LSN_FORMAT_ARGS(newMinRecoveryPoint),
2792 newMinRecoveryPointTLI)));
2793 }
2794 }
2795 LWLockRelease(ControlFileLock);
2796}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4120 of file xlog.c.

4121{
4122 char path[MAXPGPATH];
4123 struct stat stat_buf;
4124
4125 /* Check for pg_wal; if it doesn't exist, error out */
4126 if (stat(XLOGDIR, &stat_buf) != 0 ||
4127 !S_ISDIR(stat_buf.st_mode))
4128 ereport(FATAL,
4130 errmsg("required WAL directory \"%s\" does not exist",
4131 XLOGDIR)));
4132
4133 /* Check for archive_status */
4134 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4135 if (stat(path, &stat_buf) == 0)
4136 {
4137 /* Check for weird cases where it exists but isn't a directory */
4138 if (!S_ISDIR(stat_buf.st_mode))
4139 ereport(FATAL,
4141 errmsg("required WAL directory \"%s\" does not exist",
4142 path)));
4143 }
4144 else
4145 {
4146 ereport(LOG,
4147 (errmsg("creating missing WAL directory \"%s\"", path)));
4148 if (MakePGDirectory(path) < 0)
4149 ereport(FATAL,
4151 errmsg("could not create missing directory \"%s\": %m",
4152 path)));
4153 }
4154
4155 /* Check for summaries */
4156 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4157 if (stat(path, &stat_buf) == 0)
4158 {
4159 /* Check for weird cases where it exists but isn't a directory */
4160 if (!S_ISDIR(stat_buf.st_mode))
4161 ereport(FATAL,
4162 (errmsg("required WAL directory \"%s\" does not exist",
4163 path)));
4164 }
4165 else
4166 {
4167 ereport(LOG,
4168 (errmsg("creating missing WAL directory \"%s\"", path)));
4169 if (MakePGDirectory(path) < 0)
4170 ereport(FATAL,
4171 (errmsg("could not create missing directory \"%s\": %m",
4172 path)));
4173 }
4174}
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3936
#define S_ISDIR(m)
Definition: win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1504 of file xlog.c.

1505{
1506 uint64 bytepos;
1507 XLogRecPtr inserted;
1508 XLogRecPtr reservedUpto;
1509 XLogRecPtr finishedUpto;
1511 int i;
1512
1513 if (MyProc == NULL)
1514 elog(PANIC, "cannot wait without a PGPROC structure");
1515
1516 /*
1517 * Check if there's any work to do. Use a barrier to ensure we get the
1518 * freshest value.
1519 */
1521 if (upto <= inserted)
1522 return inserted;
1523
1524 /* Read the current insert position */
1525 SpinLockAcquire(&Insert->insertpos_lck);
1526 bytepos = Insert->CurrBytePos;
1527 SpinLockRelease(&Insert->insertpos_lck);
1528 reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1529
1530 /*
1531 * No-one should request to flush a piece of WAL that hasn't even been
1532 * reserved yet. However, it can happen if there is a block with a bogus
1533 * LSN on disk, for example. XLogFlush checks for that situation and
1534 * complains, but only after the flush. Here we just assume that to mean
1535 * that all WAL that has been reserved needs to be finished. In this
1536 * corner-case, the return value can be smaller than 'upto' argument.
1537 */
1538 if (upto > reservedUpto)
1539 {
1540 ereport(LOG,
1541 (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1542 LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
1543 upto = reservedUpto;
1544 }
1545
1546 /*
1547 * Loop through all the locks, sleeping on any in-progress insert older
1548 * than 'upto'.
1549 *
1550 * finishedUpto is our return value, indicating the point upto which all
1551 * the WAL insertions have been finished. Initialize it to the head of
1552 * reserved WAL, and as we iterate through the insertion locks, back it
1553 * out for any insertion that's still in progress.
1554 */
1555 finishedUpto = reservedUpto;
1556 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1557 {
1558 XLogRecPtr insertingat = InvalidXLogRecPtr;
1559
1560 do
1561 {
1562 /*
1563 * See if this insertion is in progress. LWLockWaitForVar will
1564 * wait for the lock to be released, or for the 'value' to be set
1565 * by a LWLockUpdateVar call. When a lock is initially acquired,
1566 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1567 * know where it's inserting yet. We will have to wait for it. If
1568 * it's a small insertion, the record will most likely fit on the
1569 * same page and the inserter will release the lock without ever
1570 * calling LWLockUpdateVar. But if it has to sleep, it will
1571 * advertise the insertion point with LWLockUpdateVar before
1572 * sleeping.
1573 *
1574 * In this loop we are only waiting for insertions that started
1575 * before WaitXLogInsertionsToFinish was called. The lack of
1576 * memory barriers in the loop means that we might see locks as
1577 * "unused" that have since become used. This is fine because
1578 * they only can be used for later insertions that we would not
1579 * want to wait on anyway. Not taking a lock to acquire the
1580 * current insertingAt value means that we might see older
1581 * insertingAt values. This is also fine, because if we read a
1582 * value too old, we will add ourselves to the wait queue, which
1583 * contains atomic operations.
1584 */
1585 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1587 insertingat, &insertingat))
1588 {
1589 /* the lock was free, so no insertion in progress */
1590 insertingat = InvalidXLogRecPtr;
1591 break;
1592 }
1593
1594 /*
1595 * This insertion is still in progress. Have to wait, unless the
1596 * inserter has proceeded past 'upto'.
1597 */
1598 } while (insertingat < upto);
1599
1600 if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1601 finishedUpto = insertingat;
1602 }
1603
1604 /*
1605 * Advance the limit we know to have been inserted and return the freshest
1606 * value we know of, which might be beyond what we requested if somebody
1607 * is concurrently doing this with an 'upto' pointer ahead of us.
1608 */
1610 finishedUpto);
1611
1612 return finishedUpto;
1613}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition: atomics.h:585
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1584
PGPROC * MyProc
Definition: proc.c:66
pg_atomic_uint64 insertingAt
Definition: xlog.c:370

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1371 of file xlog.c.

1372{
1373 bool immed;
1374
1375 /*
1376 * It doesn't matter which of the WAL insertion locks we acquire, so try
1377 * the one we used last time. If the system isn't particularly busy, it's
1378 * a good bet that it's still available, and it's good to have some
1379 * affinity to a particular lock so that you don't unnecessarily bounce
1380 * cache lines between processes when there's no contention.
1381 *
1382 * If this is the first time through in this backend, pick a lock
1383 * (semi-)randomly. This allows the locks to be used evenly if you have a
1384 * lot of very short connections.
1385 */
1386 static int lockToTry = -1;
1387
1388 if (lockToTry == -1)
1389 lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1390 MyLockNo = lockToTry;
1391
1392 /*
1393 * The insertingAt value is initially set to 0, as we don't know our
1394 * insert location yet.
1395 */
1397 if (!immed)
1398 {
1399 /*
1400 * If we couldn't get the lock immediately, try another lock next
1401 * time. On a system with more insertion locks than concurrent
1402 * inserters, this causes all the inserters to eventually migrate to a
1403 * lock that no-one else is using. On a system with more inserters
1404 * than locks, it still helps to distribute the inserters evenly
1405 * across the locks.
1406 */
1407 lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1408 }
1409}
ProcNumber MyProcNumber
Definition: globals.c:89
static int MyLockNo
Definition: xlog.c:651

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1416 of file xlog.c.

1417{
1418 int i;
1419
1420 /*
1421 * When holding all the locks, all but the last lock's insertingAt
1422 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1423 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1424 */
1425 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1426 {
1431 }
1432 /* Variable value reset to 0 at release */
1434
1435 holdingAllLocks = true;
1436}
#define PG_UINT64_MAX
Definition: c.h:550
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1720
static bool holdingAllLocks
Definition: xlog.c:652

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1445 of file xlog.c.

1446{
1447 if (holdingAllLocks)
1448 {
1449 int i;
1450
1451 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1454 0);
1455
1456 holdingAllLocks = false;
1457 }
1458 else
1459 {
1462 0);
1463 }
1464}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1854

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1471 of file xlog.c.

1472{
1473 if (holdingAllLocks)
1474 {
1475 /*
1476 * We use the last lock to mark our actual position, see comments in
1477 * WALInsertLockAcquireExclusive.
1478 */
1481 insertingAt);
1482 }
1483 else
1486 insertingAt);
1487}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1748 of file xlog.c.

1750{
1751 char *pdst = dstbuf;
1752 XLogRecPtr recptr = startptr;
1753 XLogRecPtr inserted;
1754 Size nbytes = count;
1755
1757 return 0;
1758
1759 Assert(!XLogRecPtrIsInvalid(startptr));
1760
1761 /*
1762 * Caller should ensure that the requested data has been inserted into WAL
1763 * buffers before we try to read it.
1764 */
1766 if (startptr + count > inserted)
1767 ereport(ERROR,
1768 errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1769 LSN_FORMAT_ARGS(startptr + count),
1770 LSN_FORMAT_ARGS(inserted)));
1771
1772 /*
1773 * Loop through the buffers without a lock. For each buffer, atomically
1774 * read and verify the end pointer, then copy the data out, and finally
1775 * re-read and re-verify the end pointer.
1776 *
1777 * Once a page is evicted, it never returns to the WAL buffers, so if the
1778 * end pointer matches the expected end pointer before and after we copy
1779 * the data, then the right page must have been present during the data
1780 * copy. Read barriers are necessary to ensure that the data copy actually
1781 * happens between the two verification steps.
1782 *
1783 * If either verification fails, we simply terminate the loop and return
1784 * with the data that had been already copied out successfully.
1785 */
1786 while (nbytes > 0)
1787 {
1788 uint32 offset = recptr % XLOG_BLCKSZ;
1789 int idx = XLogRecPtrToBufIdx(recptr);
1790 XLogRecPtr expectedEndPtr;
1791 XLogRecPtr endptr;
1792 const char *page;
1793 const char *psrc;
1794 Size npagebytes;
1795
1796 /*
1797 * Calculate the end pointer we expect in the xlblocks array if the
1798 * correct page is present.
1799 */
1800 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1801
1802 /*
1803 * First verification step: check that the correct page is present in
1804 * the WAL buffers.
1805 */
1807 if (expectedEndPtr != endptr)
1808 break;
1809
1810 /*
1811 * The correct page is present (or was at the time the endptr was
1812 * read; must re-verify later). Calculate pointer to source data and
1813 * determine how much data to read from this page.
1814 */
1815 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1816 psrc = page + offset;
1817 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1818
1819 /*
1820 * Ensure that the data copy and the first verification step are not
1821 * reordered.
1822 */
1824
1825 /* data copy */
1826 memcpy(pdst, psrc, npagebytes);
1827
1828 /*
1829 * Ensure that the data copy and the second verification step are not
1830 * reordered.
1831 */
1833
1834 /*
1835 * Second verification step: check that the page we read from wasn't
1836 * evicted while we were copying the data.
1837 */
1839 if (expectedEndPtr != endptr)
1840 break;
1841
1842 pdst += npagebytes;
1843 recptr += npagebytes;
1844 nbytes -= npagebytes;
1845 }
1846
1847 Assert(pdst - dstbuf <= count);
1848
1849 return pdst - dstbuf;
1850}
#define pg_read_barrier()
Definition: atomics.h:156
#define Min(x, y)
Definition: c.h:961
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6541

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4260 of file xlog.c.

4261{
4262 int fd;
4263 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4264
4265 /*
4266 * Initialize version and compatibility-check fields
4267 */
4270
4271 ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4273
4274 ControlFile->blcksz = BLCKSZ;
4275 ControlFile->relseg_size = RELSEG_SIZE;
4276 ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4278
4281
4284
4286
4287 /* Contents are protected with a CRC */
4290 (char *) ControlFile,
4291 offsetof(ControlFileData, crc));
4293
4294 /*
4295 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4296 * the excess over sizeof(ControlFileData). This reduces the odds of
4297 * premature-EOF errors when reading pg_control. We'll still fail when we
4298 * check the contents of the file, but hopefully with a more specific
4299 * error than "couldn't read pg_control".
4300 */
4301 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4302 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4303
4305 O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4306 if (fd < 0)
4307 ereport(PANIC,
4309 errmsg("could not create file \"%s\": %m",
4311
4312 errno = 0;
4313 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4315 {
4316 /* if write didn't set errno, assume problem is no disk space */
4317 if (errno == 0)
4318 errno = ENOSPC;
4319 ereport(PANIC,
4321 errmsg("could not write to file \"%s\": %m",
4323 }
4325
4326 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4327 if (pg_fsync(fd) != 0)
4328 ereport(PANIC,
4330 errmsg("could not fsync file \"%s\": %m",
4333
4334 if (close(fd) != 0)
4335 ereport(PANIC,
4337 errmsg("could not close file \"%s\": %m",
4339}
#define FLOAT8PASSBYVAL
Definition: c.h:592
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, FLOAT8PASSBYVAL, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8256 of file xlog.c.

8257{
8258 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8259 XLogRecPtr lsn = record->EndRecPtr;
8260
8261 /*
8262 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8263 * XLOG_FPI_FOR_HINT records.
8264 */
8265 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8266 !XLogRecHasAnyBlockRefs(record));
8267
8268 if (info == XLOG_NEXTOID)
8269 {
8270 Oid nextOid;
8271
8272 /*
8273 * We used to try to take the maximum of TransamVariables->nextOid and
8274 * the recorded nextOid, but that fails if the OID counter wraps
8275 * around. Since no OID allocation should be happening during replay
8276 * anyway, better to just believe the record exactly. We still take
8277 * OidGenLock while setting the variable, just in case.
8278 */
8279 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8280 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8281 TransamVariables->nextOid = nextOid;
8283 LWLockRelease(OidGenLock);
8284 }
8285 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8286 {
8287 CheckPoint checkPoint;
8288 TimeLineID replayTLI;
8289
8290 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8291 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8292 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8293 TransamVariables->nextXid = checkPoint.nextXid;
8294 LWLockRelease(XidGenLock);
8295 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8296 TransamVariables->nextOid = checkPoint.nextOid;
8298 LWLockRelease(OidGenLock);
8300 checkPoint.nextMultiOffset);
8301
8303 checkPoint.oldestMultiDB);
8304
8305 /*
8306 * No need to set oldestClogXid here as well; it'll be set when we
8307 * redo an xl_clog_truncate if it changed since initialization.
8308 */
8309 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8310
8311 /*
8312 * If we see a shutdown checkpoint while waiting for an end-of-backup
8313 * record, the backup was canceled and the end-of-backup record will
8314 * never arrive.
8315 */
8319 ereport(PANIC,
8320 (errmsg("online backup was canceled, recovery cannot continue")));
8321
8322 /*
8323 * If we see a shutdown checkpoint, we know that nothing was running
8324 * on the primary at this point. So fake-up an empty running-xacts
8325 * record and use that here and now. Recover additional standby state
8326 * for prepared transactions.
8327 */
8329 {
8330 TransactionId *xids;
8331 int nxids;
8332 TransactionId oldestActiveXID;
8333 TransactionId latestCompletedXid;
8335
8336 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8337
8338 /* Update pg_subtrans entries for any prepared transactions */
8340
8341 /*
8342 * Construct a RunningTransactions snapshot representing a shut
8343 * down server, with only prepared transactions still alive. We're
8344 * never overflowed at this point because all subxids are listed
8345 * with their parent prepared transactions.
8346 */
8347 running.xcnt = nxids;
8348 running.subxcnt = 0;
8350 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8351 running.oldestRunningXid = oldestActiveXID;
8352 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8353 TransactionIdRetreat(latestCompletedXid);
8354 Assert(TransactionIdIsNormal(latestCompletedXid));
8355 running.latestCompletedXid = latestCompletedXid;
8356 running.xids = xids;
8357
8359 }
8360
8361 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8362 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8364 LWLockRelease(ControlFileLock);
8365
8366 /* Update shared-memory copy of checkpoint XID/epoch */
8368 XLogCtl->ckptFullXid = checkPoint.nextXid;
8370
8371 /*
8372 * We should've already switched to the new TLI before replaying this
8373 * record.
8374 */
8375 (void) GetCurrentReplayRecPtr(&replayTLI);
8376 if (checkPoint.ThisTimeLineID != replayTLI)
8377 ereport(PANIC,
8378 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8379 checkPoint.ThisTimeLineID, replayTLI)));
8380
8381 RecoveryRestartPoint(&checkPoint, record);
8382 }
8383 else if (info == XLOG_CHECKPOINT_ONLINE)
8384 {
8385 CheckPoint checkPoint;
8386 TimeLineID replayTLI;
8387
8388 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8389 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8390 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8392 checkPoint.nextXid))
8393 TransamVariables->nextXid = checkPoint.nextXid;
8394 LWLockRelease(XidGenLock);
8395
8396 /*
8397 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8398 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8399 * counter is from the start of the checkpoint and might well be stale
8400 * compared to later XLOG_NEXTOID records. We could try to take the
8401 * maximum of the nextOid counter and our latest value, but since
8402 * there's no particular guarantee about the speed with which the OID
8403 * counter wraps around, that's a risky thing to do. In any case,
8404 * users of the nextOid counter are required to avoid assignment of
8405 * duplicates, so that a somewhat out-of-date value should be safe.
8406 */
8407
8408 /* Handle multixact */
8410 checkPoint.nextMultiOffset);
8411
8412 /*
8413 * NB: This may perform multixact truncation when replaying WAL
8414 * generated by an older primary.
8415 */
8417 checkPoint.oldestMultiDB);
8419 checkPoint.oldestXid))
8421 checkPoint.oldestXidDB);
8422 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8423 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8425 LWLockRelease(ControlFileLock);
8426
8427 /* Update shared-memory copy of checkpoint XID/epoch */
8429 XLogCtl->ckptFullXid = checkPoint.nextXid;
8431
8432 /* TLI should not change in an on-line checkpoint */
8433 (void) GetCurrentReplayRecPtr(&replayTLI);
8434 if (checkPoint.ThisTimeLineID != replayTLI)
8435 ereport(PANIC,
8436 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8437 checkPoint.ThisTimeLineID, replayTLI)));
8438
8439 RecoveryRestartPoint(&checkPoint, record);
8440 }
8441 else if (info == XLOG_OVERWRITE_CONTRECORD)
8442 {
8443 /* nothing to do here, handled in xlogrecovery_redo() */
8444 }
8445 else if (info == XLOG_END_OF_RECOVERY)
8446 {
8447 xl_end_of_recovery xlrec;
8448 TimeLineID replayTLI;
8449
8450 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8451
8452 /*
8453 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8454 * but this case is rarer and harder to test, so the benefit doesn't
8455 * outweigh the potential extra cost of maintenance.
8456 */
8457
8458 /*
8459 * We should've already switched to the new TLI before replaying this
8460 * record.
8461 */
8462 (void) GetCurrentReplayRecPtr(&replayTLI);
8463 if (xlrec.ThisTimeLineID != replayTLI)
8464 ereport(PANIC,
8465 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8466 xlrec.ThisTimeLineID, replayTLI)));
8467 }
8468 else if (info == XLOG_NOOP)
8469 {
8470 /* nothing to do here */
8471 }
8472 else if (info == XLOG_SWITCH)
8473 {
8474 /* nothing to do here */
8475 }
8476 else if (info == XLOG_RESTORE_POINT)
8477 {
8478 /* nothing to do here, handled in xlogrecovery.c */
8479 }
8480 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8481 {
8482 /*
8483 * XLOG_FPI records contain nothing else but one or more block
8484 * references. Every block reference must include a full-page image
8485 * even if full_page_writes was disabled when the record was generated
8486 * - otherwise there would be no point in this record.
8487 *
8488 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8489 * WAL-logged because of a hint bit update. They are only generated
8490 * when checksums and/or wal_log_hints are enabled. They may include
8491 * no full-page images if full_page_writes was disabled when they were
8492 * generated. In this case there is nothing to do here.
8493 *
8494 * No recovery conflicts are generated by these generic records - if a
8495 * resource manager needs to generate conflicts, it has to define a
8496 * separate WAL record type and redo routine.
8497 */
8498 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8499 {
8500 Buffer buffer;
8501
8502 if (!XLogRecHasBlockImage(record, block_id))
8503 {
8504 if (info == XLOG_FPI)
8505 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8506 continue;
8507 }
8508
8509 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8510 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8511 UnlockReleaseBuffer(buffer);
8512 }
8513 }
8514 else if (info == XLOG_BACKUP_END)
8515 {
8516 /* nothing to do here, handled in xlogrecovery_redo() */
8517 }
8518 else if (info == XLOG_PARAMETER_CHANGE)
8519 {
8520 xl_parameter_change xlrec;
8521
8522 /* Update our copy of the parameters in pg_control */
8523 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8524
8525 /*
8526 * Invalidate logical slots if we are in hot standby and the primary
8527 * does not have a WAL level sufficient for logical decoding. No need
8528 * to search for potentially conflicting logically slots if standby is
8529 * running with wal_level lower than logical, because in that case, we
8530 * would have either disallowed creation of logical slots or
8531 * invalidated existing ones.
8532 */
8533 if (InRecovery && InHotStandby &&
8534 xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8537 0, InvalidOid,
8539
8540 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8548
8549 /*
8550 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8551 * recover back up to this point before allowing hot standby again.
8552 * This is important if the max_* settings are decreased, to ensure
8553 * you don't run queries against the WAL preceding the change. The
8554 * local copies cannot be updated as long as crash recovery is
8555 * happening and we expect all the WAL to be replayed.
8556 */
8558 {
8561 }
8563 {
8564 TimeLineID replayTLI;
8565
8566 (void) GetCurrentReplayRecPtr(&replayTLI);
8568 ControlFile->minRecoveryPointTLI = replayTLI;
8569 }
8570
8574
8576 LWLockRelease(ControlFileLock);
8577
8578 /* Check to see if any parameter change gives a problem on recovery */
8580 }
8581 else if (info == XLOG_FPW_CHANGE)
8582 {
8583 bool fpw;
8584
8585 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8586
8587 /*
8588 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8589 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8590 * full_page_writes has been disabled during online backup.
8591 */
8592 if (!fpw)
8593 {
8598 }
8599
8600 /* Keep track of full_page_writes */
8601 lastFullPageWrites = fpw;
8602 }
8603 else if (info == XLOG_CHECKPOINT_REDO)
8604 {
8605 /* nothing to do here, just for informational purposes */
8606 }
8607}
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4881
uint8_t uint8
Definition: c.h:486
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2536
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2511
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:59
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7571
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsInvalid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2993 of file xlog.c.

2994{
2995 XLogwrtRqst WriteRqst;
2996 bool flexible = true;
2997 static TimestampTz lastflush;
2999 int flushblocks;
3000 TimeLineID insertTLI;
3001
3002 /* XLOG doesn't need flushing during recovery */
3003 if (RecoveryInProgress())
3004 return false;
3005
3006 /*
3007 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3008 * so we can read it without a lock.
3009 */
3010 insertTLI = XLogCtl->InsertTimeLineID;
3011
3012 /* read updated LogwrtRqst */
3014 WriteRqst = XLogCtl->LogwrtRqst;
3016
3017 /* back off to last completed page boundary */
3018 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3019
3020 /* if we have already flushed that far, consider async commit records */
3022 if (WriteRqst.Write <= LogwrtResult.Flush)
3023 {
3025 WriteRqst.Write = XLogCtl->asyncXactLSN;
3027 flexible = false; /* ensure it all gets written */
3028 }
3029
3030 /*
3031 * If already known flushed, we're done. Just need to check if we are
3032 * holding an open file handle to a logfile that's no longer in use,
3033 * preventing the file from being deleted.
3034 */
3035 if (WriteRqst.Write <= LogwrtResult.Flush)
3036 {
3037 if (openLogFile >= 0)
3038 {
3041 {
3042 XLogFileClose();
3043 }
3044 }
3045 return false;
3046 }
3047
3048 /*
3049 * Determine how far to flush WAL, based on the wal_writer_delay and
3050 * wal_writer_flush_after GUCs.
3051 *
3052 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3053 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3054 * logic is the same in both places if you change this.
3055 */
3057 flushblocks =
3058 WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3059
3060 if (WalWriterFlushAfter == 0 || lastflush == 0)
3061 {
3062 /* first call, or block based limits disabled */
3063 WriteRqst.Flush = WriteRqst.Write;
3064 lastflush = now;
3065 }
3066 else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3067 {
3068 /*
3069 * Flush the writes at least every WalWriterDelay ms. This is
3070 * important to bound the amount of time it takes for an asynchronous
3071 * commit to hit disk.
3072 */
3073 WriteRqst.Flush = WriteRqst.Write;
3074 lastflush = now;
3075 }
3076 else if (flushblocks >= WalWriterFlushAfter)
3077 {
3078 /* exceeded wal_writer_flush_after blocks, flush */
3079 WriteRqst.Flush = WriteRqst.Write;
3080 lastflush = now;
3081 }
3082 else
3083 {
3084 /* no flushing, this time round */
3085 WriteRqst.Flush = 0;
3086 }
3087
3088#ifdef WAL_DEBUG
3089 if (XLOG_DEBUG)
3090 elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3091 LSN_FORMAT_ARGS(WriteRqst.Write),
3092 LSN_FORMAT_ARGS(WriteRqst.Flush),
3095#endif
3096
3098
3099 /* now wait for any in-progress insertions to finish and get write lock */
3101 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3103 if (WriteRqst.Write > LogwrtResult.Write ||
3104 WriteRqst.Flush > LogwrtResult.Flush)
3105 {
3106 XLogWrite(WriteRqst, insertTLI, flexible);
3107 }
3108 LWLockRelease(WALWriteLock);
3109
3111
3112 /* wake up walsenders now that we've released heavily contended locks */
3114
3115 /*
3116 * Great, done. To take some work off the critical path, try to initialize
3117 * as many of the no-longer-needed WAL buffers for future use as we can.
3118 */
3119 AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3120
3121 /*
3122 * If we determined that we need to write data, but somebody else
3123 * wrote/flushed already, it should be considered as being active, to
3124 * avoid hibernating too early.
3125 */
3126 return true;
3127}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
XLogRecPtr asyncXactLSN
Definition: xlog.c:458
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:70
int WalWriterDelay
Definition: walwriter.c:69
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1898 of file xlog.c.

1899{
1900 uint64 fullsegs;
1901 uint64 fullpages;
1902 uint64 bytesleft;
1903 uint32 seg_offset;
1904 XLogRecPtr result;
1905
1906 fullsegs = bytepos / UsableBytesInSegment;
1907 bytesleft = bytepos % UsableBytesInSegment;
1908
1909 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1910 {
1911 /* fits on first page of segment */
1912 if (bytesleft == 0)
1913 seg_offset = 0;
1914 else
1915 seg_offset = bytesleft + SizeOfXLogLongPHD;
1916 }
1917 else
1918 {
1919 /* account for the first page on segment with long header */
1920 seg_offset = XLOG_BLCKSZ;
1921 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1922
1923 fullpages = bytesleft / UsableBytesInPage;
1924 bytesleft = bytesleft % UsableBytesInPage;
1925
1926 if (bytesleft == 0)
1927 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1928 else
1929 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1930 }
1931
1932 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1933
1934 return result;
1935}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1858 of file xlog.c.

1859{
1860 uint64 fullsegs;
1861 uint64 fullpages;
1862 uint64 bytesleft;
1863 uint32 seg_offset;
1864 XLogRecPtr result;
1865
1866 fullsegs = bytepos / UsableBytesInSegment;
1867 bytesleft = bytepos % UsableBytesInSegment;
1868
1869 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1870 {
1871 /* fits on first page of segment */
1872 seg_offset = bytesleft + SizeOfXLogLongPHD;
1873 }
1874 else
1875 {
1876 /* account for the first page on segment with long header */
1877 seg_offset = XLOG_BLCKSZ;
1878 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1879
1880 fullpages = bytesleft / UsableBytesInPage;
1881 bytesleft = bytesleft % UsableBytesInPage;
1882
1883 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1884 }
1885
1886 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1887
1888 return result;
1889}

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2290 of file xlog.c.

2291{
2292 XLogSegNo old_segno;
2293
2295
2296 if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2297 return true;
2298 return false;
2299}

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4642 of file xlog.c.

4643{
4644 int xbuffers;
4645
4646 xbuffers = NBuffers / 32;
4647 if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4648 xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4649 if (xbuffers < 8)
4650 xbuffers = 8;
4651 return xbuffers;
4652}

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3660 of file xlog.c.

3661{
3662 Assert(openLogFile >= 0);
3663
3664 /*
3665 * WAL segment files will not be re-read in normal operation, so we advise
3666 * the OS to release any cached pages. But do not do so if WAL archiving
3667 * or streaming is active, because archiver and walsender process could
3668 * use the cache to read the WAL segment.
3669 */
3670#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3671 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3672 (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3673#endif
3674
3675 if (close(openLogFile) != 0)
3676 {
3677 char xlogfname[MAXFNAMELEN];
3678 int save_errno = errno;
3679
3681 errno = save_errno;
3682 ereport(PANIC,
3684 errmsg("could not close file \"%s\": %m", xlogfname)));
3685 }
3686
3687 openLogFile = -1;
3689}
void ReleaseExternalFD(void)
Definition: fd.c:1238

References Assert, close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3439 of file xlog.c.

3442{
3443 char path[MAXPGPATH];
3444 char tmppath[MAXPGPATH];
3445 PGAlignedXLogBlock buffer;
3446 int srcfd;
3447 int fd;
3448 int nbytes;
3449
3450 /*
3451 * Open the source file
3452 */
3453 XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3454 srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3455 if (srcfd < 0)
3456 ereport(ERROR,
3458 errmsg("could not open file \"%s\": %m", path)));
3459
3460 /*
3461 * Copy into a temp file name.
3462 */
3463 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3464
3465 unlink(tmppath);
3466
3467 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3468 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3469 if (fd < 0)
3470 ereport(ERROR,
3472 errmsg("could not create file \"%s\": %m", tmppath)));
3473
3474 /*
3475 * Do the data copying.
3476 */
3477 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3478 {
3479 int nread;
3480
3481 nread = upto - nbytes;
3482
3483 /*
3484 * The part that is not read from the source file is filled with
3485 * zeros.
3486 */
3487 if (nread < sizeof(buffer))
3488 memset(buffer.data, 0, sizeof(buffer));
3489
3490 if (nread > 0)
3491 {
3492 int r;
3493
3494 if (nread > sizeof(buffer))
3495 nread = sizeof(buffer);
3496 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3497 r = read(srcfd, buffer.data, nread);
3498 if (r != nread)
3499 {
3500 if (r < 0)
3501 ereport(ERROR,
3503 errmsg("could not read file \"%s\": %m",
3504 path)));
3505 else
3506 ereport(ERROR,
3508 errmsg("could not read file \"%s\": read %d of %zu",
3509 path, r, (Size) nread)));
3510 }
3512 }
3513 errno = 0;
3514 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3515 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3516 {
3517 int save_errno = errno;
3518
3519 /*
3520 * If we fail to make the file, delete it to release disk space
3521 */
3522 unlink(tmppath);
3523 /* if write didn't set errno, assume problem is no disk space */
3524 errno = save_errno ? save_errno : ENOSPC;
3525
3526 ereport(ERROR,
3528 errmsg("could not write to file \"%s\": %m", tmppath)));
3529 }
3531 }
3532
3533 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3534 if (pg_fsync(fd) != 0)
3537 errmsg("could not fsync file \"%s\": %m", tmppath)));
3539
3540 if (CloseTransientFile(fd) != 0)
3541 ereport(ERROR,
3543 errmsg("could not close file \"%s\": %m", tmppath)));
3544
3545 if (CloseTransientFile(srcfd) != 0)
3546 ereport(ERROR,
3548 errmsg("could not close file \"%s\": %m", path)));
3549
3550 /*
3551 * Now move the segment into place with its final name.
3552 */
3553 if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3554 elog(ERROR, "InstallXLogFileSegment should not have failed");
3555}
int CloseTransientFile(int fd)
Definition: fd.c:2831
int data_sync_elevel(int elevel)
Definition: fd.c:3959
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2655
char data[XLOG_BLCKSZ]
Definition: c.h:1105

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3401 of file xlog.c.

3402{
3403 bool ignore_added;
3404 char path[MAXPGPATH];
3405 int fd;
3406
3407 Assert(logtli != 0);
3408
3409 fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3410 if (fd >= 0)
3411 return fd;
3412
3413 /* Now open original target segment (might not be file I just made) */
3414 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3416 if (fd < 0)
3417 ereport(ERROR,
3419 errmsg("could not open file \"%s\": %m", path)));
3420 return fd;
3421}
#define O_CLOEXEC
Definition: win32_port.h:349

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool *  added,
char *  path 
)
static

Definition at line 3213 of file xlog.c.

3215{
3216 char tmppath[MAXPGPATH];
3217 XLogSegNo installed_segno;
3218 XLogSegNo max_segno;
3219 int fd;
3220 int save_errno;
3221 int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3222 instr_time io_start;
3223
3224 Assert(logtli != 0);
3225
3226 XLogFilePath(path, logtli, logsegno, wal_segment_size);
3227
3228 /*
3229 * Try to use existent file (checkpoint maker may have created it already)
3230 */
3231 *added = false;
3232 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3234 if (fd < 0)
3235 {
3236 if (errno != ENOENT)
3237 ereport(ERROR,
3239 errmsg("could not open file \"%s\": %m", path)));
3240 }
3241 else
3242 return fd;
3243
3244 /*
3245 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3246 * another process is doing the same thing. If so, we will end up
3247 * pre-creating an extra log segment. That seems OK, and better than
3248 * holding the lock throughout this lengthy process.
3249 */
3250 elog(DEBUG2, "creating and filling new WAL file");
3251
3252 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3253
3254 unlink(tmppath);
3255
3257 open_flags |= PG_O_DIRECT;
3258
3259 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3260 fd = BasicOpenFile(tmppath, open_flags);
3261 if (fd < 0)
3262 ereport(ERROR,
3264 errmsg("could not create file \"%s\": %m", tmppath)));
3265
3266 /* Measure I/O timing when initializing segment */
3268
3269 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3270 save_errno = 0;
3271 if (wal_init_zero)
3272 {
3273 ssize_t rc;
3274
3275 /*
3276 * Zero-fill the file. With this setting, we do this the hard way to
3277 * ensure that all the file space has really been allocated. On
3278 * platforms that allow "holes" in files, just seeking to the end
3279 * doesn't allocate intermediate space. This way, we know that we
3280 * have all the space and (after the fsync below) that all the
3281 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3282 * O_DSYNC will be sufficient to sync future writes to the log file.
3283 */
3285
3286 if (rc < 0)
3287 save_errno = errno;
3288 }
3289 else
3290 {
3291 /*
3292 * Otherwise, seeking to the end and writing a solitary byte is
3293 * enough.
3294 */
3295 errno = 0;
3296 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3297 {
3298 /* if write didn't set errno, assume no disk space */
3299 save_errno = errno ? errno : ENOSPC;
3300 }
3301 }
3303
3304 /*
3305 * A full segment worth of data is written when using wal_init_zero. One
3306 * byte is written when not using it.
3307 */
3309 io_start, 1,
3311
3312 if (save_errno)
3313 {
3314 /*
3315 * If we fail to make the file, delete it to release disk space
3316 */
3317 unlink(tmppath);
3318
3319 close(fd);
3320
3321 errno = save_errno;
3322
3323 ereport(ERROR,
3325 errmsg("could not write to file \"%s\": %m", tmppath)));
3326 }
3327
3328 /* Measure I/O timing when flushing segment */
3330
3331 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3332 if (pg_fsync(fd) != 0)
3333 {
3334 save_errno = errno;
3335 close(fd);
3336 errno = save_errno;
3337 ereport(ERROR,
3339 errmsg("could not fsync file \"%s\": %m", tmppath)));
3340 }
3342
3344 IOOP_FSYNC, io_start, 1, 0);
3345
3346 if (close(fd) != 0)
3347 ereport(ERROR,
3349 errmsg("could not close file \"%s\": %m", tmppath)));
3350
3351 /*
3352 * Now move the segment into place with its final name. Cope with
3353 * possibility that someone else has created the file while we were
3354 * filling ours: if so, use ours to pre-create a future log segment.
3355 */
3356 installed_segno = logsegno;
3357
3358 /*
3359 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3360 * that was a constant, but that was always a bit dubious: normally, at a
3361 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3362 * here, it was the offset from the insert location. We can't do the
3363 * normal XLOGfileslop calculation here because we don't have access to
3364 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3365 * CheckPointSegments.
3366 */
3367 max_segno = logsegno + CheckPointSegments;
3368 if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3369 logtli))
3370 {
3371 *added = true;
3372 elog(DEBUG2, "done creating and filling new WAL file");
3373 }
3374 else
3375 {
3376 /*
3377 * No need for any more future segments, or InstallXLogFileSegment()
3378 * failed to rename the file into place. If the rename failed, a
3379 * caller opening the file may fail.
3380 */
3381 unlink(tmppath);
3382 elog(DEBUG2, "abandoned new WAL file");
3383 }
3384
3385 return -1;
3386}
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:688
@ IOCONTEXT_INIT
Definition: pgstat.h:286
@ IOOP_WRITE
Definition: pgstat.h:314
#define pg_pwrite
Definition: port.h:227
bool wal_init_zero
Definition: xlog.c:127

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3639 of file xlog.c.

3640{
3641 char path[MAXPGPATH];
3642 int fd;
3643
3644 XLogFilePath(path, tli, segno, wal_segment_size);
3645
3646 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3648 if (fd < 0)
3649 ereport(PANIC,
3651 errmsg("could not open file \"%s\": %m", path)));
3652
3653 return fd;
3654}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2240 of file xlog.c.

2241{
2242 XLogSegNo minSegNo;
2243 XLogSegNo maxSegNo;
2244 double distance;
2245 XLogSegNo recycleSegNo;
2246
2247 /*
2248 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2249 * correspond to. Always recycle enough segments to meet the minimum, and
2250 * remove enough segments to stay below the maximum.
2251 */
2252 minSegNo = lastredoptr / wal_segment_size +
2254 maxSegNo = lastredoptr / wal_segment_size +
2256
2257 /*
2258 * Between those limits, recycle enough segments to get us through to the
2259 * estimated end of next checkpoint.
2260 *
2261 * To estimate where the next checkpoint will finish, assume that the
2262 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2263 * every checkpoint.
2264 */
2266 /* add 10% for good measure. */
2267 distance *= 1.10;
2268
2269 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2271
2272 if (recycleSegNo < minSegNo)
2273 recycleSegNo = minSegNo;
2274 if (recycleSegNo > maxSegNo)
2275 recycleSegNo = maxSegNo;
2276
2277 return recycleSegNo;
2278}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2805 of file xlog.c.

2806{
2807 XLogRecPtr WriteRqstPtr;
2808 XLogwrtRqst WriteRqst;
2809 TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2810
2811 /*
2812 * During REDO, we are reading not writing WAL. Therefore, instead of
2813 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2814 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2815 * to act this way too, and because when it tries to write the
2816 * end-of-recovery checkpoint, it should indeed flush.
2817 */
2818 if (!XLogInsertAllowed())
2819 {
2820 UpdateMinRecoveryPoint(record, false);
2821 return;
2822 }
2823
2824 /* Quick exit if already known flushed */
2825 if (record <= LogwrtResult.Flush)
2826 return;
2827
2828#ifdef WAL_DEBUG
2829 if (XLOG_DEBUG)
2830 elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2831 LSN_FORMAT_ARGS(record),
2834#endif
2835
2837
2838 /*
2839 * Since fsync is usually a horribly expensive operation, we try to
2840 * piggyback as much data as we can on each fsync: if we see any more data
2841 * entered into the xlog buffer, we'll write and fsync that too, so that
2842 * the final value of LogwrtResult.Flush is as large as possible. This
2843 * gives us some chance of avoiding another fsync immediately after.
2844 */
2845
2846 /* initialize to given target; may increase below */
2847 WriteRqstPtr = record;
2848
2849 /*
2850 * Now wait until we get the write lock, or someone else does the flush
2851 * for us.
2852 */
2853 for (;;)
2854 {
2855 XLogRecPtr insertpos;
2856
2857 /* done already? */
2859 if (record <= LogwrtResult.Flush)
2860 break;
2861
2862 /*
2863 * Before actually performing the write, wait for all in-flight
2864 * insertions to the pages we're about to write to finish.
2865 */
2867 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2868 WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2870 insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2871
2872 /*
2873 * Try to get the write lock. If we can't get it immediately, wait
2874 * until it's released, and recheck if we still need to do the flush
2875 * or if the backend that held the lock did it for us already. This
2876 * helps to maintain a good rate of group committing when the system
2877 * is bottlenecked by the speed of fsyncing.
2878 */
2879 if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2880 {
2881 /*
2882 * The lock is now free, but we didn't acquire it yet. Before we
2883 * do, loop back to check if someone else flushed the record for
2884 * us already.
2885 */
2886 continue;
2887 }
2888
2889 /* Got the lock; recheck whether request is satisfied */
2891 if (record <= LogwrtResult.Flush)
2892 {
2893 LWLockRelease(WALWriteLock);
2894 break;
2895 }
2896
2897 /*
2898 * Sleep before flush! By adding a delay here, we may give further
2899 * backends the opportunity to join the backlog of group commit
2900 * followers; this can significantly improve transaction throughput,
2901 * at the risk of increasing transaction latency.
2902 *
2903 * We do not sleep if enableFsync is not turned on, nor if there are
2904 * fewer than CommitSiblings other backends with active transactions.
2905 */
2906 if (CommitDelay > 0 && enableFsync &&
2908 {
2910
2911 /*
2912 * Re-check how far we can now flush the WAL. It's generally not
2913 * safe to call WaitXLogInsertionsToFinish while holding
2914 * WALWriteLock, because an in-progress insertion might need to
2915 * also grab WALWriteLock to make progress. But we know that all
2916 * the insertions up to insertpos have already finished, because
2917 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2918 * We're only calling it again to allow insertpos to be moved
2919 * further forward, not to actually wait for anyone.
2920 */
2921 insertpos = WaitXLogInsertionsToFinish(insertpos);
2922 }
2923
2924 /* try to write/flush later additions to XLOG as well */
2925 WriteRqst.Write = insertpos;
2926 WriteRqst.Flush = insertpos;
2927
2928 XLogWrite(WriteRqst, insertTLI, false);
2929
2930 LWLockRelease(WALWriteLock);
2931 /* done */
2932 break;
2933 }
2934
2936
2937 /* wake up walsenders now that we've released heavily contended locks */
2939
2940 /*
2941 * If we still haven't flushed to the request point then we have a
2942 * problem; most likely, the requested flush point is past end of XLOG.
2943 * This has been seen to occur when a disk page has a corrupted LSN.
2944 *
2945 * Formerly we treated this as a PANIC condition, but that hurts the
2946 * system's robustness rather than helping it: we do not want to take down
2947 * the whole system due to corruption on one data page. In particular, if
2948 * the bad page is encountered again during recovery then we would be
2949 * unable to restart the database at all! (This scenario actually
2950 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2951 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2952 * the only time we can reach here during recovery is while flushing the
2953 * end-of-recovery checkpoint record, and we don't expect that to have a
2954 * bad LSN.
2955 *
2956 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2957 * since xact.c calls this routine inside a critical section. However,
2958 * calls from bufmgr.c are not within critical sections and so we will not
2959 * force a restart for a bad LSN on a data page.
2960 */
2961 if (LogwrtResult.Flush < record)
2962 elog(ERROR,
2963 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2964 LSN_FORMAT_ARGS(record),
2966}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1396
bool MinimumActiveBackends(int min)
Definition: procarray.c:3546
int CommitDelay
Definition: xlog.c:132
int CommitSiblings
Definition: xlog.c:133
bool XLogInsertAllowed(void)
Definition: xlog.c:6410

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3779 of file xlog.c.

3780{
3781 XLogSegNo lastRemovedSegNo;
3782
3784 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3786
3787 return lastRemovedSegNo;
3788}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3795 of file xlog.c.

3796{
3797 DIR *xldir;
3798 struct dirent *xlde;
3799 XLogSegNo oldest_segno = 0;
3800
3801 xldir = AllocateDir(XLOGDIR);
3802 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3803 {
3804 TimeLineID file_tli;
3805 XLogSegNo file_segno;
3806
3807 /* Ignore files that are not XLOG segments. */
3808 if (!IsXLogFileName(xlde->d_name))
3809 continue;
3810
3811 /* Parse filename to get TLI and segno. */
3812 XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3814
3815 /* Ignore anything that's not from the TLI of interest. */
3816 if (tli != file_tli)
3817 continue;
3818
3819 /* If it's the oldest so far, update oldest_segno. */
3820 if (oldest_segno == 0 || file_segno < oldest_segno)
3821 oldest_segno = file_segno;
3822 }
3823
3824 FreeDir(xldir);
3825 return oldest_segno;
3826}

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2704 of file xlog.c.

2705{
2706 XLogRecPtr retval;
2707
2711
2712 return retval;
2713}
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:459

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5235 of file xlog.c.

5236{
5237 char xlogfname[MAXFNAMELEN];
5238 XLogSegNo endLogSegNo;
5239 XLogSegNo startLogSegNo;
5240
5241 /* we always switch to a new timeline after archive recovery */
5242 Assert(endTLI != newTLI);
5243
5244 /*
5245 * Update min recovery point one last time.
5246 */
5248
5249 /*
5250 * Calculate the last segment on the old timeline, and the first segment
5251 * on the new timeline. If the switch happens in the middle of a segment,
5252 * they are the same, but if the switch happens exactly at a segment
5253 * boundary, startLogSegNo will be endLogSegNo + 1.
5254 */
5255 XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5256 XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5257
5258 /*
5259 * Initialize the starting WAL segment for the new timeline. If the switch
5260 * happens in the middle of a segment, copy data from the last WAL segment
5261 * of the old timeline up to the switch point, to the starting WAL segment
5262 * on the new timeline.
5263 */
5264 if (endLogSegNo == startLogSegNo)
5265 {
5266 /*
5267 * Make a copy of the file on the new timeline.
5268 *
5269 * Writing WAL isn't allowed yet, so there are no locking
5270 * considerations. But we should be just as tense as XLogFileInit to
5271 * avoid emplacing a bogus file.
5272 */
5273 XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5275 }
5276 else
5277 {
5278 /*
5279 * The switch happened at a segment boundary, so just create the next
5280 * segment on the new timeline.
5281 */
5282 int fd;
5283
5284 fd = XLogFileInit(startLogSegNo, newTLI);
5285
5286 if (close(fd) != 0)
5287 {
5288 int save_errno = errno;
5289
5290 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5291 errno = save_errno;
5292 ereport(ERROR,
5294 errmsg("could not close file \"%s\": %m", xlogfname)));
5295 }
5296 }
5297
5298 /*
5299 * Let's just make real sure there are not .ready or .done flags posted
5300 * for the new segment.
5301 */
5302 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5303 XLogArchiveCleanup(xlogfname);
5304}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3439

References Assert, close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6410 of file xlog.c.

6411{
6412 /*
6413 * If value is "unconditionally true" or "unconditionally false", just
6414 * return it. This provides the normal fast path once recovery is known
6415 * done.
6416 */
6417 if (LocalXLogInsertAllowed >= 0)
6418 return (bool) LocalXLogInsertAllowed;
6419
6420 /*
6421 * Else, must check to see if we're still in recovery.
6422 */
6423 if (RecoveryInProgress())
6424 return false;
6425
6426 /*
6427 * On exit from recovery, reset to "unconditionally true", since there is
6428 * no need to keep checking.
6429 */
6431 return true;
6432}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 748 of file xlog.c.

753{
755 pg_crc32c rdata_crc;
756 bool inserted;
757 XLogRecord *rechdr = (XLogRecord *) rdata->data;
758 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
760 XLogRecPtr StartPos;
761 XLogRecPtr EndPos;
762 bool prevDoPageWrites = doPageWrites;
763 TimeLineID insertTLI;
764
765 /* Does this record type require special handling? */
766 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
767 {
768 if (info == XLOG_SWITCH)
770 else if (info == XLOG_CHECKPOINT_REDO)
772 }
773
774 /* we assume that all of the record header is in the first chunk */
775 Assert(rdata->len >= SizeOfXLogRecord);
776
777 /* cross-check on whether we should be here or not */
778 if (!XLogInsertAllowed())
779 elog(ERROR, "cannot make new WAL entries during recovery");
780
781 /*
782 * Given that we're not in recovery, InsertTimeLineID is set and can't
783 * change, so we can read it without a lock.
784 */
785 insertTLI = XLogCtl->InsertTimeLineID;
786
787 /*----------
788 *
789 * We have now done all the preparatory work we can without holding a
790 * lock or modifying shared state. From here on, inserting the new WAL
791 * record to the shared WAL buffer cache is a two-step process:
792 *
793 * 1. Reserve the right amount of space from the WAL. The current head of
794 * reserved space is kept in Insert->CurrBytePos, and is protected by
795 * insertpos_lck.
796 *
797 * 2. Copy the record to the reserved WAL space. This involves finding the
798 * correct WAL buffer containing the reserved space, and copying the
799 * record in place. This can be done concurrently in multiple processes.
800 *
801 * To keep track of which insertions are still in-progress, each concurrent
802 * inserter acquires an insertion lock. In addition to just indicating that
803 * an insertion is in progress, the lock tells others how far the inserter
804 * has progressed. There is a small fixed number of insertion locks,
805 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
806 * boundary, it updates the value stored in the lock to the how far it has
807 * inserted, to allow the previous buffer to be flushed.
808 *
809 * Holding onto an insertion lock also protects RedoRecPtr and
810 * fullPageWrites from changing until the insertion is finished.
811 *
812 * Step 2 can usually be done completely in parallel. If the required WAL
813 * page is not initialized yet, you have to grab WALBufMappingLock to
814 * initialize it, but the WAL writer tries to do that ahead of insertions
815 * to avoid that from happening in the critical path.
816 *
817 *----------
818 */
820
821 if (likely(class == WALINSERT_NORMAL))
822 {
824
825 /*
826 * Check to see if my copy of RedoRecPtr is out of date. If so, may
827 * have to go back and have the caller recompute everything. This can
828 * only happen just after a checkpoint, so it's better to be slow in
829 * this case and fast otherwise.
830 *
831 * Also check to see if fullPageWrites was just turned on or there's a
832 * running backup (which forces full-page writes); if we weren't
833 * already doing full-page writes then go back and recompute.
834 *
835 * If we aren't doing full-page writes then RedoRecPtr doesn't
836 * actually affect the contents of the XLOG record, so we'll update
837 * our local copy but not force a recomputation. (If doPageWrites was
838 * just turned off, we could recompute the record without full pages,
839 * but we choose not to bother.)
840 */
841 if (RedoRecPtr != Insert->RedoRecPtr)
842 {
843 Assert(RedoRecPtr < Insert->RedoRecPtr);
844 RedoRecPtr = Insert->RedoRecPtr;
845 }
846 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
847
848 if (doPageWrites &&
849 (!prevDoPageWrites ||
850 (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
851 {
852 /*
853 * Oops, some buffer now needs to be backed up that the caller
854 * didn't back up. Start over.
855 */
858 return InvalidXLogRecPtr;
859 }
860
861 /*
862 * Reserve space for the record in the WAL. This also sets the xl_prev
863 * pointer.
864 */
865 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
866 &rechdr->xl_prev);
867
868 /* Normal records are always inserted. */
869 inserted = true;
870 }
871 else if (class == WALINSERT_SPECIAL_SWITCH)
872 {
873 /*
874 * In order to insert an XLOG_SWITCH record, we need to hold all of
875 * the WAL insertion locks, not just one, so that no one else can
876 * begin inserting a record until we've figured out how much space
877 * remains in the current WAL segment and claimed all of it.
878 *
879 * Nonetheless, this case is simpler than the normal cases handled
880 * below, which must check for changes in doPageWrites and RedoRecPtr.
881 * Those checks are only needed for records that can contain buffer
882 * references, and an XLOG_SWITCH record never does.
883 */
884 Assert(fpw_lsn == InvalidXLogRecPtr);
886 inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
887 }
888 else
889 {
891
892 /*
893 * We need to update both the local and shared copies of RedoRecPtr,
894 * which means that we need to hold all the WAL insertion locks.
895 * However, there can't be any buffer references, so as above, we need
896 * not check RedoRecPtr before inserting the record; we just need to
897 * update it afterwards.
898 */
899 Assert(fpw_lsn == InvalidXLogRecPtr);
901 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
902 &rechdr->xl_prev);
903 RedoRecPtr = Insert->RedoRecPtr = StartPos;
904 inserted = true;
905 }
906
907 if (inserted)
908 {
909 /*
910 * Now that xl_prev has been filled in, calculate CRC of the record
911 * header.
912 */
913 rdata_crc = rechdr->xl_crc;
914 COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
915 FIN_CRC32C(rdata_crc);
916 rechdr->xl_crc = rdata_crc;
917
918 /*
919 * All the record data, including the header, is now ready to be
920 * inserted. Copy the record in the space reserved.
921 */
923 class == WALINSERT_SPECIAL_SWITCH, rdata,
924 StartPos, EndPos, insertTLI);
925
926 /*
927 * Unless record is flagged as not important, update LSN of last
928 * important record in the current slot. When holding all locks, just
929 * update the first one.
930 */
931 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
932 {
933 int lockno = holdingAllLocks ? 0 : MyLockNo;
934
935 WALInsertLocks[lockno].l.lastImportantAt = StartPos;
936 }
937 }
938 else
939 {
940 /*
941 * This was an xlog-switch record, but the current insert location was
942 * already exactly at the beginning of a segment, so there was no need
943 * to do anything.
944 */
945 }
946
947 /*
948 * Done! Let others know that we're finished.
949 */
951
953
955
956 /*
957 * Mark top transaction id is logged (if needed) so that we should not try
958 * to log it again with the next WAL record in the current subtransaction.
959 */
960 if (topxid_included)
962
963 /*
964 * Update shared LogwrtRqst.Write, if we crossed page boundary.
965 */
966 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
967 {
969 /* advance global request to include new block(s) */
970 if (XLogCtl->LogwrtRqst.Write < EndPos)
971 XLogCtl->LogwrtRqst.Write = EndPos;
974 }
975
976 /*
977 * If this was an XLOG_SWITCH record, flush the record and the empty
978 * padding space that fills the rest of the segment, and perform
979 * end-of-segment actions (eg, notifying archiver).
980 */
981 if (class == WALINSERT_SPECIAL_SWITCH)
982 {
983 TRACE_POSTGRESQL_WAL_SWITCH();
984 XLogFlush(EndPos);
985
986 /*
987 * Even though we reserved the rest of the segment for us, which is
988 * reflected in EndPos, we return a pointer to just the end of the
989 * xlog-switch record.
990 */
991 if (inserted)
992 {
993 EndPos = StartPos + SizeOfXLogRecord;
994 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
995 {
996 uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
997
998 if (offset == EndPos % XLOG_BLCKSZ)
999 EndPos += SizeOfXLogLongPHD;
1000 else
1001 EndPos += SizeOfXLogShortPHD;
1002 }
1003 }
1004 }
1005
1006#ifdef WAL_DEBUG
1007 if (XLOG_DEBUG)
1008 {
1009 static XLogReaderState *debug_reader = NULL;
1010 XLogRecord *record;
1011 DecodedXLogRecord *decoded;
1013 StringInfoData recordBuf;
1014 char *errormsg = NULL;
1015 MemoryContext oldCxt;
1016
1017 oldCxt = MemoryContextSwitchTo(walDebugCxt);
1018
1020 appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1021
1022 /*
1023 * We have to piece together the WAL record data from the XLogRecData
1024 * entries, so that we can pass it to the rm_desc function as one
1025 * contiguous chunk.
1026 */
1027 initStringInfo(&recordBuf);
1028 for (; rdata != NULL; rdata = rdata->next)
1029 appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1030
1031 /* We also need temporary space to decode the record. */
1032 record = (XLogRecord *) recordBuf.data;
1033 decoded = (DecodedXLogRecord *)
1035
1036 if (!debug_reader)
1037 debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1038 XL_ROUTINE(.page_read = NULL,
1039 .segment_open = NULL,
1040 .segment_close = NULL),
1041 NULL);
1042 if (!debug_reader)
1043 {
1044 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1045 }
1046 else if (!DecodeXLogRecord(debug_reader,
1047 decoded,
1048 record,
1049 EndPos,
1050 &errormsg))
1051 {
1052 appendStringInfo(&buf, "error decoding record: %s",
1053 errormsg ? errormsg : "no error message");
1054 }
1055 else
1056 {
1057 appendStringInfoString(&buf, " - ");
1058
1059 debug_reader->record = decoded;
1060 xlog_outdesc(&buf, debug_reader);
1061 debug_reader->record = NULL;
1062 }
1063 elog(LOG, "%s", buf.data);
1064
1065 pfree(decoded);
1066 pfree(buf.data);
1067 pfree(recordBuf.data);
1068 MemoryContextSwitchTo(oldCxt);
1069 }
1070#endif
1071
1072 /*
1073 * Update our global variables
1074 */
1075 ProcLastRecPtr = StartPos;
1076 XactLastRecEnd = EndPos;
1077
1078 /* Report WAL traffic to the instrumentation. */
1079 if (inserted)
1080 {
1081 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1083 pgWalUsage.wal_fpi += num_fpi;
1084 }
1085
1086 return EndPos;
1087}
#define likely(x)
Definition: c.h:332
#define unlikely(x)
Definition: c.h:333
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:590
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:540
XLogRecPtr XactLastRecEnd
Definition: xlog.c:254
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1225
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1108
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1164
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1672
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1639
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3136 of file xlog.c.

3137{
3138 /*
3139 * During recovery, we don't flush WAL but update minRecoveryPoint
3140 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3141 * would need to be updated.
3142 */
3143 if (RecoveryInProgress())
3144 {
3145 /*
3146 * An invalid minRecoveryPoint means that we need to recover all the
3147 * WAL, i.e., we're doing crash recovery. We never modify the control
3148 * file's value in that case, so we can short-circuit future checks
3149 * here too. This triggers a quick exit path for the startup process,
3150 * which cannot update its local copy of minRecoveryPoint as long as
3151 * it has not replayed all WAL available when doing crash recovery.
3152 */
3154 updateMinRecoveryPoint = false;
3155
3156 /* Quick exit if already known to be updated or cannot be updated */
3158 return false;
3159
3160 /*
3161 * Update local copy of minRecoveryPoint. But if the lock is busy,
3162 * just return a conservative guess.
3163 */
3164 if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3165 return true;
3168 LWLockRelease(ControlFileLock);
3169
3170 /*
3171 * Check minRecoveryPoint for any other process than the startup
3172 * process doing crash recovery, which should not update the control
3173 * file value if crash recovery is still running.
3174 */
3176 updateMinRecoveryPoint = false;
3177
3178 /* check again */
3180 return false;
3181 else
3182 return true;
3183 }
3184
3185 /* Quick exit if already known flushed */
3186 if (record <= LogwrtResult.Flush)
3187 return false;
3188
3189 /* read LogwrtResult and update local state */
3191
3192 /* check again */
3193 if (record <= LogwrtResult.Flush)
3194 return false;
3195
3196 return true;
3197}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8044 of file xlog.c.

8045{
8047 XLogRegisterData(&nextOid, sizeof(Oid));
8048 (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8049
8050 /*
8051 * We need not flush the NEXTOID record immediately, because any of the
8052 * just-allocated OIDs could only reach disk as part of a tuple insert or
8053 * update that would have its own XLOG record that must follow the NEXTOID
8054 * record. Therefore, the standard buffer LSN interlock applied to those
8055 * records will ensure no such OID reaches disk before the NEXTOID record
8056 * does.
8057 *
8058 * Note, however, that the above statement only covers state "within" the
8059 * database. When we use a generated OID as a file or directory name, we
8060 * are in a sense violating the basic WAL rule, because that filesystem
8061 * change may reach disk before the NEXTOID WAL record does. The impact
8062 * of this is that if a database crash occurs immediately afterward, we
8063 * might after restart re-generate the same OID and find that it conflicts
8064 * with the leftover file or directory. But since for safety's sake we
8065 * always loop until finding a nonconflicting filename, this poses no real
8066 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8067 */
8068}

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1941 of file xlog.c.

1942{
1943 uint64 fullsegs;
1944 uint32 fullpages;
1945 uint32 offset;
1946 uint64 result;
1947
1948 XLByteToSeg(ptr, fullsegs, wal_segment_size);
1949
1950 fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1951 offset = ptr % XLOG_BLCKSZ;
1952
1953 if (fullpages == 0)
1954 {
1955 result = fullsegs * UsableBytesInSegment;
1956 if (offset > 0)
1957 {
1958 Assert(offset >= SizeOfXLogLongPHD);
1959 result += offset - SizeOfXLogLongPHD;
1960 }
1961 }
1962 else
1963 {
1964 result = fullsegs * UsableBytesInSegment +
1965 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1966 (fullpages - 1) * UsableBytesInPage; /* full pages */
1967 if (offset > 0)
1968 {
1969 Assert(offset >= SizeOfXLogShortPHD);
1970 result += offset - SizeOfXLogShortPHD;
1971 }
1972 }
1973
1974 return result;
1975}

References Assert, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8124 of file xlog.c.

8125{
8134 {
8135 /*
8136 * The change in number of backend slots doesn't need to be WAL-logged
8137 * if archiving is not enabled, as you can't start archive recovery
8138 * with wal_level=minimal anyway. We don't really care about the
8139 * values in pg_control either if wal_level=minimal, but seems better
8140 * to keep them up-to-date to avoid confusion.
8141 */
8143 {
8144 xl_parameter_change xlrec;
8145 XLogRecPtr recptr;
8146
8152 xlrec.wal_level = wal_level;
8155
8157 XLogRegisterData(&xlrec, sizeof(xlrec));
8158
8159 recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8160 XLogFlush(recptr);
8161 }
8162
8163 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8164
8174
8175 LWLockRelease(ControlFileLock);
8176 }
8177}

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8099 of file xlog.c.

8100{
8101 XLogRecPtr RecPtr;
8102 xl_restore_point xlrec;
8103
8104 xlrec.rp_time = GetCurrentTimestamp();
8105 strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8106
8108 XLogRegisterData(&xlrec, sizeof(xl_restore_point));
8109
8110 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8111
8112 ereport(LOG,
8113 (errmsg("restore point \"%s\" created at %X/%X",
8114 rpName, LSN_FORMAT_ARGS(RecPtr))));
8115
8116 return RecPtr;
8117}
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2634 of file xlog.c.

2635{
2636 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2637 bool sleeping;
2638 bool wakeup = false;
2639 XLogRecPtr prevAsyncXactLSN;
2640
2642 sleeping = XLogCtl->WalWriterSleeping;
2643 prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2644 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2645 XLogCtl->asyncXactLSN = asyncXactLSN;
2647
2648 /*
2649 * If somebody else already called this function with a more aggressive
2650 * LSN, they will have done what we needed (and perhaps more).
2651 */
2652 if (asyncXactLSN <= prevAsyncXactLSN)
2653 return;
2654
2655 /*
2656 * If the WALWriter is sleeping, kick it to make it come out of low-power
2657 * mode, so that this async commit will reach disk within the expected
2658 * amount of time. Otherwise, determine whether it has enough WAL
2659 * available to flush, the same way that XLogBackgroundFlush() does.
2660 */
2661 if (sleeping)
2662 wakeup = true;
2663 else
2664 {
2665 int flushblocks;
2666
2668
2669 flushblocks =
2670 WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2671
2672 if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2673 wakeup = true;
2674 }
2675
2676 if (wakeup)
2677 {
2678 volatile PROC_HDR *procglobal = ProcGlobal;
2679 ProcNumber walwriterProc = procglobal->walwriterProc;
2680
2681 if (walwriterProc != INVALID_PROC_NUMBER)
2682 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2683 }
2684}
void SetLatch(Latch *latch)
Definition: latch.c:632
#define GetPGProcByNumber(n)
Definition: proc.h:423
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
PROC_HDR * ProcGlobal
Definition: proc.c:78
Definition: proc.h:369
ProcNumber walwriterProc
Definition: proc.h:407
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4939 of file xlog.c.

4940{
4941 bool foundCFile,
4942 foundXLog;
4943 char *allocptr;
4944 int i;
4945 ControlFileData *localControlFile;
4946
4947#ifdef WAL_DEBUG
4948
4949 /*
4950 * Create a memory context for WAL debugging that's exempt from the normal
4951 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4952 * an allocation fails, but wal_debug is not for production use anyway.
4953 */
4954 if (walDebugCxt == NULL)
4955 {
4957 "WAL Debug",
4959 MemoryContextAllowInCriticalSection(walDebugCxt, true);
4960 }
4961#endif
4962
4963
4964 XLogCtl = (XLogCtlData *)
4965 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4966
4967 localControlFile = ControlFile;
4969 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4970
4971 if (foundCFile || foundXLog)
4972 {
4973 /* both should be present or neither */
4974 Assert(foundCFile && foundXLog);
4975
4976 /* Initialize local copy of WALInsertLocks */
4978
4979 if (localControlFile)
4980 pfree(localControlFile);
4981 return;
4982 }
4983 memset(XLogCtl, 0, sizeof(XLogCtlData));
4984
4985 /*
4986 * Already have read control file locally, unless in bootstrap mode. Move
4987 * contents into shared memory.
4988 */
4989 if (localControlFile)
4990 {
4991 memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4992 pfree(localControlFile);
4993 }
4994
4995 /*
4996 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4997 * multiple of the alignment for same, so no extra alignment padding is
4998 * needed here.
4999 */
5000 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5001 XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
5002 allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
5003
5004 for (i = 0; i < XLOGbuffers; i++)
5005 {
5007 }
5008
5009 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5010 allocptr += sizeof(WALInsertLockPadded) -
5011 ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
5013 (WALInsertLockPadded *) allocptr;
5014 allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
5015
5016 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5017 {
5021 }
5022
5023 /*
5024 * Align the start of the page buffers to a full xlog block size boundary.
5025 * This simplifies some calculations in XLOG insertion. It is also
5026 * required for O_DIRECT.
5027 */
5028 allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5029 XLogCtl->pages = allocptr;
5030 memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5031
5032 /*
5033 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5034 * in additional info.)
5035 */
5039 XLogCtl->WalWriterSleeping = false;
5040
5047}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:494
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:444
slock_t insertpos_lck
Definition: xlog.c:398
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4889
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4889 of file xlog.c.

4890{
4891 Size size;
4892
4893 /*
4894 * If the value of wal_buffers is -1, use the preferred auto-tune value.
4895 * This isn't an amazingly clean place to do this, but we must wait till
4896 * NBuffers has received its final value, and must do it before using the
4897 * value of XLOGbuffers to do anything important.
4898 *
4899 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4900 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4901 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4902 * the matter with PGC_S_OVERRIDE.
4903 */
4904 if (XLOGbuffers == -1)
4905 {
4906 char buf[32];
4907
4908 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4909 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4911 if (XLOGbuffers == -1) /* failed to apply it? */
4912 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4914 }
4915 Assert(XLOGbuffers > 0);
4916
4917 /* XLogCtl */
4918 size = sizeof(XLogCtlData);
4919
4920 /* WAL insertion locks, plus alignment */
4922 /* xlblocks array */
4924 /* extra alignment padding for XLOG I/O buffers */
4925 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4926 /* and the buffers themselves */
4927 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4928
4929 /*
4930 * Note: we don't count ControlFileData, it comes out of the "slop factor"
4931 * added by CreateSharedMemoryAndSemaphores. This lets us use this
4932 * routine again below to compute the actual allocation size.
4933 */
4934
4935 return size;
4936}
#define Max(x, y)
Definition: c.h:955
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_POSTMASTER
Definition: guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9497 of file xlog.c.

9498{
9500
9501 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9503 LWLockRelease(ControlFileLock);
9504}
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2314 of file xlog.c.

2315{
2316 bool ispartialpage;
2317 bool last_iteration;
2318 bool finishing_seg;
2319 int curridx;
2320 int npages;
2321 int startidx;
2322 uint32 startoffset;
2323
2324 /* We should always be inside a critical section here */
2326
2327 /*
2328 * Update local LogwrtResult (caller probably did this already, but...)
2329 */
2331
2332 /*
2333 * Since successive pages in the xlog cache are consecutively allocated,
2334 * we can usually gather multiple pages together and issue just one
2335 * write() call. npages is the number of pages we have determined can be
2336 * written together; startidx is the cache block index of the first one,
2337 * and startoffset is the file offset at which it should go. The latter
2338 * two variables are only valid when npages > 0, but we must initialize
2339 * all of them to keep the compiler quiet.
2340 */
2341 npages = 0;
2342 startidx = 0;
2343 startoffset = 0;
2344
2345 /*
2346 * Within the loop, curridx is the cache block index of the page to
2347 * consider writing. Begin at the buffer containing the next unwritten
2348 * page, or last partially written page.
2349 */
2351
2352 while (LogwrtResult.Write < WriteRqst.Write)
2353 {
2354 /*
2355 * Make sure we're not ahead of the insert process. This could happen
2356 * if we're passed a bogus WriteRqst.Write that is past the end of the
2357 * last page that's been initialized by AdvanceXLInsertBuffer.
2358 */
2359 XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2360
2361 if (LogwrtResult.Write >= EndPtr)
2362 elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
2364 LSN_FORMAT_ARGS(EndPtr));
2365
2366 /* Advance LogwrtResult.Write to end of current buffer page */
2367 LogwrtResult.Write = EndPtr;
2368 ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2369
2372 {
2373 /*
2374 * Switch to new logfile segment. We cannot have any pending
2375 * pages here (since we dump what we have at segment end).
2376 */
2377 Assert(npages == 0);
2378 if (openLogFile >= 0)
2379 XLogFileClose();
2382 openLogTLI = tli;
2383
2384 /* create/use new log file */
2387 }
2388
2389 /* Make sure we have the current logfile open */
2390 if (openLogFile < 0)
2391 {
2394 openLogTLI = tli;
2397 }
2398
2399 /* Add current page to the set of pending pages-to-dump */
2400 if (npages == 0)
2401 {
2402 /* first of group */
2403 startidx = curridx;
2404 startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2406 }
2407 npages++;
2408
2409 /*
2410 * Dump the set if this will be the last loop iteration, or if we are
2411 * at the last page of the cache area (since the next page won't be
2412 * contiguous in memory), or if we are at the end of the logfile
2413 * segment.
2414 */
2415 last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2416
2417 finishing_seg = !ispartialpage &&
2418 (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2419
2420 if (last_iteration ||
2421 curridx == XLogCtl->XLogCacheBlck ||
2422 finishing_seg)
2423 {
2424 char *from;
2425 Size nbytes;
2426 Size nleft;
2427 ssize_t written;
2429
2430 /* OK to write the page(s) */
2431 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2432 nbytes = npages * (Size) XLOG_BLCKSZ;
2433 nleft = nbytes;
2434 do
2435 {
2436 errno = 0;
2437
2438 /*
2439 * Measure I/O timing to write WAL data, for pg_stat_io and/or
2440 * pg_stat_wal.
2441 */
2443
2444 pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2445 written = pg_pwrite(openLogFile, from, nleft, startoffset);
2447
2449 IOOP_WRITE, start, 1, written);
2450
2451 /*
2452 * Increment the I/O timing and the number of times WAL data
2453 * were written out to disk.
2454 */
2456 {
2457 instr_time end;
2458
2461 }
2462
2464
2465 if (written <= 0)
2466 {
2467 char xlogfname[MAXFNAMELEN];
2468 int save_errno;
2469
2470 if (errno == EINTR)
2471 continue;
2472
2473 save_errno = errno;
2474 XLogFileName(xlogfname, tli, openLogSegNo,
2476 errno = save_errno;
2477 ereport(PANIC,
2479 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2480 xlogfname, startoffset, nleft)));
2481 }
2482 nleft -= written;
2483 from += written;
2484 startoffset += written;
2485 } while (nleft > 0);
2486
2487 npages = 0;
2488
2489 /*
2490 * If we just wrote the whole last page of a logfile segment,
2491 * fsync the segment immediately. This avoids having to go back
2492 * and re-open prior segments when an fsync request comes along
2493 * later. Doing it here ensures that one and only one backend will
2494 * perform this fsync.
2495 *
2496 * This is also the right place to notify the Archiver that the
2497 * segment is ready to copy to archival storage, and to update the
2498 * timer for archive_timeout, and to signal for a checkpoint if
2499 * too many logfile segments have been used since the last
2500 * checkpoint.
2501 */
2502 if (finishing_seg)
2503 {
2505
2506 /* signal that we need to wakeup walsenders later */
2508
2509 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2510
2511 if (XLogArchivingActive())
2513
2514 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2516
2517 /*
2518 * Request a checkpoint if we've consumed too much xlog since
2519 * the last one. For speed, we first check using the local
2520 * copy of RedoRecPtr, which might be out of date; if it looks
2521 * like a checkpoint is needed, forcibly update RedoRecPtr and
2522 * recheck.
2523 */
2525 {
2526 (void) GetRedoRecPtr();
2529 }
2530 }
2531 }
2532
2533 if (ispartialpage)
2534 {
2535 /* Only asked to write a partial page */
2536 LogwrtResult.Write = WriteRqst.Write;
2537 break;
2538 }
2539 curridx = NextBufIdx(curridx);
2540
2541 /* If flexible, break out of loop as soon as we wrote something */
2542 if (flexible && npages == 0)
2543 break;
2544 }
2545
2546 Assert(npages == 0);
2547
2548 /*
2549 * If asked to flush, do so
2550 */
2551 if (LogwrtResult.Flush < WriteRqst.Flush &&
2553 {
2554 /*
2555 * Could get here without iterating above loop, in which case we might
2556 * have no open file or the wrong one. However, we do not need to
2557 * fsync more than one file.
2558 */
2561 {
2562 if (openLogFile >= 0 &&
2565 XLogFileClose();
2566 if (openLogFile < 0)
2567 {
2570 openLogTLI = tli;
2573 }
2574
2576 }
2577
2578 /* signal that we need to wakeup walsenders later */
2580
2582 }
2583
2584 /*
2585 * Update shared-memory status
2586 *
2587 * We make sure that the shared 'request' values do not fall behind the
2588 * 'result' values. This is not absolutely essential, but it saves some
2589 * code in a couple of places.
2590 */
2597
2598 /*
2599 * We write Write first, bar, then Flush. When reading, the opposite must
2600 * be done (with a matching barrier in between), so that we always see a
2601 * Flush value that trails behind the Write value seen.
2602 */
2606
2607#ifdef USE_ASSERT_CHECKING
2608 {
2612
2618
2619 /* WAL written to disk is always ahead of WAL flushed */
2620 Assert(Write >= Flush);
2621
2622 /* WAL inserted to buffers is always ahead of WAL written */
2623 Assert(Insert >= Write);
2624 }
2625#endif
2626}
void ReserveExternalFD(void)
Definition: fd.c:1220
volatile uint32 CritSectionCount
Definition: globals.c:44
PgStat_Counter wal_write
Definition: pgstat.h:498
instr_time wal_write_time
Definition: pgstat.h:500
XLogRecPtr Flush
Definition: walreceiver.c:111
XLogRecPtr Write
Definition: walreceiver.c:110
#define WalSndWakeupRequest()
Definition: walsender.h:58
#define EINTR
Definition: win32_port.h:364
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6458
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3639
#define NextBufIdx(idx)
Definition: xlog.c:584
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8704
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2290
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_io_timing, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_write, PgStat_PendingWalStats::wal_write_time, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

Definition at line 191 of file xlog.c.

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 166 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 159 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

bool doPageWrites
static

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 122 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 217 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 224 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 236 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 135 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 115 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 636 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 637 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 160 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

bool track_wal_io_timing = false

Definition at line 137 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 648 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 124 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 126 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 125 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 136 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 127 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 116 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 123 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 128 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 143 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 171 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 118 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 117 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl