PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "access/xlogwait.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static charstr_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static charGetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, uint64 fpi_bytes, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
charGetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const charshow_archive_command (void)
 
const charshow_in_hot_standby (void)
 
const charshow_effective_wal_level (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
void ResetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
charXLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
charwal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 114 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 605 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 582 of file xlog.c.

583 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
static int fb(int x)

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 586 of file xlog.c.

587 : ((idx) + 1))
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 153 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
pg_atomic_uint64 logWriteResult
Definition xlog.c:474
pg_atomic_uint64 logFlushResult
Definition xlog.c:475
static XLogCtlData * XLogCtl
Definition xlog.c:568

Definition at line 622 of file xlog.c.

623 { \
627 } while (0)

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 599 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 593 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

◆ XLogCtlInsert

◆ XLogwrtResult

◆ XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 561 of file xlog.c.

562{
WalInsertClass
Definition xlog.c:562
@ WALINSERT_SPECIAL_SWITCH
Definition xlog.c:564
@ WALINSERT_NORMAL
Definition xlog.c:563
@ WALINSERT_SPECIAL_CHECKPOINT
Definition xlog.c:565

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1991 of file xlog.c.

1992{
1994 int nextidx;
2000 int npages pg_attribute_unused() = 0;
2001
2003
2004 /*
2005 * Now that we have the lock, check if someone initialized the page
2006 * already.
2007 */
2009 {
2011
2012 /*
2013 * Get ending-offset of the buffer page we need to replace (this may
2014 * be zero if the buffer hasn't been used yet). Fall through if it's
2015 * already written out.
2016 */
2019 {
2020 /*
2021 * Nope, got work to do. If we just want to pre-initialize as much
2022 * as we can without flushing, give up now.
2023 */
2024 if (opportunistic)
2025 break;
2026
2027 /* Advance shared memory write request position */
2032
2033 /*
2034 * Acquire an up-to-date LogwrtResult value and see if we still
2035 * need to write it or if someone else already did.
2036 */
2039 {
2040 /*
2041 * Must acquire write lock. Release WALBufMappingLock first,
2042 * to make sure that all insertions that we need to wait for
2043 * can finish (up to this same position). Otherwise we risk
2044 * deadlock.
2045 */
2047
2049
2051
2054 {
2055 /* OK, someone wrote it already */
2057 }
2058 else
2059 {
2060 /* Have to write it ourselves */
2062 WriteRqst.Write = OldPageRqstPtr;
2064 XLogWrite(WriteRqst, tli, false);
2068
2069 /*
2070 * Required for the flush of pending stats WAL data, per
2071 * update of pgWalUsage.
2072 */
2073 pgstat_report_fixed = true;
2074 }
2075 /* Re-acquire WALBufMappingLock and retry */
2077 continue;
2078 }
2079 }
2080
2081 /*
2082 * Now the next buffer slot is free and we can set it up to be the
2083 * next output page.
2084 */
2087
2089
2091
2092 /*
2093 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2094 * before initializing. Otherwise, the old page may be partially
2095 * zeroed but look valid.
2096 */
2099
2100 /*
2101 * Be sure to re-zero the buffer so that bytes beyond what we've
2102 * written will look like zeroes and not valid XLOG records...
2103 */
2105
2106 /*
2107 * Fill the new page's header
2108 */
2109 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2110
2111 /* NewPage->xlp_info = 0; */ /* done by memset */
2112 NewPage->xlp_tli = tli;
2113 NewPage->xlp_pageaddr = NewPageBeginPtr;
2114
2115 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2116
2117 /*
2118 * If online backup is not in progress, mark the header to indicate
2119 * that WAL records beginning in this page have removable backup
2120 * blocks. This allows the WAL archiver to know whether it is safe to
2121 * compress archived WAL data by transforming full-block records into
2122 * the non-full-block format. It is sufficient to record this at the
2123 * page level because we force a page switch (in fact a segment
2124 * switch) when starting a backup, so the flag will be off before any
2125 * records can be written during the backup. At the end of a backup,
2126 * the last page will be marked as all unsafe when perhaps only part
2127 * is unsafe, but at worst the archiver would miss the opportunity to
2128 * compress a few records.
2129 */
2130 if (Insert->runningBackups == 0)
2131 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2132
2133 /*
2134 * If first page of an XLOG segment file, make it a long header.
2135 */
2136 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2137 {
2139
2141 NewLongPage->xlp_seg_size = wal_segment_size;
2142 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2143 NewPage->xlp_info |= XLP_LONG_HEADER;
2144 }
2145
2146 /*
2147 * Make sure the initialization of the page becomes visible to others
2148 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2149 * holding a lock.
2150 */
2152
2155
2156 npages++;
2157 }
2159
2160#ifdef WAL_DEBUG
2161 if (XLOG_DEBUG && npages > 0)
2162 {
2163 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2165 }
2166#endif
2167}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
#define pg_write_barrier()
Definition atomics.h:155
#define pg_attribute_unused()
Definition c.h:132
#define Assert(condition)
Definition c.h:873
#define MemSet(start, val, len)
Definition c.h:1013
size_t Size
Definition c.h:619
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
static void Insert(File file)
Definition fd.c:1300
WalUsage pgWalUsage
Definition instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_EXCLUSIVE
Definition lwlock.h:112
bool pgstat_report_fixed
Definition pgstat.c:218
#define SpinLockRelease(lock)
Definition spin.h:61
#define SpinLockAcquire(lock)
Definition spin.h:59
uint64 system_identifier
Definition pg_control.h:112
int64 wal_buffers_full
Definition instrument.h:57
XLogwrtRqst LogwrtRqst
Definition xlog.c:458
slock_t info_lck
Definition xlog.c:555
XLogRecPtr InitializedUpTo
Definition xlog.c:487
char * pages
Definition xlog.c:494
pg_atomic_uint64 * xlblocks
Definition xlog.c:495
XLogCtlInsert Insert
Definition xlog.c:455
XLogRecPtr Write
Definition xlog.c:330
XLogRecPtr Write
Definition xlog.c:324
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition xlog.c:1510
#define RefreshXLogWriteResult(_target)
Definition xlog.c:622
int wal_segment_size
Definition xlog.c:146
static XLogwrtResult LogwrtResult
Definition xlog.c:614
#define XLogRecPtrToBufIdx(recptr)
Definition xlog.c:593
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition xlog.c:2307
static ControlFileData * ControlFile
Definition xlog.c:576
XLogLongPageHeaderData * XLogLongPageHeader
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
#define XLP_LONG_HEADER
#define XLP_BKP_REMOVABLE
#define XLOG_PAGE_MAGIC
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, fb(), XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, and XLogLongPageHeaderData::xlp_sysid.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void extra 
)

Definition at line 2210 of file xlog.c.

2211{
2214}
double CheckPointCompletionTarget
#define newval
static void CalculateCheckpointSegments(void)
Definition xlog.c:2174

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void extra 
)

Definition at line 2203 of file xlog.c.

2204{
2207}
int max_wal_size_mb
Definition xlog.c:117

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char newval,
void extra 
)

Definition at line 4831 of file xlog.c.

4832{
4833 /*
4834 * If some checks were deferred, it's possible that the checks will fail
4835 * later during InitializeWalConsistencyChecking(). But in that case, the
4836 * postmaster will exit anyway, so it's safe to proceed with the
4837 * assignment.
4838 *
4839 * Any built-in resource managers specified are assigned immediately,
4840 * which affects WAL created before shared_preload_libraries are
4841 * processed. Any custom resource managers specified won't be assigned
4842 * until after shared_preload_libraries are processed, but that's OK
4843 * because WAL for a custom resource manager can't be written before the
4844 * module is loaded anyway.
4845 */
4847}
bool * wal_consistency_checking
Definition xlog.c:129

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void extra 
)

Definition at line 8816 of file xlog.c.

8817{
8819 {
8820 /*
8821 * To ensure that no blocks escape unsynced, force an fsync on the
8822 * currently open log segment (if any). Also, if the open flag is
8823 * changing, close the log file so it will be reopened (with new flag
8824 * bit) at next use.
8825 */
8826 if (openLogFile >= 0)
8827 {
8829 if (pg_fsync(openLogFile) != 0)
8830 {
8831 char xlogfname[MAXFNAMELEN];
8832 int save_errno;
8833
8834 save_errno = errno;
8837 errno = save_errno;
8838 ereport(PANIC,
8840 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8841 }
8842
8845 XLogFileClose();
8846 }
8847 }
8848}
int errcode_for_file_access(void)
Definition elog.c:886
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define PANIC
Definition elog.h:42
#define ereport(elevel,...)
Definition elog.h:150
int pg_fsync(int fd)
Definition fd.c:389
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
static int openLogFile
Definition xlog.c:637
static int get_sync_bit(int method)
Definition xlog.c:8768
int wal_sync_method
Definition xlog.c:133
static TimeLineID openLogTLI
Definition xlog.c:639
static void XLogFileClose(void)
Definition xlog.c:3676
static XLogSegNo openLogSegNo
Definition xlog.c:638
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), fb(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5126 of file xlog.c.

5127{
5128 CheckPoint checkPoint;
5129 PGAlignedXLogBlock buffer;
5130 XLogPageHeader page;
5132 XLogRecord *record;
5133 char *recptr;
5134 uint64 sysidentifier;
5135 struct timeval tv;
5136 pg_crc32c crc;
5137
5138 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5140
5141 /*
5142 * Select a hopefully-unique system identifier code for this installation.
5143 * We use the result of gettimeofday(), including the fractional seconds
5144 * field, as being about as unique as we can easily get. (Think not to
5145 * use random(), since it hasn't been seeded and there's no portable way
5146 * to seed it other than the system clock value...) The upper half of the
5147 * uint64 value is just the tv_sec part, while the lower half contains the
5148 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5149 * PID for a little extra uniqueness. A person knowing this encoding can
5150 * determine the initialization time of the installation, which could
5151 * perhaps be useful sometimes.
5152 */
5153 gettimeofday(&tv, NULL);
5154 sysidentifier = ((uint64) tv.tv_sec) << 32;
5155 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5156 sysidentifier |= getpid() & 0xFFF;
5157
5158 memset(&buffer, 0, sizeof buffer);
5159 page = (XLogPageHeader) &buffer;
5160
5161 /*
5162 * Set up information for the initial checkpoint record
5163 *
5164 * The initial checkpoint record is written to the beginning of the WAL
5165 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5166 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5167 */
5171 checkPoint.fullPageWrites = fullPageWrites;
5173 checkPoint.wal_level = wal_level;
5174 checkPoint.nextXid =
5176 checkPoint.nextOid = FirstGenbkiObjectId;
5177 checkPoint.nextMulti = FirstMultiXactId;
5178 checkPoint.nextMultiOffset = 1;
5180 checkPoint.oldestXidDB = Template1DbOid;
5181 checkPoint.oldestMulti = FirstMultiXactId;
5182 checkPoint.oldestMultiDB = Template1DbOid;
5185 checkPoint.time = (pg_time_t) time(NULL);
5187
5188 TransamVariables->nextXid = checkPoint.nextXid;
5189 TransamVariables->nextOid = checkPoint.nextOid;
5191 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5192 AdvanceOldestClogXid(checkPoint.oldestXid);
5193 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5194 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5196
5197 /* Set up the XLOG page header */
5198 page->xlp_magic = XLOG_PAGE_MAGIC;
5199 page->xlp_info = XLP_LONG_HEADER;
5203 longpage->xlp_sysid = sysidentifier;
5204 longpage->xlp_seg_size = wal_segment_size;
5205 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5206
5207 /* Insert the initial checkpoint record */
5208 recptr = ((char *) page + SizeOfXLogLongPHD);
5209 record = (XLogRecord *) recptr;
5210 record->xl_prev = InvalidXLogRecPtr;
5211 record->xl_xid = InvalidTransactionId;
5212 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5214 record->xl_rmid = RM_XLOG_ID;
5216 /* fill the XLogRecordDataHeaderShort struct */
5217 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5218 *(recptr++) = sizeof(checkPoint);
5219 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5220 recptr += sizeof(checkPoint);
5221 Assert(recptr - (char *) record == record->xl_tot_len);
5222
5224 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5225 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5226 FIN_CRC32C(crc);
5227 record->xl_crc = crc;
5228
5229 /* Create first XLOG segment file */
5232
5233 /*
5234 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5235 * close the file again in a moment.
5236 */
5237
5238 /* Write the first page with the initial record */
5239 errno = 0;
5241 if (write(openLogFile, &buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5242 {
5243 /* if write didn't set errno, assume problem is no disk space */
5244 if (errno == 0)
5245 errno = ENOSPC;
5246 ereport(PANIC,
5248 errmsg("could not write bootstrap write-ahead log file: %m")));
5249 }
5251
5253 if (pg_fsync(openLogFile) != 0)
5254 ereport(PANIC,
5256 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5258
5259 if (close(openLogFile) != 0)
5260 ereport(PANIC,
5262 errmsg("could not close bootstrap write-ahead log file: %m")));
5263
5264 openLogFile = -1;
5265
5266 /* Now create pg_control */
5267 InitControlFile(sysidentifier, data_checksum_version);
5268 ControlFile->time = checkPoint.time;
5269 ControlFile->checkPoint = checkPoint.redo;
5270 ControlFile->checkPointCopy = checkPoint;
5271
5272 /* some additional ControlFile fields are set in WriteControlFile() */
5274
5275 /* Bootstrap the commit log, too */
5276 BootStrapCLOG();
5280
5281 /*
5282 * Force control file to be read - in contrast to normal processing we'd
5283 * otherwise never run the checks and GUC related initializations therein.
5284 */
5286}
uint64_t uint64
Definition c.h:547
void BootStrapCLOG(void)
Definition clog.c:832
void BootStrapCommitTs(void)
Definition commit_ts.c:594
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition commit_ts.c:887
#define close(a)
Definition win32.h:12
#define write(a, b, c)
Definition win32.h:14
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:1992
void BootStrapMultiXact(void)
Definition multixact.c:1793
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2014
#define FirstMultiXactId
Definition multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:69
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:158
return crc
int64 pg_time_t
Definition pgtime.h:23
Oid oldestMultiDB
Definition pg_control.h:52
MultiXactId oldestMulti
Definition pg_control.h:51
MultiXactOffset nextMultiOffset
Definition pg_control.h:48
TransactionId newestCommitTsXid
Definition pg_control.h:56
TransactionId oldestXid
Definition pg_control.h:49
TimeLineID PrevTimeLineID
Definition pg_control.h:40
TimeLineID ThisTimeLineID
Definition pg_control.h:39
TransactionId oldestActiveXid
Definition pg_control.h:65
bool fullPageWrites
Definition pg_control.h:42
MultiXactId nextMulti
Definition pg_control.h:47
FullTransactionId nextXid
Definition pg_control.h:45
TransactionId oldestCommitTsXid
Definition pg_control.h:54
pg_time_t time
Definition pg_control.h:53
int wal_level
Definition pg_control.h:43
bool logicalDecodingEnabled
Definition pg_control.h:44
XLogRecPtr redo
Definition pg_control.h:37
Oid oldestXidDB
Definition pg_control.h:50
CheckPoint checkPointCopy
Definition pg_control.h:137
pg_time_t time
Definition pg_control.h:134
XLogRecPtr checkPoint
Definition pg_control.h:135
FullTransactionId nextXid
Definition transam.h:220
XLogRecPtr xlp_pageaddr
XLogRecPtr xl_prev
Definition xlogrecord.h:45
uint8 xl_info
Definition xlogrecord.h:46
uint32 xl_tot_len
Definition xlogrecord.h:43
TransactionId xl_xid
Definition xlogrecord.h:44
RmgrId xl_rmid
Definition xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition subtrans.c:269
#define InvalidTransactionId
Definition transam.h:31
#define FirstGenbkiObjectId
Definition transam.h:195
#define FirstNormalTransactionId
Definition transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition varsup.c:355
TransamVariablesData * TransamVariables
Definition varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition xlog.c:3417
bool fullPageWrites
Definition xlog.c:125
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition xlog.c:4241
void SetInstallXLogFileSegmentActive(void)
Definition xlog.c:9646
int wal_level
Definition xlog.c:134
static void WriteControlFile(void)
Definition xlog.c:4276
#define BootstrapTimeLineID
Definition xlog.c:114
static void ReadControlFile(void)
Definition xlog.c:4386
@ WAL_LEVEL_LOGICAL
Definition xlog.h:77
#define SizeOfXLogLongPHD
#define SizeOfXLogRecordDataHeaderShort
Definition xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition xlogrecord.h:241
#define SizeOfXLogRecord
Definition xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), fb(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, InvalidXLogRecPtr, CheckPoint::logicalDecodingEnabled, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, wal_level, CheckPoint::wal_level, WAL_LEVEL_LOGICAL, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogPageHeaderData::xlp_tli, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2174 of file xlog.c.

2175{
2176 double target;
2177
2178 /*-------
2179 * Calculate the distance at which to trigger a checkpoint, to avoid
2180 * exceeding max_wal_size_mb. This is based on two assumptions:
2181 *
2182 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2183 * WAL for two checkpoint cycles to allow us to recover from the
2184 * secondary checkpoint if the first checkpoint failed, though we
2185 * only did this on the primary anyway, not on standby. Keeping just
2186 * one checkpoint simplifies processing and reduces disk space in
2187 * many smaller databases.)
2188 * b) during checkpoint, we consume checkpoint_completion_target *
2189 * number of segments consumed between checkpoints.
2190 *-------
2191 */
2194
2195 /* round down */
2196 CheckPointSegments = (int) target;
2197
2198 if (CheckPointSegments < 1)
2200}
#define ConvertToXSegs(x, segsize)
Definition xlog.c:605
int CheckPointSegments
Definition xlog.c:159

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, fb(), max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 4709 of file xlog.c.

4710{
4711 /*
4712 * -1 indicates a request for auto-tune.
4713 */
4714 if (*newval == -1)
4715 {
4716 /*
4717 * If we haven't yet changed the boot_val default of -1, just let it
4718 * be. We'll fix it when XLOGShmemSize is called.
4719 */
4720 if (XLOGbuffers == -1)
4721 return true;
4722
4723 /* Otherwise, substitute the auto-tune value */
4725 }
4726
4727 /*
4728 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4729 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4730 * the case, we just silently treat such values as a request for the
4731 * minimum. (We could throw an error instead, but that doesn't seem very
4732 * helpful.)
4733 */
4734 if (*newval < 4)
4735 *newval = 4;
4736
4737 return true;
4738}
static int XLOGChooseNumBuffers(void)
Definition xlog.c:4693
int XLOGbuffers
Definition xlog.c:120

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4744 of file xlog.c.

4745{
4746 char *rawstring;
4747 List *elemlist;
4748 ListCell *l;
4749 bool newwalconsistency[RM_MAX_ID + 1];
4750
4751 /* Initialize the array */
4752 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4753
4754 /* Need a modifiable copy of string */
4756
4757 /* Parse string into list of identifiers */
4759 {
4760 /* syntax error in list */
4761 GUC_check_errdetail("List syntax is invalid.");
4764 return false;
4765 }
4766
4767 foreach(l, elemlist)
4768 {
4769 char *tok = (char *) lfirst(l);
4770 int rmid;
4771
4772 /* Check for 'all'. */
4773 if (pg_strcasecmp(tok, "all") == 0)
4774 {
4775 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4776 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4777 newwalconsistency[rmid] = true;
4778 }
4779 else
4780 {
4781 /* Check if the token matches any known resource manager. */
4782 bool found = false;
4783
4784 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4785 {
4786 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4787 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4788 {
4789 newwalconsistency[rmid] = true;
4790 found = true;
4791 break;
4792 }
4793 }
4794 if (!found)
4795 {
4796 /*
4797 * During startup, it might be a not-yet-loaded custom
4798 * resource manager. Defer checking until
4799 * InitializeWalConsistencyChecking().
4800 */
4802 {
4804 }
4805 else
4806 {
4807 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4810 return false;
4811 }
4812 }
4813 }
4814 }
4815
4818
4819 /* assign new value */
4820 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
4821 if (!*extra)
4822 return false;
4823 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4824 return true;
4825}
#define LOG
Definition elog.h:31
void * guc_malloc(int elevel, size_t size)
Definition guc.c:636
#define GUC_check_errdetail
Definition guc.h:505
void list_free(List *list)
Definition list.c:1546
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
bool process_shared_preload_libraries_done
Definition miscinit.c:1787
#define lfirst(lc)
Definition pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
#define RM_MAX_ID
Definition rmgr.h:33
Definition pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition varlena.c:2730
static bool check_wal_consistency_checking_deferred
Definition xlog.c:169
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, fb(), GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 2217 of file xlog.c.

2218{
2220 {
2221 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2222 return false;
2223 }
2224
2225 return true;
2226}
#define IsValidWalSegSize(size)

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7641 of file xlog.c.

7642{
7648
7649 /* Write out all dirty data in SLRUs and the main buffer pool */
7657 CheckPointBuffers(flags);
7658
7659 /* Perform all queued up fsyncs */
7665
7666 /* We deliberately delay 2PC checkpointing as long as possible */
7668}
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void CheckPointBuffers(int flags)
Definition bufmgr.c:4342
void CheckPointCLOG(void)
Definition clog.c:903
void CheckPointCommitTs(void)
Definition commit_ts.c:794
void CheckPointMultiXact(void)
Definition multixact.c:1968
void CheckPointReplicationOrigin(void)
Definition origin.c:604
void CheckPointPredicate(void)
Definition predicate.c:1041
void CheckPointRelationMap(void)
Definition relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
void CheckPointReplicationSlots(bool is_shutdown)
Definition slot.c:2300
void CheckPointSnapBuild(void)
Definition snapbuild.c:1969
TimestampTz ckpt_write_t
Definition xlog.h:173
TimestampTz ckpt_sync_end_t
Definition xlog.h:175
TimestampTz ckpt_sync_t
Definition xlog.h:174
void CheckPointSUBTRANS(void)
Definition subtrans.c:329
void ProcessSyncRequests(void)
Definition sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1822
CheckpointStatsData CheckpointStats
Definition xlog.c:212
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:150

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, fb(), GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5473 of file xlog.c.

5474{
5475 /*
5476 * For archive recovery, the WAL must be generated with at least 'replica'
5477 * wal_level.
5478 */
5480 {
5481 ereport(FATAL,
5483 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5484 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5485 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5486 }
5487
5488 /*
5489 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5490 * must have at least as many backend slots as the primary.
5491 */
5493 {
5494 /* We ignore autovacuum_worker_slots when we make this test. */
5495 RecoveryRequiresIntParameter("max_connections",
5498 RecoveryRequiresIntParameter("max_worker_processes",
5501 RecoveryRequiresIntParameter("max_wal_senders",
5504 RecoveryRequiresIntParameter("max_prepared_transactions",
5507 RecoveryRequiresIntParameter("max_locks_per_transaction",
5510 }
5511}
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errhint(const char *fmt,...)
Definition elog.c:1330
int errcode(int sqlerrcode)
Definition elog.c:863
#define FATAL
Definition elog.h:41
int MaxConnections
Definition globals.c:143
int max_worker_processes
Definition globals.c:144
int max_locks_per_xact
Definition lock.c:53
int max_prepared_xacts
Definition twophase.c:116
int max_wal_senders
Definition walsender.c:129
bool EnableHotStandby
Definition xlog.c:124
@ WAL_LEVEL_MINIMAL
Definition xlog.h:75
bool ArchiveRecoveryRequested
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, fb(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3764 of file xlog.c.

3765{
3766 int save_errno = errno;
3767 XLogSegNo lastRemovedSegNo;
3768
3770 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3772
3773 if (segno <= lastRemovedSegNo)
3774 {
3775 char filename[MAXFNAMELEN];
3776
3778 errno = save_errno;
3779 ereport(ERROR,
3781 errmsg("requested WAL segment %s has already been removed",
3782 filename)));
3783 }
3784 errno = save_errno;
3785}
#define ERROR
Definition elog.h:39
static char * filename
Definition pg_dumpall.c:120
XLogSegNo lastRemovedSegNo
Definition xlog.c:463
uint64 XLogSegNo
Definition xlogdefs.h:52

References ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5377 of file xlog.c.

5379{
5380 /*
5381 * Execute the recovery_end_command, if any.
5382 */
5385 "recovery_end_command",
5386 true,
5388
5389 /*
5390 * We switched to a new timeline. Clean up segments on the old timeline.
5391 *
5392 * If there are any higher-numbered segments on the old timeline, remove
5393 * them. They might contain valid WAL, but they might also be
5394 * pre-allocated files containing garbage. In any case, they are not part
5395 * of the new timeline's history so we don't need them.
5396 */
5398
5399 /*
5400 * If the switch happened in the middle of a segment, what to do with the
5401 * last, partial segment on the old timeline? If we don't archive it, and
5402 * the server that created the WAL never archives it either (e.g. because
5403 * it was hit by a meteor), it will never make it to the archive. That's
5404 * OK from our point of view, because the new segment that we created with
5405 * the new TLI contains all the WAL from the old timeline up to the switch
5406 * point. But if you later try to do PITR to the "missing" WAL on the old
5407 * timeline, recovery won't find it in the archive. It's physically
5408 * present in the new file with new TLI, but recovery won't look there
5409 * when it's recovering to the older timeline. On the other hand, if we
5410 * archive the partial segment, and the original server on that timeline
5411 * is still running and archives the completed version of the same segment
5412 * later, it will fail. (We used to do that in 9.4 and below, and it
5413 * caused such problems).
5414 *
5415 * As a compromise, we rename the last segment with the .partial suffix,
5416 * and archive it. Archive recovery will never try to read .partial
5417 * segments, so they will normally go unused. But in the odd PITR case,
5418 * the administrator can copy them manually to the pg_wal directory
5419 * (removing the suffix). They can be useful in debugging, too.
5420 *
5421 * If a .done or .ready file already exists for the old timeline, however,
5422 * we had already determined that the segment is complete, so we can let
5423 * it be archived normally. (In particular, if it was restored from the
5424 * archive to begin with, it's expected to have a .done file).
5425 */
5428 {
5429 char origfname[MAXFNAMELEN];
5431
5434
5436 {
5437 char origpath[MAXPGPATH];
5439 char partialpath[MAXPGPATH];
5440
5441 /*
5442 * If we're summarizing WAL, we can't rename the partial file
5443 * until the summarizer finishes with it, else it will fail.
5444 */
5445 if (summarize_wal)
5447
5449 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5450 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5451
5452 /*
5453 * Make sure there's no .done or .ready file for the .partial
5454 * file.
5455 */
5457
5460 }
5461 }
5462}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:782
#define MAXPGPATH
#define snprintf
Definition port.h:260
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition xlog.c:3977
#define XLogArchivingActive()
Definition xlog.h:101
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
void XLogArchiveNotify(const char *xlog)
void XLogArchiveCleanup(const char *xlog)
char * recoveryEndCommand

References durable_rename(), ERROR, ExecuteRecoveryCommand(), fb(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4198 of file xlog.c.

4199{
4200 DIR *xldir;
4201 struct dirent *xlde;
4202 char path[MAXPGPATH + sizeof(XLOGDIR)];
4203
4205
4206 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4207 {
4208 if (IsBackupHistoryFileName(xlde->d_name))
4209 {
4210 if (XLogArchiveCheckDone(xlde->d_name))
4211 {
4212 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4213 xlde->d_name);
4214 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4215 unlink(path);
4216 XLogArchiveCleanup(xlde->d_name);
4217 }
4218 }
4219 }
4220
4221 FreeDir(xldir);
4222}
#define DEBUG2
Definition elog.h:29
int FreeDir(DIR *dir)
Definition fd.c:3008
DIR * AllocateDir(const char *dirname)
Definition fd.c:2890
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2956
Definition dirent.c:26
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1231 of file xlog.c.

1233{
1234 char *currpos;
1235 int freespace;
1236 int written;
1239
1240 /*
1241 * Get a pointer to the right place in the right WAL buffer to start
1242 * inserting to.
1243 */
1244 CurrPos = StartPos;
1245 currpos = GetXLogBuffer(CurrPos, tli);
1246 freespace = INSERT_FREESPACE(CurrPos);
1247
1248 /*
1249 * there should be enough space for at least the first field (xl_tot_len)
1250 * on this page.
1251 */
1252 Assert(freespace >= sizeof(uint32));
1253
1254 /* Copy record data */
1255 written = 0;
1256 while (rdata != NULL)
1257 {
1258 const char *rdata_data = rdata->data;
1259 int rdata_len = rdata->len;
1260
1261 while (rdata_len > freespace)
1262 {
1263 /*
1264 * Write what fits on this page, and continue on the next page.
1265 */
1266 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1267 memcpy(currpos, rdata_data, freespace);
1268 rdata_data += freespace;
1269 rdata_len -= freespace;
1270 written += freespace;
1271 CurrPos += freespace;
1272
1273 /*
1274 * Get pointer to beginning of next page, and set the xlp_rem_len
1275 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1276 *
1277 * It's safe to set the contrecord flag and xlp_rem_len without a
1278 * lock on the page. All the other flags were already set when the
1279 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1280 * only backend that needs to set the contrecord flag.
1281 */
1282 currpos = GetXLogBuffer(CurrPos, tli);
1283 pagehdr = (XLogPageHeader) currpos;
1284 pagehdr->xlp_rem_len = write_len - written;
1285 pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1286
1287 /* skip over the page header */
1289 {
1291 currpos += SizeOfXLogLongPHD;
1292 }
1293 else
1294 {
1296 currpos += SizeOfXLogShortPHD;
1297 }
1298 freespace = INSERT_FREESPACE(CurrPos);
1299 }
1300
1301 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1302 memcpy(currpos, rdata_data, rdata_len);
1303 currpos += rdata_len;
1304 CurrPos += rdata_len;
1305 freespace -= rdata_len;
1306 written += rdata_len;
1307
1308 rdata = rdata->next;
1309 }
1311
1312 /*
1313 * If this was an xlog-switch, it's not enough to write the switch record,
1314 * we also have to consume all the remaining space in the WAL segment. We
1315 * have already reserved that space, but we need to actually fill it.
1316 */
1318 {
1319 /* An xlog-switch record doesn't contain any data besides the header */
1321
1322 /* Assert that we did reserve the right amount of space */
1324
1325 /* Use up all the remaining space on the current page */
1326 CurrPos += freespace;
1327
1328 /*
1329 * Cause all remaining pages in the segment to be flushed, leaving the
1330 * XLog position where it should be, at the start of the next segment.
1331 * We do this one page at a time, to make sure we don't deadlock
1332 * against ourselves if wal_buffers < wal_segment_size.
1333 */
1334 while (CurrPos < EndPos)
1335 {
1336 /*
1337 * The minimal action to flush the page would be to call
1338 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1339 * AdvanceXLInsertBuffer(...). The page would be left initialized
1340 * mostly to zeros, except for the page header (always the short
1341 * variant, as this is never a segment's first page).
1342 *
1343 * The large vistas of zeros are good for compressibility, but the
1344 * headers interrupting them every XLOG_BLCKSZ (with values that
1345 * differ from page to page) are not. The effect varies with
1346 * compression tool, but bzip2 for instance compresses about an
1347 * order of magnitude worse if those headers are left in place.
1348 *
1349 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1350 * called in heavily-loaded circumstances as well as this lightly-
1351 * loaded one) with variant behavior, we just use GetXLogBuffer
1352 * (which itself calls the two methods we need) to get the pointer
1353 * and zero most of the page. Then we just zero the page header.
1354 */
1355 currpos = GetXLogBuffer(CurrPos, tli);
1356 MemSet(currpos, 0, SizeOfXLogShortPHD);
1357
1359 }
1360 }
1361 else
1362 {
1363 /* Align the end position, so that the next record starts aligned */
1365 }
1366
1367 if (CurrPos != EndPos)
1368 ereport(PANIC,
1370 errmsg_internal("space reserved for WAL record does not match what was written"));
1371}
uint32_t uint32
Definition c.h:546
#define MAXALIGN64(LEN)
Definition c.h:851
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
#define ERRCODE_DATA_CORRUPTED
#define INSERT_FREESPACE(endptr)
Definition xlog.c:582
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition xlog.c:1638
#define XLP_FIRST_IS_CONTRECORD
#define SizeOfXLogShortPHD

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), fb(), GetXLogBuffer(), INSERT_FREESPACE, MAXALIGN64, MemSet, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, and XLP_FIRST_IS_CONTRECORD.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 7015 of file xlog.c.

7016{
7017 bool shutdown;
7018 CheckPoint checkPoint;
7022 uint32 freespace;
7026 int nvxids;
7027 int oldXLogAllowed = 0;
7028
7029 /*
7030 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
7031 * issued at a different time.
7032 */
7034 shutdown = true;
7035 else
7036 shutdown = false;
7037
7038 /* sanity check */
7039 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
7040 elog(ERROR, "can't create a checkpoint during recovery");
7041
7042 /*
7043 * Prepare to accumulate statistics.
7044 *
7045 * Note: because it is possible for log_checkpoints to change while a
7046 * checkpoint proceeds, we always accumulate stats, even if
7047 * log_checkpoints is currently off.
7048 */
7051
7052 /*
7053 * Let smgr prepare for checkpoint; this has to happen outside the
7054 * critical section and before we determine the REDO pointer. Note that
7055 * smgr must not do anything that'd have to be undone if we decide no
7056 * checkpoint is needed.
7057 */
7059
7060 /* Run these points outside the critical section. */
7061 INJECTION_POINT("create-checkpoint-initial", NULL);
7062 INJECTION_POINT_LOAD("create-checkpoint-run");
7063
7064 /*
7065 * Use a critical section to force system panic if we have trouble.
7066 */
7068
7069 if (shutdown)
7070 {
7075 }
7076
7077 /* Begin filling in the checkpoint WAL record */
7078 MemSet(&checkPoint, 0, sizeof(checkPoint));
7079 checkPoint.time = (pg_time_t) time(NULL);
7080
7081 /*
7082 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7083 * pointer. This allows us to begin accumulating changes to assemble our
7084 * starting snapshot of locks and transactions.
7085 */
7087 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7088 else
7090
7091 /*
7092 * Get location of last important record before acquiring insert locks (as
7093 * GetLastImportantRecPtr() also locks WAL locks).
7094 */
7096
7097 /*
7098 * If this isn't a shutdown or forced checkpoint, and if there has been no
7099 * WAL activity requiring a checkpoint, skip it. The idea here is to
7100 * avoid inserting duplicate checkpoints when the system is idle.
7101 */
7103 CHECKPOINT_FORCE)) == 0)
7104 {
7106 {
7109 (errmsg_internal("checkpoint skipped because system is idle")));
7110 return false;
7111 }
7112 }
7113
7114 /*
7115 * An end-of-recovery checkpoint is created before anyone is allowed to
7116 * write WAL. To allow us to write the checkpoint record, temporarily
7117 * enable XLogInsertAllowed.
7118 */
7119 if (flags & CHECKPOINT_END_OF_RECOVERY)
7121
7123 if (flags & CHECKPOINT_END_OF_RECOVERY)
7125 else
7126 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7127
7128 /*
7129 * We must block concurrent insertions while examining insert state.
7130 */
7132
7133 checkPoint.fullPageWrites = Insert->fullPageWrites;
7134 checkPoint.wal_level = wal_level;
7135
7136 if (shutdown)
7137 {
7139
7140 /*
7141 * Compute new REDO record ptr = location of next XLOG record.
7142 *
7143 * Since this is a shutdown checkpoint, there can't be any concurrent
7144 * WAL insertion.
7145 */
7146 freespace = INSERT_FREESPACE(curInsert);
7147 if (freespace == 0)
7148 {
7151 else
7153 }
7154 checkPoint.redo = curInsert;
7155
7156 /*
7157 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7158 * this must be done while holding all the insertion locks.
7159 *
7160 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7161 * left pointing past where it really needs to point. This is okay;
7162 * the only consequence is that XLogInsert might back up whole buffers
7163 * that it didn't really need to. We can't postpone advancing
7164 * RedoRecPtr because XLogInserts that happen while we are dumping
7165 * buffers must assume that their buffer changes are not included in
7166 * the checkpoint.
7167 */
7168 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7169 }
7170
7171 /*
7172 * Now we can release the WAL insertion locks, allowing other xacts to
7173 * proceed while we are flushing disk buffers.
7174 */
7176
7177 /*
7178 * If this is an online checkpoint, we have not yet determined the redo
7179 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7180 * record; the LSN at which it starts becomes the new redo pointer. We
7181 * don't do this for a shutdown checkpoint, because in that case no WAL
7182 * can be written between the redo point and the insertion of the
7183 * checkpoint record itself, so the checkpoint record itself serves to
7184 * mark the redo point.
7185 */
7186 if (!shutdown)
7187 {
7188 /* Include WAL level in record for WAL summarizer's benefit. */
7192
7193 /*
7194 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7195 * shared memory and RedoRecPtr in backend-local memory, but we need
7196 * to copy that into the record that will be inserted when the
7197 * checkpoint is complete.
7198 */
7199 checkPoint.redo = RedoRecPtr;
7200 }
7201
7202 /* Update the info_lck-protected copy of RedoRecPtr as well */
7204 XLogCtl->RedoRecPtr = checkPoint.redo;
7206
7207 /*
7208 * If enabled, log checkpoint start. We postpone this until now so as not
7209 * to log anything if we decided to skip the checkpoint.
7210 */
7211 if (log_checkpoints)
7212 LogCheckpointStart(flags, false);
7213
7214 INJECTION_POINT_CACHED("create-checkpoint-run", NULL);
7215
7216 /* Update the process title */
7217 update_checkpoint_display(flags, false, false);
7218
7220
7221 /*
7222 * Get the other info we need for the checkpoint record.
7223 *
7224 * We don't need to save oldestClogXid in the checkpoint, it only matters
7225 * for the short period in which clog is being truncated, and if we crash
7226 * during that we'll redo the clog truncation and fix up oldestClogXid
7227 * there.
7228 */
7230 checkPoint.nextXid = TransamVariables->nextXid;
7231 checkPoint.oldestXid = TransamVariables->oldestXid;
7234
7239
7241 checkPoint.nextOid = TransamVariables->nextOid;
7242 if (!shutdown)
7243 checkPoint.nextOid += TransamVariables->oidCount;
7245
7247
7249 &checkPoint.nextMulti,
7250 &checkPoint.nextMultiOffset,
7251 &checkPoint.oldestMulti,
7252 &checkPoint.oldestMultiDB);
7253
7254 /*
7255 * Having constructed the checkpoint record, ensure all shmem disk buffers
7256 * and commit-log buffers are flushed to disk.
7257 *
7258 * This I/O could fail for various reasons. If so, we will fail to
7259 * complete the checkpoint, but there is no reason to force a system
7260 * panic. Accordingly, exit critical section while doing it.
7261 */
7263
7264 /*
7265 * In some cases there are groups of actions that must all occur on one
7266 * side or the other of a checkpoint record. Before flushing the
7267 * checkpoint record we must explicitly wait for any backend currently
7268 * performing those groups of actions.
7269 *
7270 * One example is end of transaction, so we must wait for any transactions
7271 * that are currently in commit critical sections. If an xact inserted
7272 * its commit record into XLOG just before the REDO point, then a crash
7273 * restart from the REDO point would not replay that record, which means
7274 * that our flushing had better include the xact's update of pg_xact. So
7275 * we wait till he's out of his commit critical section before proceeding.
7276 * See notes in RecordTransactionCommit().
7277 *
7278 * Because we've already released the insertion locks, this test is a bit
7279 * fuzzy: it is possible that we will wait for xacts we didn't really need
7280 * to wait for. But the delay should be short and it seems better to make
7281 * checkpoint take a bit longer than to hold off insertions longer than
7282 * necessary. (In fact, the whole reason we have this issue is that xact.c
7283 * does commit record XLOG insertion and clog update as two separate steps
7284 * protected by different locks, but again that seems best on grounds of
7285 * minimizing lock contention.)
7286 *
7287 * A transaction that has not yet set delayChkptFlags when we look cannot
7288 * be at risk, since it has not inserted its commit record yet; and one
7289 * that's already cleared it is not at risk either, since it's done fixing
7290 * clog and we will correctly flush the update below. So we cannot miss
7291 * any xacts we need to wait for.
7292 */
7294 if (nvxids > 0)
7295 {
7296 do
7297 {
7298 /*
7299 * Keep absorbing fsync requests while we wait. There could even
7300 * be a deadlock if we don't, if the process that prevents the
7301 * checkpoint is trying to add a request to the queue.
7302 */
7304
7306 pg_usleep(10000L); /* wait for 10 msec */
7310 }
7311 pfree(vxids);
7312
7313 CheckPointGuts(checkPoint.redo, flags);
7314
7316 if (nvxids > 0)
7317 {
7318 do
7319 {
7321
7323 pg_usleep(10000L); /* wait for 10 msec */
7327 }
7328 pfree(vxids);
7329
7330 /*
7331 * Take a snapshot of running transactions and write this to WAL. This
7332 * allows us to reconstruct the state of running transactions during
7333 * archive recovery, if required. Skip, if this info disabled.
7334 *
7335 * If we are shutting down, or Startup process is completing crash
7336 * recovery we don't need to write running xact data.
7337 */
7340
7342
7343 /*
7344 * Now insert the checkpoint record into XLOG.
7345 */
7347 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7351
7353
7354 /*
7355 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7356 * overwritten at next startup. No-one should even try, this just allows
7357 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7358 * to just temporarily disable writing until the system has exited
7359 * recovery.
7360 */
7361 if (shutdown)
7362 {
7363 if (flags & CHECKPOINT_END_OF_RECOVERY)
7365 else
7366 LocalXLogInsertAllowed = 0; /* never again write WAL */
7367 }
7368
7369 /*
7370 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7371 * = end of actual checkpoint record.
7372 */
7373 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7374 ereport(PANIC,
7375 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7376
7377 /*
7378 * Remember the prior checkpoint's redo ptr for
7379 * UpdateCheckPointDistanceEstimate()
7380 */
7382
7383 /*
7384 * Update the control file.
7385 */
7387 if (shutdown)
7390 ControlFile->checkPointCopy = checkPoint;
7391 /* crash recovery should always recover to the end of WAL */
7394
7395 /*
7396 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7397 * unused on non-shutdown checkpoints, but seems useful to store it always
7398 * for debugging purposes.
7399 */
7401
7404
7405 /*
7406 * We are now done with critical updates; no need for system panic if we
7407 * have trouble while fooling with old log segments.
7408 */
7410
7411 /*
7412 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7413 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7414 * where (a) we're not inside of a critical section and (b) we can be
7415 * certain that the relevant record has been flushed to disk, which must
7416 * happen before it can be summarized.
7417 *
7418 * If this is a shutdown checkpoint, then this happens reasonably
7419 * promptly: we've only just inserted and flushed the
7420 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7421 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7422 * record was written before we began flushing data to disk, and that
7423 * could be many minutes ago at this point. However, we don't XLogFlush()
7424 * after inserting that record, so we're not guaranteed that it's on disk
7425 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7426 * record.
7427 */
7429
7430 /*
7431 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7432 */
7434
7435 /*
7436 * Update the average distance between checkpoints if the prior checkpoint
7437 * exists.
7438 */
7441
7442 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7443
7444 /*
7445 * Delete old log files, those no longer needed for last checkpoint to
7446 * prevent the disk holding the xlog from growing full.
7447 */
7453 {
7454 /*
7455 * Some slots have been invalidated; recalculate the old-segment
7456 * horizon, starting again from RedoRecPtr.
7457 */
7460 }
7461 _logSegNo--;
7463 checkPoint.ThisTimeLineID);
7464
7465 /*
7466 * Make more log segments if needed. (Do this after recycling old log
7467 * segments, since that may supply some of the needed files.)
7468 */
7469 if (!shutdown)
7471
7472 /*
7473 * Truncate pg_subtrans if possible. We can throw away all data before
7474 * the oldest XMIN of any running transaction. No future transaction will
7475 * attempt to reference any pg_subtrans entry older than that (see Asserts
7476 * in subtrans.c). During recovery, though, we mustn't do this because
7477 * StartupSUBTRANS hasn't been called yet.
7478 */
7479 if (!RecoveryInProgress())
7481
7482 /* Real work is done; log and update stats. */
7483 LogCheckpointEnd(false);
7484
7485 /* Reset the process title */
7486 update_checkpoint_display(flags, false, true);
7487
7489 NBuffers,
7493
7494 return true;
7495}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:476
void AbsorbSyncRequests(void)
int NBuffers
Definition globals.c:142
#define INJECTION_POINT(name, arg)
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:204
@ LW_SHARED
Definition lwlock.h:113
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:1946
#define XLOG_CHECKPOINT_REDO
Definition pg_control.h:83
@ DB_SHUTDOWNING
Definition pg_control.h:96
@ DB_SHUTDOWNED
Definition pg_control.h:94
#define XLOG_CHECKPOINT_ONLINE
Definition pg_control.h:70
#define InvalidOid
#define DELAY_CHKPT_START
Definition proc.h:136
#define DELAY_CHKPT_COMPLETE
Definition proc.h:137
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition procarray.c:1979
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition procarray.c:3047
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition procarray.c:2830
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition procarray.c:3002
void pg_usleep(long microsec)
Definition signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition slot.c:2196
@ RS_INVAL_WAL_REMOVED
Definition slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition slot.h:68
XLogRecPtr LogStandbySnapshot(void)
Definition standby.c:1281
TimestampTz ckpt_start_t
Definition xlog.h:172
int ckpt_segs_removed
Definition xlog.h:182
int ckpt_bufs_written
Definition xlog.h:178
int ckpt_segs_recycled
Definition xlog.h:183
XLogRecPtr minRecoveryPoint
Definition pg_control.h:170
XLogRecPtr unloggedLSN
Definition pg_control.h:139
TimeLineID minRecoveryPointTLI
Definition pg_control.h:171
TransactionId oldestCommitTsXid
Definition transam.h:232
TransactionId newestCommitTsXid
Definition transam.h:233
TransactionId oldestXid
Definition transam.h:222
TimeLineID InsertTimeLineID
Definition xlog.c:511
XLogRecPtr RedoRecPtr
Definition xlog.c:459
TimeLineID PrevTimeLineID
Definition xlog.c:512
pg_atomic_uint64 unloggedLSN
Definition xlog.c:466
XLogRecPtr RedoRecPtr
Definition xlog.c:433
void TruncateSUBTRANS(TransactionId oldestXact)
Definition subtrans.c:385
void SyncPreCheckpoint(void)
Definition sync.c:177
void SyncPostCheckpoint(void)
Definition sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition xlog.c:256
bool RecoveryInProgress(void)
Definition xlog.c:6460
static void WALInsertLockRelease(void)
Definition xlog.c:1451
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition xlog.c:1864
static void WALInsertLockAcquireExclusive(void)
Definition xlog.c:1422
static void UpdateControlFile(void)
Definition xlog.c:4618
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition xlog.c:3902
static void LogCheckpointStart(int flags, bool restartpoint)
Definition xlog.c:6775
static XLogRecPtr RedoRecPtr
Definition xlog.c:276
static void LogCheckpointEnd(bool restartpoint)
Definition xlog.c:6807
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition xlog.c:3727
bool log_checkpoints
Definition xlog.c:132
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition xlog.c:8086
static int LocalSetXLogInsertAllowed(void)
Definition xlog.c:6548
XLogRecPtr GetLastImportantRecPtr(void)
Definition xlog.c:6682
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition xlog.c:6912
static int LocalXLogInsertAllowed
Definition xlog.c:239
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2783
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition xlog.c:7641
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition xlog.c:6950
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:151
#define CHECKPOINT_FORCE
Definition xlog.h:153
#define XLogStandbyInfoActive()
Definition xlog.h:125
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogBeginInsert(void)
Definition xloginsert.c:152

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, fb(), CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, INJECTION_POINT_CACHED, INJECTION_POINT_LOAD, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsLogicalDecodingEnabled(), KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), CheckPoint::logicalDecodingEnabled, LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRecPtrIsValid, XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7506 of file xlog.c.

7507{
7510
7511 /* sanity check */
7512 if (!RecoveryInProgress())
7513 elog(ERROR, "can only be used to end recovery");
7514
7515 xlrec.end_time = GetCurrentTimestamp();
7516 xlrec.wal_level = wal_level;
7517
7519 xlrec.ThisTimeLineID = XLogCtl->InsertTimeLineID;
7520 xlrec.PrevTimeLineID = XLogCtl->PrevTimeLineID;
7522
7524
7528
7530
7531 /*
7532 * Update the control file so that crash recovery can follow the timeline
7533 * changes to this point.
7534 */
7537 ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7540
7542}
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:78

References ControlFile, elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, UpdateControlFile(), wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7571 of file xlog.c.

7573{
7578
7579 /* sanity checks */
7580 if (!RecoveryInProgress())
7581 elog(ERROR, "can only be used at end of recovery");
7582 if (pagePtr % XLOG_BLCKSZ != 0)
7583 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7585
7586 /* The current WAL insert position should be right after the page header */
7587 startPos = pagePtr;
7590 else
7593 if (recptr != startPos)
7594 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
7596
7598
7599 /*
7600 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7601 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7602 *
7603 * No other backend is allowed to write WAL yet, so acquiring the WAL
7604 * insertion lock is just pro forma.
7605 */
7610
7611 /*
7612 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7613 * page. We know it becomes the first record, because no other backend is
7614 * allowed to write WAL yet.
7615 */
7617 xlrec.overwritten_lsn = aborted_lsn;
7618 xlrec.overwrite_time = GetCurrentTimestamp();
7621
7622 /* check that the record was inserted to the right place */
7623 if (ProcLastRecPtr != startPos)
7624 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
7626
7628
7630
7631 return recptr;
7632}
#define XLOG_OVERWRITE_CONTRECORD
Definition pg_control.h:82
static void WALInsertLockAcquire(void)
Definition xlog.c:1377
XLogRecPtr GetXLogInsertRecPtr(void)
Definition xlog.c:9598
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD

References elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLP_FIRST_IS_OVERWRITE_CONTRECORD.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7721 of file xlog.c.

7722{
7723 XLogRecPtr lastCheckPointRecPtr;
7724 XLogRecPtr lastCheckPointEndPtr;
7725 CheckPoint lastCheckPoint;
7729 TimeLineID replayTLI;
7730 XLogRecPtr endptr;
7733
7734 /* Concurrent checkpoint/restartpoint cannot happen */
7736
7737 /* Get a local copy of the last safe checkpoint record. */
7739 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7740 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7741 lastCheckPoint = XLogCtl->lastCheckPoint;
7743
7744 /*
7745 * Check that we're still in recovery mode. It's ok if we exit recovery
7746 * mode after this check, the restart point is valid anyway.
7747 */
7748 if (!RecoveryInProgress())
7749 {
7751 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7752 return false;
7753 }
7754
7755 /*
7756 * If the last checkpoint record we've replayed is already our last
7757 * restartpoint, we can't perform a new restart point. We still update
7758 * minRecoveryPoint in that case, so that if this is a shutdown restart
7759 * point, we won't start up earlier than before. That's not strictly
7760 * necessary, but when hot standby is enabled, it would be rather weird if
7761 * the database opened up for read-only connections at a point-in-time
7762 * before the last shutdown. Such time travel is still possible in case of
7763 * immediate shutdown, though.
7764 *
7765 * We don't explicitly advance minRecoveryPoint when we do create a
7766 * restartpoint. It's assumed that flushing the buffers will do that as a
7767 * side-effect.
7768 */
7769 if (!XLogRecPtrIsValid(lastCheckPointRecPtr) ||
7770 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7771 {
7773 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
7774 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
7775
7777 if (flags & CHECKPOINT_IS_SHUTDOWN)
7778 {
7783 }
7784 return false;
7785 }
7786
7787 /*
7788 * Update the shared RedoRecPtr so that the startup process can calculate
7789 * the number of segments replayed since last restartpoint, and request a
7790 * restartpoint if it exceeds CheckPointSegments.
7791 *
7792 * Like in CreateCheckPoint(), hold off insertions to update it, although
7793 * during recovery this is just pro forma, because no WAL insertions are
7794 * happening.
7795 */
7797 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7799
7800 /* Also update the info_lck-protected copy */
7802 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7804
7805 /*
7806 * Prepare to accumulate statistics.
7807 *
7808 * Note: because it is possible for log_checkpoints to change while a
7809 * checkpoint proceeds, we always accumulate stats, even if
7810 * log_checkpoints is currently off.
7811 */
7814
7815 if (log_checkpoints)
7816 LogCheckpointStart(flags, true);
7817
7818 /* Update the process title */
7819 update_checkpoint_display(flags, true, false);
7820
7821 CheckPointGuts(lastCheckPoint.redo, flags);
7822
7823 /*
7824 * This location needs to be after CheckPointGuts() to ensure that some
7825 * work has already happened during this checkpoint.
7826 */
7827 INJECTION_POINT("create-restart-point", NULL);
7828
7829 /*
7830 * Remember the prior checkpoint's redo ptr for
7831 * UpdateCheckPointDistanceEstimate()
7832 */
7834
7835 /*
7836 * Update pg_control, using current time. Check that it still shows an
7837 * older checkpoint, else do nothing; this is a quick hack to make sure
7838 * nothing really bad happens if somehow we get here after the
7839 * end-of-recovery checkpoint.
7840 */
7842 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7843 {
7844 /*
7845 * Update the checkpoint information. We do this even if the cluster
7846 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7847 * segments recycled below.
7848 */
7849 ControlFile->checkPoint = lastCheckPointRecPtr;
7850 ControlFile->checkPointCopy = lastCheckPoint;
7851
7852 /*
7853 * Ensure minRecoveryPoint is past the checkpoint record and update it
7854 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7855 * this will have happened already while writing out dirty buffers,
7856 * but not necessarily - e.g. because no buffers were dirtied. We do
7857 * this because a backup performed in recovery uses minRecoveryPoint
7858 * to determine which WAL files must be included in the backup, and
7859 * the file (or files) containing the checkpoint record must be
7860 * included, at a minimum. Note that for an ordinary restart of
7861 * recovery there's no value in having the minimum recovery point any
7862 * earlier than this anyway, because redo will begin just after the
7863 * checkpoint record.
7864 */
7866 {
7867 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7868 {
7869 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7871
7872 /* update local copy */
7875 }
7876 if (flags & CHECKPOINT_IS_SHUTDOWN)
7878 }
7880 }
7882
7883 /*
7884 * Update the average distance between checkpoints/restartpoints if the
7885 * prior checkpoint exists.
7886 */
7889
7890 /*
7891 * Delete old log files, those no longer needed for last restartpoint to
7892 * prevent the disk holding the xlog from growing full.
7893 */
7895
7896 /*
7897 * Retreat _logSegNo using the current end of xlog replayed or received,
7898 * whichever is later.
7899 */
7901 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7902 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7903 KeepLogSeg(endptr, &_logSegNo);
7904
7905 INJECTION_POINT("restartpoint-before-slot-invalidation", NULL);
7906
7910 {
7911 /*
7912 * Some slots have been invalidated; recalculate the old-segment
7913 * horizon, starting again from RedoRecPtr.
7914 */
7916 KeepLogSeg(endptr, &_logSegNo);
7917 }
7918 _logSegNo--;
7919
7920 /*
7921 * Try to recycle segments on a useful timeline. If we've been promoted
7922 * since the beginning of this restartpoint, use the new timeline chosen
7923 * at end of recovery. If we're still in recovery, use the timeline we're
7924 * currently replaying.
7925 *
7926 * There is no guarantee that the WAL segments will be useful on the
7927 * current timeline; if recovery proceeds to a new timeline right after
7928 * this, the pre-allocated WAL segments on this timeline will not be used,
7929 * and will go wasted until recycled on the next restartpoint. We'll live
7930 * with that.
7931 */
7932 if (!RecoveryInProgress())
7933 replayTLI = XLogCtl->InsertTimeLineID;
7934
7935 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7936
7937 /*
7938 * Make more log segments if needed. (Do this after recycling old log
7939 * segments, since that may supply some of the needed files.)
7940 */
7941 PreallocXlogFiles(endptr, replayTLI);
7942
7943 /*
7944 * Truncate pg_subtrans if possible. We can throw away all data before
7945 * the oldest XMIN of any running transaction. No future transaction will
7946 * attempt to reference any pg_subtrans entry older than that (see Asserts
7947 * in subtrans.c). When hot standby is disabled, though, we mustn't do
7948 * this because StartupSUBTRANS hasn't been called yet.
7949 */
7950 if (EnableHotStandby)
7952
7953 /* Real work is done; log and update stats. */
7954 LogCheckpointEnd(true);
7955
7956 /* Reset the process title */
7957 update_checkpoint_display(flags, true, true);
7958
7961 errmsg("recovery restart point at %X/%08X",
7962 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7963 xtime ? errdetail("Last completed transaction was at log time %s.",
7965
7966 /*
7967 * Finally, execute archive_cleanup_command, if any.
7968 */
7971 "archive_cleanup_command",
7972 false,
7974
7975 return true;
7976}
const char * timestamptz_to_str(TimestampTz t)
Definition timestamp.c:1862
int64 TimestampTz
Definition timestamp.h:39
bool IsUnderPostmaster
Definition globals.c:120
@ B_CHECKPOINTER
Definition miscadmin.h:363
BackendType MyBackendType
Definition miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition pg_control.h:98
@ DB_SHUTDOWNED_IN_RECOVERY
Definition pg_control.h:95
CheckPoint lastCheckPoint
Definition xlog.c:547
XLogRecPtr lastCheckPointRecPtr
Definition xlog.c:545
XLogRecPtr lastCheckPointEndPtr
Definition xlog.c:546
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition xlog.c:2703
static XLogRecPtr LocalMinRecoveryPoint
Definition xlog.c:648
static TimeLineID LocalMinRecoveryPointTLI
Definition xlog.c:649
uint32 TimeLineID
Definition xlogdefs.h:63
char * archiveCleanupCommand
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), fb(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsValid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9557 of file xlog.c.

9558{
9560
9561 /* If called during backup start, there shouldn't be one already running */
9563
9565 {
9569
9572
9575 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9576 }
9577}
#define WARNING
Definition elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition postgres.h:100
int runningBackups
Definition xlog.c:441
static SessionBackupState sessionBackupState
Definition xlog.c:394
@ SESSION_BACKUP_NONE
Definition xlog.h:304

References arg, Assert, DatumGetBool(), ereport, errmsg(), fb(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8955 of file xlog.c.

8957{
8959
8960 Assert(state != NULL);
8962
8963 /*
8964 * During recovery, we don't need to check WAL level. Because, if WAL
8965 * level is not sufficient, it's impossible to get here during recovery.
8966 */
8968 ereport(ERROR,
8970 errmsg("WAL level not sufficient for making an online backup"),
8971 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8972
8974 ereport(ERROR,
8976 errmsg("backup label too long (max %d bytes)",
8977 MAXPGPATH)));
8978
8979 strlcpy(state->name, backupidstr, sizeof(state->name));
8980
8981 /*
8982 * Mark backup active in shared memory. We must do full-page WAL writes
8983 * during an on-line backup even if not doing so at other times, because
8984 * it's quite possible for the backup dump to obtain a "torn" (partially
8985 * written) copy of a database page if it reads the page concurrently with
8986 * our write to the same page. This can be fixed as long as the first
8987 * write to the page in the WAL sequence is a full-page write. Hence, we
8988 * increment runningBackups then force a CHECKPOINT, to ensure there are
8989 * no dirty pages in shared memory that might get dumped while the backup
8990 * is in progress without having a corresponding WAL record. (Once the
8991 * backup is complete, we need not force full-page writes anymore, since
8992 * we expect that any pages not modified during the backup interval must
8993 * have been correctly captured by the backup.)
8994 *
8995 * Note that forcing full-page writes has no effect during an online
8996 * backup from the standby.
8997 *
8998 * We must hold all the insertion locks to change the value of
8999 * runningBackups, to ensure adequate interlocking against
9000 * XLogInsertRecord().
9001 */
9005
9006 /*
9007 * Ensure we decrement runningBackups if we fail below. NB -- for this to
9008 * work correctly, it is critical that sessionBackupState is only updated
9009 * after this block is over.
9010 */
9012 {
9013 bool gotUniqueStartpoint = false;
9014 DIR *tblspcdir;
9015 struct dirent *de;
9017 int datadirpathlen;
9018
9019 /*
9020 * Force an XLOG file switch before the checkpoint, to ensure that the
9021 * WAL segment the checkpoint is written to doesn't contain pages with
9022 * old timeline IDs. That would otherwise happen if you called
9023 * pg_backup_start() right after restoring from a PITR archive: the
9024 * first WAL segment containing the startup checkpoint has pages in
9025 * the beginning with the old timeline ID. That can cause trouble at
9026 * recovery: we won't have a history file covering the old timeline if
9027 * pg_wal directory was not included in the base backup and the WAL
9028 * archive was cleared too before starting the backup.
9029 *
9030 * This also ensures that we have emitted a WAL page header that has
9031 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
9032 * Therefore, if a WAL archiver (such as pglesslog) is trying to
9033 * compress out removable backup blocks, it won't remove any that
9034 * occur after this point.
9035 *
9036 * During recovery, we skip forcing XLOG file switch, which means that
9037 * the backup taken during recovery is not available for the special
9038 * recovery case described above.
9039 */
9041 RequestXLogSwitch(false);
9042
9043 do
9044 {
9045 bool checkpointfpw;
9046
9047 /*
9048 * Force a CHECKPOINT. Aside from being necessary to prevent torn
9049 * page problems, this guarantees that two successive backup runs
9050 * will have different checkpoint positions and hence different
9051 * history file names, even if nothing happened in between.
9052 *
9053 * During recovery, establish a restartpoint if possible. We use
9054 * the last restartpoint as the backup starting checkpoint. This
9055 * means that two successive backup runs can have same checkpoint
9056 * positions.
9057 *
9058 * Since the fact that we are executing do_pg_backup_start()
9059 * during recovery means that checkpointer is running, we can use
9060 * RequestCheckpoint() to establish a restartpoint.
9061 *
9062 * We use CHECKPOINT_FAST only if requested by user (via passing
9063 * fast = true). Otherwise this can take awhile.
9064 */
9066 (fast ? CHECKPOINT_FAST : 0));
9067
9068 /*
9069 * Now we need to fetch the checkpoint record location, and also
9070 * its REDO pointer. The oldest point in WAL that would be needed
9071 * to restore starting from the checkpoint is precisely the REDO
9072 * pointer.
9073 */
9075 state->checkpointloc = ControlFile->checkPoint;
9076 state->startpoint = ControlFile->checkPointCopy.redo;
9080
9082 {
9084
9085 /*
9086 * Check to see if all WAL replayed during online backup
9087 * (i.e., since last restartpoint used as backup starting
9088 * checkpoint) contain full-page writes.
9089 */
9093
9094 if (!checkpointfpw || state->startpoint <= recptr)
9095 ereport(ERROR,
9097 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9098 "since last restartpoint"),
9099 errhint("This means that the backup being taken on the standby "
9100 "is corrupt and should not be used. "
9101 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9102 "and then try an online backup again.")));
9103
9104 /*
9105 * During recovery, since we don't use the end-of-backup WAL
9106 * record and don't write the backup history file, the
9107 * starting WAL location doesn't need to be unique. This means
9108 * that two base backups started at the same time might use
9109 * the same checkpoint as starting locations.
9110 */
9111 gotUniqueStartpoint = true;
9112 }
9113
9114 /*
9115 * If two base backups are started at the same time (in WAL sender
9116 * processes), we need to make sure that they use different
9117 * checkpoints as starting locations, because we use the starting
9118 * WAL location as a unique identifier for the base backup in the
9119 * end-of-backup WAL record and when we write the backup history
9120 * file. Perhaps it would be better generate a separate unique ID
9121 * for each backup instead of forcing another checkpoint, but
9122 * taking a checkpoint right after another is not that expensive
9123 * either because only few buffers have been dirtied yet.
9124 */
9126 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9127 {
9128 XLogCtl->Insert.lastBackupStart = state->startpoint;
9129 gotUniqueStartpoint = true;
9130 }
9132 } while (!gotUniqueStartpoint);
9133
9134 /*
9135 * Construct tablespace_map file.
9136 */
9138
9139 /* Collect information about all tablespaces */
9141 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9142 {
9143 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9144 char linkpath[MAXPGPATH];
9145 char *relpath = NULL;
9146 char *s;
9148 char *badp;
9149 Oid tsoid;
9150
9151 /*
9152 * Try to parse the directory name as an unsigned integer.
9153 *
9154 * Tablespace directories should be positive integers that can be
9155 * represented in 32 bits, with no leading zeroes or trailing
9156 * garbage. If we come across a name that doesn't meet those
9157 * criteria, skip it.
9158 */
9159 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9160 continue;
9161 errno = 0;
9162 tsoid = strtoul(de->d_name, &badp, 10);
9163 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9164 continue;
9165
9166 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9167
9168 de_type = get_dirent_type(fullpath, de, false, ERROR);
9169
9170 if (de_type == PGFILETYPE_LNK)
9171 {
9173 int rllen;
9174
9175 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9176 if (rllen < 0)
9177 {
9179 (errmsg("could not read symbolic link \"%s\": %m",
9180 fullpath)));
9181 continue;
9182 }
9183 else if (rllen >= sizeof(linkpath))
9184 {
9186 (errmsg("symbolic link \"%s\" target is too long",
9187 fullpath)));
9188 continue;
9189 }
9190 linkpath[rllen] = '\0';
9191
9192 /*
9193 * Relpath holds the relative path of the tablespace directory
9194 * when it's located within PGDATA, or NULL if it's located
9195 * elsewhere.
9196 */
9197 if (rllen > datadirpathlen &&
9201
9202 /*
9203 * Add a backslash-escaped version of the link path to the
9204 * tablespace map file.
9205 */
9207 for (s = linkpath; *s; s++)
9208 {
9209 if (*s == '\n' || *s == '\r' || *s == '\\')
9212 }
9214 de->d_name, escapedpath.data);
9215 pfree(escapedpath.data);
9216 }
9217 else if (de_type == PGFILETYPE_DIR)
9218 {
9219 /*
9220 * It's possible to use allow_in_place_tablespaces to create
9221 * directories directly under pg_tblspc, for testing purposes
9222 * only.
9223 *
9224 * In this case, we store a relative path rather than an
9225 * absolute path into the tablespaceinfo.
9226 */
9227 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9228 PG_TBLSPC_DIR, de->d_name);
9230 }
9231 else
9232 {
9233 /* Skip any other file type that appears here. */
9234 continue;
9235 }
9236
9238 ti->oid = tsoid;
9239 ti->path = pstrdup(linkpath);
9240 ti->rpath = relpath;
9241 ti->size = -1;
9242
9243 if (tablespaces)
9244 *tablespaces = lappend(*tablespaces, ti);
9245 }
9247
9248 state->starttime = (pg_time_t) time(NULL);
9249 }
9251
9252 state->started_in_recovery = backup_started_in_recovery;
9253
9254 /*
9255 * Mark that the start phase has correctly finished for the backup.
9256 */
9258}
static bool backup_started_in_recovery
Definition basebackup.c:123
void RequestCheckpoint(int flags)
#define palloc_object(type)
Definition fe_memutils.h:74
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition file_utils.c:547
PGFileType
Definition file_utils.h:19
@ PGFILETYPE_LNK
Definition file_utils.h:24
@ PGFILETYPE_DIR
Definition file_utils.h:23
char * DataDir
Definition globals.c:71
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
List * lappend(List *list, void *datum)
Definition list.c:339
#define IS_DIR_SEP(ch)
Definition port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
unsigned int Oid
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define PG_TBLSPC_DIR
Definition relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition xlog.c:553
XLogRecPtr lastBackupStart
Definition xlog.c:442
#define readlink(path, buf, size)
Definition win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition xlog.c:8195
void do_pg_abort_backup(int code, Datum arg)
Definition xlog.c:9557
@ SESSION_BACKUP_RUNNING
Definition xlog.h:305
#define CHECKPOINT_WAIT
Definition xlog.h:156
#define CHECKPOINT_FAST
Definition xlog.h:152
#define XLogIsNeeded()
Definition xlog.h:111

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, fb(), FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, palloc_object, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9283 of file xlog.c.

9284{
9285 bool backup_stopped_in_recovery = false;
9286 char histfilepath[MAXPGPATH];
9290 FILE *fp;
9292 int waits = 0;
9293 bool reported_waiting = false;
9294
9295 Assert(state != NULL);
9296
9298
9299 /*
9300 * During recovery, we don't need to check WAL level. Because, if WAL
9301 * level is not sufficient, it's impossible to get here during recovery.
9302 */
9304 ereport(ERROR,
9306 errmsg("WAL level not sufficient for making an online backup"),
9307 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9308
9309 /*
9310 * OK to update backup counter and session-level lock.
9311 *
9312 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9313 * otherwise they can be updated inconsistently, which might cause
9314 * do_pg_abort_backup() to fail.
9315 */
9317
9318 /*
9319 * It is expected that each do_pg_backup_start() call is matched by
9320 * exactly one do_pg_backup_stop() call.
9321 */
9324
9325 /*
9326 * Clean up session-level lock.
9327 *
9328 * You might think that WALInsertLockRelease() can be called before
9329 * cleaning up session-level lock because session-level lock doesn't need
9330 * to be protected with WAL insertion lock. But since
9331 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9332 * cleaned up before it.
9333 */
9335
9337
9338 /*
9339 * If we are taking an online backup from the standby, we confirm that the
9340 * standby has not been promoted during the backup.
9341 */
9342 if (state->started_in_recovery && !backup_stopped_in_recovery)
9343 ereport(ERROR,
9345 errmsg("the standby was promoted during online backup"),
9346 errhint("This means that the backup being taken is corrupt "
9347 "and should not be used. "
9348 "Try taking another online backup.")));
9349
9350 /*
9351 * During recovery, we don't write an end-of-backup record. We assume that
9352 * pg_control was backed up last and its minimum recovery point can be
9353 * available as the backup end location. Since we don't have an
9354 * end-of-backup record, we use the pg_control value to check whether
9355 * we've reached the end of backup when starting recovery from this
9356 * backup. We have no way of checking if pg_control wasn't backed up last
9357 * however.
9358 *
9359 * We don't force a switch to new WAL file but it is still possible to
9360 * wait for all the required files to be archived if waitforarchive is
9361 * true. This is okay if we use the backup to start a standby and fetch
9362 * the missing WAL using streaming replication. But in the case of an
9363 * archive recovery, a user should set waitforarchive to true and wait for
9364 * them to be archived to ensure that all the required files are
9365 * available.
9366 *
9367 * We return the current minimum recovery point as the backup end
9368 * location. Note that it can be greater than the exact backup end
9369 * location if the minimum recovery point is updated after the backup of
9370 * pg_control. This is harmless for current uses.
9371 *
9372 * XXX currently a backup history file is for informational and debug
9373 * purposes only. It's not essential for an online backup. Furthermore,
9374 * even if it's created, it will not be archived during recovery because
9375 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9376 * backup history file during recovery.
9377 */
9379 {
9381
9382 /*
9383 * Check to see if all WAL replayed during online backup contain
9384 * full-page writes.
9385 */
9389
9390 if (state->startpoint <= recptr)
9391 ereport(ERROR,
9393 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9394 "during online backup"),
9395 errhint("This means that the backup being taken on the standby "
9396 "is corrupt and should not be used. "
9397 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9398 "and then try an online backup again.")));
9399
9400
9402 state->stoppoint = ControlFile->minRecoveryPoint;
9405 }
9406 else
9407 {
9408 char *history_file;
9409
9410 /*
9411 * Write the backup-end xlog record
9412 */
9414 XLogRegisterData(&state->startpoint,
9415 sizeof(state->startpoint));
9417
9418 /*
9419 * Given that we're not in recovery, InsertTimeLineID is set and can't
9420 * change, so we can read it without a lock.
9421 */
9422 state->stoptli = XLogCtl->InsertTimeLineID;
9423
9424 /*
9425 * Force a switch to a new xlog segment file, so that the backup is
9426 * valid as soon as archiver moves out the current segment file.
9427 */
9428 RequestXLogSwitch(false);
9429
9430 state->stoptime = (pg_time_t) time(NULL);
9431
9432 /*
9433 * Write the backup history file
9434 */
9437 state->startpoint, wal_segment_size);
9438 fp = AllocateFile(histfilepath, "w");
9439 if (!fp)
9440 ereport(ERROR,
9442 errmsg("could not create file \"%s\": %m",
9443 histfilepath)));
9444
9445 /* Build and save the contents of the backup history file */
9447 fprintf(fp, "%s", history_file);
9449
9450 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9451 ereport(ERROR,
9453 errmsg("could not write file \"%s\": %m",
9454 histfilepath)));
9455
9456 /*
9457 * Clean out any no-longer-needed history files. As a side effect,
9458 * this will post a .ready file for the newly created history file,
9459 * notifying the archiver that history file may be archived
9460 * immediately.
9461 */
9463 }
9464
9465 /*
9466 * If archiving is enabled, wait for all the required WAL files to be
9467 * archived before returning. If archiving isn't enabled, the required WAL
9468 * needs to be transported via streaming replication (hopefully with
9469 * wal_keep_size set high enough), or some more exotic mechanism like
9470 * polling and copying files from pg_wal with script. We have no knowledge
9471 * of those mechanisms, so it's up to the user to ensure that he gets all
9472 * the required WAL.
9473 *
9474 * We wait until both the last WAL file filled during backup and the
9475 * history file have been archived, and assume that the alphabetic sorting
9476 * property of the WAL files ensures any earlier WAL files are safely
9477 * archived as well.
9478 *
9479 * We wait forever, since archive_command is supposed to work and we
9480 * assume the admin wanted his backup to work completely. If you don't
9481 * wish to wait, then either waitforarchive should be passed in as false,
9482 * or you can set statement_timeout. Also, some notices are issued to
9483 * clue in anyone who might be doing this interactively.
9484 */
9485
9486 if (waitforarchive &&
9489 {
9493
9496 state->startpoint, wal_segment_size);
9497
9499 waits = 0;
9500
9503 {
9505
9506 if (!reported_waiting && waits > 5)
9507 {
9509 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9510 reported_waiting = true;
9511 }
9512
9515 1000L,
9518
9520 {
9521 seconds_before_warning *= 2; /* This wraps in >10 years... */
9523 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9524 waits),
9525 errhint("Check that your \"archive_command\" is executing properly. "
9526 "You can safely cancel this backup, "
9527 "but the database backup will not be usable without all the WAL segments.")));
9528 }
9529 }
9530
9532 (errmsg("all required WAL segments have been archived")));
9533 }
9534 else if (waitforarchive)
9536 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9537}
#define fprintf(file, fmt, msg)
Definition cubescan.l:21
#define NOTICE
Definition elog.h:35
int FreeFile(FILE *file)
Definition fd.c:2826
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2627
struct Latch * MyLatch
Definition globals.c:63
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define XLOG_BACKUP_END
Definition pg_control.h:74
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static void CleanupBackupHistory(void)
Definition xlog.c:4198
#define XLogArchivingAlways()
Definition xlog.h:104
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fb(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9264 of file xlog.c.

9265{
9266 return sessionBackupState;
9267}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8768 of file xlog.c.

8769{
8770 int o_direct_flag = 0;
8771
8772 /*
8773 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8774 * written by walreceiver is normally read by the startup process soon
8775 * after it's written. Also, walreceiver performs unaligned writes, which
8776 * don't work with O_DIRECT, so it is required for correctness too.
8777 */
8780
8781 /* If fsync is disabled, never open in sync mode */
8782 if (!enableFsync)
8783 return o_direct_flag;
8784
8785 switch (method)
8786 {
8787 /*
8788 * enum values for all sync options are defined even if they are
8789 * not supported on the current platform. But if not, they are
8790 * not included in the enum option array, and therefore will never
8791 * be seen here.
8792 */
8796 return o_direct_flag;
8797#ifdef O_SYNC
8799 return O_SYNC | o_direct_flag;
8800#endif
8801#ifdef O_DSYNC
8803 return O_DSYNC | o_direct_flag;
8804#endif
8805 default:
8806 /* can't happen (unless we are out of sync with option array) */
8807 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8808 return 0; /* silence warning */
8809 }
8810}
int io_direct_flags
Definition fd.c:171
#define IO_DIRECT_WAL
Definition fd.h:55
#define PG_O_DIRECT
Definition fd.h:123
bool enableFsync
Definition globals.c:129
#define AmWalReceiverProcess()
Definition miscadmin.h:391
#define O_DSYNC
Definition win32_port.h:346
@ WAL_SYNC_METHOD_OPEN
Definition xlog.h:27
@ WAL_SYNC_METHOD_FDATASYNC
Definition xlog.h:26
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition xlog.h:28
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition xlog.h:29
@ WAL_SYNC_METHOD_FSYNC
Definition xlog.h:25

References AmWalReceiverProcess, elog, enableFsync, ERROR, fb(), io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4952 of file xlog.c.

4953{
4954 return ControlFile->wal_level;
4955}

References ControlFile, and ControlFileData::wal_level.

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4661 of file xlog.c.

4662{
4664}
bool default_char_signedness
Definition pg_control.h:232

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4676 of file xlog.c.

4677{
4679}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition atomics.h:532

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6593 of file xlog.c.

6594{
6597}
static bool doPageWrites
Definition xlog.c:289

References doPageWrites, fb(), and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6682 of file xlog.c.

6683{
6685 int i;
6686
6687 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6688 {
6690
6691 /*
6692 * Need to take a lock to prevent torn reads of the LSN, which are
6693 * possible on some of the supported platforms. WAL insert locks only
6694 * support exclusive mode, so we have to use that.
6695 */
6698 LWLockRelease(&WALInsertLocks[i].l.lock);
6699
6700 if (res < last_important)
6701 res = last_important;
6702 }
6703
6704 return res;
6705}
int i
Definition isn.c:77
XLogRecPtr lastImportantAt
Definition xlog.c:374
WALInsertLock l
Definition xlog.c:386
static WALInsertLockPadded * WALInsertLocks
Definition xlog.c:571
#define NUM_XLOGINSERT_LOCKS
Definition xlog.c:153

References fb(), i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6711 of file xlog.c.

6712{
6713 pg_time_t result;
6714
6715 /* Need WALWriteLock, but shared lock is sufficient */
6717 result = XLogCtl->lastSegSwitchTime;
6720
6721 return result;
6722}
pg_time_t lastSegSwitchTime
Definition xlog.c:469
XLogRecPtr lastSegSwitchLSN
Definition xlog.c:470

References fb(), XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4637 of file xlog.c.

4638{
4641}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition pg_control.h:239

References Assert, ControlFile, fb(), and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6496 of file xlog.c.

6497{
6498 RecoveryState retval;
6499
6501 retval = XLogCtl->SharedRecoveryState;
6503
6504 return retval;
6505}
RecoveryState
Definition xlog.h:91

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6563 of file xlog.c.

6564{
6565 XLogRecPtr ptr;
6566
6567 /*
6568 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6569 * grabbed a WAL insertion lock to read the authoritative value in
6570 * Insert->RedoRecPtr, someone might update it just after we've released
6571 * the lock.
6572 */
6574 ptr = XLogCtl->RedoRecPtr;
6576
6577 if (RedoRecPtr < ptr)
6578 RedoRecPtr = ptr;
6579
6580 return RedoRecPtr;
6581}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), reserve_wal_for_local_slot(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 8002 of file xlog.c.

8003{
8004 XLogRecPtr currpos; /* current write LSN */
8005 XLogSegNo currSeg; /* segid of currpos */
8006 XLogSegNo targetSeg; /* segid of targetLSN */
8007 XLogSegNo oldestSeg; /* actual oldest segid */
8008 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
8009 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
8011
8012 /*
8013 * slot does not reserve WAL. Either deactivated, or has never been active
8014 */
8016 return WALAVAIL_INVALID_LSN;
8017
8018 /*
8019 * Calculate the oldest segment currently reserved by all slots,
8020 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
8021 * oldestSlotSeg to the current segment.
8022 */
8023 currpos = GetXLogWriteRecPtr();
8025 KeepLogSeg(currpos, &oldestSlotSeg);
8026
8027 /*
8028 * Find the oldest extant segment file. We get 1 until checkpoint removes
8029 * the first WAL segment file since startup, which causes the status being
8030 * wrong under certain abnormal conditions but that doesn't actually harm.
8031 */
8033
8034 /* calculate oldest segment by max_wal_size */
8037
8038 if (currSeg > keepSegs)
8040 else
8042
8043 /* the segment we care about */
8045
8046 /*
8047 * No point in returning reserved or extended status values if the
8048 * targetSeg is known to be lost.
8049 */
8050 if (targetSeg >= oldestSlotSeg)
8051 {
8052 /* show "reserved" when targetSeg is within max_wal_size */
8054 return WALAVAIL_RESERVED;
8055
8056 /* being retained by slots exceeding max_wal_size */
8057 return WALAVAIL_EXTENDED;
8058 }
8059
8060 /* WAL segments are no longer retained but haven't been removed yet */
8061 if (targetSeg >= oldestSeg)
8062 return WALAVAIL_UNRESERVED;
8063
8064 /* Definitely lost */
8065 return WALAVAIL_REMOVED;
8066}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3795
XLogRecPtr GetXLogWriteRecPtr(void)
Definition xlog.c:9614
@ WALAVAIL_REMOVED
Definition xlog.h:205
@ WALAVAIL_RESERVED
Definition xlog.h:201
@ WALAVAIL_UNRESERVED
Definition xlog.h:204
@ WALAVAIL_EXTENDED
Definition xlog.h:202
@ WALAVAIL_INVALID_LSN
Definition xlog.h:200

References ConvertToXSegs, fb(), GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsValid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6646 of file xlog.c.

6647{
6649
6650 /* Since the value can't be changing, no lock is required. */
6651 return XLogCtl->InsertTimeLineID;
6652}

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1638 of file xlog.c.

1639{
1640 int idx;
1641 XLogRecPtr endptr;
1642 static uint64 cachedPage = 0;
1643 static char *cachedPos = NULL;
1645
1646 /*
1647 * Fast path for the common case that we need to access again the same
1648 * page as last time.
1649 */
1650 if (ptr / XLOG_BLCKSZ == cachedPage)
1651 {
1653 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1654 return cachedPos + ptr % XLOG_BLCKSZ;
1655 }
1656
1657 /*
1658 * The XLog buffer cache is organized so that a page is always loaded to a
1659 * particular buffer. That way we can easily calculate the buffer a given
1660 * page must be loaded into, from the XLogRecPtr alone.
1661 */
1662 idx = XLogRecPtrToBufIdx(ptr);
1663
1664 /*
1665 * See what page is loaded in the buffer at the moment. It could be the
1666 * page we're looking for, or something older. It can't be anything newer
1667 * - that would imply the page we're looking for has already been written
1668 * out to disk and evicted, and the caller is responsible for making sure
1669 * that doesn't happen.
1670 *
1671 * We don't hold a lock while we read the value. If someone is just about
1672 * to initialize or has just initialized the page, it's possible that we
1673 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1674 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1675 * we're looking for.
1676 */
1677 expectedEndPtr = ptr;
1679
1681 if (expectedEndPtr != endptr)
1682 {
1684
1685 /*
1686 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1687 * know how far we're finished with inserting the record.
1688 *
1689 * NB: If 'ptr' points to just after the page header, advertise a
1690 * position at the beginning of the page rather than 'ptr' itself. If
1691 * there are no other insertions running, someone might try to flush
1692 * up to our advertised location. If we advertised a position after
1693 * the page header, someone might try to flush the page header, even
1694 * though page might actually not be initialized yet. As the first
1695 * inserter on the page, we are effectively responsible for making
1696 * sure that it's initialized, before we let insertingAt to move past
1697 * the page header.
1698 */
1699 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1702 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1705 else
1706 initializedUpto = ptr;
1707
1709
1710 AdvanceXLInsertBuffer(ptr, tli, false);
1712
1713 if (expectedEndPtr != endptr)
1714 elog(PANIC, "could not find WAL buffer for %X/%08X",
1715 LSN_FORMAT_ARGS(ptr));
1716 }
1717 else
1718 {
1719 /*
1720 * Make sure the initialization of the page is visible to us, and
1721 * won't arrive later to overwrite the WAL data we write on the page.
1722 */
1724 }
1725
1726 /*
1727 * Found the buffer holding this page. Return a pointer to the right
1728 * offset within the page.
1729 */
1730 cachedPage = ptr / XLOG_BLCKSZ;
1732
1734 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1735
1736 return cachedPos + ptr % XLOG_BLCKSZ;
1737}
#define pg_memory_barrier()
Definition atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition xlog.c:1477
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition xlog.c:1991

References AdvanceXLInsertBuffer(), Assert, elog, fb(), idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4241 of file xlog.c.

4242{
4244
4245 /*
4246 * Generate a random nonce. This is used for authentication requests that
4247 * will fail because the user does not exist. The nonce is used to create
4248 * a genuine-looking password challenge for the non-existent user, in lieu
4249 * of an actual stored password.
4250 */
4252 ereport(PANIC,
4254 errmsg("could not generate secret authorization token")));
4255
4256 memset(ControlFile, 0, sizeof(ControlFileData));
4257 /* Initialize pg_control status fields */
4258 ControlFile->system_identifier = sysidentifier;
4262
4263 /* Set important parameter values for use when replaying WAL */
4272 ControlFile->data_checksum_version = data_checksum_version;
4273}
bool track_commit_timestamp
Definition commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition pg_control.h:187
bool wal_log_hints
Definition xlog.c:126
#define FirstNormalUnloggedLSN
Definition xlogdefs.h:37

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), fb(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4858 of file xlog.c.

4859{
4861
4863 {
4864 struct config_generic *guc;
4865
4866 guc = find_option("wal_consistency_checking", false, false, ERROR);
4867
4869
4870 set_config_option_ext("wal_consistency_checking",
4872 guc->scontext, guc->source, guc->srole,
4873 GUC_ACTION_SET, true, ERROR, false);
4874
4875 /* checking should not be deferred again */
4877 }
4878}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition guc.c:3256
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition guc.c:1113
@ GUC_ACTION_SET
Definition guc.h:203
char * wal_consistency_checking_string
Definition xlog.c:128

References Assert, check_wal_consistency_checking_deferred, ERROR, fb(), find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, set_config_option_ext(), and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3600 of file xlog.c.

3602{
3603 char path[MAXPGPATH];
3604 struct stat stat_buf;
3605
3606 Assert(tli != 0);
3607
3608 XLogFilePath(path, tli, *segno, wal_segment_size);
3609
3612 {
3614 return false;
3615 }
3616
3617 if (!find_free)
3618 {
3619 /* Force installation: get rid of any pre-existing segment file */
3620 durable_unlink(path, DEBUG1);
3621 }
3622 else
3623 {
3624 /* Find a free slot to put it in */
3625 while (stat(path, &stat_buf) == 0)
3626 {
3627 if ((*segno) >= max_segno)
3628 {
3629 /* Failed to find a free slot within specified range */
3631 return false;
3632 }
3633 (*segno)++;
3634 XLogFilePath(path, tli, *segno, wal_segment_size);
3635 }
3636 }
3637
3638 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3639 if (durable_rename(tmppath, path, LOG) != 0)
3640 {
3642 /* durable_rename already emitted log message */
3643 return false;
3644 }
3645
3647
3648 return true;
3649}
int durable_unlink(const char *fname, int elevel)
Definition fd.c:872
short access
bool InstallXLogFileSegmentActive
Definition xlog.c:528
#define stat
Definition win32_port.h:74

References Assert, DEBUG1, durable_rename(), durable_unlink(), fb(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9663 of file xlog.c.

9664{
9665 bool result;
9666
9670
9671 return result;
9672}

References fb(), XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8858 of file xlog.c.

8859{
8860 char *msg = NULL;
8862
8863 Assert(tli != 0);
8864
8865 /*
8866 * Quick exit if fsync is disabled or write() has already synced the WAL
8867 * file.
8868 */
8869 if (!enableFsync ||
8872 return;
8873
8874 /*
8875 * Measure I/O timing to sync the WAL file for pg_stat_io.
8876 */
8878
8880 switch (wal_sync_method)
8881 {
8883 if (pg_fsync_no_writethrough(fd) != 0)
8884 msg = _("could not fsync file \"%s\": %m");
8885 break;
8886#ifdef HAVE_FSYNC_WRITETHROUGH
8888 if (pg_fsync_writethrough(fd) != 0)
8889 msg = _("could not fsync write-through file \"%s\": %m");
8890 break;
8891#endif
8893 if (pg_fdatasync(fd) != 0)
8894 msg = _("could not fdatasync file \"%s\": %m");
8895 break;
8898 /* not reachable */
8899 Assert(false);
8900 break;
8901 default:
8902 ereport(PANIC,
8904 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8905 break;
8906 }
8907
8908 /* PANIC if failed to fsync */
8909 if (msg)
8910 {
8911 char xlogfname[MAXFNAMELEN];
8912 int save_errno = errno;
8913
8915 errno = save_errno;
8916 ereport(PANIC,
8918 errmsg(msg, xlogfname)));
8919 }
8920
8922
8924 start, 1, 0);
8925}
#define _(x)
Definition elog.c:91
int pg_fsync_no_writethrough(int fd)
Definition fd.c:441
int pg_fdatasync(int fd)
Definition fd.c:480
int pg_fsync_writethrough(int fd)
Definition fd.c:461
return str start
@ IOOBJECT_WAL
Definition pgstat.h:279
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_FSYNC
Definition pgstat.h:308
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
static int fd(const char *x, int i)
bool track_wal_io_timing
Definition xlog.c:140

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fb(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8086 of file xlog.c.

8087{
8089 XLogSegNo segno;
8091
8093 segno = currSegNo;
8094
8095 /* Calculate how many segments are kept by slots. */
8098 {
8100
8101 /*
8102 * Account for max_slot_wal_keep_size to avoid keeping more than
8103 * configured. However, don't do that during a binary upgrade: if
8104 * slots were to be invalidated because of this, it would not be
8105 * possible to preserve logical ones during the upgrade.
8106 */
8108 {
8110
8113
8114 if (currSegNo - segno > slot_keep_segs)
8115 segno = currSegNo - slot_keep_segs;
8116 }
8117 }
8118
8119 /*
8120 * If WAL summarization is in use, don't remove WAL that has yet to be
8121 * summarized.
8122 */
8125 {
8127
8129 if (unsummarized_segno < segno)
8130 segno = unsummarized_segno;
8131 }
8132
8133 /* but, keep at least wal_keep_size if that's set */
8134 if (wal_keep_size_mb > 0)
8135 {
8137
8139 if (currSegNo - segno < keep_segs)
8140 {
8141 /* avoid underflow, don't go below 1 */
8142 if (currSegNo <= keep_segs)
8143 segno = 1;
8144 else
8145 segno = currSegNo - keep_segs;
8146 }
8147 }
8148
8149 /* don't delete WAL segments newer than the calculated segment */
8150 if (segno < *logSegNo)
8151 *logSegNo = segno;
8152}
bool IsBinaryUpgrade
Definition globals.c:121
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition xlog.c:119
XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2682
int max_slot_wal_keep_size_mb
Definition xlog.c:138

References ConvertToXSegs, fb(), GetOldestUnsummarizedLSN(), IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4939 of file xlog.c.

4940{
4944}
void reset(void)

References Assert, ControlFile, fb(), palloc_object, ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6548 of file xlog.c.

6549{
6551
6553
6554 return oldXLogAllowed;
6555}

References fb(), and LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6807 of file xlog.c.

6808{
6809 long write_msecs,
6810 sync_msecs,
6815
6817
6820
6823
6824 /* Accumulate checkpoint timing summary data, in milliseconds. */
6827
6828 /*
6829 * All of the published timing statistics are accounted for. Only
6830 * continue if a log message is to be written.
6831 */
6832 if (!log_checkpoints)
6833 return;
6834
6837
6838 /*
6839 * Timing values returned from CheckpointStats are in microseconds.
6840 * Convert to milliseconds for consistent printing.
6841 */
6843
6848 average_msecs = (long) ((average_sync_time + 999) / 1000);
6849
6850 /*
6851 * ControlFileLock is not required to see ControlFile->checkPoint and
6852 * ->checkPointCopy here as we are the only updator of those variables at
6853 * this moment.
6854 */
6855 if (restartpoint)
6856 ereport(LOG,
6857 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6858 "wrote %d SLRU buffers; %d WAL file(s) added, "
6859 "%d removed, %d recycled; write=%ld.%03d s, "
6860 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6861 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6862 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6869 write_msecs / 1000, (int) (write_msecs % 1000),
6870 sync_msecs / 1000, (int) (sync_msecs % 1000),
6871 total_msecs / 1000, (int) (total_msecs % 1000),
6873 longest_msecs / 1000, (int) (longest_msecs % 1000),
6874 average_msecs / 1000, (int) (average_msecs % 1000),
6875 (int) (PrevCheckPointDistance / 1024.0),
6876 (int) (CheckPointDistanceEstimate / 1024.0),
6879 else
6880 ereport(LOG,
6881 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6882 "wrote %d SLRU buffers; %d WAL file(s) added, "
6883 "%d removed, %d recycled; write=%ld.%03d s, "
6884 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6885 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6886 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6893 write_msecs / 1000, (int) (write_msecs % 1000),
6894 sync_msecs / 1000, (int) (sync_msecs % 1000),
6895 total_msecs / 1000, (int) (total_msecs % 1000),
6897 longest_msecs / 1000, (int) (longest_msecs % 1000),
6898 average_msecs / 1000, (int) (average_msecs % 1000),
6899 (int) (PrevCheckPointDistance / 1024.0),
6900 (int) (CheckPointDistanceEstimate / 1024.0),
6903}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition timestamp.c:1757
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition xlog.h:187
uint64 ckpt_longest_sync
Definition xlog.h:186
TimestampTz ckpt_end_t
Definition xlog.h:176
int ckpt_slru_written
Definition xlog.h:179
PgStat_Counter sync_time
Definition pgstat.h:265
PgStat_Counter write_time
Definition pgstat.h:264
static double CheckPointDistanceEstimate
Definition xlog.c:162
static double PrevCheckPointDistance
Definition xlog.c:163

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), fb(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6775 of file xlog.c.

6776{
6777 if (restartpoint)
6778 ereport(LOG,
6779 /* translator: the placeholders show checkpoint options */
6780 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6781 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6782 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6783 (flags & CHECKPOINT_FAST) ? " fast" : "",
6784 (flags & CHECKPOINT_FORCE) ? " force" : "",
6785 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6786 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6787 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6788 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6789 else
6790 ereport(LOG,
6791 /* translator: the placeholders show checkpoint options */
6792 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6793 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6794 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6795 (flags & CHECKPOINT_FAST) ? " fast" : "",
6796 (flags & CHECKPOINT_FORCE) ? " force" : "",
6797 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6798 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6799 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6800 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6801}
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:154
#define CHECKPOINT_CAUSE_XLOG
Definition xlog.h:159
#define CHECKPOINT_CAUSE_TIME
Definition xlog.h:160

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), fb(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6410 of file xlog.c.

6411{
6412 bool promoted = false;
6413
6414 /*
6415 * Perform a checkpoint to update all our recovery activity to disk.
6416 *
6417 * Note that we write a shutdown checkpoint rather than an on-line one.
6418 * This is not particularly critical, but since we may be assigning a new
6419 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6420 * only changes in shutdown checkpoints, which allows some extra error
6421 * checking in xlog_redo.
6422 *
6423 * In promotion, only create a lightweight end-of-recovery record instead
6424 * of a full checkpoint. A checkpoint is requested later, after we're
6425 * fully out of recovery mode and already accepting queries.
6426 */
6429 {
6430 promoted = true;
6431
6432 /*
6433 * Insert a special WAL record to mark the end of recovery, since we
6434 * aren't doing a checkpoint. That means that the checkpointer process
6435 * may likely be in the middle of a time-smoothed restartpoint and
6436 * could continue to be for minutes after this. That sounds strange,
6437 * but the effect is roughly the same and it would be stranger to try
6438 * to come out of the restartpoint and then checkpoint. We request a
6439 * checkpoint later anyway, just for safety.
6440 */
6442 }
6443 else
6444 {
6448 }
6449
6450 return promoted;
6451}
static void CreateEndOfRecoveryRecord(void)
Definition xlog.c:7506
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), fb(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3727 of file xlog.c.

3728{
3730 int lf;
3731 bool added;
3732 char path[MAXPGPATH];
3733 uint64 offset;
3734
3736 return; /* unlocked check says no */
3737
3739 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3740 if (offset >= (uint32) (0.75 * wal_segment_size))
3741 {
3742 _logSegNo++;
3743 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3744 if (lf >= 0)
3745 close(lf);
3746 if (added)
3748 }
3749}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition xlog.c:3229

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, fb(), XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6373 of file xlog.c.

6374{
6375 /*
6376 * We have reached the end of base backup, as indicated by pg_control. The
6377 * data on disk is now consistent (unless minRecoveryPoint is further
6378 * ahead, which can happen if we crashed during previous recovery). Reset
6379 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6380 * make sure we don't allow starting up at an earlier point even if
6381 * recovery is stopped and restarted soon after this.
6382 */
6384
6385 if (ControlFile->minRecoveryPoint < EndRecPtr)
6386 {
6387 ControlFile->minRecoveryPoint = EndRecPtr;
6389 }
6390
6395
6397}
XLogRecPtr backupStartPoint
Definition pg_control.h:172
XLogRecPtr backupEndPoint
Definition pg_control.h:173

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, fb(), InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4386 of file xlog.c.

4387{
4388 pg_crc32c crc;
4389 int fd;
4390 char wal_segsz_str[20];
4391 int r;
4392
4393 /*
4394 * Read data...
4395 */
4397 O_RDWR | PG_BINARY);
4398 if (fd < 0)
4399 ereport(PANIC,
4401 errmsg("could not open file \"%s\": %m",
4403
4405 r = read(fd, ControlFile, sizeof(ControlFileData));
4406 if (r != sizeof(ControlFileData))
4407 {
4408 if (r < 0)
4409 ereport(PANIC,
4411 errmsg("could not read file \"%s\": %m",
4413 else
4414 ereport(PANIC,
4416 errmsg("could not read file \"%s\": read %d of %zu",
4417 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4418 }
4420
4421 close(fd);
4422
4423 /*
4424 * Check for expected pg_control format version. If this is wrong, the
4425 * CRC check will likely fail because we'll be checking the wrong number
4426 * of bytes. Complaining about wrong version will probably be more
4427 * enlightening than complaining about wrong CRC.
4428 */
4429
4431 ereport(FATAL,
4433 errmsg("database files are incompatible with server"),
4434 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4435 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4438 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4439
4441 ereport(FATAL,
4443 errmsg("database files are incompatible with server"),
4444 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4445 " but the server was compiled with PG_CONTROL_VERSION %d.",
4447 errhint("It looks like you need to initdb.")));
4448
4449 /* Now check the CRC. */
4454 FIN_CRC32C(crc);
4455
4456 if (!EQ_CRC32C(crc, ControlFile->crc))
4457 ereport(FATAL,
4459 errmsg("incorrect checksum in control file")));
4460
4461 /*
4462 * Do compatibility checking immediately. If the database isn't
4463 * compatible with the backend executable, we want to abort before we can
4464 * possibly do any damage.
4465 */
4467 ereport(FATAL,
4469 errmsg("database files are incompatible with server"),
4470 /* translator: %s is a variable name and %d is its value */
4471 errdetail("The database cluster was initialized with %s %d,"
4472 " but the server was compiled with %s %d.",
4473 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4474 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4475 errhint("It looks like you need to initdb.")));
4477 ereport(FATAL,
4479 errmsg("database files are incompatible with server"),
4480 /* translator: %s is a variable name and %d is its value */
4481 errdetail("The database cluster was initialized with %s %d,"
4482 " but the server was compiled with %s %d.",
4483 "MAXALIGN", ControlFile->maxAlign,
4484 "MAXALIGN", MAXIMUM_ALIGNOF),
4485 errhint("It looks like you need to initdb.")));
4487 ereport(FATAL,
4489 errmsg("database files are incompatible with server"),
4490 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4491 errhint("It looks like you need to initdb.")));
4492 if (ControlFile->blcksz != BLCKSZ)
4493 ereport(FATAL,
4495 errmsg("database files are incompatible with server"),
4496 /* translator: %s is a variable name and %d is its value */
4497 errdetail("The database cluster was initialized with %s %d,"
4498 " but the server was compiled with %s %d.",
4499 "BLCKSZ", ControlFile->blcksz,
4500 "BLCKSZ", BLCKSZ),
4501 errhint("It looks like you need to recompile or initdb.")));
4503 ereport(FATAL,
4505 errmsg("database files are incompatible with server"),
4506 /* translator: %s is a variable name and %d is its value */
4507 errdetail("The database cluster was initialized with %s %d,"
4508 " but the server was compiled with %s %d.",
4509 "RELSEG_SIZE", ControlFile->relseg_size,
4510 "RELSEG_SIZE", RELSEG_SIZE),
4511 errhint("It looks like you need to recompile or initdb.")));
4513 ereport(FATAL,
4515 errmsg("database files are incompatible with server"),
4516 /* translator: %s is a variable name and %d is its value */
4517 errdetail("The database cluster was initialized with %s %d,"
4518 " but the server was compiled with %s %d.",
4519 "SLRU_PAGES_PER_SEGMENT", ControlFile->slru_pages_per_segment,
4520 "SLRU_PAGES_PER_SEGMENT", SLRU_PAGES_PER_SEGMENT),
4521 errhint("It looks like you need to recompile or initdb.")));
4523 ereport(FATAL,
4525 errmsg("database files are incompatible with server"),
4526 /* translator: %s is a variable name and %d is its value */
4527 errdetail("The database cluster was initialized with %s %d,"
4528 " but the server was compiled with %s %d.",
4529 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4530 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4531 errhint("It looks like you need to recompile or initdb.")));
4533 ereport(FATAL,
4535 errmsg("database files are incompatible with server"),
4536 /* translator: %s is a variable name and %d is its value */
4537 errdetail("The database cluster was initialized with %s %d,"
4538 " but the server was compiled with %s %d.",
4539 "NAMEDATALEN", ControlFile->nameDataLen,
4540 "NAMEDATALEN", NAMEDATALEN),
4541 errhint("It looks like you need to recompile or initdb.")));
4543 ereport(FATAL,
4545 errmsg("database files are incompatible with server"),
4546 /* translator: %s is a variable name and %d is its value */
4547 errdetail("The database cluster was initialized with %s %d,"
4548 " but the server was compiled with %s %d.",
4549 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4550 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4551 errhint("It looks like you need to recompile or initdb.")));
4553 ereport(FATAL,
4555 errmsg("database files are incompatible with server"),
4556 /* translator: %s is a variable name and %d is its value */
4557 errdetail("The database cluster was initialized with %s %d,"
4558 " but the server was compiled with %s %d.",
4559 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4560 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4561 errhint("It looks like you need to recompile or initdb.")));
4563 ereport(FATAL,
4565 errmsg("database files are incompatible with server"),
4566 /* translator: %s is a variable name and %d is its value */
4567 errdetail("The database cluster was initialized with %s %d,"
4568 " but the server was compiled with %s %d.",
4569 "LOBLKSIZE", ControlFile->loblksize,
4570 "LOBLKSIZE", (int) LOBLKSIZE),
4571 errhint("It looks like you need to recompile or initdb.")));
4572
4573 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4574
4576
4579 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4580 "invalid WAL segment size in control file (%d bytes)",
4583 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4584
4586 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4588
4589 /* check and update variables dependent on wal_segment_size */
4592 /* translator: both %s are GUC names */
4593 errmsg("\"%s\" must be at least twice \"%s\"",
4594 "min_wal_size", "wal_segment_size")));
4595
4598 /* translator: both %s are GUC names */
4599 errmsg("\"%s\" must be at least twice \"%s\"",
4600 "max_wal_size", "wal_segment_size")));
4601
4605
4607
4608 /* Make the initdb settings visible as GUC variables, too */
4609 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4611}
#define PG_BINARY
Definition c.h:1287
#define CATALOG_VERSION_NO
Definition catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1193
int BasicOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1089
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4196
@ PGC_S_DYNAMIC_DEFAULT
Definition guc.h:114
@ PGC_INTERNAL
Definition guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition heaptoast.h:84
#define read(a, b, c)
Definition win32.h:13
#define LOBLKSIZE
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define SLRU_PAGES_PER_SEGMENT
#define FLOATFORMAT_VALUE
Definition pg_control.h:203
#define PG_CONTROL_VERSION
Definition pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
uint32 pg_control_version
Definition pg_control.h:127
uint32 xlog_seg_size
Definition pg_control.h:215
uint32 slru_pages_per_segment
Definition pg_control.h:212
uint32 indexMaxKeys
Definition pg_control.h:218
uint32 catalog_version_no
Definition pg_control.h:128
pg_crc32c crc
Definition pg_control.h:242
uint32 toast_max_chunk_size
Definition pg_control.h:220
#define UsableBytesInPage
Definition xlog.c:599
bool DataChecksumsEnabled(void)
Definition xlog.c:4647
static int UsableBytesInSegment
Definition xlog.c:608
int min_wal_size_mb
Definition xlog.c:118
#define XLOG_CONTROL_FILE

References Assert, BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6460 of file xlog.c.

6461{
6462 /*
6463 * We check shared state each time only until we leave recovery mode. We
6464 * can't re-enter recovery, so there's no need to keep checking after the
6465 * shared variable has once been seen false.
6466 */
6468 return false;
6469 else
6470 {
6471 /*
6472 * use volatile pointer to make sure we make a fresh read of the
6473 * shared variable.
6474 */
6475 volatile XLogCtlData *xlogctl = XLogCtl;
6476
6477 LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
6478
6479 /*
6480 * Note: We don't need a memory barrier when we're still in recovery.
6481 * We might exit recovery immediately after return, so the caller
6482 * can't rely on 'true' meaning that we're still in recovery anyway.
6483 */
6484
6486 }
6487}
static bool LocalRecoveryInProgress
Definition xlog.c:227

References fb(), LocalRecoveryInProgress, RECOVERY_STATE_DONE, and XLogCtl.

Referenced by amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), DisableLogicalDecoding(), DisableLogicalDecodingIfNecessary(), do_pg_backup_start(), do_pg_backup_stop(), EnableLogicalDecoding(), EnsureLogicalDecodingEnabled(), error_commit_ts_disabled(), ExecCheckpoint(), ExecWaitStmt(), extended_statistics_update(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_clear_extended_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), pgstat_report_replslotsync(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_effective_wal_level(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), verify_heapam(), WaitForLSN(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7681 of file xlog.c.

7682{
7683 /*
7684 * Also refrain from creating a restartpoint if we have seen any
7685 * references to non-existent pages. Restarting recovery from the
7686 * restartpoint would not see the references, so we would lose the
7687 * cross-check that the pages belonged to a relation that was dropped
7688 * later.
7689 */
7691 {
7692 elog(DEBUG2,
7693 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
7694 LSN_FORMAT_ARGS(checkPoint->redo));
7695 return;
7696 }
7697
7698 /*
7699 * Copy the checkpoint record to shared memory, so that checkpointer can
7700 * work out the next time it wants to perform a restartpoint.
7701 */
7705 XLogCtl->lastCheckPoint = *checkPoint;
7707}
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
bool XLogHaveInvalidPages(void)
Definition xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9584 of file xlog.c.

9585{
9586 static bool already_done = false;
9587
9588 if (already_done)
9589 return;
9591 already_done = true;
9592}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344

References before_shmem_exit(), BoolGetDatum(), do_pg_abort_backup(), and fb().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3977 of file xlog.c.

3978{
3979 DIR *xldir;
3980 struct dirent *xlde;
3981 char switchseg[MAXFNAMELEN];
3985
3986 /*
3987 * Initialize info about where to begin the work. This will recycle,
3988 * somewhat arbitrarily, 10 future segments.
3989 */
3993
3994 /*
3995 * Construct a filename of the last segment to be kept.
3996 */
3998
3999 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
4000 switchseg);
4001
4003
4004 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4005 {
4006 /* Ignore files that are not XLOG segments */
4007 if (!IsXLogFileName(xlde->d_name))
4008 continue;
4009
4010 /*
4011 * Remove files that are on a timeline older than the new one we're
4012 * switching to, but with a segment number >= the first segment on the
4013 * new timeline.
4014 */
4015 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4016 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4017 {
4018 /*
4019 * If the file has already been marked as .ready, however, don't
4020 * remove it yet. It should be OK to remove it - files that are
4021 * not part of our timeline history are not required for recovery
4022 * - but seems safer to let them be archived and removed later.
4023 */
4024 if (!XLogArchiveIsReady(xlde->d_name))
4026 }
4027 }
4028
4029 FreeDir(xldir);
4030}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition xlog.c:4046
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3902 of file xlog.c.

3904{
3905 DIR *xldir;
3906 struct dirent *xlde;
3907 char lastoff[MAXFNAMELEN];
3910
3911 /* Initialize info about where to try to recycle to */
3914
3915 /*
3916 * Construct a filename of the last segment to be kept. The timeline ID
3917 * doesn't matter, we ignore that in the comparison. (During recovery,
3918 * InsertTimeLineID isn't set, so we can't use that.)
3919 */
3921
3922 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3923 lastoff);
3924
3926
3927 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3928 {
3929 /* Ignore files that are not XLOG segments */
3930 if (!IsXLogFileName(xlde->d_name) &&
3931 !IsPartialXLogFileName(xlde->d_name))
3932 continue;
3933
3934 /*
3935 * We ignore the timeline part of the XLOG segment identifiers in
3936 * deciding whether a segment is still needed. This ensures that we
3937 * won't prematurely remove a segment from a parent timeline. We could
3938 * probably be a little more proactive about removing segments of
3939 * non-parent timelines, but that would be a whole lot more
3940 * complicated.
3941 *
3942 * We use the alphanumeric sorting property of the filenames to decide
3943 * which ones are earlier than the lastoff segment.
3944 */
3945 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3946 {
3947 if (XLogArchiveCheckDone(xlde->d_name))
3948 {
3949 /* Update the last removed location in shared memory first */
3950 UpdateLastRemovedPtr(xlde->d_name);
3951
3953 }
3954 }
3955 }
3956
3957 FreeDir(xldir);
3958}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition xlog.c:2233
static void UpdateLastRemovedPtr(char *filename)
Definition xlog.c:3849
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3869 of file xlog.c.

3870{
3871 DIR *xldir;
3872 struct dirent *xlde;
3873
3874 elog(DEBUG2, "removing all temporary WAL segments");
3875
3877 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3878 {
3879 char path[MAXPGPATH];
3880
3881 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3882 continue;
3883
3884 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3885 unlink(path);
3886 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3887 }
3888 FreeDir(xldir);
3889}

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4046 of file xlog.c.

4049{
4050 char path[MAXPGPATH];
4051#ifdef WIN32
4052 char newpath[MAXPGPATH];
4053#endif
4054 const char *segname = segment_de->d_name;
4055
4056 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4057
4058 /*
4059 * Before deleting the file, see if it can be recycled as a future log
4060 * segment. Only recycle normal files, because we don't want to recycle
4061 * symbolic links pointing to a separate archive directory.
4062 */
4063 if (wal_recycle &&
4065 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4066 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4068 true, recycleSegNo, insertTLI))
4069 {
4071 (errmsg_internal("recycled write-ahead log file \"%s\"",
4072 segname)));
4074 /* Needn't recheck that slot on future iterations */
4075 (*endlogSegNo)++;
4076 }
4077 else
4078 {
4079 /* No need for any more future segments, or recycling failed ... */
4080 int rc;
4081
4083 (errmsg_internal("removing write-ahead log file \"%s\"",
4084 segname)));
4085
4086#ifdef WIN32
4087
4088 /*
4089 * On Windows, if another process (e.g another backend) holds the file
4090 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4091 * will still show up in directory listing until the last handle is
4092 * closed. To avoid confusing the lingering deleted file for a live
4093 * WAL file that needs to be archived, rename it before deleting it.
4094 *
4095 * If another process holds the file open without FILE_SHARE_DELETE
4096 * flag, rename will fail. We'll try again at the next checkpoint.
4097 */
4098 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4099 if (rename(path, newpath) != 0)
4100 {
4101 ereport(LOG,
4103 errmsg("could not rename file \"%s\": %m",
4104 path)));
4105 return;
4106 }
4107 rc = durable_unlink(newpath, LOG);
4108#else
4109 rc = durable_unlink(path, LOG);
4110#endif
4111 if (rc != 0)
4112 {
4113 /* Message already logged by durable_unlink() */
4114 return;
4115 }
4117 }
4118
4120}
@ PGFILETYPE_REG
Definition file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition xlog.c:3600
bool wal_recycle
Definition xlog.c:131

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), fb(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8195 of file xlog.c.

8196{
8198
8199 /* XLOG SWITCH has no data */
8201
8202 if (mark_unimportant)
8205
8206 return RecPtr;
8207}
#define XLOG_SWITCH
Definition pg_control.h:73
#define XLOG_MARK_UNIMPORTANT
Definition xlog.h:166
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References fb(), XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1114 of file xlog.c.

1116{
1121
1122 size = MAXALIGN(size);
1123
1124 /* All (non xlog-switch) records should contain data. */
1125 Assert(size > SizeOfXLogRecord);
1126
1127 /*
1128 * The duration the spinlock needs to be held is minimized by minimizing
1129 * the calculations that have to be done while holding the lock. The
1130 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1131 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1132 * page headers. The mapping between "usable" byte positions and physical
1133 * positions (XLogRecPtrs) can be done outside the locked region, and
1134 * because the usable byte position doesn't include any headers, reserving
1135 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1136 */
1137 SpinLockAcquire(&Insert->insertpos_lck);
1138
1139 startbytepos = Insert->CurrBytePos;
1140 endbytepos = startbytepos + size;
1141 prevbytepos = Insert->PrevBytePos;
1142 Insert->CurrBytePos = endbytepos;
1143 Insert->PrevBytePos = startbytepos;
1144
1145 SpinLockRelease(&Insert->insertpos_lck);
1146
1150
1151 /*
1152 * Check that the conversions between "usable byte positions" and
1153 * XLogRecPtrs work consistently in both directions.
1154 */
1158}
#define MAXALIGN(LEN)
Definition c.h:826
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition xlog.c:1904
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition xlog.c:1947

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1170 of file xlog.c.

1171{
1177 XLogRecPtr ptr;
1179
1180 /*
1181 * These calculations are a bit heavy-weight to be done while holding a
1182 * spinlock, but since we're holding all the WAL insertion locks, there
1183 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1184 * compete for it, but that's not called very frequently.
1185 */
1186 SpinLockAcquire(&Insert->insertpos_lck);
1187
1188 startbytepos = Insert->CurrBytePos;
1189
1191 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1192 {
1193 SpinLockRelease(&Insert->insertpos_lck);
1194 *EndPos = *StartPos = ptr;
1195 return false;
1196 }
1197
1198 endbytepos = startbytepos + size;
1199 prevbytepos = Insert->PrevBytePos;
1200
1203
1206 {
1207 /* consume the rest of the segment */
1208 *EndPos += segleft;
1210 }
1211 Insert->CurrBytePos = endbytepos;
1212 Insert->PrevBytePos = startbytepos;
1213
1214 SpinLockRelease(&Insert->insertpos_lck);
1215
1217
1222
1223 return true;
1224}

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ ResetInstallXLogFileSegmentActive()

◆ SetInstallXLogFileSegmentActive()

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4884 of file xlog.c.

4885{
4886 if (XLogArchivingActive())
4887 return XLogArchiveCommand;
4888 else
4889 return "(disabled)";
4890}
char * XLogArchiveCommand
Definition xlog.c:123

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_effective_wal_level()

const char * show_effective_wal_level ( void  )

Definition at line 4911 of file xlog.c.

4912{
4914 return "minimal";
4915
4916 /*
4917 * During recovery, effective_wal_level reflects the primary's
4918 * configuration rather than the local wal_level value.
4919 */
4920 if (RecoveryInProgress())
4921 return IsXLogLogicalInfoEnabled() ? "logical" : "replica";
4922
4923 return XLogLogicalInfoActive() ? "logical" : "replica";
4924}
bool IsXLogLogicalInfoEnabled(void)
Definition logicalctl.c:220
#define XLogLogicalInfoActive()
Definition xlog.h:136

References IsXLogLogicalInfoEnabled(), RecoveryInProgress(), wal_level, WAL_LEVEL_MINIMAL, and XLogLogicalInfoActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 4896 of file xlog.c.

4897{
4898 /*
4899 * We display the actual state based on shared memory, so that this GUC
4900 * reports up-to-date state if examined intra-query. The underlying
4901 * variable (in_hot_standby_guc) changes only when we transmit a new value
4902 * to the client.
4903 */
4904 return RecoveryInProgress() ? "on" : "off";
4905}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6728 of file xlog.c.

6729{
6730 /*
6731 * We should have an aux process resource owner to use, and we should not
6732 * be in a transaction that's installed some other resowner.
6733 */
6738
6739 /* Don't be chatty in standalone mode */
6741 (errmsg("shutting down")));
6742
6743 /*
6744 * Signal walsenders to move to stopping state.
6745 */
6747
6748 /*
6749 * Wait for WAL senders to be in stopping state. This prevents commands
6750 * from writing new WAL.
6751 */
6753
6754 if (RecoveryInProgress())
6756 else
6757 {
6758 /*
6759 * If archiving is enabled, rotate the last XLOG file so that all the
6760 * remaining records are archived (postmaster wakes up the archiver
6761 * process one more time at the end of shutdown). The checkpoint
6762 * record will go to the next XLOG file and won't be archived (yet).
6763 */
6764 if (XLogArchivingActive())
6765 RequestXLogSwitch(false);
6766
6768 }
6769}
bool IsPostmasterEnvironment
Definition globals.c:119
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition resowner.c:176
void WalSndInitStopping(void)
Definition walsender.c:3889
void WalSndWaitStopping(void)
Definition walsender.c:3915
bool CreateRestartPoint(int flags)
Definition xlog.c:7721
bool CreateCheckPoint(int flags)
Definition xlog.c:7015

References Assert, AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), fb(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5517 of file xlog.c.

5518{
5520 CheckPoint checkPoint;
5521 bool wasShutdown;
5522 bool didCrash;
5523 bool haveTblspcMap;
5524 bool haveBackupLabel;
5533 bool promoted = false;
5534 char timebuf[128];
5535
5536 /*
5537 * We should have an aux process resource owner to use, and we should not
5538 * be in a transaction that's installed some other resowner.
5539 */
5544
5545 /*
5546 * Check that contents look valid.
5547 */
5549 ereport(FATAL,
5551 errmsg("control file contains invalid checkpoint location")));
5552
5553 switch (ControlFile->state)
5554 {
5555 case DB_SHUTDOWNED:
5556
5557 /*
5558 * This is the expected case, so don't be chatty in standalone
5559 * mode
5560 */
5562 (errmsg("database system was shut down at %s",
5563 str_time(ControlFile->time,
5564 timebuf, sizeof(timebuf)))));
5565 break;
5566
5568 ereport(LOG,
5569 (errmsg("database system was shut down in recovery at %s",
5571 timebuf, sizeof(timebuf)))));
5572 break;
5573
5574 case DB_SHUTDOWNING:
5575 ereport(LOG,
5576 (errmsg("database system shutdown was interrupted; last known up at %s",
5578 timebuf, sizeof(timebuf)))));
5579 break;
5580
5582 ereport(LOG,
5583 (errmsg("database system was interrupted while in recovery at %s",
5585 timebuf, sizeof(timebuf))),
5586 errhint("This probably means that some data is corrupted and"
5587 " you will have to use the last backup for recovery.")));
5588 break;
5589
5591 ereport(LOG,
5592 (errmsg("database system was interrupted while in recovery at log time %s",
5594 timebuf, sizeof(timebuf))),
5595 errhint("If this has occurred more than once some data might be corrupted"
5596 " and you might need to choose an earlier recovery target.")));
5597 break;
5598
5599 case DB_IN_PRODUCTION:
5600 ereport(LOG,
5601 (errmsg("database system was interrupted; last known up at %s",
5603 timebuf, sizeof(timebuf)))));
5604 break;
5605
5606 default:
5607 ereport(FATAL,
5609 errmsg("control file contains invalid database cluster state")));
5610 }
5611
5612 /* This is just to allow attaching to startup process with a debugger */
5613#ifdef XLOG_REPLAY_DELAY
5615 pg_usleep(60000000L);
5616#endif
5617
5618 /*
5619 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5620 * In cases where someone has performed a copy for PITR, these directories
5621 * may have been excluded and need to be re-created.
5622 */
5624
5625 /* Set up timeout handler needed to report startup progress. */
5629
5630 /*----------
5631 * If we previously crashed, perform a couple of actions:
5632 *
5633 * - The pg_wal directory may still include some temporary WAL segments
5634 * used when creating a new segment, so perform some clean up to not
5635 * bloat this path. This is done first as there is no point to sync
5636 * this temporary data.
5637 *
5638 * - There might be data which we had written, intending to fsync it, but
5639 * which we had not actually fsync'd yet. Therefore, a power failure in
5640 * the near future might cause earlier unflushed writes to be lost, even
5641 * though more recent data written to disk from here on would be
5642 * persisted. To avoid that, fsync the entire data directory.
5643 */
5646 {
5649 didCrash = true;
5650 }
5651 else
5652 didCrash = false;
5653
5654 /*
5655 * Prepare for WAL recovery if needed.
5656 *
5657 * InitWalRecovery analyzes the control file and the backup label file, if
5658 * any. It updates the in-memory ControlFile buffer according to the
5659 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5660 * It also applies the tablespace map file, if any.
5661 */
5664 checkPoint = ControlFile->checkPointCopy;
5665
5666 /* initialize shared memory variables from the checkpoint record */
5667 TransamVariables->nextXid = checkPoint.nextXid;
5668 TransamVariables->nextOid = checkPoint.nextOid;
5670 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5671 AdvanceOldestClogXid(checkPoint.oldestXid);
5672 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5673 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5675 checkPoint.newestCommitTsXid);
5676
5677 /*
5678 * Clear out any old relcache cache files. This is *necessary* if we do
5679 * any WAL replay, since that would probably result in the cache files
5680 * being out of sync with database reality. In theory we could leave them
5681 * in place if the database had been cleanly shut down, but it seems
5682 * safest to just remove them always and let them be rebuilt during the
5683 * first backend startup. These files needs to be removed from all
5684 * directories including pg_tblspc, however the symlinks are created only
5685 * after reading tablespace_map file in case of archive recovery from
5686 * backup, so needs to clear old relcache files here after creating
5687 * symlinks.
5688 */
5690
5691 /*
5692 * Initialize replication slots, before there's a chance to remove
5693 * required resources.
5694 */
5696
5697 /*
5698 * Startup the logical decoding status with the last status stored in the
5699 * checkpoint record.
5700 */
5702
5703 /*
5704 * Startup logical state, needs to be setup now so we have proper data
5705 * during crash recovery.
5706 */
5708
5709 /*
5710 * Startup CLOG. This must be done after TransamVariables->nextXid has
5711 * been initialized and before we accept connections or begin WAL replay.
5712 */
5713 StartupCLOG();
5714
5715 /*
5716 * Startup MultiXact. We need to do this early to be able to replay
5717 * truncations.
5718 */
5720
5721 /*
5722 * Ditto for commit timestamps. Activate the facility if the setting is
5723 * enabled in the control file, as there should be no tracking of commit
5724 * timestamps done when the setting was disabled. This facility can be
5725 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5726 */
5729
5730 /*
5731 * Recover knowledge about replay progress of known replication partners.
5732 */
5734
5735 /*
5736 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5737 * control file. On recovery, all unlogged relations are blown away, so
5738 * the unlogged LSN counter can be reset too.
5739 */
5743 else
5746
5747 /*
5748 * Copy any missing timeline history files between 'now' and the recovery
5749 * target timeline from archive to pg_wal. While we don't need those files
5750 * ourselves - the history file of the recovery target timeline covers all
5751 * the previous timelines in the history too - a cascading standby server
5752 * might be interested in them. Or, if you archive the WAL from this
5753 * server to a different archive than the primary, it'd be good for all
5754 * the history files to get archived there after failover, so that you can
5755 * use one of the old timelines as a PITR target. Timeline history files
5756 * are small, so it's better to copy them unnecessarily than not copy them
5757 * and regret later.
5758 */
5760
5761 /*
5762 * Before running in recovery, scan pg_twophase and fill in its status to
5763 * be able to work on entries generated by redo. Doing a scan before
5764 * taking any recovery action has the merit to discard any 2PC files that
5765 * are newer than the first record to replay, saving from any conflicts at
5766 * replay. This avoids as well any subsequent scans when doing recovery
5767 * of the on-disk two-phase data.
5768 */
5770
5771 /*
5772 * When starting with crash recovery, reset pgstat data - it might not be
5773 * valid. Otherwise restore pgstat data. It's safe to do this here,
5774 * because postmaster will not yet have started any other processes.
5775 *
5776 * NB: Restoring replication slot stats relies on slot state to have
5777 * already been restored from disk.
5778 *
5779 * TODO: With a bit of extra work we could just start with a pgstat file
5780 * associated with the checkpoint redo location we're starting from.
5781 */
5782 if (didCrash)
5784 else
5786
5788
5791
5792 /* REDO */
5793 if (InRecovery)
5794 {
5795 /* Initialize state for RecoveryInProgress() */
5799 else
5802
5803 /*
5804 * Update pg_control to show that we are recovering and to show the
5805 * selected checkpoint as the place we are starting from. We also mark
5806 * pg_control with any minimum recovery stop point obtained from a
5807 * backup history file.
5808 *
5809 * No need to hold ControlFileLock yet, we aren't up far enough.
5810 */
5812
5813 /*
5814 * If there was a backup label file, it's done its job and the info
5815 * has now been propagated into pg_control. We must get rid of the
5816 * label file so that if we crash during recovery, we'll pick up at
5817 * the latest recovery restartpoint instead of going all the way back
5818 * to the backup start point. It seems prudent though to just rename
5819 * the file out of the way rather than delete it completely.
5820 */
5821 if (haveBackupLabel)
5822 {
5825 }
5826
5827 /*
5828 * If there was a tablespace_map file, it's done its job and the
5829 * symlinks have been created. We must get rid of the map file so
5830 * that if we crash during recovery, we don't create symlinks again.
5831 * It seems prudent though to just rename the file out of the way
5832 * rather than delete it completely.
5833 */
5834 if (haveTblspcMap)
5835 {
5838 }
5839
5840 /*
5841 * Initialize our local copy of minRecoveryPoint. When doing crash
5842 * recovery we want to replay up to the end of WAL. Particularly, in
5843 * the case of a promoted standby minRecoveryPoint value in the
5844 * control file is only updated after the first checkpoint. However,
5845 * if the instance crashes before the first post-recovery checkpoint
5846 * is completed then recovery will use a stale location causing the
5847 * startup process to think that there are still invalid page
5848 * references when checking for data consistency.
5849 */
5851 {
5854 }
5855 else
5856 {
5859 }
5860
5861 /* Check that the GUCs used to generate the WAL allow recovery */
5863
5864 /*
5865 * We're in recovery, so unlogged relations may be trashed and must be
5866 * reset. This should be done BEFORE allowing Hot Standby
5867 * connections, so that read-only backends don't try to read whatever
5868 * garbage is left over from before.
5869 */
5871
5872 /*
5873 * Likewise, delete any saved transaction snapshot files that got left
5874 * behind by crashed backends.
5875 */
5877
5878 /*
5879 * Initialize for Hot Standby, if enabled. We won't let backends in
5880 * yet, not until we've reached the min recovery point specified in
5881 * control file and we've established a recovery snapshot from a
5882 * running-xacts WAL record.
5883 */
5885 {
5886 TransactionId *xids;
5887 int nxids;
5888
5890 (errmsg_internal("initializing for hot standby")));
5891
5893
5894 if (wasShutdown)
5896 else
5897 oldestActiveXID = checkPoint.oldestActiveXid;
5899
5900 /* Tell procarray about the range of xids it has to deal with */
5902
5903 /*
5904 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5905 * have already been started up and other SLRUs are not maintained
5906 * during recovery and need not be started yet.
5907 */
5909
5910 /*
5911 * If we're beginning at a shutdown checkpoint, we know that
5912 * nothing was running on the primary at this point. So fake-up an
5913 * empty running-xacts record and use that here and now. Recover
5914 * additional standby state for prepared transactions.
5915 */
5916 if (wasShutdown)
5917 {
5919 TransactionId latestCompletedXid;
5920
5921 /* Update pg_subtrans entries for any prepared transactions */
5923
5924 /*
5925 * Construct a RunningTransactions snapshot representing a
5926 * shut down server, with only prepared transactions still
5927 * alive. We're never overflowed at this point because all
5928 * subxids are listed with their parent prepared transactions.
5929 */
5930 running.xcnt = nxids;
5931 running.subxcnt = 0;
5933 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5935 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5936 TransactionIdRetreat(latestCompletedXid);
5937 Assert(TransactionIdIsNormal(latestCompletedXid));
5938 running.latestCompletedXid = latestCompletedXid;
5939 running.xids = xids;
5940
5942 }
5943 }
5944
5945 /*
5946 * We're all set for replaying the WAL now. Do it.
5947 */
5949 performedWalRecovery = true;
5950 }
5951 else
5952 performedWalRecovery = false;
5953
5954 /*
5955 * Finish WAL recovery.
5956 */
5958 EndOfLog = endOfRecoveryInfo->endOfLog;
5959 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5960 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5961 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5962
5963 /*
5964 * Reset ps status display, so as no information related to recovery shows
5965 * up.
5966 */
5967 set_ps_display("");
5968
5969 /*
5970 * When recovering from a backup (we are in recovery, and archive recovery
5971 * was requested), complain if we did not roll forward far enough to reach
5972 * the point where the database is consistent. For regular online
5973 * backup-from-primary, that means reaching the end-of-backup WAL record
5974 * (at which point we reset backupStartPoint to be Invalid), for
5975 * backup-from-replica (which can't inject records into the WAL stream),
5976 * that point is when we reach the minRecoveryPoint in pg_control (which
5977 * we purposefully copy last when backing up from a replica). For
5978 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5979 * or snapshot-style backups (which don't), backupEndRequired will be set
5980 * to false.
5981 *
5982 * Note: it is indeed okay to look at the local variable
5983 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5984 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5985 * been advanced beyond the WAL we processed.
5986 */
5987 if (InRecovery &&
5990 {
5991 /*
5992 * Ran off end of WAL before reaching end-of-backup WAL record, or
5993 * minRecoveryPoint. That's a bad sign, indicating that you tried to
5994 * recover from an online backup but never called pg_backup_stop(), or
5995 * you didn't archive all the WAL needed.
5996 */
5998 {
6000 ereport(FATAL,
6002 errmsg("WAL ends before end of online backup"),
6003 errhint("All WAL generated while online backup was taken must be available at recovery.")));
6004 else
6005 ereport(FATAL,
6007 errmsg("WAL ends before consistent recovery point")));
6008 }
6009 }
6010
6011 /*
6012 * Reset unlogged relations to the contents of their INIT fork. This is
6013 * done AFTER recovery is complete so as to include any unlogged relations
6014 * created during recovery, but BEFORE recovery is marked as having
6015 * completed successfully. Otherwise we'd not retry if any of the post
6016 * end-of-recovery steps fail.
6017 */
6018 if (InRecovery)
6020
6021 /*
6022 * Pre-scan prepared transactions to find out the range of XIDs present.
6023 * This information is not quite needed yet, but it is positioned here so
6024 * as potential problems are detected before any on-disk change is done.
6025 */
6027
6028 /*
6029 * Allow ordinary WAL segment creation before possibly switching to a new
6030 * timeline, which creates a new segment, and after the last ReadRecord().
6031 */
6033
6034 /*
6035 * Consider whether we need to assign a new timeline ID.
6036 *
6037 * If we did archive recovery, we always assign a new ID. This handles a
6038 * couple of issues. If we stopped short of the end of WAL during
6039 * recovery, then we are clearly generating a new timeline and must assign
6040 * it a unique new ID. Even if we ran to the end, modifying the current
6041 * last segment is problematic because it may result in trying to
6042 * overwrite an already-archived copy of that segment, and we encourage
6043 * DBAs to make their archive_commands reject that. We can dodge the
6044 * problem by making the new active segment have a new timeline ID.
6045 *
6046 * In a normal crash recovery, we can just extend the timeline we were in.
6047 */
6048 newTLI = endOfRecoveryInfo->lastRecTLI;
6050 {
6052 ereport(LOG,
6053 (errmsg("selected new timeline ID: %u", newTLI)));
6054
6055 /*
6056 * Make a writable copy of the last WAL segment. (Note that we also
6057 * have a copy of the last block of the old WAL in
6058 * endOfRecovery->lastPage; we will use that below.)
6059 */
6061
6062 /*
6063 * Remove the signal files out of the way, so that we don't
6064 * accidentally re-enter archive recovery mode in a subsequent crash.
6065 */
6066 if (endOfRecoveryInfo->standby_signal_file_found)
6068
6069 if (endOfRecoveryInfo->recovery_signal_file_found)
6071
6072 /*
6073 * Write the timeline history file, and have it archived. After this
6074 * point (or rather, as soon as the file is archived), the timeline
6075 * will appear as "taken" in the WAL archive and to any standby
6076 * servers. If we crash before actually switching to the new
6077 * timeline, standby servers will nevertheless think that we switched
6078 * to the new timeline, and will try to connect to the new timeline.
6079 * To minimize the window for that, try to do as little as possible
6080 * between here and writing the end-of-recovery record.
6081 */
6083 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6084
6085 ereport(LOG,
6086 (errmsg("archive recovery complete")));
6087 }
6088
6089 /* Save the selected TimeLineID in shared memory, too */
6094
6095 /*
6096 * Actually, if WAL ended in an incomplete record, skip the parts that
6097 * made it through and start writing after the portion that persisted.
6098 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6099 * we'll do as soon as we're open for writing new WAL.)
6100 */
6102 {
6103 /*
6104 * We should only have a missingContrecPtr if we're not switching to a
6105 * new timeline. When a timeline switch occurs, WAL is copied from the
6106 * old timeline to the new only up to the end of the last complete
6107 * record, so there can't be an incomplete WAL record that we need to
6108 * disregard.
6109 */
6110 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6113 }
6114
6115 /*
6116 * Prepare to write WAL starting at EndOfLog location, and init xlog
6117 * buffer cache using the block containing the last record from the
6118 * previous incarnation.
6119 */
6120 Insert = &XLogCtl->Insert;
6122 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6123
6124 /*
6125 * Tricky point here: lastPage contains the *last* block that the LastRec
6126 * record spans, not the one it starts in. The last block is indeed the
6127 * one we want to use.
6128 */
6129 if (EndOfLog % XLOG_BLCKSZ != 0)
6130 {
6131 char *page;
6132 int len;
6133 int firstIdx;
6134
6136 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6138
6139 /* Copy the valid part of the last block, and zero the rest */
6140 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6141 memcpy(page, endOfRecoveryInfo->lastPage, len);
6142 memset(page + len, 0, XLOG_BLCKSZ - len);
6143
6146 }
6147 else
6148 {
6149 /*
6150 * There is no partial block to copy. Just set InitializedUpTo, and
6151 * let the first attempt to insert a log record to initialize the next
6152 * buffer.
6153 */
6155 }
6156
6157 /*
6158 * Update local and shared status. This is OK to do without any locks
6159 * because no other process can be reading or writing WAL yet.
6160 */
6167
6168 /*
6169 * Preallocate additional log files, if wanted.
6170 */
6172
6173 /*
6174 * Okay, we're officially UP.
6175 */
6176 InRecovery = false;
6177
6178 /* start the archive_timeout timer and LSN running */
6181
6182 /* also initialize latestCompletedXid, to nextXid - 1 */
6187
6188 /*
6189 * Start up subtrans, if not already done for hot standby. (commit
6190 * timestamps are started below, if necessary.)
6191 */
6194
6195 /*
6196 * Perform end of recovery actions for any SLRUs that need it.
6197 */
6198 TrimCLOG();
6199 TrimMultiXact();
6200
6201 /*
6202 * Reload shared-memory state for prepared transactions. This needs to
6203 * happen before renaming the last partial segment of the old timeline as
6204 * it may be possible that we have to recover some transactions from it.
6205 */
6207
6208 /* Shut down xlogreader */
6210
6211 /* Enable WAL writes for this backend only. */
6213
6214 /* If necessary, write overwrite-contrecord before doing anything else */
6216 {
6219 }
6220
6221 /*
6222 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6223 * record before resource manager writes cleanup WAL records or checkpoint
6224 * record is written.
6225 */
6226 Insert->fullPageWrites = lastFullPageWrites;
6228
6229 /*
6230 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6231 */
6234
6235 /*
6236 * If any of the critical GUCs have changed, log them before we allow
6237 * backends to write WAL.
6238 */
6240
6241 /* If this is archive recovery, perform post-recovery cleanup actions. */
6244
6245 /*
6246 * Local WAL inserts enabled, so it's time to finish initialization of
6247 * commit timestamp.
6248 */
6250
6251 /*
6252 * Update logical decoding status in shared memory and write an
6253 * XLOG_LOGICAL_DECODING_STATUS_CHANGE, if necessary.
6254 */
6256
6257 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6258 if (endOfRecoveryInfo->lastPage)
6259 pfree(endOfRecoveryInfo->lastPage);
6260 pfree(endOfRecoveryInfo->recoveryStopReason);
6262
6263 /*
6264 * All done with end-of-recovery actions.
6265 *
6266 * Now allow backends to write WAL and update the control file status in
6267 * consequence. SharedRecoveryState, that controls if backends can write
6268 * WAL, is updated while holding ControlFileLock to prevent other backends
6269 * to look at an inconsistent state of the control file in shared memory.
6270 * There is still a small window during which backends can write WAL and
6271 * the control file is still referring to a system not in DB_IN_PRODUCTION
6272 * state while looking at the on-disk control file.
6273 *
6274 * Also, we use info_lck to update SharedRecoveryState to ensure that
6275 * there are no race conditions concerning visibility of other recent
6276 * updates to shared memory.
6277 */
6280
6284
6287
6288 /*
6289 * Wake up the checkpointer process as there might be a request to disable
6290 * logical decoding by concurrent slot drop.
6291 */
6293
6294 /*
6295 * Wake up all waiters. They need to report an error that recovery was
6296 * ended before reaching the target LSN.
6297 */
6301
6302 /*
6303 * Shutdown the recovery environment. This must occur after
6304 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6305 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6306 * any session building a snapshot will not rely on KnownAssignedXids as
6307 * RecoveryInProgress() would return false at this stage. This is
6308 * particularly critical for prepared 2PC transactions, that would still
6309 * need to be included in snapshots once recovery has ended.
6310 */
6313
6314 /*
6315 * If there were cascading standby servers connected to us, nudge any wal
6316 * sender processes to notice that we've been promoted.
6317 */
6318 WalSndWakeup(true, true);
6319
6320 /*
6321 * If this was a promotion, request an (online) checkpoint now. This isn't
6322 * required for consistency, but the last restartpoint might be far back,
6323 * and in case of a crash, recovering from it might take a longer than is
6324 * appropriate now that we're not in standby mode anymore.
6325 */
6326 if (promoted)
6328}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:504
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition timeline.c:304
void startup_progress_timeout_handler(void)
Definition startup.c:302
uint32 TransactionId
Definition c.h:666
void WakeupCheckpointer(void)
void StartupCLOG(void)
Definition clog.c:843
void TrimCLOG(void)
Definition clog.c:858
void StartupCommitTs(void)
Definition commit_ts.c:608
void CompleteCommitTsInitialization(void)
Definition commit_ts.c:618
void SyncDataDirectory(void)
Definition fd.c:3593
void UpdateLogicalDecodingStatusEndOfRecovery(void)
Definition logicalctl.c:553
void StartupLogicalDecodingStatus(bool last_status)
Definition logicalctl.c:146
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
void TrimMultiXact(void)
Definition multixact.c:1834
void StartupMultiXact(void)
Definition multixact.c:1809
void StartupReplicationOrigin(void)
Definition origin.c:730
@ DB_IN_PRODUCTION
Definition pg_control.h:99
@ DB_IN_CRASH_RECOVERY
Definition pg_control.h:97
const void size_t len
void pgstat_restore_stats(void)
Definition pgstat.c:507
void pgstat_discard_stats(void)
Definition pgstat.c:519
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition procarray.c:1051
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition procarray.c:1020
static void set_ps_display(const char *activity)
Definition ps_status.h:40
void ResetUnloggedRelations(int op)
Definition reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition reinit.h:27
void RelationCacheInitFileRemove(void)
Definition relcache.c:6895
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition slot.c:2378
void DeleteAllExportedSnapshotFiles(void)
Definition snapmgr.c:1587
void InitRecoveryTransactionEnvironment(void)
Definition standby.c:95
void ShutdownRecoveryTransactionEnvironment(void)
Definition standby.c:161
@ SUBXIDS_IN_SUBTRANS
Definition standby.h:82
TransactionId oldestRunningXid
Definition standby.h:92
TransactionId nextXid
Definition standby.h:91
TransactionId latestCompletedXid
Definition standby.h:95
subxids_array_status subxid_status
Definition standby.h:90
TransactionId * xids
Definition standby.h:97
FullTransactionId latestCompletedXid
Definition transam.h:238
pg_atomic_uint64 logInsertResult
Definition xlog.c:473
uint64 PrevBytePos
Definition xlog.c:411
XLogRecPtr Flush
Definition xlog.c:325
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition subtrans.c:283
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition timeout.h:38
#define TransactionIdRetreat(dest)
Definition transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition transam.h:103
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
void RecoverPreparedTransactions(void)
Definition twophase.c:2083
void restoreTwoPhaseData(void)
Definition twophase.c:1904
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1966
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2045
void WalSndWakeup(bool physical, bool logical)
Definition walsender.c:3810
void UpdateFullPageWrites(void)
Definition xlog.c:8301
static void ValidateXLOGDirectoryStructure(void)
Definition xlog.c:4136
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition xlog.c:7571
static void XLogReportParameters(void)
Definition xlog.c:8238
static bool PerformRecoveryXLogAction(void)
Definition xlog.c:6410
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition xlog.c:5377
static bool lastFullPageWrites
Definition xlog.c:220
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition xlog.c:5302
static void CheckRequiredParameterValues(void)
Definition xlog.c:5473
static void RemoveTempXlogFiles(void)
Definition xlog.c:3869
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition xlog.c:5289
#define TABLESPACE_MAP_OLD
Definition xlog.h:323
#define TABLESPACE_MAP
Definition xlog.h:322
#define STANDBY_SIGNAL_FILE
Definition xlog.h:318
#define BACKUP_LABEL_OLD
Definition xlog.h:320
#define BACKUP_LABEL_FILE
Definition xlog.h:319
#define RECOVERY_SIGNAL_FILE
Definition xlog.h:317
@ RECOVERY_STATE_CRASH
Definition xlog.h:92
@ RECOVERY_STATE_ARCHIVE
Definition xlog.h:93
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
static XLogRecPtr abortedRecPtr
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
TimeLineID recoveryTargetTLI
HotStandbyState standbyState
Definition xlogutils.c:53
bool InRecovery
Definition xlogutils.c:50
@ STANDBY_DISABLED
Definition xlogutils.h:52
void WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
Definition xlogwait.c:317
@ WAIT_LSN_TYPE_STANDBY_REPLAY
Definition xlogwait.h:39
@ WAIT_LSN_TYPE_STANDBY_FLUSH
Definition xlogwait.h:41
@ WAIT_LSN_TYPE_STANDBY_WRITE
Definition xlogwait.h:40

References abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, fb(), findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, CheckPoint::logicalDecodingEnabled, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupLogicalDecodingStatus(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), ValidateXLOGDirectoryStructure(), WAIT_LSN_TYPE_STANDBY_FLUSH, WAIT_LSN_TYPE_STANDBY_REPLAY, WAIT_LSN_TYPE_STANDBY_WRITE, WaitLSNWakeup(), WakeupCheckpointer(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsValid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char buf,
size_t  bufsize 
)
static

Definition at line 5289 of file xlog.c.

5290{
5292 "%Y-%m-%d %H:%M:%S %Z",
5294
5295 return buf;
5296}
#define bufsize
static char buf[DEFAULT_XLOG_SEG_SIZE]
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition localtime.c:1345
PGDLLIMPORT pg_tz * log_timezone
Definition pgtz.c:31

References buf, bufsize, fb(), log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6335 of file xlog.c.

6336{
6337 /* initialize minRecoveryPoint to this record */
6340 if (ControlFile->minRecoveryPoint < EndRecPtr)
6341 {
6342 ControlFile->minRecoveryPoint = EndRecPtr;
6343 ControlFile->minRecoveryPointTLI = replayTLI;
6344 }
6345 /* update local copy */
6348
6349 /*
6350 * The startup process can update its local copy of minRecoveryPoint from
6351 * this point.
6352 */
6354
6356
6357 /*
6358 * We update SharedRecoveryState while holding the lock on ControlFileLock
6359 * so both states are consistent in shared memory.
6360 */
6364
6366}
static bool updateMinRecoveryPoint
Definition xlog.c:650

References ControlFile, DB_IN_ARCHIVE_RECOVERY, fb(), XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6950 of file xlog.c.

6951{
6952 /*
6953 * The status is reported only for end-of-recovery and shutdown
6954 * checkpoints or shutdown restartpoints. Updating the ps display is
6955 * useful in those situations as it may not be possible to rely on
6956 * pg_stat_activity to see the status of the checkpointer or the startup
6957 * process.
6958 */
6960 return;
6961
6962 if (reset)
6963 set_ps_display("");
6964 else
6965 {
6966 char activitymsg[128];
6967
6968 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6969 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6970 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6971 restartpoint ? "restartpoint" : "checkpoint");
6973 }
6974}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, fb(), reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6912 of file xlog.c.

6913{
6914 /*
6915 * To estimate the number of segments consumed between checkpoints, keep a
6916 * moving average of the amount of WAL generated in previous checkpoint
6917 * cycles. However, if the load is bursty, with quiet periods and busy
6918 * periods, we want to cater for the peak load. So instead of a plain
6919 * moving average, let the average decline slowly if the previous cycle
6920 * used less WAL than estimated, but bump it up immediately if it used
6921 * more.
6922 *
6923 * When checkpoints are triggered by max_wal_size, this should converge to
6924 * CheckpointSegments * wal_segment_size,
6925 *
6926 * Note: This doesn't pay any attention to what caused the checkpoint.
6927 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6928 * starting a base backup, are counted the same as those created
6929 * automatically. The slow-decline will largely mask them out, if they are
6930 * not frequent. If they are frequent, it seems reasonable to count them
6931 * in as any others; if you issue a manual checkpoint every 5 minutes and
6932 * never let a timed checkpoint happen, it makes sense to base the
6933 * preallocation on that 5 minute interval rather than whatever
6934 * checkpoint_timeout is set to.
6935 */
6936 PrevCheckPointDistance = nbytes;
6937 if (CheckPointDistanceEstimate < nbytes)
6939 else
6941 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6942}

References CheckPointDistanceEstimate, fb(), and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4618 of file xlog.c.

4619{
4621}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8301 of file xlog.c.

8302{
8304 bool recoveryInProgress;
8305
8306 /*
8307 * Do nothing if full_page_writes has not been changed.
8308 *
8309 * It's safe to check the shared full_page_writes without the lock,
8310 * because we assume that there is no concurrently running process which
8311 * can update it.
8312 */
8313 if (fullPageWrites == Insert->fullPageWrites)
8314 return;
8315
8316 /*
8317 * Perform this outside critical section so that the WAL insert
8318 * initialization done by RecoveryInProgress() doesn't trigger an
8319 * assertion failure.
8320 */
8322
8324
8325 /*
8326 * It's always safe to take full page images, even when not strictly
8327 * required, but not the other round. So if we're setting full_page_writes
8328 * to true, first set it true and then write the WAL record. If we're
8329 * setting it to false, first write the WAL record and then set the global
8330 * flag.
8331 */
8332 if (fullPageWrites)
8333 {
8335 Insert->fullPageWrites = true;
8337 }
8338
8339 /*
8340 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8341 * full_page_writes during archive recovery, if required.
8342 */
8344 {
8346 XLogRegisterData(&fullPageWrites, sizeof(bool));
8347
8349 }
8350
8351 if (!fullPageWrites)
8352 {
8354 Insert->fullPageWrites = false;
8356 }
8358}
#define XLOG_FPW_CHANGE
Definition pg_control.h:77

References END_CRIT_SECTION, fb(), fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char filename)
static

Definition at line 3849 of file xlog.c.

3850{
3851 uint32 tli;
3852 XLogSegNo segno;
3853
3855
3857 if (segno > XLogCtl->lastRemovedSegNo)
3858 XLogCtl->lastRemovedSegNo = segno;
3860}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2703 of file xlog.c.

2704{
2705 /* Quick check using our local copy of the variable */
2706 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2707 return;
2708
2709 /*
2710 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2711 * i.e., we're doing crash recovery. We never modify the control file's
2712 * value in that case, so we can short-circuit future checks here too. The
2713 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2714 * updated until crash recovery finishes. We only do this for the startup
2715 * process as it should not update its own reference of minRecoveryPoint
2716 * until it has finished crash recovery to make sure that all WAL
2717 * available is replayed in this case. This also saves from extra locks
2718 * taken on the control file from the startup process.
2719 */
2721 {
2722 updateMinRecoveryPoint = false;
2723 return;
2724 }
2725
2727
2728 /* update local copy */
2731
2733 updateMinRecoveryPoint = false;
2734 else if (force || LocalMinRecoveryPoint < lsn)
2735 {
2738
2739 /*
2740 * To avoid having to update the control file too often, we update it
2741 * all the way to the last record being replayed, even though 'lsn'
2742 * would suffice for correctness. This also allows the 'force' case
2743 * to not need a valid 'lsn' value.
2744 *
2745 * Another important reason for doing it this way is that the passed
2746 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2747 * the caller got it from a corrupted heap page. Accepting such a
2748 * value as the min recovery point would prevent us from coming up at
2749 * all. Instead, we just log a warning and continue with recovery.
2750 * (See also the comments about corrupt LSNs in XLogFlush.)
2751 */
2753 if (!force && newMinRecoveryPoint < lsn)
2754 elog(WARNING,
2755 "xlog min recovery request %X/%08X is past current point %X/%08X",
2757
2758 /* update control file */
2760 {
2766
2768 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2771 }
2772 }
2774}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), fb(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsValid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4136 of file xlog.c.

4137{
4138 char path[MAXPGPATH];
4139 struct stat stat_buf;
4140
4141 /* Check for pg_wal; if it doesn't exist, error out */
4142 if (stat(XLOGDIR, &stat_buf) != 0 ||
4143 !S_ISDIR(stat_buf.st_mode))
4144 ereport(FATAL,
4146 errmsg("required WAL directory \"%s\" does not exist",
4147 XLOGDIR)));
4148
4149 /* Check for archive_status */
4150 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4151 if (stat(path, &stat_buf) == 0)
4152 {
4153 /* Check for weird cases where it exists but isn't a directory */
4154 if (!S_ISDIR(stat_buf.st_mode))
4155 ereport(FATAL,
4157 errmsg("required WAL directory \"%s\" does not exist",
4158 path)));
4159 }
4160 else
4161 {
4162 ereport(LOG,
4163 (errmsg("creating missing WAL directory \"%s\"", path)));
4164 if (MakePGDirectory(path) < 0)
4165 ereport(FATAL,
4167 errmsg("could not create missing directory \"%s\": %m",
4168 path)));
4169 }
4170
4171 /* Check for summaries */
4172 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4173 if (stat(path, &stat_buf) == 0)
4174 {
4175 /* Check for weird cases where it exists but isn't a directory */
4176 if (!S_ISDIR(stat_buf.st_mode))
4177 ereport(FATAL,
4178 (errmsg("required WAL directory \"%s\" does not exist",
4179 path)));
4180 }
4181 else
4182 {
4183 ereport(LOG,
4184 (errmsg("creating missing WAL directory \"%s\"", path)));
4185 if (MakePGDirectory(path) < 0)
4186 ereport(FATAL,
4187 (errmsg("could not create missing directory \"%s\": %m",
4188 path)));
4189 }
4190}
int MakePGDirectory(const char *directoryName)
Definition fd.c:3962
#define S_ISDIR(m)
Definition win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, fb(), LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1510 of file xlog.c.

1511{
1517 int i;
1518
1519 if (MyProc == NULL)
1520 elog(PANIC, "cannot wait without a PGPROC structure");
1521
1522 /*
1523 * Check if there's any work to do. Use a barrier to ensure we get the
1524 * freshest value.
1525 */
1527 if (upto <= inserted)
1528 return inserted;
1529
1530 /* Read the current insert position */
1531 SpinLockAcquire(&Insert->insertpos_lck);
1532 bytepos = Insert->CurrBytePos;
1533 SpinLockRelease(&Insert->insertpos_lck);
1535
1536 /*
1537 * No-one should request to flush a piece of WAL that hasn't even been
1538 * reserved yet. However, it can happen if there is a block with a bogus
1539 * LSN on disk, for example. XLogFlush checks for that situation and
1540 * complains, but only after the flush. Here we just assume that to mean
1541 * that all WAL that has been reserved needs to be finished. In this
1542 * corner-case, the return value can be smaller than 'upto' argument.
1543 */
1544 if (upto > reservedUpto)
1545 {
1546 ereport(LOG,
1547 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1550 }
1551
1552 /*
1553 * Loop through all the locks, sleeping on any in-progress insert older
1554 * than 'upto'.
1555 *
1556 * finishedUpto is our return value, indicating the point upto which all
1557 * the WAL insertions have been finished. Initialize it to the head of
1558 * reserved WAL, and as we iterate through the insertion locks, back it
1559 * out for any insertion that's still in progress.
1560 */
1562 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1563 {
1565
1566 do
1567 {
1568 /*
1569 * See if this insertion is in progress. LWLockWaitForVar will
1570 * wait for the lock to be released, or for the 'value' to be set
1571 * by a LWLockUpdateVar call. When a lock is initially acquired,
1572 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1573 * know where it's inserting yet. We will have to wait for it. If
1574 * it's a small insertion, the record will most likely fit on the
1575 * same page and the inserter will release the lock without ever
1576 * calling LWLockUpdateVar. But if it has to sleep, it will
1577 * advertise the insertion point with LWLockUpdateVar before
1578 * sleeping.
1579 *
1580 * In this loop we are only waiting for insertions that started
1581 * before WaitXLogInsertionsToFinish was called. The lack of
1582 * memory barriers in the loop means that we might see locks as
1583 * "unused" that have since become used. This is fine because
1584 * they only can be used for later insertions that we would not
1585 * want to wait on anyway. Not taking a lock to acquire the
1586 * current insertingAt value means that we might see older
1587 * insertingAt values. This is also fine, because if we read a
1588 * value too old, we will add ourselves to the wait queue, which
1589 * contains atomic operations.
1590 */
1591 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1594 {
1595 /* the lock was free, so no insertion in progress */
1597 break;
1598 }
1599
1600 /*
1601 * This insertion is still in progress. Have to wait, unless the
1602 * inserter has proceeded past 'upto'.
1603 */
1604 } while (insertingat < upto);
1605
1608 }
1609
1610 /*
1611 * Advance the limit we know to have been inserted and return the freshest
1612 * value we know of, which might be beyond what we requested if somebody
1613 * is concurrently doing this with an 'upto' pointer ahead of us.
1614 */
1616 finishedUpto);
1617
1618 return finishedUpto;
1619}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition atomics.h:595
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition lwlock.c:1592
PGPROC * MyProc
Definition proc.c:67
pg_atomic_uint64 insertingAt
Definition xlog.c:373

References elog, ereport, errmsg(), fb(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), XLogCtl, and XLogRecPtrIsValid.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1377 of file xlog.c.

1378{
1379 bool immed;
1380
1381 /*
1382 * It doesn't matter which of the WAL insertion locks we acquire, so try
1383 * the one we used last time. If the system isn't particularly busy, it's
1384 * a good bet that it's still available, and it's good to have some
1385 * affinity to a particular lock so that you don't unnecessarily bounce
1386 * cache lines between processes when there's no contention.
1387 *
1388 * If this is the first time through in this backend, pick a lock
1389 * (semi-)randomly. This allows the locks to be used evenly if you have a
1390 * lot of very short connections.
1391 */
1392 static int lockToTry = -1;
1393
1394 if (lockToTry == -1)
1397
1398 /*
1399 * The insertingAt value is initially set to 0, as we don't know our
1400 * insert location yet.
1401 */
1403 if (!immed)
1404 {
1405 /*
1406 * If we couldn't get the lock immediately, try another lock next
1407 * time. On a system with more insertion locks than concurrent
1408 * inserters, this causes all the inserters to eventually migrate to a
1409 * lock that no-one else is using. On a system with more inserters
1410 * than locks, it still helps to distribute the inserters evenly
1411 * across the locks.
1412 */
1414 }
1415}
ProcNumber MyProcNumber
Definition globals.c:90
static int MyLockNo
Definition xlog.c:653

References fb(), LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1422 of file xlog.c.

1423{
1424 int i;
1425
1426 /*
1427 * When holding all the locks, all but the last lock's insertingAt
1428 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1429 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1430 */
1431 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1432 {
1437 }
1438 /* Variable value reset to 0 at release */
1440
1441 holdingAllLocks = true;
1442}
#define PG_UINT64_MAX
Definition c.h:607
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1728
static bool holdingAllLocks
Definition xlog.c:654

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1451 of file xlog.c.

1452{
1453 if (holdingAllLocks)
1454 {
1455 int i;
1456
1457 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1460 0);
1461
1462 holdingAllLocks = false;
1463 }
1464 else
1465 {
1468 0);
1469 }
1470}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1866

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1477 of file xlog.c.

1478{
1479 if (holdingAllLocks)
1480 {
1481 /*
1482 * We use the last lock to mark our actual position, see comments in
1483 * WALInsertLockAcquireExclusive.
1484 */
1487 insertingAt);
1488 }
1489 else
1492 insertingAt);
1493}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1754 of file xlog.c.

1756{
1757 char *pdst = dstbuf;
1758 XLogRecPtr recptr = startptr;
1760 Size nbytes = count;
1761
1763 return 0;
1764
1765 Assert(XLogRecPtrIsValid(startptr));
1766
1767 /*
1768 * Caller should ensure that the requested data has been inserted into WAL
1769 * buffers before we try to read it.
1770 */
1772 if (startptr + count > inserted)
1773 ereport(ERROR,
1774 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1775 LSN_FORMAT_ARGS(startptr + count),
1777
1778 /*
1779 * Loop through the buffers without a lock. For each buffer, atomically
1780 * read and verify the end pointer, then copy the data out, and finally
1781 * re-read and re-verify the end pointer.
1782 *
1783 * Once a page is evicted, it never returns to the WAL buffers, so if the
1784 * end pointer matches the expected end pointer before and after we copy
1785 * the data, then the right page must have been present during the data
1786 * copy. Read barriers are necessary to ensure that the data copy actually
1787 * happens between the two verification steps.
1788 *
1789 * If either verification fails, we simply terminate the loop and return
1790 * with the data that had been already copied out successfully.
1791 */
1792 while (nbytes > 0)
1793 {
1794 uint32 offset = recptr % XLOG_BLCKSZ;
1797 XLogRecPtr endptr;
1798 const char *page;
1799 const char *psrc;
1801
1802 /*
1803 * Calculate the end pointer we expect in the xlblocks array if the
1804 * correct page is present.
1805 */
1806 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1807
1808 /*
1809 * First verification step: check that the correct page is present in
1810 * the WAL buffers.
1811 */
1813 if (expectedEndPtr != endptr)
1814 break;
1815
1816 /*
1817 * The correct page is present (or was at the time the endptr was
1818 * read; must re-verify later). Calculate pointer to source data and
1819 * determine how much data to read from this page.
1820 */
1821 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1822 psrc = page + offset;
1823 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1824
1825 /*
1826 * Ensure that the data copy and the first verification step are not
1827 * reordered.
1828 */
1830
1831 /* data copy */
1833
1834 /*
1835 * Ensure that the data copy and the second verification step are not
1836 * reordered.
1837 */
1839
1840 /*
1841 * Second verification step: check that the page we read from wasn't
1842 * evicted while we were copying the data.
1843 */
1845 if (expectedEndPtr != endptr)
1846 break;
1847
1848 pdst += npagebytes;
1849 recptr += npagebytes;
1850 nbytes -= npagebytes;
1851 }
1852
1853 Assert(pdst - dstbuf <= count);
1854
1855 return pdst - dstbuf;
1856}
#define pg_read_barrier()
Definition atomics.h:154
#define Min(x, y)
Definition c.h:997
TimeLineID GetWALInsertionTimeLine(void)
Definition xlog.c:6646

References Assert, ereport, errmsg(), ERROR, fb(), GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsValid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4276 of file xlog.c.

4277{
4278 int fd;
4279 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4280
4281 /*
4282 * Initialize version and compatibility-check fields
4283 */
4286
4289
4295
4298
4301
4302 ControlFile->float8ByVal = true; /* vestigial */
4303
4304 /*
4305 * Initialize the default 'char' signedness.
4306 *
4307 * The signedness of the char type is implementation-defined. For instance
4308 * on x86 architecture CPUs, the char data type is typically treated as
4309 * signed by default, whereas on aarch architecture CPUs, it is typically
4310 * treated as unsigned by default. In v17 or earlier, we accidentally let
4311 * C implementation signedness affect persistent data. This led to
4312 * inconsistent results when comparing char data across different
4313 * platforms.
4314 *
4315 * This flag can be used as a hint to ensure consistent behavior for
4316 * pre-v18 data files that store data sorted by the 'char' type on disk,
4317 * especially in cross-platform replication scenarios.
4318 *
4319 * Newly created database clusters unconditionally set the default char
4320 * signedness to true. pg_upgrade changes this flag for clusters that were
4321 * initialized on signedness=false platforms. As a result,
4322 * signedness=false setting will become rare over time. If we had known
4323 * about this problem during the last development cycle that forced initdb
4324 * (v8.3), we would have made all clusters signed or all clusters
4325 * unsigned. Making pg_upgrade the only source of signedness=false will
4326 * cause the population of database clusters to converge toward that
4327 * retrospective ideal.
4328 */
4330
4331 /* Contents are protected with a CRC */
4337
4338 /*
4339 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4340 * the excess over sizeof(ControlFileData). This reduces the odds of
4341 * premature-EOF errors when reading pg_control. We'll still fail when we
4342 * check the contents of the file, but hopefully with a more specific
4343 * error than "couldn't read pg_control".
4344 */
4345 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4346 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4347
4350 if (fd < 0)
4351 ereport(PANIC,
4353 errmsg("could not create file \"%s\": %m",
4355
4356 errno = 0;
4359 {
4360 /* if write didn't set errno, assume problem is no disk space */
4361 if (errno == 0)
4362 errno = ENOSPC;
4363 ereport(PANIC,
4365 errmsg("could not write to file \"%s\": %m",
4367 }
4369
4371 if (pg_fsync(fd) != 0)
4372 ereport(PANIC,
4374 errmsg("could not fsync file \"%s\": %m",
4377
4378 if (close(fd) != 0)
4379 ereport(PANIC,
4381 errmsg("could not close file \"%s\": %m",
4383}
#define PG_CONTROL_FILE_SIZE
Definition pg_control.h:260

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg(), fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8370 of file xlog.c.

8371{
8372 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8373 XLogRecPtr lsn = record->EndRecPtr;
8374
8375 /*
8376 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8377 * XLOG_FPI_FOR_HINT records.
8378 */
8379 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8380 !XLogRecHasAnyBlockRefs(record));
8381
8382 if (info == XLOG_NEXTOID)
8383 {
8384 Oid nextOid;
8385
8386 /*
8387 * We used to try to take the maximum of TransamVariables->nextOid and
8388 * the recorded nextOid, but that fails if the OID counter wraps
8389 * around. Since no OID allocation should be happening during replay
8390 * anyway, better to just believe the record exactly. We still take
8391 * OidGenLock while setting the variable, just in case.
8392 */
8393 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8395 TransamVariables->nextOid = nextOid;
8398 }
8399 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8400 {
8401 CheckPoint checkPoint;
8402 TimeLineID replayTLI;
8403
8404 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8405 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8407 TransamVariables->nextXid = checkPoint.nextXid;
8410 TransamVariables->nextOid = checkPoint.nextOid;
8414 checkPoint.nextMultiOffset);
8415
8417 checkPoint.oldestMultiDB);
8418
8419 /*
8420 * No need to set oldestClogXid here as well; it'll be set when we
8421 * redo an xl_clog_truncate if it changed since initialization.
8422 */
8423 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8424
8425 /*
8426 * If we see a shutdown checkpoint while waiting for an end-of-backup
8427 * record, the backup was canceled and the end-of-backup record will
8428 * never arrive.
8429 */
8433 ereport(PANIC,
8434 (errmsg("online backup was canceled, recovery cannot continue")));
8435
8436 /*
8437 * If we see a shutdown checkpoint, we know that nothing was running
8438 * on the primary at this point. So fake-up an empty running-xacts
8439 * record and use that here and now. Recover additional standby state
8440 * for prepared transactions.
8441 */
8443 {
8444 TransactionId *xids;
8445 int nxids;
8447 TransactionId latestCompletedXid;
8449
8451
8452 /* Update pg_subtrans entries for any prepared transactions */
8454
8455 /*
8456 * Construct a RunningTransactions snapshot representing a shut
8457 * down server, with only prepared transactions still alive. We're
8458 * never overflowed at this point because all subxids are listed
8459 * with their parent prepared transactions.
8460 */
8461 running.xcnt = nxids;
8462 running.subxcnt = 0;
8464 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8466 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8467 TransactionIdRetreat(latestCompletedXid);
8468 Assert(TransactionIdIsNormal(latestCompletedXid));
8469 running.latestCompletedXid = latestCompletedXid;
8470 running.xids = xids;
8471
8473 }
8474
8475 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8479
8480 /*
8481 * We should've already switched to the new TLI before replaying this
8482 * record.
8483 */
8484 (void) GetCurrentReplayRecPtr(&replayTLI);
8485 if (checkPoint.ThisTimeLineID != replayTLI)
8486 ereport(PANIC,
8487 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8488 checkPoint.ThisTimeLineID, replayTLI)));
8489
8490 RecoveryRestartPoint(&checkPoint, record);
8491
8492 /*
8493 * After replaying a checkpoint record, free all smgr objects.
8494 * Otherwise we would never do so for dropped relations, as the
8495 * startup does not process shared invalidation messages or call
8496 * AtEOXact_SMgr().
8497 */
8499 }
8500 else if (info == XLOG_CHECKPOINT_ONLINE)
8501 {
8502 CheckPoint checkPoint;
8503 TimeLineID replayTLI;
8504
8505 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8506 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8509 checkPoint.nextXid))
8510 TransamVariables->nextXid = checkPoint.nextXid;
8512
8513 /*
8514 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8515 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8516 * counter is from the start of the checkpoint and might well be stale
8517 * compared to later XLOG_NEXTOID records. We could try to take the
8518 * maximum of the nextOid counter and our latest value, but since
8519 * there's no particular guarantee about the speed with which the OID
8520 * counter wraps around, that's a risky thing to do. In any case,
8521 * users of the nextOid counter are required to avoid assignment of
8522 * duplicates, so that a somewhat out-of-date value should be safe.
8523 */
8524
8525 /* Handle multixact */
8527 checkPoint.nextMultiOffset);
8528
8529 /*
8530 * NB: This may perform multixact truncation when replaying WAL
8531 * generated by an older primary.
8532 */
8534 checkPoint.oldestMultiDB);
8536 checkPoint.oldestXid))
8538 checkPoint.oldestXidDB);
8539 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8543
8544 /* TLI should not change in an on-line checkpoint */
8545 (void) GetCurrentReplayRecPtr(&replayTLI);
8546 if (checkPoint.ThisTimeLineID != replayTLI)
8547 ereport(PANIC,
8548 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8549 checkPoint.ThisTimeLineID, replayTLI)));
8550
8551 RecoveryRestartPoint(&checkPoint, record);
8552
8553 /*
8554 * After replaying a checkpoint record, free all smgr objects.
8555 * Otherwise we would never do so for dropped relations, as the
8556 * startup does not process shared invalidation messages or call
8557 * AtEOXact_SMgr().
8558 */
8560 }
8561 else if (info == XLOG_OVERWRITE_CONTRECORD)
8562 {
8563 /* nothing to do here, handled in xlogrecovery_redo() */
8564 }
8565 else if (info == XLOG_END_OF_RECOVERY)
8566 {
8568 TimeLineID replayTLI;
8569
8570 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8571
8572 /*
8573 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8574 * but this case is rarer and harder to test, so the benefit doesn't
8575 * outweigh the potential extra cost of maintenance.
8576 */
8577
8578 /*
8579 * We should've already switched to the new TLI before replaying this
8580 * record.
8581 */
8582 (void) GetCurrentReplayRecPtr(&replayTLI);
8583 if (xlrec.ThisTimeLineID != replayTLI)
8584 ereport(PANIC,
8585 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8586 xlrec.ThisTimeLineID, replayTLI)));
8587 }
8588 else if (info == XLOG_NOOP)
8589 {
8590 /* nothing to do here */
8591 }
8592 else if (info == XLOG_SWITCH)
8593 {
8594 /* nothing to do here */
8595 }
8596 else if (info == XLOG_RESTORE_POINT)
8597 {
8598 /* nothing to do here, handled in xlogrecovery.c */
8599 }
8600 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8601 {
8602 /*
8603 * XLOG_FPI records contain nothing else but one or more block
8604 * references. Every block reference must include a full-page image
8605 * even if full_page_writes was disabled when the record was generated
8606 * - otherwise there would be no point in this record.
8607 *
8608 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8609 * WAL-logged because of a hint bit update. They are only generated
8610 * when checksums and/or wal_log_hints are enabled. They may include
8611 * no full-page images if full_page_writes was disabled when they were
8612 * generated. In this case there is nothing to do here.
8613 *
8614 * No recovery conflicts are generated by these generic records - if a
8615 * resource manager needs to generate conflicts, it has to define a
8616 * separate WAL record type and redo routine.
8617 */
8618 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8619 {
8620 Buffer buffer;
8621
8622 if (!XLogRecHasBlockImage(record, block_id))
8623 {
8624 if (info == XLOG_FPI)
8625 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8626 continue;
8627 }
8628
8629 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8630 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8631 UnlockReleaseBuffer(buffer);
8632 }
8633 }
8634 else if (info == XLOG_BACKUP_END)
8635 {
8636 /* nothing to do here, handled in xlogrecovery_redo() */
8637 }
8638 else if (info == XLOG_PARAMETER_CHANGE)
8639 {
8641
8642 /* Update our copy of the parameters in pg_control */
8643 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8644
8646 ControlFile->MaxConnections = xlrec.MaxConnections;
8647 ControlFile->max_worker_processes = xlrec.max_worker_processes;
8648 ControlFile->max_wal_senders = xlrec.max_wal_senders;
8649 ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
8650 ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
8651 ControlFile->wal_level = xlrec.wal_level;
8652 ControlFile->wal_log_hints = xlrec.wal_log_hints;
8653
8654 /*
8655 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8656 * recover back up to this point before allowing hot standby again.
8657 * This is important if the max_* settings are decreased, to ensure
8658 * you don't run queries against the WAL preceding the change. The
8659 * local copies cannot be updated as long as crash recovery is
8660 * happening and we expect all the WAL to be replayed.
8661 */
8663 {
8666 }
8668 {
8669 TimeLineID replayTLI;
8670
8671 (void) GetCurrentReplayRecPtr(&replayTLI);
8673 ControlFile->minRecoveryPointTLI = replayTLI;
8674 }
8675
8676 CommitTsParameterChange(xlrec.track_commit_timestamp,
8678 ControlFile->track_commit_timestamp = xlrec.track_commit_timestamp;
8679
8682
8683 /* Check to see if any parameter change gives a problem on recovery */
8685 }
8686 else if (info == XLOG_FPW_CHANGE)
8687 {
8688 bool fpw;
8689
8690 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8691
8692 /*
8693 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8694 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8695 * full_page_writes has been disabled during online backup.
8696 */
8697 if (!fpw)
8698 {
8703 }
8704
8705 /* Keep track of full_page_writes */
8707 }
8708 else if (info == XLOG_CHECKPOINT_REDO)
8709 {
8710 /* nothing to do here, just for informational purposes */
8711 }
8712 else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE)
8713 {
8714 bool status;
8715
8716 memcpy(&status, XLogRecGetData(record), sizeof(bool));
8717
8718 /*
8719 * We need to toggle the logical decoding status and update the
8720 * XLogLogicalInfo cache of processes synchronously because
8721 * XLogLogicalInfoActive() is used even during read-only queries
8722 * (e.g., via RelationIsAccessibleInLogicalDecoding()). In the
8723 * 'disable' case, it is safe to invalidate existing slots after
8724 * disabling logical decoding because logical decoding cannot process
8725 * subsequent WAL records, which may not contain logical information.
8726 */
8727 if (status)
8729 else
8731
8732 elog(DEBUG1, "update logical decoding status to %d during recovery",
8733 status);
8734
8735 if (InRecovery && InHotStandby)
8736 {
8737 if (!status)
8738 {
8739 /*
8740 * Invalidate logical slots if we are in hot standby and the
8741 * primary disabled logical decoding.
8742 */
8744 0, InvalidOid,
8746 }
8747 else if (sync_replication_slots)
8748 {
8749 /*
8750 * Signal the postmaster to launch the slotsync worker.
8751 *
8752 * XXX: For simplicity, we keep the slotsync worker running
8753 * even after logical decoding is disabled. A future
8754 * improvement can consider starting and stopping the worker
8755 * based on logical decoding status change.
8756 */
8758 }
8759 }
8760 }
8761}
int Buffer
Definition buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
uint8_t uint8
Definition c.h:544
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition commit_ts.c:640
pid_t PostmasterPid
Definition globals.c:106
void DisableLogicalDecoding(void)
Definition logicalctl.c:491
void EnableLogicalDecoding(void)
Definition logicalctl.c:340
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2191
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2164
#define XLOG_RESTORE_POINT
Definition pg_control.h:76
#define XLOG_FPI
Definition pg_control.h:80
#define XLOG_FPI_FOR_HINT
Definition pg_control.h:79
#define XLOG_NEXTOID
Definition pg_control.h:72
#define XLOG_NOOP
Definition pg_control.h:71
#define XLOG_PARAMETER_CHANGE
Definition pg_control.h:75
#define XLOG_LOGICAL_DECODING_STATUS_CHANGE
Definition pg_control.h:84
@ RS_INVAL_WAL_LEVEL
Definition slot.h:66
bool sync_replication_slots
Definition slotsync.c:117
void smgrdestroyall(void)
Definition smgr.c:386
#define FullTransactionIdPrecedes(a, b)
Definition transam.h:51
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition xlog.c:7681
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XLogRecMaxBlockId(decoder)
Definition xlogreader.h:417
#define XLogRecHasBlockImage(decoder, block_id)
Definition xlogreader.h:422
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:416
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition xlogutils.c:303
@ STANDBY_INITIALIZED
Definition xlogutils.h:53
#define InHotStandby
Definition xlogutils.h:60
@ BLK_RESTORED
Definition xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, DEBUG1, DisableLogicalDecoding(), elog, EnableLogicalDecoding(), XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, fb(), FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, kill, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::max_locks_per_xact, ControlFileData::max_prepared_xacts, ControlFileData::max_wal_senders, ControlFileData::max_worker_processes, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PostmasterPid, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SIGUSR1, smgrdestroyall(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, sync_replication_slots, CheckPoint::ThisTimeLineID, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), ControlFileData::wal_level, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_LOGICAL_DECODING_STATUS_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsValid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2988 of file xlog.c.

2989{
2991 bool flexible = true;
2992 static TimestampTz lastflush;
2994 int flushblocks;
2996
2997 /* XLOG doesn't need flushing during recovery */
2998 if (RecoveryInProgress())
2999 return false;
3000
3001 /*
3002 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3003 * so we can read it without a lock.
3004 */
3006
3007 /* read updated LogwrtRqst */
3011
3012 /* back off to last completed page boundary */
3013 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3014
3015 /* if we have already flushed that far, consider async commit records */
3017 if (WriteRqst.Write <= LogwrtResult.Flush)
3018 {
3022 flexible = false; /* ensure it all gets written */
3023 }
3024
3025 /*
3026 * If already known flushed, we're done. Just need to check if we are
3027 * holding an open file handle to a logfile that's no longer in use,
3028 * preventing the file from being deleted.
3029 */
3030 if (WriteRqst.Write <= LogwrtResult.Flush)
3031 {
3032 if (openLogFile >= 0)
3033 {
3036 {
3037 XLogFileClose();
3038 }
3039 }
3040 return false;
3041 }
3042
3043 /*
3044 * Determine how far to flush WAL, based on the wal_writer_delay and
3045 * wal_writer_flush_after GUCs.
3046 *
3047 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3048 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3049 * logic is the same in both places if you change this.
3050 */
3052 flushblocks =
3054
3055 if (WalWriterFlushAfter == 0 || lastflush == 0)
3056 {
3057 /* first call, or block based limits disabled */
3058 WriteRqst.Flush = WriteRqst.Write;
3059 lastflush = now;
3060 }
3062 {
3063 /*
3064 * Flush the writes at least every WalWriterDelay ms. This is
3065 * important to bound the amount of time it takes for an asynchronous
3066 * commit to hit disk.
3067 */
3068 WriteRqst.Flush = WriteRqst.Write;
3069 lastflush = now;
3070 }
3071 else if (flushblocks >= WalWriterFlushAfter)
3072 {
3073 /* exceeded wal_writer_flush_after blocks, flush */
3074 WriteRqst.Flush = WriteRqst.Write;
3075 lastflush = now;
3076 }
3077 else
3078 {
3079 /* no flushing, this time round */
3081 }
3082
3083#ifdef WAL_DEBUG
3084 if (XLOG_DEBUG)
3085 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3090#endif
3091
3093
3094 /* now wait for any in-progress insertions to finish and get write lock */
3098 if (WriteRqst.Write > LogwrtResult.Write ||
3100 {
3102 }
3104
3106
3107 /* wake up walsenders now that we've released heavily contended locks */
3109
3110 /*
3111 * If we flushed an LSN that someone was waiting for, notify the waiters.
3112 */
3113 if (waitLSNState &&
3117
3118 /*
3119 * Great, done. To take some work off the critical path, try to initialize
3120 * as many of the no-longer-needed WAL buffers for future use as we can.
3121 */
3123
3124 /*
3125 * If we determined that we need to write data, but somebody else
3126 * wrote/flushed already, it should be considered as being active, to
3127 * avoid hibernating too early.
3128 */
3129 return true;
3130}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
pg_atomic_uint64 minWaitedLSN[WAIT_LSN_TYPE_COUNT]
Definition xlogwait.h:85
XLogRecPtr asyncXactLSN
Definition xlog.c:460
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition walsender.h:65
int WalWriterFlushAfter
Definition walwriter.c:71
int WalWriterDelay
Definition walwriter.c:70
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
struct WaitLSNState * waitLSNState
Definition xlogwait.c:68
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, fb(), XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), WaitLSNState::minWaitedLSN, now(), openLogFile, openLogSegNo, pg_atomic_read_u64(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1904 of file xlog.c.

1905{
1910 XLogRecPtr result;
1911
1914
1916 {
1917 /* fits on first page of segment */
1918 if (bytesleft == 0)
1919 seg_offset = 0;
1920 else
1922 }
1923 else
1924 {
1925 /* account for the first page on segment with long header */
1928
1931
1932 if (bytesleft == 0)
1934 else
1936 }
1937
1939
1940 return result;
1941}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1864 of file xlog.c.

1865{
1870 XLogRecPtr result;
1871
1874
1876 {
1877 /* fits on first page of segment */
1879 }
1880 else
1881 {
1882 /* account for the first page on segment with long header */
1885
1888
1890 }
1891
1893
1894 return result;
1895}

References fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2283 of file xlog.c.

2284{
2286
2288
2290 return true;
2291 return false;
2292}

References CheckPointSegments, fb(), RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4693 of file xlog.c.

4694{
4695 int xbuffers;
4696
4697 xbuffers = NBuffers / 32;
4700 if (xbuffers < 8)
4701 xbuffers = 8;
4702 return xbuffers;
4703}

References fb(), NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3676 of file xlog.c.

3677{
3678 Assert(openLogFile >= 0);
3679
3680 /*
3681 * WAL segment files will not be re-read in normal operation, so we advise
3682 * the OS to release any cached pages. But do not do so if WAL archiving
3683 * or streaming is active, because archiver and walsender process could
3684 * use the cache to read the WAL segment.
3685 */
3686#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3687 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3689#endif
3690
3691 if (close(openLogFile) != 0)
3692 {
3693 char xlogfname[MAXFNAMELEN];
3694 int save_errno = errno;
3695
3697 errno = save_errno;
3698 ereport(PANIC,
3700 errmsg("could not close file \"%s\": %m", xlogfname)));
3701 }
3702
3703 openLogFile = -1;
3705}
void ReleaseExternalFD(void)
Definition fd.c:1224

References Assert, close, ereport, errcode_for_file_access(), errmsg(), fb(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3455 of file xlog.c.

3458{
3459 char path[MAXPGPATH];
3460 char tmppath[MAXPGPATH];
3461 PGAlignedXLogBlock buffer;
3462 int srcfd;
3463 int fd;
3464 int nbytes;
3465
3466 /*
3467 * Open the source file
3468 */
3471 if (srcfd < 0)
3472 ereport(ERROR,
3474 errmsg("could not open file \"%s\": %m", path)));
3475
3476 /*
3477 * Copy into a temp file name.
3478 */
3479 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3480
3481 unlink(tmppath);
3482
3483 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3485 if (fd < 0)
3486 ereport(ERROR,
3488 errmsg("could not create file \"%s\": %m", tmppath)));
3489
3490 /*
3491 * Do the data copying.
3492 */
3493 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3494 {
3495 int nread;
3496
3497 nread = upto - nbytes;
3498
3499 /*
3500 * The part that is not read from the source file is filled with
3501 * zeros.
3502 */
3503 if (nread < sizeof(buffer))
3504 memset(buffer.data, 0, sizeof(buffer));
3505
3506 if (nread > 0)
3507 {
3508 int r;
3509
3510 if (nread > sizeof(buffer))
3511 nread = sizeof(buffer);
3513 r = read(srcfd, buffer.data, nread);
3514 if (r != nread)
3515 {
3516 if (r < 0)
3517 ereport(ERROR,
3519 errmsg("could not read file \"%s\": %m",
3520 path)));
3521 else
3522 ereport(ERROR,
3524 errmsg("could not read file \"%s\": read %d of %zu",
3525 path, r, (Size) nread)));
3526 }
3528 }
3529 errno = 0;
3531 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3532 {
3533 int save_errno = errno;
3534
3535 /*
3536 * If we fail to make the file, delete it to release disk space
3537 */
3538 unlink(tmppath);
3539 /* if write didn't set errno, assume problem is no disk space */
3541
3542 ereport(ERROR,
3544 errmsg("could not write to file \"%s\": %m", tmppath)));
3545 }
3547 }
3548
3550 if (pg_fsync(fd) != 0)
3553 errmsg("could not fsync file \"%s\": %m", tmppath)));
3555
3556 if (CloseTransientFile(fd) != 0)
3557 ereport(ERROR,
3559 errmsg("could not close file \"%s\": %m", tmppath)));
3560
3561 if (CloseTransientFile(srcfd) != 0)
3562 ereport(ERROR,
3564 errmsg("could not close file \"%s\": %m", path)));
3565
3566 /*
3567 * Now move the segment into place with its final name.
3568 */
3570 elog(ERROR, "InstallXLogFileSegment should not have failed");
3571}
int CloseTransientFile(int fd)
Definition fd.c:2854
int data_sync_elevel(int elevel)
Definition fd.c:3985
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2677
char data[XLOG_BLCKSZ]
Definition c.h:1137

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3417 of file xlog.c.

3418{
3419 bool ignore_added;
3420 char path[MAXPGPATH];
3421 int fd;
3422
3423 Assert(logtli != 0);
3424
3426 if (fd >= 0)
3427 return fd;
3428
3429 /* Now open original target segment (might not be file I just made) */
3432 if (fd < 0)
3433 ereport(ERROR,
3435 errmsg("could not open file \"%s\": %m", path)));
3436 return fd;
3437}
#define O_CLOEXEC
Definition win32_port.h:344

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char path 
)
static

Definition at line 3229 of file xlog.c.

3231{
3232 char tmppath[MAXPGPATH];
3235 int fd;
3236 int save_errno;
3239
3240 Assert(logtli != 0);
3241
3243
3244 /*
3245 * Try to use existent file (checkpoint maker may have created it already)
3246 */
3247 *added = false;
3250 if (fd < 0)
3251 {
3252 if (errno != ENOENT)
3253 ereport(ERROR,
3255 errmsg("could not open file \"%s\": %m", path)));
3256 }
3257 else
3258 return fd;
3259
3260 /*
3261 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3262 * another process is doing the same thing. If so, we will end up
3263 * pre-creating an extra log segment. That seems OK, and better than
3264 * holding the lock throughout this lengthy process.
3265 */
3266 elog(DEBUG2, "creating and filling new WAL file");
3267
3268 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3269
3270 unlink(tmppath);
3271
3274
3275 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3277 if (fd < 0)
3278 ereport(ERROR,
3280 errmsg("could not create file \"%s\": %m", tmppath)));
3281
3282 /* Measure I/O timing when initializing segment */
3284
3286 save_errno = 0;
3287 if (wal_init_zero)
3288 {
3289 ssize_t rc;
3290
3291 /*
3292 * Zero-fill the file. With this setting, we do this the hard way to
3293 * ensure that all the file space has really been allocated. On
3294 * platforms that allow "holes" in files, just seeking to the end
3295 * doesn't allocate intermediate space. This way, we know that we
3296 * have all the space and (after the fsync below) that all the
3297 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3298 * O_DSYNC will be sufficient to sync future writes to the log file.
3299 */
3301
3302 if (rc < 0)
3303 save_errno = errno;
3304 }
3305 else
3306 {
3307 /*
3308 * Otherwise, seeking to the end and writing a solitary byte is
3309 * enough.
3310 */
3311 errno = 0;
3312 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3313 {
3314 /* if write didn't set errno, assume no disk space */
3316 }
3317 }
3319
3320 /*
3321 * A full segment worth of data is written when using wal_init_zero. One
3322 * byte is written when not using it.
3323 */
3325 io_start, 1,
3327
3328 if (save_errno)
3329 {
3330 /*
3331 * If we fail to make the file, delete it to release disk space
3332 */
3333 unlink(tmppath);
3334
3335 close(fd);
3336
3337 errno = save_errno;
3338
3339 ereport(ERROR,
3341 errmsg("could not write to file \"%s\": %m", tmppath)));
3342 }
3343
3344 /* Measure I/O timing when flushing segment */
3346
3348 if (pg_fsync(fd) != 0)
3349 {
3350 save_errno = errno;
3351 close(fd);
3352 errno = save_errno;
3353 ereport(ERROR,
3355 errmsg("could not fsync file \"%s\": %m", tmppath)));
3356 }
3358
3360 IOOP_FSYNC, io_start, 1, 0);
3361
3362 if (close(fd) != 0)
3363 ereport(ERROR,
3365 errmsg("could not close file \"%s\": %m", tmppath)));
3366
3367 /*
3368 * Now move the segment into place with its final name. Cope with
3369 * possibility that someone else has created the file while we were
3370 * filling ours: if so, use ours to pre-create a future log segment.
3371 */
3373
3374 /*
3375 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3376 * that was a constant, but that was always a bit dubious: normally, at a
3377 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3378 * here, it was the offset from the insert location. We can't do the
3379 * normal XLOGfileslop calculation here because we don't have access to
3380 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3381 * CheckPointSegments.
3382 */
3385 logtli))
3386 {
3387 *added = true;
3388 elog(DEBUG2, "done creating and filling new WAL file");
3389 }
3390 else
3391 {
3392 /*
3393 * No need for any more future segments, or InstallXLogFileSegment()
3394 * failed to rename the file into place. If the rename failed, a
3395 * caller opening the file may fail.
3396 */
3397 unlink(tmppath);
3398 elog(DEBUG2, "abandoned new WAL file");
3399 }
3400
3401 return -1;
3402}
#define IO_DIRECT_WAL_INIT
Definition fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
Definition file_utils.c:709
@ IOCONTEXT_INIT
Definition pgstat.h:288
@ IOOP_WRITE
Definition pgstat.h:316
#define pg_pwrite
Definition port.h:248
bool wal_init_zero
Definition xlog.c:130

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3655 of file xlog.c.

3656{
3657 char path[MAXPGPATH];
3658 int fd;
3659
3660 XLogFilePath(path, tli, segno, wal_segment_size);
3661
3664 if (fd < 0)
3665 ereport(PANIC,
3667 errmsg("could not open file \"%s\": %m", path)));
3668
3669 return fd;
3670}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2233 of file xlog.c.

2234{
2237 double distance;
2239
2240 /*
2241 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2242 * correspond to. Always recycle enough segments to meet the minimum, and
2243 * remove enough segments to stay below the maximum.
2244 */
2249
2250 /*
2251 * Between those limits, recycle enough segments to get us through to the
2252 * estimated end of next checkpoint.
2253 *
2254 * To estimate where the next checkpoint will finish, assume that the
2255 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2256 * every checkpoint.
2257 */
2259 /* add 10% for good measure. */
2260 distance *= 1.10;
2261
2262 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2264
2265 if (recycleSegNo < minSegNo)
2267 if (recycleSegNo > maxSegNo)
2269
2270 return recycleSegNo;
2271}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, fb(), max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2783 of file xlog.c.

2784{
2788
2789 /*
2790 * During REDO, we are reading not writing WAL. Therefore, instead of
2791 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2792 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2793 * to act this way too, and because when it tries to write the
2794 * end-of-recovery checkpoint, it should indeed flush.
2795 */
2796 if (!XLogInsertAllowed())
2797 {
2798 UpdateMinRecoveryPoint(record, false);
2799 return;
2800 }
2801
2802 /* Quick exit if already known flushed */
2803 if (record <= LogwrtResult.Flush)
2804 return;
2805
2806#ifdef WAL_DEBUG
2807 if (XLOG_DEBUG)
2808 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2809 LSN_FORMAT_ARGS(record),
2812#endif
2813
2815
2816 /*
2817 * Since fsync is usually a horribly expensive operation, we try to
2818 * piggyback as much data as we can on each fsync: if we see any more data
2819 * entered into the xlog buffer, we'll write and fsync that too, so that
2820 * the final value of LogwrtResult.Flush is as large as possible. This
2821 * gives us some chance of avoiding another fsync immediately after.
2822 */
2823
2824 /* initialize to given target; may increase below */
2825 WriteRqstPtr = record;
2826
2827 /*
2828 * Now wait until we get the write lock, or someone else does the flush
2829 * for us.
2830 */
2831 for (;;)
2832 {
2834
2835 /* done already? */
2837 if (record <= LogwrtResult.Flush)
2838 break;
2839
2840 /*
2841 * Before actually performing the write, wait for all in-flight
2842 * insertions to the pages we're about to write to finish.
2843 */
2845 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2849
2850 /*
2851 * Try to get the write lock. If we can't get it immediately, wait
2852 * until it's released, and recheck if we still need to do the flush
2853 * or if the backend that held the lock did it for us already. This
2854 * helps to maintain a good rate of group committing when the system
2855 * is bottlenecked by the speed of fsyncing.
2856 */
2858 {
2859 /*
2860 * The lock is now free, but we didn't acquire it yet. Before we
2861 * do, loop back to check if someone else flushed the record for
2862 * us already.
2863 */
2864 continue;
2865 }
2866
2867 /* Got the lock; recheck whether request is satisfied */
2869 if (record <= LogwrtResult.Flush)
2870 {
2872 break;
2873 }
2874
2875 /*
2876 * Sleep before flush! By adding a delay here, we may give further
2877 * backends the opportunity to join the backlog of group commit
2878 * followers; this can significantly improve transaction throughput,
2879 * at the risk of increasing transaction latency.
2880 *
2881 * We do not sleep if enableFsync is not turned on, nor if there are
2882 * fewer than CommitSiblings other backends with active transactions.
2883 */
2884 if (CommitDelay > 0 && enableFsync &&
2886 {
2890
2891 /*
2892 * Re-check how far we can now flush the WAL. It's generally not
2893 * safe to call WaitXLogInsertionsToFinish while holding
2894 * WALWriteLock, because an in-progress insertion might need to
2895 * also grab WALWriteLock to make progress. But we know that all
2896 * the insertions up to insertpos have already finished, because
2897 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2898 * We're only calling it again to allow insertpos to be moved
2899 * further forward, not to actually wait for anyone.
2900 */
2902 }
2903
2904 /* try to write/flush later additions to XLOG as well */
2905 WriteRqst.Write = insertpos;
2906 WriteRqst.Flush = insertpos;
2907
2908 XLogWrite(WriteRqst, insertTLI, false);
2909
2911 /* done */
2912 break;
2913 }
2914
2916
2917 /* wake up walsenders now that we've released heavily contended locks */
2919
2920 /*
2921 * If we flushed an LSN that someone was waiting for, notify the waiters.
2922 */
2923 if (waitLSNState &&
2927
2928 /*
2929 * If we still haven't flushed to the request point then we have a
2930 * problem; most likely, the requested flush point is past end of XLOG.
2931 * This has been seen to occur when a disk page has a corrupted LSN.
2932 *
2933 * Formerly we treated this as a PANIC condition, but that hurts the
2934 * system's robustness rather than helping it: we do not want to take down
2935 * the whole system due to corruption on one data page. In particular, if
2936 * the bad page is encountered again during recovery then we would be
2937 * unable to restart the database at all! (This scenario actually
2938 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2939 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2940 * the only time we can reach here during recovery is while flushing the
2941 * end-of-recovery checkpoint record, and we don't expect that to have a
2942 * bad LSN.
2943 *
2944 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2945 * since xact.c calls this routine inside a critical section. However,
2946 * calls from bufmgr.c are not within critical sections and so we will not
2947 * force a restart for a bad LSN on a data page.
2948 */
2949 if (LogwrtResult.Flush < record)
2950 elog(ERROR,
2951 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2952 LSN_FORMAT_ARGS(record),
2954
2955 /*
2956 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2957 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2958 * remain consistent.
2959 */
2960 Assert(!XLogNeedsFlush(record));
2961}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1404
bool MinimumActiveBackends(int min)
Definition procarray.c:3495
int CommitDelay
Definition xlog.c:135
int CommitSiblings
Definition xlog.c:136
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3145
bool XLogInsertAllowed(void)
Definition xlog.c:6515

References Assert, CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, fb(), XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), WaitLSNState::minWaitedLSN, pg_atomic_read_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FinishSyncWorker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_logical_decoding_status_update_record(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3795 of file xlog.c.

3796{
3797 XLogSegNo lastRemovedSegNo;
3798
3800 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3802
3803 return lastRemovedSegNo;
3804}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3811 of file xlog.c.

3812{
3813 DIR *xldir;
3814 struct dirent *xlde;
3816
3818 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3819 {
3822
3823 /* Ignore files that are not XLOG segments. */
3824 if (!IsXLogFileName(xlde->d_name))
3825 continue;
3826
3827 /* Parse filename to get TLI and segno. */
3830
3831 /* Ignore anything that's not from the TLI of interest. */
3832 if (tli != file_tli)
3833 continue;
3834
3835 /* If it's the oldest so far, update oldest_segno. */
3836 if (oldest_segno == 0 || file_segno < oldest_segno)
3838 }
3839
3840 FreeDir(xldir);
3841 return oldest_segno;
3842}

References AllocateDir(), fb(), FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), and MaybeRemoveOldWalSummaries().

◆ XLogGetReplicationSlotMinimumLSN()

XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )

Definition at line 2682 of file xlog.c.

2683{
2684 XLogRecPtr retval;
2685
2689
2690 return retval;
2691}
XLogRecPtr replicationSlotMinLSN
Definition xlog.c:461

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg(), and reserve_wal_for_local_slot().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5302 of file xlog.c.

5303{
5304 char xlogfname[MAXFNAMELEN];
5307
5308 /* we always switch to a new timeline after archive recovery */
5309 Assert(endTLI != newTLI);
5310
5311 /*
5312 * Update min recovery point one last time.
5313 */
5315
5316 /*
5317 * Calculate the last segment on the old timeline, and the first segment
5318 * on the new timeline. If the switch happens in the middle of a segment,
5319 * they are the same, but if the switch happens exactly at a segment
5320 * boundary, startLogSegNo will be endLogSegNo + 1.
5321 */
5324
5325 /*
5326 * Initialize the starting WAL segment for the new timeline. If the switch
5327 * happens in the middle of a segment, copy data from the last WAL segment
5328 * of the old timeline up to the switch point, to the starting WAL segment
5329 * on the new timeline.
5330 */
5332 {
5333 /*
5334 * Make a copy of the file on the new timeline.
5335 *
5336 * Writing WAL isn't allowed yet, so there are no locking
5337 * considerations. But we should be just as tense as XLogFileInit to
5338 * avoid emplacing a bogus file.
5339 */
5342 }
5343 else
5344 {
5345 /*
5346 * The switch happened at a segment boundary, so just create the next
5347 * segment on the new timeline.
5348 */
5349 int fd;
5350
5352
5353 if (close(fd) != 0)
5354 {
5355 int save_errno = errno;
5356
5358 errno = save_errno;
5359 ereport(ERROR,
5361 errmsg("could not close file \"%s\": %m", xlogfname)));
5362 }
5363 }
5364
5365 /*
5366 * Let's just make real sure there are not .ready or .done flags posted
5367 * for the new segment.
5368 */
5371}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition xlog.c:3455

References Assert, close, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6515 of file xlog.c.

6516{
6517 /*
6518 * If value is "unconditionally true" or "unconditionally false", just
6519 * return it. This provides the normal fast path once recovery is known
6520 * done.
6521 */
6522 if (LocalXLogInsertAllowed >= 0)
6523 return (bool) LocalXLogInsertAllowed;
6524
6525 /*
6526 * Else, must check to see if we're still in recovery.
6527 */
6528 if (RecoveryInProgress())
6529 return false;
6530
6531 /*
6532 * On exit from recovery, reset to "unconditionally true", since there is
6533 * no need to keep checking.
6534 */
6536 return true;
6537}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
uint64  fpi_bytes,
bool  topxid_included 
)

Definition at line 749 of file xlog.c.

755{
758 bool inserted;
759 XLogRecord *rechdr = (XLogRecord *) rdata->data;
760 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
766
767 /* Does this record type require special handling? */
768 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
769 {
770 if (info == XLOG_SWITCH)
772 else if (info == XLOG_CHECKPOINT_REDO)
774 }
775
776 /* we assume that all of the record header is in the first chunk */
778
779 /* cross-check on whether we should be here or not */
780 if (!XLogInsertAllowed())
781 elog(ERROR, "cannot make new WAL entries during recovery");
782
783 /*
784 * Given that we're not in recovery, InsertTimeLineID is set and can't
785 * change, so we can read it without a lock.
786 */
788
789 /*----------
790 *
791 * We have now done all the preparatory work we can without holding a
792 * lock or modifying shared state. From here on, inserting the new WAL
793 * record to the shared WAL buffer cache is a two-step process:
794 *
795 * 1. Reserve the right amount of space from the WAL. The current head of
796 * reserved space is kept in Insert->CurrBytePos, and is protected by
797 * insertpos_lck.
798 *
799 * 2. Copy the record to the reserved WAL space. This involves finding the
800 * correct WAL buffer containing the reserved space, and copying the
801 * record in place. This can be done concurrently in multiple processes.
802 *
803 * To keep track of which insertions are still in-progress, each concurrent
804 * inserter acquires an insertion lock. In addition to just indicating that
805 * an insertion is in progress, the lock tells others how far the inserter
806 * has progressed. There is a small fixed number of insertion locks,
807 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
808 * boundary, it updates the value stored in the lock to the how far it has
809 * inserted, to allow the previous buffer to be flushed.
810 *
811 * Holding onto an insertion lock also protects RedoRecPtr and
812 * fullPageWrites from changing until the insertion is finished.
813 *
814 * Step 2 can usually be done completely in parallel. If the required WAL
815 * page is not initialized yet, you have to grab WALBufMappingLock to
816 * initialize it, but the WAL writer tries to do that ahead of insertions
817 * to avoid that from happening in the critical path.
818 *
819 *----------
820 */
822
823 if (likely(class == WALINSERT_NORMAL))
824 {
826
827 /*
828 * Check to see if my copy of RedoRecPtr is out of date. If so, may
829 * have to go back and have the caller recompute everything. This can
830 * only happen just after a checkpoint, so it's better to be slow in
831 * this case and fast otherwise.
832 *
833 * Also check to see if fullPageWrites was just turned on or there's a
834 * running backup (which forces full-page writes); if we weren't
835 * already doing full-page writes then go back and recompute.
836 *
837 * If we aren't doing full-page writes then RedoRecPtr doesn't
838 * actually affect the contents of the XLOG record, so we'll update
839 * our local copy but not force a recomputation. (If doPageWrites was
840 * just turned off, we could recompute the record without full pages,
841 * but we choose not to bother.)
842 */
843 if (RedoRecPtr != Insert->RedoRecPtr)
844 {
846 RedoRecPtr = Insert->RedoRecPtr;
847 }
848 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
849
850 if (doPageWrites &&
853 {
854 /*
855 * Oops, some buffer now needs to be backed up that the caller
856 * didn't back up. Start over.
857 */
860 return InvalidXLogRecPtr;
861 }
862
863 /*
864 * Reserve space for the record in the WAL. This also sets the xl_prev
865 * pointer.
866 */
868 &rechdr->xl_prev);
869
870 /* Normal records are always inserted. */
871 inserted = true;
872 }
873 else if (class == WALINSERT_SPECIAL_SWITCH)
874 {
875 /*
876 * In order to insert an XLOG_SWITCH record, we need to hold all of
877 * the WAL insertion locks, not just one, so that no one else can
878 * begin inserting a record until we've figured out how much space
879 * remains in the current WAL segment and claimed all of it.
880 *
881 * Nonetheless, this case is simpler than the normal cases handled
882 * below, which must check for changes in doPageWrites and RedoRecPtr.
883 * Those checks are only needed for records that can contain buffer
884 * references, and an XLOG_SWITCH record never does.
885 */
889 }
890 else
891 {
893
894 /*
895 * We need to update both the local and shared copies of RedoRecPtr,
896 * which means that we need to hold all the WAL insertion locks.
897 * However, there can't be any buffer references, so as above, we need
898 * not check RedoRecPtr before inserting the record; we just need to
899 * update it afterwards.
900 */
904 &rechdr->xl_prev);
905 RedoRecPtr = Insert->RedoRecPtr = StartPos;
906 inserted = true;
907 }
908
909 if (inserted)
910 {
911 /*
912 * Now that xl_prev has been filled in, calculate CRC of the record
913 * header.
914 */
915 rdata_crc = rechdr->xl_crc;
918 rechdr->xl_crc = rdata_crc;
919
920 /*
921 * All the record data, including the header, is now ready to be
922 * inserted. Copy the record in the space reserved.
923 */
924 CopyXLogRecordToWAL(rechdr->xl_tot_len,
927
928 /*
929 * Unless record is flagged as not important, update LSN of last
930 * important record in the current slot. When holding all locks, just
931 * update the first one.
932 */
933 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
934 {
935 int lockno = holdingAllLocks ? 0 : MyLockNo;
936
938 }
939 }
940 else
941 {
942 /*
943 * This was an xlog-switch record, but the current insert location was
944 * already exactly at the beginning of a segment, so there was no need
945 * to do anything.
946 */
947 }
948
949 /*
950 * Done! Let others know that we're finished.
951 */
953
955
957
958 /*
959 * Mark top transaction id is logged (if needed) so that we should not try
960 * to log it again with the next WAL record in the current subtransaction.
961 */
962 if (topxid_included)
964
965 /*
966 * Update shared LogwrtRqst.Write, if we crossed page boundary.
967 */
969 {
971 /* advance global request to include new block(s) */
976 }
977
978 /*
979 * If this was an XLOG_SWITCH record, flush the record and the empty
980 * padding space that fills the rest of the segment, and perform
981 * end-of-segment actions (eg, notifying archiver).
982 */
983 if (class == WALINSERT_SPECIAL_SWITCH)
984 {
987
988 /*
989 * Even though we reserved the rest of the segment for us, which is
990 * reflected in EndPos, we return a pointer to just the end of the
991 * xlog-switch record.
992 */
993 if (inserted)
994 {
997 {
999
1000 if (offset == EndPos % XLOG_BLCKSZ)
1002 else
1004 }
1005 }
1006 }
1007
1008#ifdef WAL_DEBUG
1009 if (XLOG_DEBUG)
1010 {
1012 XLogRecord *record;
1016 char *errormsg = NULL;
1018
1020
1022 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1023
1024 /*
1025 * We have to piece together the WAL record data from the XLogRecData
1026 * entries, so that we can pass it to the rm_desc function as one
1027 * contiguous chunk.
1028 */
1030 for (; rdata != NULL; rdata = rdata->next)
1032
1033 /* We also need temporary space to decode the record. */
1034 record = (XLogRecord *) recordBuf.data;
1037
1038 if (!debug_reader)
1040 XL_ROUTINE(.page_read = NULL,
1041 .segment_open = NULL,
1042 .segment_close = NULL),
1043 NULL);
1044 if (!debug_reader)
1045 {
1046 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1047 }
1049 decoded,
1050 record,
1051 EndPos,
1052 &errormsg))
1053 {
1054 appendStringInfo(&buf, "error decoding record: %s",
1055 errormsg ? errormsg : "no error message");
1056 }
1057 else
1058 {
1059 appendStringInfoString(&buf, " - ");
1060
1061 debug_reader->record = decoded;
1063 debug_reader->record = NULL;
1064 }
1065 elog(LOG, "%s", buf.data);
1066
1067 pfree(decoded);
1068 pfree(buf.data);
1069 pfree(recordBuf.data);
1071 }
1072#endif
1073
1074 /*
1075 * Update our global variables
1076 */
1079
1080 /* Report WAL traffic to the instrumentation. */
1081 if (inserted)
1082 {
1083 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1087
1088 /* Required for the flush of pending stats WAL data */
1089 pgstat_report_fixed = true;
1090 }
1091
1092 return EndPos;
1093}
#define likely(x)
Definition c.h:411
#define unlikely(x)
Definition c.h:412
void * palloc(Size size)
Definition mcxt.c:1387
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
void MarkSubxactTopXidLogged(void)
Definition xact.c:592
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition xact.c:542
XLogRecPtr XactLastRecEnd
Definition xlog.c:257
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition xlog.c:1231
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1114
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1170
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, fb(), FIN_CRC32C, holdingAllLocks, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogRecPtrIsValid, and XLogSegmentOffset.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3145 of file xlog.c.

3146{
3147 /*
3148 * During recovery, we don't flush WAL but update minRecoveryPoint
3149 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3150 * would need to be updated.
3151 *
3152 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3153 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3154 * This check should be consistent with the one in XLogFlush().
3155 */
3156 if (!XLogInsertAllowed())
3157 {
3158 /* Quick exit if already known to be updated or cannot be updated */
3160 return false;
3161
3162 /*
3163 * An invalid minRecoveryPoint means that we need to recover all the
3164 * WAL, i.e., we're doing crash recovery. We never modify the control
3165 * file's value in that case, so we can short-circuit future checks
3166 * here too. This triggers a quick exit path for the startup process,
3167 * which cannot update its local copy of minRecoveryPoint as long as
3168 * it has not replayed all WAL available when doing crash recovery.
3169 */
3171 {
3172 updateMinRecoveryPoint = false;
3173 return false;
3174 }
3175
3176 /*
3177 * Update local copy of minRecoveryPoint. But if the lock is busy,
3178 * just return a conservative guess.
3179 */
3181 return true;
3185
3186 /*
3187 * Check minRecoveryPoint for any other process than the startup
3188 * process doing crash recovery, which should not update the control
3189 * file value if crash recovery is still running.
3190 */
3192 updateMinRecoveryPoint = false;
3193
3194 /* check again */
3196 return false;
3197 else
3198 return true;
3199 }
3200
3201 /* Quick exit if already known flushed */
3202 if (record <= LogwrtResult.Flush)
3203 return false;
3204
3205 /* read LogwrtResult and update local state */
3207
3208 /* check again */
3209 if (record <= LogwrtResult.Flush)
3210 return false;
3211
3212 return true;
3213}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1347

References ControlFile, fb(), XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsValid.

Referenced by GetVictimBuffer(), SetHintBits(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8158 of file xlog.c.

8159{
8161 XLogRegisterData(&nextOid, sizeof(Oid));
8163
8164 /*
8165 * We need not flush the NEXTOID record immediately, because any of the
8166 * just-allocated OIDs could only reach disk as part of a tuple insert or
8167 * update that would have its own XLOG record that must follow the NEXTOID
8168 * record. Therefore, the standard buffer LSN interlock applied to those
8169 * records will ensure no such OID reaches disk before the NEXTOID record
8170 * does.
8171 *
8172 * Note, however, that the above statement only covers state "within" the
8173 * database. When we use a generated OID as a file or directory name, we
8174 * are in a sense violating the basic WAL rule, because that filesystem
8175 * change may reach disk before the NEXTOID WAL record does. The impact
8176 * of this is that if a database crash occurs immediately afterward, we
8177 * might after restart re-generate the same OID and find that it conflicts
8178 * with the leftover file or directory. But since for safety's sake we
8179 * always loop until finding a nonconflicting filename, this poses no real
8180 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8181 */
8182}

References fb(), XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1947 of file xlog.c.

1948{
1951 uint32 offset;
1952 uint64 result;
1953
1955
1957 offset = ptr % XLOG_BLCKSZ;
1958
1959 if (fullpages == 0)
1960 {
1961 result = fullsegs * UsableBytesInSegment;
1962 if (offset > 0)
1963 {
1964 Assert(offset >= SizeOfXLogLongPHD);
1965 result += offset - SizeOfXLogLongPHD;
1966 }
1967 }
1968 else
1969 {
1970 result = fullsegs * UsableBytesInSegment +
1971 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1972 (fullpages - 1) * UsableBytesInPage; /* full pages */
1973 if (offset > 0)
1974 {
1975 Assert(offset >= SizeOfXLogShortPHD);
1976 result += offset - SizeOfXLogShortPHD;
1977 }
1978 }
1979
1980 return result;
1981}

References Assert, fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8238 of file xlog.c.

8239{
8248 {
8249 /*
8250 * The change in number of backend slots doesn't need to be WAL-logged
8251 * if archiving is not enabled, as you can't start archive recovery
8252 * with wal_level=minimal anyway. We don't really care about the
8253 * values in pg_control either if wal_level=minimal, but seems better
8254 * to keep them up-to-date to avoid confusion.
8255 */
8257 {
8260
8262 xlrec.max_worker_processes = max_worker_processes;
8263 xlrec.max_wal_senders = max_wal_senders;
8264 xlrec.max_prepared_xacts = max_prepared_xacts;
8265 xlrec.max_locks_per_xact = max_locks_per_xact;
8266 xlrec.wal_level = wal_level;
8267 xlrec.wal_log_hints = wal_log_hints;
8268 xlrec.track_commit_timestamp = track_commit_timestamp;
8269
8271 XLogRegisterData(&xlrec, sizeof(xlrec));
8272
8275 }
8276
8278
8288
8290 }
8291}

References ControlFile, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char rpName)

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2612 of file xlog.c.

2613{
2614 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2615 bool sleeping;
2616 bool wakeup = false;
2618
2622 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2623 XLogCtl->asyncXactLSN = asyncXactLSN;
2625
2626 /*
2627 * If somebody else already called this function with a more aggressive
2628 * LSN, they will have done what we needed (and perhaps more).
2629 */
2630 if (asyncXactLSN <= prevAsyncXactLSN)
2631 return;
2632
2633 /*
2634 * If the WALWriter is sleeping, kick it to make it come out of low-power
2635 * mode, so that this async commit will reach disk within the expected
2636 * amount of time. Otherwise, determine whether it has enough WAL
2637 * available to flush, the same way that XLogBackgroundFlush() does.
2638 */
2639 if (sleeping)
2640 wakeup = true;
2641 else
2642 {
2643 int flushblocks;
2644
2646
2647 flushblocks =
2649
2651 wakeup = true;
2652 }
2653
2654 if (wakeup)
2655 {
2656 volatile PROC_HDR *procglobal = ProcGlobal;
2657 ProcNumber walwriterProc = procglobal->walwriterProc;
2658
2659 if (walwriterProc != INVALID_PROC_NUMBER)
2660 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2661 }
2662}
void SetLatch(Latch *latch)
Definition latch.c:290
#define GetPGProcByNumber(n)
Definition proc.h:440
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
PROC_HDR * ProcGlobal
Definition proc.c:79
ProcNumber walwriterProc
Definition proc.h:424
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References XLogCtlData::asyncXactLSN, fb(), XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 5011 of file xlog.c.

5012{
5013 bool foundCFile,
5014 foundXLog;
5015 char *allocptr;
5016 int i;
5018
5019#ifdef WAL_DEBUG
5020
5021 /*
5022 * Create a memory context for WAL debugging that's exempt from the normal
5023 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
5024 * an allocation fails, but wal_debug is not for production use anyway.
5025 */
5026 if (walDebugCxt == NULL)
5027 {
5029 "WAL Debug",
5032 }
5033#endif
5034
5035
5036 XLogCtl = (XLogCtlData *)
5037 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
5038
5041 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
5042
5043 if (foundCFile || foundXLog)
5044 {
5045 /* both should be present or neither */
5047
5048 /* Initialize local copy of WALInsertLocks */
5050
5051 if (localControlFile)
5053 return;
5054 }
5055 memset(XLogCtl, 0, sizeof(XLogCtlData));
5056
5057 /*
5058 * Already have read control file locally, unless in bootstrap mode. Move
5059 * contents into shared memory.
5060 */
5061 if (localControlFile)
5062 {
5065 }
5066
5067 /*
5068 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5069 * multiple of the alignment for same, so no extra alignment padding is
5070 * needed here.
5071 */
5072 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5075
5076 for (i = 0; i < XLOGbuffers; i++)
5077 {
5079 }
5080
5081 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5082 allocptr += sizeof(WALInsertLockPadded) -
5087
5088 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5089 {
5093 }
5094
5095 /*
5096 * Align the start of the page buffers to a full xlog block size boundary.
5097 * This simplifies some calculations in XLOG insertion. It is also
5098 * required for O_DIRECT.
5099 */
5103
5104 /*
5105 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5106 * in additional info.)
5107 */
5111 XLogCtl->WalWriterSleeping = false;
5112
5119}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:819
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:698
MemoryContext TopMemoryContext
Definition mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition mcxt.c:743
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:378
#define SpinLockInit(lock)
Definition spin.h:57
int XLogCacheBlck
Definition xlog.c:496
WALInsertLockPadded * WALInsertLocks
Definition xlog.c:447
slock_t insertpos_lck
Definition xlog.c:401
Size XLOGShmemSize(void)
Definition xlog.c:4961

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, fb(), i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4961 of file xlog.c.

4962{
4963 Size size;
4964
4965 /*
4966 * If the value of wal_buffers is -1, use the preferred auto-tune value.
4967 * This isn't an amazingly clean place to do this, but we must wait till
4968 * NBuffers has received its final value, and must do it before using the
4969 * value of XLOGbuffers to do anything important.
4970 *
4971 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4972 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4973 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4974 * the matter with PGC_S_OVERRIDE.
4975 */
4976 if (XLOGbuffers == -1)
4977 {
4978 char buf[32];
4979
4980 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4981 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4983 if (XLOGbuffers == -1) /* failed to apply it? */
4984 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4986 }
4987 Assert(XLOGbuffers > 0);
4988
4989 /* XLogCtl */
4990 size = sizeof(XLogCtlData);
4991
4992 /* WAL insertion locks, plus alignment */
4993 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
4994 /* xlblocks array */
4995 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
4996 /* extra alignment padding for XLOG I/O buffers */
4997 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4998 /* and the buffers themselves */
4999 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
5000
5001 /*
5002 * Note: we don't count ControlFileData, it comes out of the "slop factor"
5003 * added by CreateSharedMemoryAndSemaphores. This lets us use this
5004 * routine again below to compute the actual allocation size.
5005 */
5006
5007 return size;
5008}
#define Max(x, y)
Definition c.h:991
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_POSTMASTER
Definition guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition shmem.c:482
Size mul_size(Size s1, Size s2)
Definition shmem.c:497

References add_size(), Assert, buf, fb(), Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9636 of file xlog.c.

9637{
9639
9642}
#define AmStartupProcess()
Definition miscadmin.h:390
void ShutdownWalRcv(void)
void ResetInstallXLogFileSegmentActive(void)
Definition xlog.c:9655

References AmStartupProcess, Assert, IsUnderPostmaster, ResetInstallXLogFileSegmentActive(), and ShutdownWalRcv().

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2307 of file xlog.c.

2308{
2309 bool ispartialpage;
2310 bool last_iteration;
2311 bool finishing_seg;
2312 int curridx;
2313 int npages;
2314 int startidx;
2316
2317 /* We should always be inside a critical section here */
2319
2320 /*
2321 * Update local LogwrtResult (caller probably did this already, but...)
2322 */
2324
2325 /*
2326 * Since successive pages in the xlog cache are consecutively allocated,
2327 * we can usually gather multiple pages together and issue just one
2328 * write() call. npages is the number of pages we have determined can be
2329 * written together; startidx is the cache block index of the first one,
2330 * and startoffset is the file offset at which it should go. The latter
2331 * two variables are only valid when npages > 0, but we must initialize
2332 * all of them to keep the compiler quiet.
2333 */
2334 npages = 0;
2335 startidx = 0;
2336 startoffset = 0;
2337
2338 /*
2339 * Within the loop, curridx is the cache block index of the page to
2340 * consider writing. Begin at the buffer containing the next unwritten
2341 * page, or last partially written page.
2342 */
2344
2345 while (LogwrtResult.Write < WriteRqst.Write)
2346 {
2347 /*
2348 * Make sure we're not ahead of the insert process. This could happen
2349 * if we're passed a bogus WriteRqst.Write that is past the end of the
2350 * last page that's been initialized by AdvanceXLInsertBuffer.
2351 */
2353
2354 if (LogwrtResult.Write >= EndPtr)
2355 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2358
2359 /* Advance LogwrtResult.Write to end of current buffer page */
2362
2365 {
2366 /*
2367 * Switch to new logfile segment. We cannot have any pending
2368 * pages here (since we dump what we have at segment end).
2369 */
2370 Assert(npages == 0);
2371 if (openLogFile >= 0)
2372 XLogFileClose();
2375 openLogTLI = tli;
2376
2377 /* create/use new log file */
2380 }
2381
2382 /* Make sure we have the current logfile open */
2383 if (openLogFile < 0)
2384 {
2387 openLogTLI = tli;
2390 }
2391
2392 /* Add current page to the set of pending pages-to-dump */
2393 if (npages == 0)
2394 {
2395 /* first of group */
2396 startidx = curridx;
2399 }
2400 npages++;
2401
2402 /*
2403 * Dump the set if this will be the last loop iteration, or if we are
2404 * at the last page of the cache area (since the next page won't be
2405 * contiguous in memory), or if we are at the end of the logfile
2406 * segment.
2407 */
2409
2412
2413 if (last_iteration ||
2416 {
2417 char *from;
2418 Size nbytes;
2419 Size nleft;
2422
2423 /* OK to write the page(s) */
2424 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2425 nbytes = npages * (Size) XLOG_BLCKSZ;
2426 nleft = nbytes;
2427 do
2428 {
2429 errno = 0;
2430
2431 /*
2432 * Measure I/O timing to write WAL data, for pg_stat_io.
2433 */
2435
2439
2441 IOOP_WRITE, start, 1, written);
2442
2443 if (written <= 0)
2444 {
2445 char xlogfname[MAXFNAMELEN];
2446 int save_errno;
2447
2448 if (errno == EINTR)
2449 continue;
2450
2451 save_errno = errno;
2454 errno = save_errno;
2455 ereport(PANIC,
2457 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2459 }
2460 nleft -= written;
2461 from += written;
2463 } while (nleft > 0);
2464
2465 npages = 0;
2466
2467 /*
2468 * If we just wrote the whole last page of a logfile segment,
2469 * fsync the segment immediately. This avoids having to go back
2470 * and re-open prior segments when an fsync request comes along
2471 * later. Doing it here ensures that one and only one backend will
2472 * perform this fsync.
2473 *
2474 * This is also the right place to notify the Archiver that the
2475 * segment is ready to copy to archival storage, and to update the
2476 * timer for archive_timeout, and to signal for a checkpoint if
2477 * too many logfile segments have been used since the last
2478 * checkpoint.
2479 */
2480 if (finishing_seg)
2481 {
2483
2484 /* signal that we need to wakeup walsenders later */
2486
2487 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2488
2489 if (XLogArchivingActive())
2491
2494
2495 /*
2496 * Request a checkpoint if we've consumed too much xlog since
2497 * the last one. For speed, we first check using the local
2498 * copy of RedoRecPtr, which might be out of date; if it looks
2499 * like a checkpoint is needed, forcibly update RedoRecPtr and
2500 * recheck.
2501 */
2503 {
2504 (void) GetRedoRecPtr();
2507 }
2508 }
2509 }
2510
2511 if (ispartialpage)
2512 {
2513 /* Only asked to write a partial page */
2515 break;
2516 }
2518
2519 /* If flexible, break out of loop as soon as we wrote something */
2520 if (flexible && npages == 0)
2521 break;
2522 }
2523
2524 Assert(npages == 0);
2525
2526 /*
2527 * If asked to flush, do so
2528 */
2529 if (LogwrtResult.Flush < WriteRqst.Flush &&
2531 {
2532 /*
2533 * Could get here without iterating above loop, in which case we might
2534 * have no open file or the wrong one. However, we do not need to
2535 * fsync more than one file.
2536 */
2539 {
2540 if (openLogFile >= 0 &&
2543 XLogFileClose();
2544 if (openLogFile < 0)
2545 {
2548 openLogTLI = tli;
2551 }
2552
2554 }
2555
2556 /* signal that we need to wakeup walsenders later */
2558
2560 }
2561
2562 /*
2563 * Update shared-memory status
2564 *
2565 * We make sure that the shared 'request' values do not fall behind the
2566 * 'result' values. This is not absolutely essential, but it saves some
2567 * code in a couple of places.
2568 */
2575
2576 /*
2577 * We write Write first, bar, then Flush. When reading, the opposite must
2578 * be done (with a matching barrier in between), so that we always see a
2579 * Flush value that trails behind the Write value seen.
2580 */
2584
2585#ifdef USE_ASSERT_CHECKING
2586 {
2590
2596
2597 /* WAL written to disk is always ahead of WAL flushed */
2598 Assert(Write >= Flush);
2599
2600 /* WAL inserted to buffers is always ahead of WAL written */
2601 Assert(Insert >= Write);
2602 }
2603#endif
2604}
void ReserveExternalFD(void)
Definition fd.c:1206
volatile uint32 CritSectionCount
Definition globals.c:45
XLogRecPtr Flush
XLogRecPtr Write
#define WalSndWakeupRequest()
Definition walsender.h:58
#define EINTR
Definition win32_port.h:361
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6563
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition xlog.c:3655
#define NextBufIdx(idx)
Definition xlog.c:586
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition xlog.c:8858
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition xlog.c:2283
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), fb(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition xlog.h:68
@ ARCHIVE_MODE_OFF
Definition xlog.h:66
@ ARCHIVE_MODE_ON
Definition xlog.h:67

Definition at line 194 of file xlog.c.

194 {
195 {"always", ARCHIVE_MODE_ALWAYS, false},
196 {"on", ARCHIVE_MODE_ON, false},
197 {"off", ARCHIVE_MODE_OFF, false},
198 {"true", ARCHIVE_MODE_ON, true},
199 {"false", ARCHIVE_MODE_OFF, true},
200 {"yes", ARCHIVE_MODE_ON, true},
201 {"no", ARCHIVE_MODE_OFF, true},
202 {"1", ARCHIVE_MODE_ON, true},
203 {"0", ARCHIVE_MODE_OFF, true},
204 {NULL, 0, false}
205};

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 169 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 162 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 135 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 136 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 125 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 220 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 227 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 239 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 138 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 118 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 638 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 639 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 163 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 650 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 127 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 129 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 128 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 139 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 130 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 119 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 126 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 131 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

Definition at line 137 of file xlog.c.

Referenced by ApplyLauncherMain(), launch_sync_worker(), and WaitForWALToBecomeAvailable().

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 146 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 174 of file xlog.c.

174 {
175 {"fsync", WAL_SYNC_METHOD_FSYNC, false},
176#ifdef HAVE_FSYNC_WRITETHROUGH
177 {"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
178#endif
179 {"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
180#ifdef O_SYNC
181 {"open_sync", WAL_SYNC_METHOD_OPEN, false},
182#endif
183#ifdef O_DSYNC
184 {"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
185#endif
186 {NULL, 0, false}
187};

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 121 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 120 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl