PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "access/xlogwait.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/slotsync.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static charstr_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static charGetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, uint64 fpi_bytes, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
charGetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const charshow_archive_command (void)
 
const charshow_in_hot_standby (void)
 
const charshow_effective_wal_level (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
void ResetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
charXLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
charwal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 114 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 605 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 582 of file xlog.c.

583 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
static int fb(int x)

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 586 of file xlog.c.

587 : ((idx) + 1))
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 153 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
pg_atomic_uint64 logWriteResult
Definition xlog.c:474
pg_atomic_uint64 logFlushResult
Definition xlog.c:475
static XLogCtlData * XLogCtl
Definition xlog.c:568

Definition at line 622 of file xlog.c.

623 { \
627 } while (0)

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 599 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 593 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

◆ XLogCtlInsert

◆ XLogwrtResult

◆ XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 561 of file xlog.c.

562{
WalInsertClass
Definition xlog.c:562
@ WALINSERT_SPECIAL_SWITCH
Definition xlog.c:564
@ WALINSERT_NORMAL
Definition xlog.c:563
@ WALINSERT_SPECIAL_CHECKPOINT
Definition xlog.c:565

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1992 of file xlog.c.

1993{
1995 int nextidx;
2001 int npages pg_attribute_unused() = 0;
2002
2004
2005 /*
2006 * Now that we have the lock, check if someone initialized the page
2007 * already.
2008 */
2010 {
2012
2013 /*
2014 * Get ending-offset of the buffer page we need to replace (this may
2015 * be zero if the buffer hasn't been used yet). Fall through if it's
2016 * already written out.
2017 */
2020 {
2021 /*
2022 * Nope, got work to do. If we just want to pre-initialize as much
2023 * as we can without flushing, give up now.
2024 */
2025 if (opportunistic)
2026 break;
2027
2028 /* Advance shared memory write request position */
2033
2034 /*
2035 * Acquire an up-to-date LogwrtResult value and see if we still
2036 * need to write it or if someone else already did.
2037 */
2040 {
2041 /*
2042 * Must acquire write lock. Release WALBufMappingLock first,
2043 * to make sure that all insertions that we need to wait for
2044 * can finish (up to this same position). Otherwise we risk
2045 * deadlock.
2046 */
2048
2050
2052
2055 {
2056 /* OK, someone wrote it already */
2058 }
2059 else
2060 {
2061 /* Have to write it ourselves */
2063 WriteRqst.Write = OldPageRqstPtr;
2064 WriteRqst.Flush = 0;
2065 XLogWrite(WriteRqst, tli, false);
2069
2070 /*
2071 * Required for the flush of pending stats WAL data, per
2072 * update of pgWalUsage.
2073 */
2074 pgstat_report_fixed = true;
2075 }
2076 /* Re-acquire WALBufMappingLock and retry */
2078 continue;
2079 }
2080 }
2081
2082 /*
2083 * Now the next buffer slot is free and we can set it up to be the
2084 * next output page.
2085 */
2088
2090
2092
2093 /*
2094 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2095 * before initializing. Otherwise, the old page may be partially
2096 * zeroed but look valid.
2097 */
2100
2101 /*
2102 * Be sure to re-zero the buffer so that bytes beyond what we've
2103 * written will look like zeroes and not valid XLOG records...
2104 */
2106
2107 /*
2108 * Fill the new page's header
2109 */
2110 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2111
2112 /* NewPage->xlp_info = 0; */ /* done by memset */
2113 NewPage->xlp_tli = tli;
2114 NewPage->xlp_pageaddr = NewPageBeginPtr;
2115
2116 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2117
2118 /*
2119 * If online backup is not in progress, mark the header to indicate
2120 * that WAL records beginning in this page have removable backup
2121 * blocks. This allows the WAL archiver to know whether it is safe to
2122 * compress archived WAL data by transforming full-block records into
2123 * the non-full-block format. It is sufficient to record this at the
2124 * page level because we force a page switch (in fact a segment
2125 * switch) when starting a backup, so the flag will be off before any
2126 * records can be written during the backup. At the end of a backup,
2127 * the last page will be marked as all unsafe when perhaps only part
2128 * is unsafe, but at worst the archiver would miss the opportunity to
2129 * compress a few records.
2130 */
2131 if (Insert->runningBackups == 0)
2132 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2133
2134 /*
2135 * If first page of an XLOG segment file, make it a long header.
2136 */
2137 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2138 {
2140
2142 NewLongPage->xlp_seg_size = wal_segment_size;
2143 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2144 NewPage->xlp_info |= XLP_LONG_HEADER;
2145 }
2146
2147 /*
2148 * Make sure the initialization of the page becomes visible to others
2149 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2150 * holding a lock.
2151 */
2153
2156
2157 npages++;
2158 }
2160
2161#ifdef WAL_DEBUG
2162 if (XLOG_DEBUG && npages > 0)
2163 {
2164 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2166 }
2167#endif
2168}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
#define pg_write_barrier()
Definition atomics.h:155
#define pg_attribute_unused()
Definition c.h:132
#define Assert(condition)
Definition c.h:883
#define MemSet(start, val, len)
Definition c.h:1023
size_t Size
Definition c.h:629
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
static void Insert(File file)
Definition fd.c:1297
WalUsage pgWalUsage
Definition instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_EXCLUSIVE
Definition lwlock.h:112
bool pgstat_report_fixed
Definition pgstat.c:218
#define SpinLockRelease(lock)
Definition spin.h:61
#define SpinLockAcquire(lock)
Definition spin.h:59
uint64 system_identifier
Definition pg_control.h:112
int64 wal_buffers_full
Definition instrument.h:57
XLogwrtRqst LogwrtRqst
Definition xlog.c:458
slock_t info_lck
Definition xlog.c:555
XLogRecPtr InitializedUpTo
Definition xlog.c:487
char * pages
Definition xlog.c:494
pg_atomic_uint64 * xlblocks
Definition xlog.c:495
XLogCtlInsert Insert
Definition xlog.c:455
XLogRecPtr Write
Definition xlog.c:330
XLogRecPtr Write
Definition xlog.c:324
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition xlog.c:1511
#define RefreshXLogWriteResult(_target)
Definition xlog.c:622
int wal_segment_size
Definition xlog.c:146
static XLogwrtResult LogwrtResult
Definition xlog.c:614
#define XLogRecPtrToBufIdx(recptr)
Definition xlog.c:593
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition xlog.c:2308
static ControlFileData * ControlFile
Definition xlog.c:576
XLogLongPageHeaderData * XLogLongPageHeader
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
#define XLP_LONG_HEADER
#define XLP_BKP_REMOVABLE
#define XLOG_PAGE_MAGIC
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, fb(), XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, and XLogLongPageHeaderData::xlp_sysid.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void extra 
)

Definition at line 2211 of file xlog.c.

2212{
2215}
double CheckPointCompletionTarget
#define newval
static void CalculateCheckpointSegments(void)
Definition xlog.c:2175

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void extra 
)

Definition at line 2204 of file xlog.c.

2205{
2208}
int max_wal_size_mb
Definition xlog.c:117

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char newval,
void extra 
)

Definition at line 4832 of file xlog.c.

4833{
4834 /*
4835 * If some checks were deferred, it's possible that the checks will fail
4836 * later during InitializeWalConsistencyChecking(). But in that case, the
4837 * postmaster will exit anyway, so it's safe to proceed with the
4838 * assignment.
4839 *
4840 * Any built-in resource managers specified are assigned immediately,
4841 * which affects WAL created before shared_preload_libraries are
4842 * processed. Any custom resource managers specified won't be assigned
4843 * until after shared_preload_libraries are processed, but that's OK
4844 * because WAL for a custom resource manager can't be written before the
4845 * module is loaded anyway.
4846 */
4848}
bool * wal_consistency_checking
Definition xlog.c:129

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void extra 
)

Definition at line 8814 of file xlog.c.

8815{
8817 {
8818 /*
8819 * To ensure that no blocks escape unsynced, force an fsync on the
8820 * currently open log segment (if any). Also, if the open flag is
8821 * changing, close the log file so it will be reopened (with new flag
8822 * bit) at next use.
8823 */
8824 if (openLogFile >= 0)
8825 {
8827 if (pg_fsync(openLogFile) != 0)
8828 {
8829 char xlogfname[MAXFNAMELEN];
8830 int save_errno;
8831
8832 save_errno = errno;
8835 errno = save_errno;
8836 ereport(PANIC,
8838 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8839 }
8840
8843 XLogFileClose();
8844 }
8845 }
8846}
int errcode_for_file_access(void)
Definition elog.c:886
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define PANIC
Definition elog.h:42
#define ereport(elevel,...)
Definition elog.h:150
int pg_fsync(int fd)
Definition fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
static int openLogFile
Definition xlog.c:637
static int get_sync_bit(int method)
Definition xlog.c:8766
int wal_sync_method
Definition xlog.c:133
static TimeLineID openLogTLI
Definition xlog.c:639
static void XLogFileClose(void)
Definition xlog.c:3677
static XLogSegNo openLogSegNo
Definition xlog.c:638
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), fb(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5127 of file xlog.c.

5128{
5129 CheckPoint checkPoint;
5130 PGAlignedXLogBlock buffer;
5131 XLogPageHeader page;
5133 XLogRecord *record;
5134 char *recptr;
5135 uint64 sysidentifier;
5136 struct timeval tv;
5137 pg_crc32c crc;
5138
5139 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5141
5142 /*
5143 * Select a hopefully-unique system identifier code for this installation.
5144 * We use the result of gettimeofday(), including the fractional seconds
5145 * field, as being about as unique as we can easily get. (Think not to
5146 * use random(), since it hasn't been seeded and there's no portable way
5147 * to seed it other than the system clock value...) The upper half of the
5148 * uint64 value is just the tv_sec part, while the lower half contains the
5149 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5150 * PID for a little extra uniqueness. A person knowing this encoding can
5151 * determine the initialization time of the installation, which could
5152 * perhaps be useful sometimes.
5153 */
5154 gettimeofday(&tv, NULL);
5155 sysidentifier = ((uint64) tv.tv_sec) << 32;
5156 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5157 sysidentifier |= getpid() & 0xFFF;
5158
5159 memset(&buffer, 0, sizeof buffer);
5160 page = (XLogPageHeader) &buffer;
5161
5162 /*
5163 * Set up information for the initial checkpoint record
5164 *
5165 * The initial checkpoint record is written to the beginning of the WAL
5166 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5167 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5168 */
5172 checkPoint.fullPageWrites = fullPageWrites;
5174 checkPoint.wal_level = wal_level;
5175 checkPoint.nextXid =
5177 checkPoint.nextOid = FirstGenbkiObjectId;
5178 checkPoint.nextMulti = FirstMultiXactId;
5179 checkPoint.nextMultiOffset = 1;
5181 checkPoint.oldestXidDB = Template1DbOid;
5182 checkPoint.oldestMulti = FirstMultiXactId;
5183 checkPoint.oldestMultiDB = Template1DbOid;
5186 checkPoint.time = (pg_time_t) time(NULL);
5188
5189 TransamVariables->nextXid = checkPoint.nextXid;
5190 TransamVariables->nextOid = checkPoint.nextOid;
5192 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5193 AdvanceOldestClogXid(checkPoint.oldestXid);
5194 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5195 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5197
5198 /* Set up the XLOG page header */
5199 page->xlp_magic = XLOG_PAGE_MAGIC;
5200 page->xlp_info = XLP_LONG_HEADER;
5204 longpage->xlp_sysid = sysidentifier;
5205 longpage->xlp_seg_size = wal_segment_size;
5206 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5207
5208 /* Insert the initial checkpoint record */
5209 recptr = ((char *) page + SizeOfXLogLongPHD);
5210 record = (XLogRecord *) recptr;
5211 record->xl_prev = 0;
5212 record->xl_xid = InvalidTransactionId;
5213 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5215 record->xl_rmid = RM_XLOG_ID;
5217 /* fill the XLogRecordDataHeaderShort struct */
5218 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5219 *(recptr++) = sizeof(checkPoint);
5220 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5221 recptr += sizeof(checkPoint);
5222 Assert(recptr - (char *) record == record->xl_tot_len);
5223
5225 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5226 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5227 FIN_CRC32C(crc);
5228 record->xl_crc = crc;
5229
5230 /* Create first XLOG segment file */
5233
5234 /*
5235 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5236 * close the file again in a moment.
5237 */
5238
5239 /* Write the first page with the initial record */
5240 errno = 0;
5242 if (write(openLogFile, &buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5243 {
5244 /* if write didn't set errno, assume problem is no disk space */
5245 if (errno == 0)
5246 errno = ENOSPC;
5247 ereport(PANIC,
5249 errmsg("could not write bootstrap write-ahead log file: %m")));
5250 }
5252
5254 if (pg_fsync(openLogFile) != 0)
5255 ereport(PANIC,
5257 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5259
5260 if (close(openLogFile) != 0)
5261 ereport(PANIC,
5263 errmsg("could not close bootstrap write-ahead log file: %m")));
5264
5265 openLogFile = -1;
5266
5267 /* Now create pg_control */
5268 InitControlFile(sysidentifier, data_checksum_version);
5269 ControlFile->time = checkPoint.time;
5270 ControlFile->checkPoint = checkPoint.redo;
5271 ControlFile->checkPointCopy = checkPoint;
5272
5273 /* some additional ControlFile fields are set in WriteControlFile() */
5275
5276 /* Bootstrap the commit log, too */
5277 BootStrapCLOG();
5281
5282 /*
5283 * Force control file to be read - in contrast to normal processing we'd
5284 * otherwise never run the checks and GUC related initializations therein.
5285 */
5287}
uint64_t uint64
Definition c.h:557
void BootStrapCLOG(void)
Definition clog.c:832
void BootStrapCommitTs(void)
Definition commit_ts.c:594
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition commit_ts.c:887
#define close(a)
Definition win32.h:12
#define write(a, b, c)
Definition win32.h:14
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:1992
void BootStrapMultiXact(void)
Definition multixact.c:1793
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2014
#define FirstMultiXactId
Definition multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:69
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:158
return crc
int64 pg_time_t
Definition pgtime.h:23
Oid oldestMultiDB
Definition pg_control.h:52
MultiXactId oldestMulti
Definition pg_control.h:51
MultiXactOffset nextMultiOffset
Definition pg_control.h:48
TransactionId newestCommitTsXid
Definition pg_control.h:56
TransactionId oldestXid
Definition pg_control.h:49
TimeLineID PrevTimeLineID
Definition pg_control.h:40
TimeLineID ThisTimeLineID
Definition pg_control.h:39
TransactionId oldestActiveXid
Definition pg_control.h:65
bool fullPageWrites
Definition pg_control.h:42
MultiXactId nextMulti
Definition pg_control.h:47
FullTransactionId nextXid
Definition pg_control.h:45
TransactionId oldestCommitTsXid
Definition pg_control.h:54
pg_time_t time
Definition pg_control.h:53
int wal_level
Definition pg_control.h:43
bool logicalDecodingEnabled
Definition pg_control.h:44
XLogRecPtr redo
Definition pg_control.h:37
Oid oldestXidDB
Definition pg_control.h:50
CheckPoint checkPointCopy
Definition pg_control.h:137
pg_time_t time
Definition pg_control.h:134
XLogRecPtr checkPoint
Definition pg_control.h:135
FullTransactionId nextXid
Definition transam.h:220
XLogRecPtr xlp_pageaddr
XLogRecPtr xl_prev
Definition xlogrecord.h:45
uint8 xl_info
Definition xlogrecord.h:46
uint32 xl_tot_len
Definition xlogrecord.h:43
TransactionId xl_xid
Definition xlogrecord.h:44
RmgrId xl_rmid
Definition xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition subtrans.c:269
#define InvalidTransactionId
Definition transam.h:31
#define FirstGenbkiObjectId
Definition transam.h:195
#define FirstNormalTransactionId
Definition transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition varsup.c:355
TransamVariablesData * TransamVariables
Definition varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition xlog.c:3418
bool fullPageWrites
Definition xlog.c:125
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition xlog.c:4242
void SetInstallXLogFileSegmentActive(void)
Definition xlog.c:9644
int wal_level
Definition xlog.c:134
static void WriteControlFile(void)
Definition xlog.c:4277
#define BootstrapTimeLineID
Definition xlog.c:114
static void ReadControlFile(void)
Definition xlog.c:4387
@ WAL_LEVEL_LOGICAL
Definition xlog.h:77
#define SizeOfXLogLongPHD
#define SizeOfXLogRecordDataHeaderShort
Definition xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition xlogrecord.h:241
#define SizeOfXLogRecord
Definition xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), fb(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, CheckPoint::logicalDecodingEnabled, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, wal_level, CheckPoint::wal_level, WAL_LEVEL_LOGICAL, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogPageHeaderData::xlp_tli, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2175 of file xlog.c.

2176{
2177 double target;
2178
2179 /*-------
2180 * Calculate the distance at which to trigger a checkpoint, to avoid
2181 * exceeding max_wal_size_mb. This is based on two assumptions:
2182 *
2183 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2184 * WAL for two checkpoint cycles to allow us to recover from the
2185 * secondary checkpoint if the first checkpoint failed, though we
2186 * only did this on the primary anyway, not on standby. Keeping just
2187 * one checkpoint simplifies processing and reduces disk space in
2188 * many smaller databases.)
2189 * b) during checkpoint, we consume checkpoint_completion_target *
2190 * number of segments consumed between checkpoints.
2191 *-------
2192 */
2195
2196 /* round down */
2197 CheckPointSegments = (int) target;
2198
2199 if (CheckPointSegments < 1)
2201}
#define ConvertToXSegs(x, segsize)
Definition xlog.c:605
int CheckPointSegments
Definition xlog.c:159

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, fb(), max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 4710 of file xlog.c.

4711{
4712 /*
4713 * -1 indicates a request for auto-tune.
4714 */
4715 if (*newval == -1)
4716 {
4717 /*
4718 * If we haven't yet changed the boot_val default of -1, just let it
4719 * be. We'll fix it when XLOGShmemSize is called.
4720 */
4721 if (XLOGbuffers == -1)
4722 return true;
4723
4724 /* Otherwise, substitute the auto-tune value */
4726 }
4727
4728 /*
4729 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4730 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4731 * the case, we just silently treat such values as a request for the
4732 * minimum. (We could throw an error instead, but that doesn't seem very
4733 * helpful.)
4734 */
4735 if (*newval < 4)
4736 *newval = 4;
4737
4738 return true;
4739}
static int XLOGChooseNumBuffers(void)
Definition xlog.c:4694
int XLOGbuffers
Definition xlog.c:120

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4745 of file xlog.c.

4746{
4747 char *rawstring;
4748 List *elemlist;
4749 ListCell *l;
4750 bool newwalconsistency[RM_MAX_ID + 1];
4751
4752 /* Initialize the array */
4753 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4754
4755 /* Need a modifiable copy of string */
4757
4758 /* Parse string into list of identifiers */
4760 {
4761 /* syntax error in list */
4762 GUC_check_errdetail("List syntax is invalid.");
4765 return false;
4766 }
4767
4768 foreach(l, elemlist)
4769 {
4770 char *tok = (char *) lfirst(l);
4771 int rmid;
4772
4773 /* Check for 'all'. */
4774 if (pg_strcasecmp(tok, "all") == 0)
4775 {
4776 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4777 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4778 newwalconsistency[rmid] = true;
4779 }
4780 else
4781 {
4782 /* Check if the token matches any known resource manager. */
4783 bool found = false;
4784
4785 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4786 {
4787 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4788 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4789 {
4790 newwalconsistency[rmid] = true;
4791 found = true;
4792 break;
4793 }
4794 }
4795 if (!found)
4796 {
4797 /*
4798 * During startup, it might be a not-yet-loaded custom
4799 * resource manager. Defer checking until
4800 * InitializeWalConsistencyChecking().
4801 */
4803 {
4805 }
4806 else
4807 {
4808 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4811 return false;
4812 }
4813 }
4814 }
4815 }
4816
4819
4820 /* assign new value */
4821 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
4822 if (!*extra)
4823 return false;
4824 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4825 return true;
4826}
#define LOG
Definition elog.h:31
void * guc_malloc(int elevel, size_t size)
Definition guc.c:636
#define GUC_check_errdetail
Definition guc.h:505
void list_free(List *list)
Definition list.c:1546
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
bool process_shared_preload_libraries_done
Definition miscinit.c:1787
#define lfirst(lc)
Definition pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
#define RM_MAX_ID
Definition rmgr.h:33
Definition pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition varlena.c:2730
static bool check_wal_consistency_checking_deferred
Definition xlog.c:169
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, fb(), GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 2218 of file xlog.c.

2219{
2221 {
2222 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2223 return false;
2224 }
2225
2226 return true;
2227}
#define IsValidWalSegSize(size)

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7642 of file xlog.c.

7643{
7649
7650 /* Write out all dirty data in SLRUs and the main buffer pool */
7658 CheckPointBuffers(flags);
7659
7660 /* Perform all queued up fsyncs */
7666
7667 /* We deliberately delay 2PC checkpointing as long as possible */
7669}
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void CheckPointBuffers(int flags)
Definition bufmgr.c:4342
void CheckPointCLOG(void)
Definition clog.c:903
void CheckPointCommitTs(void)
Definition commit_ts.c:794
void CheckPointMultiXact(void)
Definition multixact.c:1968
void CheckPointReplicationOrigin(void)
Definition origin.c:602
void CheckPointPredicate(void)
Definition predicate.c:1041
void CheckPointRelationMap(void)
Definition relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
void CheckPointReplicationSlots(bool is_shutdown)
Definition slot.c:2300
void CheckPointSnapBuild(void)
Definition snapbuild.c:1969
TimestampTz ckpt_write_t
Definition xlog.h:173
TimestampTz ckpt_sync_end_t
Definition xlog.h:175
TimestampTz ckpt_sync_t
Definition xlog.h:174
void CheckPointSUBTRANS(void)
Definition subtrans.c:329
void ProcessSyncRequests(void)
Definition sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1822
CheckpointStatsData CheckpointStats
Definition xlog.c:212
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:150

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, fb(), GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5474 of file xlog.c.

5475{
5476 /*
5477 * For archive recovery, the WAL must be generated with at least 'replica'
5478 * wal_level.
5479 */
5481 {
5482 ereport(FATAL,
5484 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5485 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5486 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5487 }
5488
5489 /*
5490 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5491 * must have at least as many backend slots as the primary.
5492 */
5494 {
5495 /* We ignore autovacuum_worker_slots when we make this test. */
5496 RecoveryRequiresIntParameter("max_connections",
5499 RecoveryRequiresIntParameter("max_worker_processes",
5502 RecoveryRequiresIntParameter("max_wal_senders",
5505 RecoveryRequiresIntParameter("max_prepared_transactions",
5508 RecoveryRequiresIntParameter("max_locks_per_transaction",
5511 }
5512}
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errhint(const char *fmt,...)
Definition elog.c:1330
int errcode(int sqlerrcode)
Definition elog.c:863
#define FATAL
Definition elog.h:41
int MaxConnections
Definition globals.c:143
int max_worker_processes
Definition globals.c:144
int max_locks_per_xact
Definition lock.c:53
int max_prepared_xacts
Definition twophase.c:116
int max_wal_senders
Definition walsender.c:129
bool EnableHotStandby
Definition xlog.c:124
@ WAL_LEVEL_MINIMAL
Definition xlog.h:75
bool ArchiveRecoveryRequested
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, fb(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3765 of file xlog.c.

3766{
3767 int save_errno = errno;
3768 XLogSegNo lastRemovedSegNo;
3769
3771 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3773
3774 if (segno <= lastRemovedSegNo)
3775 {
3776 char filename[MAXFNAMELEN];
3777
3779 errno = save_errno;
3780 ereport(ERROR,
3782 errmsg("requested WAL segment %s has already been removed",
3783 filename)));
3784 }
3785 errno = save_errno;
3786}
#define ERROR
Definition elog.h:39
static char * filename
Definition pg_dumpall.c:120
XLogSegNo lastRemovedSegNo
Definition xlog.c:463
uint64 XLogSegNo
Definition xlogdefs.h:52

References ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5378 of file xlog.c.

5380{
5381 /*
5382 * Execute the recovery_end_command, if any.
5383 */
5386 "recovery_end_command",
5387 true,
5389
5390 /*
5391 * We switched to a new timeline. Clean up segments on the old timeline.
5392 *
5393 * If there are any higher-numbered segments on the old timeline, remove
5394 * them. They might contain valid WAL, but they might also be
5395 * pre-allocated files containing garbage. In any case, they are not part
5396 * of the new timeline's history so we don't need them.
5397 */
5399
5400 /*
5401 * If the switch happened in the middle of a segment, what to do with the
5402 * last, partial segment on the old timeline? If we don't archive it, and
5403 * the server that created the WAL never archives it either (e.g. because
5404 * it was hit by a meteor), it will never make it to the archive. That's
5405 * OK from our point of view, because the new segment that we created with
5406 * the new TLI contains all the WAL from the old timeline up to the switch
5407 * point. But if you later try to do PITR to the "missing" WAL on the old
5408 * timeline, recovery won't find it in the archive. It's physically
5409 * present in the new file with new TLI, but recovery won't look there
5410 * when it's recovering to the older timeline. On the other hand, if we
5411 * archive the partial segment, and the original server on that timeline
5412 * is still running and archives the completed version of the same segment
5413 * later, it will fail. (We used to do that in 9.4 and below, and it
5414 * caused such problems).
5415 *
5416 * As a compromise, we rename the last segment with the .partial suffix,
5417 * and archive it. Archive recovery will never try to read .partial
5418 * segments, so they will normally go unused. But in the odd PITR case,
5419 * the administrator can copy them manually to the pg_wal directory
5420 * (removing the suffix). They can be useful in debugging, too.
5421 *
5422 * If a .done or .ready file already exists for the old timeline, however,
5423 * we had already determined that the segment is complete, so we can let
5424 * it be archived normally. (In particular, if it was restored from the
5425 * archive to begin with, it's expected to have a .done file).
5426 */
5429 {
5430 char origfname[MAXFNAMELEN];
5432
5435
5437 {
5438 char origpath[MAXPGPATH];
5440 char partialpath[MAXPGPATH];
5441
5442 /*
5443 * If we're summarizing WAL, we can't rename the partial file
5444 * until the summarizer finishes with it, else it will fail.
5445 */
5446 if (summarize_wal)
5448
5450 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5451 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5452
5453 /*
5454 * Make sure there's no .done or .ready file for the .partial
5455 * file.
5456 */
5458
5461 }
5462 }
5463}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition fd.c:779
#define MAXPGPATH
#define snprintf
Definition port.h:260
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition xlog.c:3978
#define XLogArchivingActive()
Definition xlog.h:101
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
void XLogArchiveNotify(const char *xlog)
void XLogArchiveCleanup(const char *xlog)
char * recoveryEndCommand

References durable_rename(), ERROR, ExecuteRecoveryCommand(), fb(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4199 of file xlog.c.

4200{
4201 DIR *xldir;
4202 struct dirent *xlde;
4203 char path[MAXPGPATH + sizeof(XLOGDIR)];
4204
4206
4207 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4208 {
4209 if (IsBackupHistoryFileName(xlde->d_name))
4210 {
4211 if (XLogArchiveCheckDone(xlde->d_name))
4212 {
4213 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4214 xlde->d_name);
4215 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4216 unlink(path);
4217 XLogArchiveCleanup(xlde->d_name);
4218 }
4219 }
4220 }
4221
4222 FreeDir(xldir);
4223}
#define DEBUG2
Definition elog.h:29
int FreeDir(DIR *dir)
Definition fd.c:3005
DIR * AllocateDir(const char *dirname)
Definition fd.c:2887
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2953
Definition dirent.c:26
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1232 of file xlog.c.

1234{
1235 char *currpos;
1236 int freespace;
1237 int written;
1240
1241 /*
1242 * Get a pointer to the right place in the right WAL buffer to start
1243 * inserting to.
1244 */
1245 CurrPos = StartPos;
1246 currpos = GetXLogBuffer(CurrPos, tli);
1247 freespace = INSERT_FREESPACE(CurrPos);
1248
1249 /*
1250 * there should be enough space for at least the first field (xl_tot_len)
1251 * on this page.
1252 */
1253 Assert(freespace >= sizeof(uint32));
1254
1255 /* Copy record data */
1256 written = 0;
1257 while (rdata != NULL)
1258 {
1259 const char *rdata_data = rdata->data;
1260 int rdata_len = rdata->len;
1261
1262 while (rdata_len > freespace)
1263 {
1264 /*
1265 * Write what fits on this page, and continue on the next page.
1266 */
1267 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1268 memcpy(currpos, rdata_data, freespace);
1269 rdata_data += freespace;
1270 rdata_len -= freespace;
1271 written += freespace;
1272 CurrPos += freespace;
1273
1274 /*
1275 * Get pointer to beginning of next page, and set the xlp_rem_len
1276 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1277 *
1278 * It's safe to set the contrecord flag and xlp_rem_len without a
1279 * lock on the page. All the other flags were already set when the
1280 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1281 * only backend that needs to set the contrecord flag.
1282 */
1283 currpos = GetXLogBuffer(CurrPos, tli);
1284 pagehdr = (XLogPageHeader) currpos;
1285 pagehdr->xlp_rem_len = write_len - written;
1286 pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1287
1288 /* skip over the page header */
1290 {
1292 currpos += SizeOfXLogLongPHD;
1293 }
1294 else
1295 {
1297 currpos += SizeOfXLogShortPHD;
1298 }
1299 freespace = INSERT_FREESPACE(CurrPos);
1300 }
1301
1302 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1303 memcpy(currpos, rdata_data, rdata_len);
1304 currpos += rdata_len;
1305 CurrPos += rdata_len;
1306 freespace -= rdata_len;
1307 written += rdata_len;
1308
1309 rdata = rdata->next;
1310 }
1312
1313 /*
1314 * If this was an xlog-switch, it's not enough to write the switch record,
1315 * we also have to consume all the remaining space in the WAL segment. We
1316 * have already reserved that space, but we need to actually fill it.
1317 */
1319 {
1320 /* An xlog-switch record doesn't contain any data besides the header */
1322
1323 /* Assert that we did reserve the right amount of space */
1325
1326 /* Use up all the remaining space on the current page */
1327 CurrPos += freespace;
1328
1329 /*
1330 * Cause all remaining pages in the segment to be flushed, leaving the
1331 * XLog position where it should be, at the start of the next segment.
1332 * We do this one page at a time, to make sure we don't deadlock
1333 * against ourselves if wal_buffers < wal_segment_size.
1334 */
1335 while (CurrPos < EndPos)
1336 {
1337 /*
1338 * The minimal action to flush the page would be to call
1339 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1340 * AdvanceXLInsertBuffer(...). The page would be left initialized
1341 * mostly to zeros, except for the page header (always the short
1342 * variant, as this is never a segment's first page).
1343 *
1344 * The large vistas of zeros are good for compressibility, but the
1345 * headers interrupting them every XLOG_BLCKSZ (with values that
1346 * differ from page to page) are not. The effect varies with
1347 * compression tool, but bzip2 for instance compresses about an
1348 * order of magnitude worse if those headers are left in place.
1349 *
1350 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1351 * called in heavily-loaded circumstances as well as this lightly-
1352 * loaded one) with variant behavior, we just use GetXLogBuffer
1353 * (which itself calls the two methods we need) to get the pointer
1354 * and zero most of the page. Then we just zero the page header.
1355 */
1356 currpos = GetXLogBuffer(CurrPos, tli);
1357 MemSet(currpos, 0, SizeOfXLogShortPHD);
1358
1360 }
1361 }
1362 else
1363 {
1364 /* Align the end position, so that the next record starts aligned */
1366 }
1367
1368 if (CurrPos != EndPos)
1369 ereport(PANIC,
1371 errmsg_internal("space reserved for WAL record does not match what was written"));
1372}
uint32_t uint32
Definition c.h:556
#define MAXALIGN64(LEN)
Definition c.h:861
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
#define ERRCODE_DATA_CORRUPTED
#define INSERT_FREESPACE(endptr)
Definition xlog.c:582
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition xlog.c:1639
#define XLP_FIRST_IS_CONTRECORD
#define SizeOfXLogShortPHD

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), fb(), GetXLogBuffer(), INSERT_FREESPACE, MAXALIGN64, MemSet, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, and XLP_FIRST_IS_CONTRECORD.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 7016 of file xlog.c.

7017{
7018 bool shutdown;
7019 CheckPoint checkPoint;
7023 uint32 freespace;
7027 int nvxids;
7028 int oldXLogAllowed = 0;
7029
7030 /*
7031 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
7032 * issued at a different time.
7033 */
7035 shutdown = true;
7036 else
7037 shutdown = false;
7038
7039 /* sanity check */
7040 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
7041 elog(ERROR, "can't create a checkpoint during recovery");
7042
7043 /*
7044 * Prepare to accumulate statistics.
7045 *
7046 * Note: because it is possible for log_checkpoints to change while a
7047 * checkpoint proceeds, we always accumulate stats, even if
7048 * log_checkpoints is currently off.
7049 */
7052
7053 /*
7054 * Let smgr prepare for checkpoint; this has to happen outside the
7055 * critical section and before we determine the REDO pointer. Note that
7056 * smgr must not do anything that'd have to be undone if we decide no
7057 * checkpoint is needed.
7058 */
7060
7061 /* Run these points outside the critical section. */
7062 INJECTION_POINT("create-checkpoint-initial", NULL);
7063 INJECTION_POINT_LOAD("create-checkpoint-run");
7064
7065 /*
7066 * Use a critical section to force system panic if we have trouble.
7067 */
7069
7070 if (shutdown)
7071 {
7076 }
7077
7078 /* Begin filling in the checkpoint WAL record */
7079 MemSet(&checkPoint, 0, sizeof(checkPoint));
7080 checkPoint.time = (pg_time_t) time(NULL);
7081
7082 /*
7083 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7084 * pointer. This allows us to begin accumulating changes to assemble our
7085 * starting snapshot of locks and transactions.
7086 */
7088 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7089 else
7091
7092 /*
7093 * Get location of last important record before acquiring insert locks (as
7094 * GetLastImportantRecPtr() also locks WAL locks).
7095 */
7097
7098 /*
7099 * If this isn't a shutdown or forced checkpoint, and if there has been no
7100 * WAL activity requiring a checkpoint, skip it. The idea here is to
7101 * avoid inserting duplicate checkpoints when the system is idle.
7102 */
7104 CHECKPOINT_FORCE)) == 0)
7105 {
7107 {
7110 (errmsg_internal("checkpoint skipped because system is idle")));
7111 return false;
7112 }
7113 }
7114
7115 /*
7116 * An end-of-recovery checkpoint is created before anyone is allowed to
7117 * write WAL. To allow us to write the checkpoint record, temporarily
7118 * enable XLogInsertAllowed.
7119 */
7120 if (flags & CHECKPOINT_END_OF_RECOVERY)
7122
7124 if (flags & CHECKPOINT_END_OF_RECOVERY)
7126 else
7127 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7128
7129 /*
7130 * We must block concurrent insertions while examining insert state.
7131 */
7133
7134 checkPoint.fullPageWrites = Insert->fullPageWrites;
7135 checkPoint.wal_level = wal_level;
7136
7137 if (shutdown)
7138 {
7140
7141 /*
7142 * Compute new REDO record ptr = location of next XLOG record.
7143 *
7144 * Since this is a shutdown checkpoint, there can't be any concurrent
7145 * WAL insertion.
7146 */
7147 freespace = INSERT_FREESPACE(curInsert);
7148 if (freespace == 0)
7149 {
7152 else
7154 }
7155 checkPoint.redo = curInsert;
7156
7157 /*
7158 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7159 * this must be done while holding all the insertion locks.
7160 *
7161 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7162 * left pointing past where it really needs to point. This is okay;
7163 * the only consequence is that XLogInsert might back up whole buffers
7164 * that it didn't really need to. We can't postpone advancing
7165 * RedoRecPtr because XLogInserts that happen while we are dumping
7166 * buffers must assume that their buffer changes are not included in
7167 * the checkpoint.
7168 */
7169 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7170 }
7171
7172 /*
7173 * Now we can release the WAL insertion locks, allowing other xacts to
7174 * proceed while we are flushing disk buffers.
7175 */
7177
7178 /*
7179 * If this is an online checkpoint, we have not yet determined the redo
7180 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7181 * record; the LSN at which it starts becomes the new redo pointer. We
7182 * don't do this for a shutdown checkpoint, because in that case no WAL
7183 * can be written between the redo point and the insertion of the
7184 * checkpoint record itself, so the checkpoint record itself serves to
7185 * mark the redo point.
7186 */
7187 if (!shutdown)
7188 {
7189 /* Include WAL level in record for WAL summarizer's benefit. */
7193
7194 /*
7195 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7196 * shared memory and RedoRecPtr in backend-local memory, but we need
7197 * to copy that into the record that will be inserted when the
7198 * checkpoint is complete.
7199 */
7200 checkPoint.redo = RedoRecPtr;
7201 }
7202
7203 /* Update the info_lck-protected copy of RedoRecPtr as well */
7205 XLogCtl->RedoRecPtr = checkPoint.redo;
7207
7208 /*
7209 * If enabled, log checkpoint start. We postpone this until now so as not
7210 * to log anything if we decided to skip the checkpoint.
7211 */
7212 if (log_checkpoints)
7213 LogCheckpointStart(flags, false);
7214
7215 INJECTION_POINT_CACHED("create-checkpoint-run", NULL);
7216
7217 /* Update the process title */
7218 update_checkpoint_display(flags, false, false);
7219
7221
7222 /*
7223 * Get the other info we need for the checkpoint record.
7224 *
7225 * We don't need to save oldestClogXid in the checkpoint, it only matters
7226 * for the short period in which clog is being truncated, and if we crash
7227 * during that we'll redo the clog truncation and fix up oldestClogXid
7228 * there.
7229 */
7231 checkPoint.nextXid = TransamVariables->nextXid;
7232 checkPoint.oldestXid = TransamVariables->oldestXid;
7235
7240
7242 checkPoint.nextOid = TransamVariables->nextOid;
7243 if (!shutdown)
7244 checkPoint.nextOid += TransamVariables->oidCount;
7246
7248
7250 &checkPoint.nextMulti,
7251 &checkPoint.nextMultiOffset,
7252 &checkPoint.oldestMulti,
7253 &checkPoint.oldestMultiDB);
7254
7255 /*
7256 * Having constructed the checkpoint record, ensure all shmem disk buffers
7257 * and commit-log buffers are flushed to disk.
7258 *
7259 * This I/O could fail for various reasons. If so, we will fail to
7260 * complete the checkpoint, but there is no reason to force a system
7261 * panic. Accordingly, exit critical section while doing it.
7262 */
7264
7265 /*
7266 * In some cases there are groups of actions that must all occur on one
7267 * side or the other of a checkpoint record. Before flushing the
7268 * checkpoint record we must explicitly wait for any backend currently
7269 * performing those groups of actions.
7270 *
7271 * One example is end of transaction, so we must wait for any transactions
7272 * that are currently in commit critical sections. If an xact inserted
7273 * its commit record into XLOG just before the REDO point, then a crash
7274 * restart from the REDO point would not replay that record, which means
7275 * that our flushing had better include the xact's update of pg_xact. So
7276 * we wait till he's out of his commit critical section before proceeding.
7277 * See notes in RecordTransactionCommit().
7278 *
7279 * Because we've already released the insertion locks, this test is a bit
7280 * fuzzy: it is possible that we will wait for xacts we didn't really need
7281 * to wait for. But the delay should be short and it seems better to make
7282 * checkpoint take a bit longer than to hold off insertions longer than
7283 * necessary. (In fact, the whole reason we have this issue is that xact.c
7284 * does commit record XLOG insertion and clog update as two separate steps
7285 * protected by different locks, but again that seems best on grounds of
7286 * minimizing lock contention.)
7287 *
7288 * A transaction that has not yet set delayChkptFlags when we look cannot
7289 * be at risk, since it has not inserted its commit record yet; and one
7290 * that's already cleared it is not at risk either, since it's done fixing
7291 * clog and we will correctly flush the update below. So we cannot miss
7292 * any xacts we need to wait for.
7293 */
7295 if (nvxids > 0)
7296 {
7297 do
7298 {
7299 /*
7300 * Keep absorbing fsync requests while we wait. There could even
7301 * be a deadlock if we don't, if the process that prevents the
7302 * checkpoint is trying to add a request to the queue.
7303 */
7305
7307 pg_usleep(10000L); /* wait for 10 msec */
7311 }
7312 pfree(vxids);
7313
7314 CheckPointGuts(checkPoint.redo, flags);
7315
7317 if (nvxids > 0)
7318 {
7319 do
7320 {
7322
7324 pg_usleep(10000L); /* wait for 10 msec */
7328 }
7329 pfree(vxids);
7330
7331 /*
7332 * Take a snapshot of running transactions and write this to WAL. This
7333 * allows us to reconstruct the state of running transactions during
7334 * archive recovery, if required. Skip, if this info disabled.
7335 *
7336 * If we are shutting down, or Startup process is completing crash
7337 * recovery we don't need to write running xact data.
7338 */
7341
7343
7344 /*
7345 * Now insert the checkpoint record into XLOG.
7346 */
7348 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7352
7354
7355 /*
7356 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7357 * overwritten at next startup. No-one should even try, this just allows
7358 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7359 * to just temporarily disable writing until the system has exited
7360 * recovery.
7361 */
7362 if (shutdown)
7363 {
7364 if (flags & CHECKPOINT_END_OF_RECOVERY)
7366 else
7367 LocalXLogInsertAllowed = 0; /* never again write WAL */
7368 }
7369
7370 /*
7371 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7372 * = end of actual checkpoint record.
7373 */
7374 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7375 ereport(PANIC,
7376 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7377
7378 /*
7379 * Remember the prior checkpoint's redo ptr for
7380 * UpdateCheckPointDistanceEstimate()
7381 */
7383
7384 /*
7385 * Update the control file.
7386 */
7388 if (shutdown)
7391 ControlFile->checkPointCopy = checkPoint;
7392 /* crash recovery should always recover to the end of WAL */
7395
7396 /*
7397 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7398 * unused on non-shutdown checkpoints, but seems useful to store it always
7399 * for debugging purposes.
7400 */
7402
7405
7406 /*
7407 * We are now done with critical updates; no need for system panic if we
7408 * have trouble while fooling with old log segments.
7409 */
7411
7412 /*
7413 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7414 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7415 * where (a) we're not inside of a critical section and (b) we can be
7416 * certain that the relevant record has been flushed to disk, which must
7417 * happen before it can be summarized.
7418 *
7419 * If this is a shutdown checkpoint, then this happens reasonably
7420 * promptly: we've only just inserted and flushed the
7421 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7422 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7423 * record was written before we began flushing data to disk, and that
7424 * could be many minutes ago at this point. However, we don't XLogFlush()
7425 * after inserting that record, so we're not guaranteed that it's on disk
7426 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7427 * record.
7428 */
7430
7431 /*
7432 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7433 */
7435
7436 /*
7437 * Update the average distance between checkpoints if the prior checkpoint
7438 * exists.
7439 */
7442
7443 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7444
7445 /*
7446 * Delete old log files, those no longer needed for last checkpoint to
7447 * prevent the disk holding the xlog from growing full.
7448 */
7454 {
7455 /*
7456 * Some slots have been invalidated; recalculate the old-segment
7457 * horizon, starting again from RedoRecPtr.
7458 */
7461 }
7462 _logSegNo--;
7464 checkPoint.ThisTimeLineID);
7465
7466 /*
7467 * Make more log segments if needed. (Do this after recycling old log
7468 * segments, since that may supply some of the needed files.)
7469 */
7470 if (!shutdown)
7472
7473 /*
7474 * Truncate pg_subtrans if possible. We can throw away all data before
7475 * the oldest XMIN of any running transaction. No future transaction will
7476 * attempt to reference any pg_subtrans entry older than that (see Asserts
7477 * in subtrans.c). During recovery, though, we mustn't do this because
7478 * StartupSUBTRANS hasn't been called yet.
7479 */
7480 if (!RecoveryInProgress())
7482
7483 /* Real work is done; log and update stats. */
7484 LogCheckpointEnd(false);
7485
7486 /* Reset the process title */
7487 update_checkpoint_display(flags, false, true);
7488
7490 NBuffers,
7494
7495 return true;
7496}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:476
void AbsorbSyncRequests(void)
int NBuffers
Definition globals.c:142
#define INJECTION_POINT(name, arg)
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
bool IsLogicalDecodingEnabled(void)
Definition logicalctl.c:204
@ LW_SHARED
Definition lwlock.h:113
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:1946
#define XLOG_CHECKPOINT_REDO
Definition pg_control.h:83
@ DB_SHUTDOWNING
Definition pg_control.h:96
@ DB_SHUTDOWNED
Definition pg_control.h:94
#define XLOG_CHECKPOINT_ONLINE
Definition pg_control.h:70
#define InvalidOid
#define DELAY_CHKPT_START
Definition proc.h:135
#define DELAY_CHKPT_COMPLETE
Definition proc.h:136
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition procarray.c:1984
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition procarray.c:3052
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition procarray.c:2835
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition procarray.c:3007
void pg_usleep(long microsec)
Definition signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition slot.c:2196
@ RS_INVAL_WAL_REMOVED
Definition slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition slot.h:68
XLogRecPtr LogStandbySnapshot(void)
Definition standby.c:1282
TimestampTz ckpt_start_t
Definition xlog.h:172
int ckpt_segs_removed
Definition xlog.h:182
int ckpt_bufs_written
Definition xlog.h:178
int ckpt_segs_recycled
Definition xlog.h:183
XLogRecPtr minRecoveryPoint
Definition pg_control.h:170
XLogRecPtr unloggedLSN
Definition pg_control.h:139
TimeLineID minRecoveryPointTLI
Definition pg_control.h:171
TransactionId oldestCommitTsXid
Definition transam.h:232
TransactionId newestCommitTsXid
Definition transam.h:233
TransactionId oldestXid
Definition transam.h:222
TimeLineID InsertTimeLineID
Definition xlog.c:511
XLogRecPtr RedoRecPtr
Definition xlog.c:459
TimeLineID PrevTimeLineID
Definition xlog.c:512
pg_atomic_uint64 unloggedLSN
Definition xlog.c:466
XLogRecPtr RedoRecPtr
Definition xlog.c:433
void TruncateSUBTRANS(TransactionId oldestXact)
Definition subtrans.c:385
void SyncPreCheckpoint(void)
Definition sync.c:177
void SyncPostCheckpoint(void)
Definition sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition xlog.c:256
bool RecoveryInProgress(void)
Definition xlog.c:6461
static void WALInsertLockRelease(void)
Definition xlog.c:1452
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition xlog.c:1865
static void WALInsertLockAcquireExclusive(void)
Definition xlog.c:1423
static void UpdateControlFile(void)
Definition xlog.c:4619
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition xlog.c:3903
static void LogCheckpointStart(int flags, bool restartpoint)
Definition xlog.c:6776
static XLogRecPtr RedoRecPtr
Definition xlog.c:276
static void LogCheckpointEnd(bool restartpoint)
Definition xlog.c:6808
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition xlog.c:3728
bool log_checkpoints
Definition xlog.c:132
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition xlog.c:8084
static int LocalSetXLogInsertAllowed(void)
Definition xlog.c:6549
XLogRecPtr GetLastImportantRecPtr(void)
Definition xlog.c:6683
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition xlog.c:6913
static int LocalXLogInsertAllowed
Definition xlog.c:239
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2784
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition xlog.c:7642
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition xlog.c:6951
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:151
#define CHECKPOINT_FORCE
Definition xlog.h:153
#define XLogStandbyInfoActive()
Definition xlog.h:125
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogBeginInsert(void)
Definition xloginsert.c:152

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, fb(), CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, INJECTION_POINT_CACHED, INJECTION_POINT_LOAD, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsLogicalDecodingEnabled(), KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), CheckPoint::logicalDecodingEnabled, LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRecPtrIsValid, XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7507 of file xlog.c.

7508{
7511
7512 /* sanity check */
7513 if (!RecoveryInProgress())
7514 elog(ERROR, "can only be used to end recovery");
7515
7516 xlrec.end_time = GetCurrentTimestamp();
7517 xlrec.wal_level = wal_level;
7518
7520 xlrec.ThisTimeLineID = XLogCtl->InsertTimeLineID;
7521 xlrec.PrevTimeLineID = XLogCtl->PrevTimeLineID;
7523
7525
7529
7531
7532 /*
7533 * Update the control file so that crash recovery can follow the timeline
7534 * changes to this point.
7535 */
7538 ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7541
7543}
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:78

References ControlFile, elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, UpdateControlFile(), wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7572 of file xlog.c.

7574{
7579
7580 /* sanity checks */
7581 if (!RecoveryInProgress())
7582 elog(ERROR, "can only be used at end of recovery");
7583 if (pagePtr % XLOG_BLCKSZ != 0)
7584 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7586
7587 /* The current WAL insert position should be right after the page header */
7588 startPos = pagePtr;
7591 else
7594 if (recptr != startPos)
7595 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
7597
7599
7600 /*
7601 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7602 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7603 *
7604 * No other backend is allowed to write WAL yet, so acquiring the WAL
7605 * insertion lock is just pro forma.
7606 */
7611
7612 /*
7613 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7614 * page. We know it becomes the first record, because no other backend is
7615 * allowed to write WAL yet.
7616 */
7618 xlrec.overwritten_lsn = aborted_lsn;
7619 xlrec.overwrite_time = GetCurrentTimestamp();
7622
7623 /* check that the record was inserted to the right place */
7624 if (ProcLastRecPtr != startPos)
7625 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
7627
7629
7631
7632 return recptr;
7633}
#define XLOG_OVERWRITE_CONTRECORD
Definition pg_control.h:82
static void WALInsertLockAcquire(void)
Definition xlog.c:1378
XLogRecPtr GetXLogInsertRecPtr(void)
Definition xlog.c:9596
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD

References elog, END_CRIT_SECTION, ERROR, fb(), GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLP_FIRST_IS_OVERWRITE_CONTRECORD.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7722 of file xlog.c.

7723{
7724 XLogRecPtr lastCheckPointRecPtr;
7725 XLogRecPtr lastCheckPointEndPtr;
7726 CheckPoint lastCheckPoint;
7730 TimeLineID replayTLI;
7731 XLogRecPtr endptr;
7734
7735 /* Concurrent checkpoint/restartpoint cannot happen */
7737
7738 /* Get a local copy of the last safe checkpoint record. */
7740 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7741 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7742 lastCheckPoint = XLogCtl->lastCheckPoint;
7744
7745 /*
7746 * Check that we're still in recovery mode. It's ok if we exit recovery
7747 * mode after this check, the restart point is valid anyway.
7748 */
7749 if (!RecoveryInProgress())
7750 {
7752 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7753 return false;
7754 }
7755
7756 /*
7757 * If the last checkpoint record we've replayed is already our last
7758 * restartpoint, we can't perform a new restart point. We still update
7759 * minRecoveryPoint in that case, so that if this is a shutdown restart
7760 * point, we won't start up earlier than before. That's not strictly
7761 * necessary, but when hot standby is enabled, it would be rather weird if
7762 * the database opened up for read-only connections at a point-in-time
7763 * before the last shutdown. Such time travel is still possible in case of
7764 * immediate shutdown, though.
7765 *
7766 * We don't explicitly advance minRecoveryPoint when we do create a
7767 * restartpoint. It's assumed that flushing the buffers will do that as a
7768 * side-effect.
7769 */
7770 if (!XLogRecPtrIsValid(lastCheckPointRecPtr) ||
7771 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7772 {
7774 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
7775 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
7776
7778 if (flags & CHECKPOINT_IS_SHUTDOWN)
7779 {
7784 }
7785 return false;
7786 }
7787
7788 /*
7789 * Update the shared RedoRecPtr so that the startup process can calculate
7790 * the number of segments replayed since last restartpoint, and request a
7791 * restartpoint if it exceeds CheckPointSegments.
7792 *
7793 * Like in CreateCheckPoint(), hold off insertions to update it, although
7794 * during recovery this is just pro forma, because no WAL insertions are
7795 * happening.
7796 */
7798 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7800
7801 /* Also update the info_lck-protected copy */
7803 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7805
7806 /*
7807 * Prepare to accumulate statistics.
7808 *
7809 * Note: because it is possible for log_checkpoints to change while a
7810 * checkpoint proceeds, we always accumulate stats, even if
7811 * log_checkpoints is currently off.
7812 */
7815
7816 if (log_checkpoints)
7817 LogCheckpointStart(flags, true);
7818
7819 /* Update the process title */
7820 update_checkpoint_display(flags, true, false);
7821
7822 CheckPointGuts(lastCheckPoint.redo, flags);
7823
7824 /*
7825 * This location needs to be after CheckPointGuts() to ensure that some
7826 * work has already happened during this checkpoint.
7827 */
7828 INJECTION_POINT("create-restart-point", NULL);
7829
7830 /*
7831 * Remember the prior checkpoint's redo ptr for
7832 * UpdateCheckPointDistanceEstimate()
7833 */
7835
7836 /*
7837 * Update pg_control, using current time. Check that it still shows an
7838 * older checkpoint, else do nothing; this is a quick hack to make sure
7839 * nothing really bad happens if somehow we get here after the
7840 * end-of-recovery checkpoint.
7841 */
7843 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7844 {
7845 /*
7846 * Update the checkpoint information. We do this even if the cluster
7847 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7848 * segments recycled below.
7849 */
7850 ControlFile->checkPoint = lastCheckPointRecPtr;
7851 ControlFile->checkPointCopy = lastCheckPoint;
7852
7853 /*
7854 * Ensure minRecoveryPoint is past the checkpoint record and update it
7855 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7856 * this will have happened already while writing out dirty buffers,
7857 * but not necessarily - e.g. because no buffers were dirtied. We do
7858 * this because a backup performed in recovery uses minRecoveryPoint
7859 * to determine which WAL files must be included in the backup, and
7860 * the file (or files) containing the checkpoint record must be
7861 * included, at a minimum. Note that for an ordinary restart of
7862 * recovery there's no value in having the minimum recovery point any
7863 * earlier than this anyway, because redo will begin just after the
7864 * checkpoint record.
7865 */
7867 {
7868 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7869 {
7870 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7872
7873 /* update local copy */
7876 }
7877 if (flags & CHECKPOINT_IS_SHUTDOWN)
7879 }
7881 }
7883
7884 /*
7885 * Update the average distance between checkpoints/restartpoints if the
7886 * prior checkpoint exists.
7887 */
7890
7891 /*
7892 * Delete old log files, those no longer needed for last restartpoint to
7893 * prevent the disk holding the xlog from growing full.
7894 */
7896
7897 /*
7898 * Retreat _logSegNo using the current end of xlog replayed or received,
7899 * whichever is later.
7900 */
7902 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7903 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7904 KeepLogSeg(endptr, &_logSegNo);
7908 {
7909 /*
7910 * Some slots have been invalidated; recalculate the old-segment
7911 * horizon, starting again from RedoRecPtr.
7912 */
7914 KeepLogSeg(endptr, &_logSegNo);
7915 }
7916 _logSegNo--;
7917
7918 /*
7919 * Try to recycle segments on a useful timeline. If we've been promoted
7920 * since the beginning of this restartpoint, use the new timeline chosen
7921 * at end of recovery. If we're still in recovery, use the timeline we're
7922 * currently replaying.
7923 *
7924 * There is no guarantee that the WAL segments will be useful on the
7925 * current timeline; if recovery proceeds to a new timeline right after
7926 * this, the pre-allocated WAL segments on this timeline will not be used,
7927 * and will go wasted until recycled on the next restartpoint. We'll live
7928 * with that.
7929 */
7930 if (!RecoveryInProgress())
7931 replayTLI = XLogCtl->InsertTimeLineID;
7932
7933 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7934
7935 /*
7936 * Make more log segments if needed. (Do this after recycling old log
7937 * segments, since that may supply some of the needed files.)
7938 */
7939 PreallocXlogFiles(endptr, replayTLI);
7940
7941 /*
7942 * Truncate pg_subtrans if possible. We can throw away all data before
7943 * the oldest XMIN of any running transaction. No future transaction will
7944 * attempt to reference any pg_subtrans entry older than that (see Asserts
7945 * in subtrans.c). When hot standby is disabled, though, we mustn't do
7946 * this because StartupSUBTRANS hasn't been called yet.
7947 */
7948 if (EnableHotStandby)
7950
7951 /* Real work is done; log and update stats. */
7952 LogCheckpointEnd(true);
7953
7954 /* Reset the process title */
7955 update_checkpoint_display(flags, true, true);
7956
7959 errmsg("recovery restart point at %X/%08X",
7960 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7961 xtime ? errdetail("Last completed transaction was at log time %s.",
7963
7964 /*
7965 * Finally, execute archive_cleanup_command, if any.
7966 */
7969 "archive_cleanup_command",
7970 false,
7972
7973 return true;
7974}
const char * timestamptz_to_str(TimestampTz t)
Definition timestamp.c:1862
int64 TimestampTz
Definition timestamp.h:39
bool IsUnderPostmaster
Definition globals.c:120
@ B_CHECKPOINTER
Definition miscadmin.h:363
BackendType MyBackendType
Definition miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition pg_control.h:98
@ DB_SHUTDOWNED_IN_RECOVERY
Definition pg_control.h:95
CheckPoint lastCheckPoint
Definition xlog.c:547
XLogRecPtr lastCheckPointRecPtr
Definition xlog.c:545
XLogRecPtr lastCheckPointEndPtr
Definition xlog.c:546
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition xlog.c:2704
static XLogRecPtr LocalMinRecoveryPoint
Definition xlog.c:648
static TimeLineID LocalMinRecoveryPointTLI
Definition xlog.c:649
uint32 TimeLineID
Definition xlogdefs.h:63
char * archiveCleanupCommand
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), fb(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsValid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9555 of file xlog.c.

9556{
9558
9559 /* If called during backup start, there shouldn't be one already running */
9561
9563 {
9567
9570
9573 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9574 }
9575}
#define WARNING
Definition elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition postgres.h:100
int runningBackups
Definition xlog.c:441
static SessionBackupState sessionBackupState
Definition xlog.c:394
@ SESSION_BACKUP_NONE
Definition xlog.h:303

References arg, Assert, DatumGetBool(), ereport, errmsg(), fb(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8953 of file xlog.c.

8955{
8957
8958 Assert(state != NULL);
8960
8961 /*
8962 * During recovery, we don't need to check WAL level. Because, if WAL
8963 * level is not sufficient, it's impossible to get here during recovery.
8964 */
8966 ereport(ERROR,
8968 errmsg("WAL level not sufficient for making an online backup"),
8969 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8970
8972 ereport(ERROR,
8974 errmsg("backup label too long (max %d bytes)",
8975 MAXPGPATH)));
8976
8977 strlcpy(state->name, backupidstr, sizeof(state->name));
8978
8979 /*
8980 * Mark backup active in shared memory. We must do full-page WAL writes
8981 * during an on-line backup even if not doing so at other times, because
8982 * it's quite possible for the backup dump to obtain a "torn" (partially
8983 * written) copy of a database page if it reads the page concurrently with
8984 * our write to the same page. This can be fixed as long as the first
8985 * write to the page in the WAL sequence is a full-page write. Hence, we
8986 * increment runningBackups then force a CHECKPOINT, to ensure there are
8987 * no dirty pages in shared memory that might get dumped while the backup
8988 * is in progress without having a corresponding WAL record. (Once the
8989 * backup is complete, we need not force full-page writes anymore, since
8990 * we expect that any pages not modified during the backup interval must
8991 * have been correctly captured by the backup.)
8992 *
8993 * Note that forcing full-page writes has no effect during an online
8994 * backup from the standby.
8995 *
8996 * We must hold all the insertion locks to change the value of
8997 * runningBackups, to ensure adequate interlocking against
8998 * XLogInsertRecord().
8999 */
9003
9004 /*
9005 * Ensure we decrement runningBackups if we fail below. NB -- for this to
9006 * work correctly, it is critical that sessionBackupState is only updated
9007 * after this block is over.
9008 */
9010 {
9011 bool gotUniqueStartpoint = false;
9012 DIR *tblspcdir;
9013 struct dirent *de;
9015 int datadirpathlen;
9016
9017 /*
9018 * Force an XLOG file switch before the checkpoint, to ensure that the
9019 * WAL segment the checkpoint is written to doesn't contain pages with
9020 * old timeline IDs. That would otherwise happen if you called
9021 * pg_backup_start() right after restoring from a PITR archive: the
9022 * first WAL segment containing the startup checkpoint has pages in
9023 * the beginning with the old timeline ID. That can cause trouble at
9024 * recovery: we won't have a history file covering the old timeline if
9025 * pg_wal directory was not included in the base backup and the WAL
9026 * archive was cleared too before starting the backup.
9027 *
9028 * This also ensures that we have emitted a WAL page header that has
9029 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
9030 * Therefore, if a WAL archiver (such as pglesslog) is trying to
9031 * compress out removable backup blocks, it won't remove any that
9032 * occur after this point.
9033 *
9034 * During recovery, we skip forcing XLOG file switch, which means that
9035 * the backup taken during recovery is not available for the special
9036 * recovery case described above.
9037 */
9039 RequestXLogSwitch(false);
9040
9041 do
9042 {
9043 bool checkpointfpw;
9044
9045 /*
9046 * Force a CHECKPOINT. Aside from being necessary to prevent torn
9047 * page problems, this guarantees that two successive backup runs
9048 * will have different checkpoint positions and hence different
9049 * history file names, even if nothing happened in between.
9050 *
9051 * During recovery, establish a restartpoint if possible. We use
9052 * the last restartpoint as the backup starting checkpoint. This
9053 * means that two successive backup runs can have same checkpoint
9054 * positions.
9055 *
9056 * Since the fact that we are executing do_pg_backup_start()
9057 * during recovery means that checkpointer is running, we can use
9058 * RequestCheckpoint() to establish a restartpoint.
9059 *
9060 * We use CHECKPOINT_FAST only if requested by user (via passing
9061 * fast = true). Otherwise this can take awhile.
9062 */
9064 (fast ? CHECKPOINT_FAST : 0));
9065
9066 /*
9067 * Now we need to fetch the checkpoint record location, and also
9068 * its REDO pointer. The oldest point in WAL that would be needed
9069 * to restore starting from the checkpoint is precisely the REDO
9070 * pointer.
9071 */
9073 state->checkpointloc = ControlFile->checkPoint;
9074 state->startpoint = ControlFile->checkPointCopy.redo;
9078
9080 {
9082
9083 /*
9084 * Check to see if all WAL replayed during online backup
9085 * (i.e., since last restartpoint used as backup starting
9086 * checkpoint) contain full-page writes.
9087 */
9091
9092 if (!checkpointfpw || state->startpoint <= recptr)
9093 ereport(ERROR,
9095 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9096 "since last restartpoint"),
9097 errhint("This means that the backup being taken on the standby "
9098 "is corrupt and should not be used. "
9099 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9100 "and then try an online backup again.")));
9101
9102 /*
9103 * During recovery, since we don't use the end-of-backup WAL
9104 * record and don't write the backup history file, the
9105 * starting WAL location doesn't need to be unique. This means
9106 * that two base backups started at the same time might use
9107 * the same checkpoint as starting locations.
9108 */
9109 gotUniqueStartpoint = true;
9110 }
9111
9112 /*
9113 * If two base backups are started at the same time (in WAL sender
9114 * processes), we need to make sure that they use different
9115 * checkpoints as starting locations, because we use the starting
9116 * WAL location as a unique identifier for the base backup in the
9117 * end-of-backup WAL record and when we write the backup history
9118 * file. Perhaps it would be better generate a separate unique ID
9119 * for each backup instead of forcing another checkpoint, but
9120 * taking a checkpoint right after another is not that expensive
9121 * either because only few buffers have been dirtied yet.
9122 */
9124 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9125 {
9126 XLogCtl->Insert.lastBackupStart = state->startpoint;
9127 gotUniqueStartpoint = true;
9128 }
9130 } while (!gotUniqueStartpoint);
9131
9132 /*
9133 * Construct tablespace_map file.
9134 */
9136
9137 /* Collect information about all tablespaces */
9139 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9140 {
9141 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9142 char linkpath[MAXPGPATH];
9143 char *relpath = NULL;
9144 char *s;
9146 char *badp;
9147 Oid tsoid;
9148
9149 /*
9150 * Try to parse the directory name as an unsigned integer.
9151 *
9152 * Tablespace directories should be positive integers that can be
9153 * represented in 32 bits, with no leading zeroes or trailing
9154 * garbage. If we come across a name that doesn't meet those
9155 * criteria, skip it.
9156 */
9157 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9158 continue;
9159 errno = 0;
9160 tsoid = strtoul(de->d_name, &badp, 10);
9161 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9162 continue;
9163
9164 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9165
9166 de_type = get_dirent_type(fullpath, de, false, ERROR);
9167
9168 if (de_type == PGFILETYPE_LNK)
9169 {
9171 int rllen;
9172
9173 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9174 if (rllen < 0)
9175 {
9177 (errmsg("could not read symbolic link \"%s\": %m",
9178 fullpath)));
9179 continue;
9180 }
9181 else if (rllen >= sizeof(linkpath))
9182 {
9184 (errmsg("symbolic link \"%s\" target is too long",
9185 fullpath)));
9186 continue;
9187 }
9188 linkpath[rllen] = '\0';
9189
9190 /*
9191 * Relpath holds the relative path of the tablespace directory
9192 * when it's located within PGDATA, or NULL if it's located
9193 * elsewhere.
9194 */
9195 if (rllen > datadirpathlen &&
9199
9200 /*
9201 * Add a backslash-escaped version of the link path to the
9202 * tablespace map file.
9203 */
9205 for (s = linkpath; *s; s++)
9206 {
9207 if (*s == '\n' || *s == '\r' || *s == '\\')
9210 }
9212 de->d_name, escapedpath.data);
9213 pfree(escapedpath.data);
9214 }
9215 else if (de_type == PGFILETYPE_DIR)
9216 {
9217 /*
9218 * It's possible to use allow_in_place_tablespaces to create
9219 * directories directly under pg_tblspc, for testing purposes
9220 * only.
9221 *
9222 * In this case, we store a relative path rather than an
9223 * absolute path into the tablespaceinfo.
9224 */
9225 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9226 PG_TBLSPC_DIR, de->d_name);
9228 }
9229 else
9230 {
9231 /* Skip any other file type that appears here. */
9232 continue;
9233 }
9234
9236 ti->oid = tsoid;
9237 ti->path = pstrdup(linkpath);
9238 ti->rpath = relpath;
9239 ti->size = -1;
9240
9241 if (tablespaces)
9242 *tablespaces = lappend(*tablespaces, ti);
9243 }
9245
9246 state->starttime = (pg_time_t) time(NULL);
9247 }
9249
9250 state->started_in_recovery = backup_started_in_recovery;
9251
9252 /*
9253 * Mark that the start phase has correctly finished for the backup.
9254 */
9256}
static bool backup_started_in_recovery
Definition basebackup.c:123
void RequestCheckpoint(int flags)
#define palloc_object(type)
Definition fe_memutils.h:74
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition file_utils.c:547
PGFileType
Definition file_utils.h:19
@ PGFILETYPE_LNK
Definition file_utils.h:24
@ PGFILETYPE_DIR
Definition file_utils.h:23
char * DataDir
Definition globals.c:71
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition ipc.h:52
List * lappend(List *list, void *datum)
Definition list.c:339
#define IS_DIR_SEP(ch)
Definition port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
unsigned int Oid
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define PG_TBLSPC_DIR
Definition relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition xlog.c:553
XLogRecPtr lastBackupStart
Definition xlog.c:442
#define readlink(path, buf, size)
Definition win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition xlog.c:8193
void do_pg_abort_backup(int code, Datum arg)
Definition xlog.c:9555
@ SESSION_BACKUP_RUNNING
Definition xlog.h:304
#define CHECKPOINT_WAIT
Definition xlog.h:156
#define CHECKPOINT_FAST
Definition xlog.h:152
#define XLogIsNeeded()
Definition xlog.h:111

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, fb(), FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, palloc_object, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9281 of file xlog.c.

9282{
9283 bool backup_stopped_in_recovery = false;
9284 char histfilepath[MAXPGPATH];
9288 FILE *fp;
9290 int waits = 0;
9291 bool reported_waiting = false;
9292
9293 Assert(state != NULL);
9294
9296
9297 /*
9298 * During recovery, we don't need to check WAL level. Because, if WAL
9299 * level is not sufficient, it's impossible to get here during recovery.
9300 */
9302 ereport(ERROR,
9304 errmsg("WAL level not sufficient for making an online backup"),
9305 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9306
9307 /*
9308 * OK to update backup counter and session-level lock.
9309 *
9310 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9311 * otherwise they can be updated inconsistently, which might cause
9312 * do_pg_abort_backup() to fail.
9313 */
9315
9316 /*
9317 * It is expected that each do_pg_backup_start() call is matched by
9318 * exactly one do_pg_backup_stop() call.
9319 */
9322
9323 /*
9324 * Clean up session-level lock.
9325 *
9326 * You might think that WALInsertLockRelease() can be called before
9327 * cleaning up session-level lock because session-level lock doesn't need
9328 * to be protected with WAL insertion lock. But since
9329 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9330 * cleaned up before it.
9331 */
9333
9335
9336 /*
9337 * If we are taking an online backup from the standby, we confirm that the
9338 * standby has not been promoted during the backup.
9339 */
9340 if (state->started_in_recovery && !backup_stopped_in_recovery)
9341 ereport(ERROR,
9343 errmsg("the standby was promoted during online backup"),
9344 errhint("This means that the backup being taken is corrupt "
9345 "and should not be used. "
9346 "Try taking another online backup.")));
9347
9348 /*
9349 * During recovery, we don't write an end-of-backup record. We assume that
9350 * pg_control was backed up last and its minimum recovery point can be
9351 * available as the backup end location. Since we don't have an
9352 * end-of-backup record, we use the pg_control value to check whether
9353 * we've reached the end of backup when starting recovery from this
9354 * backup. We have no way of checking if pg_control wasn't backed up last
9355 * however.
9356 *
9357 * We don't force a switch to new WAL file but it is still possible to
9358 * wait for all the required files to be archived if waitforarchive is
9359 * true. This is okay if we use the backup to start a standby and fetch
9360 * the missing WAL using streaming replication. But in the case of an
9361 * archive recovery, a user should set waitforarchive to true and wait for
9362 * them to be archived to ensure that all the required files are
9363 * available.
9364 *
9365 * We return the current minimum recovery point as the backup end
9366 * location. Note that it can be greater than the exact backup end
9367 * location if the minimum recovery point is updated after the backup of
9368 * pg_control. This is harmless for current uses.
9369 *
9370 * XXX currently a backup history file is for informational and debug
9371 * purposes only. It's not essential for an online backup. Furthermore,
9372 * even if it's created, it will not be archived during recovery because
9373 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9374 * backup history file during recovery.
9375 */
9377 {
9379
9380 /*
9381 * Check to see if all WAL replayed during online backup contain
9382 * full-page writes.
9383 */
9387
9388 if (state->startpoint <= recptr)
9389 ereport(ERROR,
9391 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9392 "during online backup"),
9393 errhint("This means that the backup being taken on the standby "
9394 "is corrupt and should not be used. "
9395 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9396 "and then try an online backup again.")));
9397
9398
9400 state->stoppoint = ControlFile->minRecoveryPoint;
9403 }
9404 else
9405 {
9406 char *history_file;
9407
9408 /*
9409 * Write the backup-end xlog record
9410 */
9412 XLogRegisterData(&state->startpoint,
9413 sizeof(state->startpoint));
9415
9416 /*
9417 * Given that we're not in recovery, InsertTimeLineID is set and can't
9418 * change, so we can read it without a lock.
9419 */
9420 state->stoptli = XLogCtl->InsertTimeLineID;
9421
9422 /*
9423 * Force a switch to a new xlog segment file, so that the backup is
9424 * valid as soon as archiver moves out the current segment file.
9425 */
9426 RequestXLogSwitch(false);
9427
9428 state->stoptime = (pg_time_t) time(NULL);
9429
9430 /*
9431 * Write the backup history file
9432 */
9435 state->startpoint, wal_segment_size);
9436 fp = AllocateFile(histfilepath, "w");
9437 if (!fp)
9438 ereport(ERROR,
9440 errmsg("could not create file \"%s\": %m",
9441 histfilepath)));
9442
9443 /* Build and save the contents of the backup history file */
9445 fprintf(fp, "%s", history_file);
9447
9448 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9449 ereport(ERROR,
9451 errmsg("could not write file \"%s\": %m",
9452 histfilepath)));
9453
9454 /*
9455 * Clean out any no-longer-needed history files. As a side effect,
9456 * this will post a .ready file for the newly created history file,
9457 * notifying the archiver that history file may be archived
9458 * immediately.
9459 */
9461 }
9462
9463 /*
9464 * If archiving is enabled, wait for all the required WAL files to be
9465 * archived before returning. If archiving isn't enabled, the required WAL
9466 * needs to be transported via streaming replication (hopefully with
9467 * wal_keep_size set high enough), or some more exotic mechanism like
9468 * polling and copying files from pg_wal with script. We have no knowledge
9469 * of those mechanisms, so it's up to the user to ensure that he gets all
9470 * the required WAL.
9471 *
9472 * We wait until both the last WAL file filled during backup and the
9473 * history file have been archived, and assume that the alphabetic sorting
9474 * property of the WAL files ensures any earlier WAL files are safely
9475 * archived as well.
9476 *
9477 * We wait forever, since archive_command is supposed to work and we
9478 * assume the admin wanted his backup to work completely. If you don't
9479 * wish to wait, then either waitforarchive should be passed in as false,
9480 * or you can set statement_timeout. Also, some notices are issued to
9481 * clue in anyone who might be doing this interactively.
9482 */
9483
9484 if (waitforarchive &&
9487 {
9491
9494 state->startpoint, wal_segment_size);
9495
9497 waits = 0;
9498
9501 {
9503
9504 if (!reported_waiting && waits > 5)
9505 {
9507 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9508 reported_waiting = true;
9509 }
9510
9513 1000L,
9516
9518 {
9519 seconds_before_warning *= 2; /* This wraps in >10 years... */
9521 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9522 waits),
9523 errhint("Check that your \"archive_command\" is executing properly. "
9524 "You can safely cancel this backup, "
9525 "but the database backup will not be usable without all the WAL segments.")));
9526 }
9527 }
9528
9530 (errmsg("all required WAL segments have been archived")));
9531 }
9532 else if (waitforarchive)
9534 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9535}
#define fprintf(file, fmt, msg)
Definition cubescan.l:21
#define NOTICE
Definition elog.h:35
int FreeFile(FILE *file)
Definition fd.c:2823
FILE * AllocateFile(const char *name, const char *mode)
Definition fd.c:2624
struct Latch * MyLatch
Definition globals.c:63
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define XLOG_BACKUP_END
Definition pg_control.h:74
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
static void CleanupBackupHistory(void)
Definition xlog.c:4199
#define XLogArchivingAlways()
Definition xlog.h:104
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fb(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9262 of file xlog.c.

9263{
9264 return sessionBackupState;
9265}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8766 of file xlog.c.

8767{
8768 int o_direct_flag = 0;
8769
8770 /*
8771 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8772 * written by walreceiver is normally read by the startup process soon
8773 * after it's written. Also, walreceiver performs unaligned writes, which
8774 * don't work with O_DIRECT, so it is required for correctness too.
8775 */
8778
8779 /* If fsync is disabled, never open in sync mode */
8780 if (!enableFsync)
8781 return o_direct_flag;
8782
8783 switch (method)
8784 {
8785 /*
8786 * enum values for all sync options are defined even if they are
8787 * not supported on the current platform. But if not, they are
8788 * not included in the enum option array, and therefore will never
8789 * be seen here.
8790 */
8794 return o_direct_flag;
8795#ifdef O_SYNC
8797 return O_SYNC | o_direct_flag;
8798#endif
8799#ifdef O_DSYNC
8801 return O_DSYNC | o_direct_flag;
8802#endif
8803 default:
8804 /* can't happen (unless we are out of sync with option array) */
8805 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8806 return 0; /* silence warning */
8807 }
8808}
int io_direct_flags
Definition fd.c:168
#define IO_DIRECT_WAL
Definition fd.h:55
#define PG_O_DIRECT
Definition fd.h:112
bool enableFsync
Definition globals.c:129
#define AmWalReceiverProcess()
Definition miscadmin.h:391
#define O_DSYNC
Definition win32_port.h:346
@ WAL_SYNC_METHOD_OPEN
Definition xlog.h:27
@ WAL_SYNC_METHOD_FDATASYNC
Definition xlog.h:26
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition xlog.h:28
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition xlog.h:29
@ WAL_SYNC_METHOD_FSYNC
Definition xlog.h:25

References AmWalReceiverProcess, elog, enableFsync, ERROR, fb(), io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4953 of file xlog.c.

4954{
4955 return ControlFile->wal_level;
4956}

References ControlFile, and ControlFileData::wal_level.

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4662 of file xlog.c.

4663{
4665}
bool default_char_signedness
Definition pg_control.h:232

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4677 of file xlog.c.

4678{
4680}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition atomics.h:532

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6594 of file xlog.c.

6595{
6598}
static bool doPageWrites
Definition xlog.c:289

References doPageWrites, fb(), and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6683 of file xlog.c.

6684{
6686 int i;
6687
6688 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6689 {
6691
6692 /*
6693 * Need to take a lock to prevent torn reads of the LSN, which are
6694 * possible on some of the supported platforms. WAL insert locks only
6695 * support exclusive mode, so we have to use that.
6696 */
6699 LWLockRelease(&WALInsertLocks[i].l.lock);
6700
6701 if (res < last_important)
6702 res = last_important;
6703 }
6704
6705 return res;
6706}
int i
Definition isn.c:77
XLogRecPtr lastImportantAt
Definition xlog.c:374
WALInsertLock l
Definition xlog.c:386
static WALInsertLockPadded * WALInsertLocks
Definition xlog.c:571
#define NUM_XLOGINSERT_LOCKS
Definition xlog.c:153

References fb(), i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6712 of file xlog.c.

6713{
6714 pg_time_t result;
6715
6716 /* Need WALWriteLock, but shared lock is sufficient */
6718 result = XLogCtl->lastSegSwitchTime;
6721
6722 return result;
6723}
pg_time_t lastSegSwitchTime
Definition xlog.c:469
XLogRecPtr lastSegSwitchLSN
Definition xlog.c:470

References fb(), XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4638 of file xlog.c.

4639{
4642}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition pg_control.h:239

References Assert, ControlFile, fb(), and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6497 of file xlog.c.

6498{
6499 RecoveryState retval;
6500
6502 retval = XLogCtl->SharedRecoveryState;
6504
6505 return retval;
6506}
RecoveryState
Definition xlog.h:91

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6564 of file xlog.c.

6565{
6566 XLogRecPtr ptr;
6567
6568 /*
6569 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6570 * grabbed a WAL insertion lock to read the authoritative value in
6571 * Insert->RedoRecPtr, someone might update it just after we've released
6572 * the lock.
6573 */
6575 ptr = XLogCtl->RedoRecPtr;
6577
6578 if (RedoRecPtr < ptr)
6579 RedoRecPtr = ptr;
6580
6581 return RedoRecPtr;
6582}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 8000 of file xlog.c.

8001{
8002 XLogRecPtr currpos; /* current write LSN */
8003 XLogSegNo currSeg; /* segid of currpos */
8004 XLogSegNo targetSeg; /* segid of targetLSN */
8005 XLogSegNo oldestSeg; /* actual oldest segid */
8006 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
8007 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
8009
8010 /*
8011 * slot does not reserve WAL. Either deactivated, or has never been active
8012 */
8014 return WALAVAIL_INVALID_LSN;
8015
8016 /*
8017 * Calculate the oldest segment currently reserved by all slots,
8018 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
8019 * oldestSlotSeg to the current segment.
8020 */
8021 currpos = GetXLogWriteRecPtr();
8023 KeepLogSeg(currpos, &oldestSlotSeg);
8024
8025 /*
8026 * Find the oldest extant segment file. We get 1 until checkpoint removes
8027 * the first WAL segment file since startup, which causes the status being
8028 * wrong under certain abnormal conditions but that doesn't actually harm.
8029 */
8031
8032 /* calculate oldest segment by max_wal_size */
8035
8036 if (currSeg > keepSegs)
8038 else
8040
8041 /* the segment we care about */
8043
8044 /*
8045 * No point in returning reserved or extended status values if the
8046 * targetSeg is known to be lost.
8047 */
8048 if (targetSeg >= oldestSlotSeg)
8049 {
8050 /* show "reserved" when targetSeg is within max_wal_size */
8052 return WALAVAIL_RESERVED;
8053
8054 /* being retained by slots exceeding max_wal_size */
8055 return WALAVAIL_EXTENDED;
8056 }
8057
8058 /* WAL segments are no longer retained but haven't been removed yet */
8059 if (targetSeg >= oldestSeg)
8060 return WALAVAIL_UNRESERVED;
8061
8062 /* Definitely lost */
8063 return WALAVAIL_REMOVED;
8064}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition xlog.c:3796
XLogRecPtr GetXLogWriteRecPtr(void)
Definition xlog.c:9612
@ WALAVAIL_REMOVED
Definition xlog.h:205
@ WALAVAIL_RESERVED
Definition xlog.h:201
@ WALAVAIL_UNRESERVED
Definition xlog.h:204
@ WALAVAIL_EXTENDED
Definition xlog.h:202
@ WALAVAIL_INVALID_LSN
Definition xlog.h:200

References ConvertToXSegs, fb(), GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsValid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6647 of file xlog.c.

6648{
6650
6651 /* Since the value can't be changing, no lock is required. */
6652 return XLogCtl->InsertTimeLineID;
6653}

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1639 of file xlog.c.

1640{
1641 int idx;
1642 XLogRecPtr endptr;
1643 static uint64 cachedPage = 0;
1644 static char *cachedPos = NULL;
1646
1647 /*
1648 * Fast path for the common case that we need to access again the same
1649 * page as last time.
1650 */
1651 if (ptr / XLOG_BLCKSZ == cachedPage)
1652 {
1654 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1655 return cachedPos + ptr % XLOG_BLCKSZ;
1656 }
1657
1658 /*
1659 * The XLog buffer cache is organized so that a page is always loaded to a
1660 * particular buffer. That way we can easily calculate the buffer a given
1661 * page must be loaded into, from the XLogRecPtr alone.
1662 */
1663 idx = XLogRecPtrToBufIdx(ptr);
1664
1665 /*
1666 * See what page is loaded in the buffer at the moment. It could be the
1667 * page we're looking for, or something older. It can't be anything newer
1668 * - that would imply the page we're looking for has already been written
1669 * out to disk and evicted, and the caller is responsible for making sure
1670 * that doesn't happen.
1671 *
1672 * We don't hold a lock while we read the value. If someone is just about
1673 * to initialize or has just initialized the page, it's possible that we
1674 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1675 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1676 * we're looking for.
1677 */
1678 expectedEndPtr = ptr;
1680
1682 if (expectedEndPtr != endptr)
1683 {
1685
1686 /*
1687 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1688 * know how far we're finished with inserting the record.
1689 *
1690 * NB: If 'ptr' points to just after the page header, advertise a
1691 * position at the beginning of the page rather than 'ptr' itself. If
1692 * there are no other insertions running, someone might try to flush
1693 * up to our advertised location. If we advertised a position after
1694 * the page header, someone might try to flush the page header, even
1695 * though page might actually not be initialized yet. As the first
1696 * inserter on the page, we are effectively responsible for making
1697 * sure that it's initialized, before we let insertingAt to move past
1698 * the page header.
1699 */
1700 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1703 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1706 else
1707 initializedUpto = ptr;
1708
1710
1711 AdvanceXLInsertBuffer(ptr, tli, false);
1713
1714 if (expectedEndPtr != endptr)
1715 elog(PANIC, "could not find WAL buffer for %X/%08X",
1716 LSN_FORMAT_ARGS(ptr));
1717 }
1718 else
1719 {
1720 /*
1721 * Make sure the initialization of the page is visible to us, and
1722 * won't arrive later to overwrite the WAL data we write on the page.
1723 */
1725 }
1726
1727 /*
1728 * Found the buffer holding this page. Return a pointer to the right
1729 * offset within the page.
1730 */
1731 cachedPage = ptr / XLOG_BLCKSZ;
1733
1735 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1736
1737 return cachedPos + ptr % XLOG_BLCKSZ;
1738}
#define pg_memory_barrier()
Definition atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition xlog.c:1478
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition xlog.c:1992

References AdvanceXLInsertBuffer(), Assert, elog, fb(), idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4242 of file xlog.c.

4243{
4245
4246 /*
4247 * Generate a random nonce. This is used for authentication requests that
4248 * will fail because the user does not exist. The nonce is used to create
4249 * a genuine-looking password challenge for the non-existent user, in lieu
4250 * of an actual stored password.
4251 */
4253 ereport(PANIC,
4255 errmsg("could not generate secret authorization token")));
4256
4257 memset(ControlFile, 0, sizeof(ControlFileData));
4258 /* Initialize pg_control status fields */
4259 ControlFile->system_identifier = sysidentifier;
4263
4264 /* Set important parameter values for use when replaying WAL */
4273 ControlFile->data_checksum_version = data_checksum_version;
4274}
bool track_commit_timestamp
Definition commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition pg_control.h:187
bool wal_log_hints
Definition xlog.c:126
#define FirstNormalUnloggedLSN
Definition xlogdefs.h:37

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), fb(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4859 of file xlog.c.

4860{
4862
4864 {
4865 struct config_generic *guc;
4866
4867 guc = find_option("wal_consistency_checking", false, false, ERROR);
4868
4870
4871 set_config_option_ext("wal_consistency_checking",
4873 guc->scontext, guc->source, guc->srole,
4874 GUC_ACTION_SET, true, ERROR, false);
4875
4876 /* checking should not be deferred again */
4878 }
4879}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition guc.c:3256
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition guc.c:1113
@ GUC_ACTION_SET
Definition guc.h:203
char * wal_consistency_checking_string
Definition xlog.c:128

References Assert, check_wal_consistency_checking_deferred, ERROR, fb(), find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, set_config_option_ext(), and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3601 of file xlog.c.

3603{
3604 char path[MAXPGPATH];
3605 struct stat stat_buf;
3606
3607 Assert(tli != 0);
3608
3609 XLogFilePath(path, tli, *segno, wal_segment_size);
3610
3613 {
3615 return false;
3616 }
3617
3618 if (!find_free)
3619 {
3620 /* Force installation: get rid of any pre-existing segment file */
3621 durable_unlink(path, DEBUG1);
3622 }
3623 else
3624 {
3625 /* Find a free slot to put it in */
3626 while (stat(path, &stat_buf) == 0)
3627 {
3628 if ((*segno) >= max_segno)
3629 {
3630 /* Failed to find a free slot within specified range */
3632 return false;
3633 }
3634 (*segno)++;
3635 XLogFilePath(path, tli, *segno, wal_segment_size);
3636 }
3637 }
3638
3639 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3640 if (durable_rename(tmppath, path, LOG) != 0)
3641 {
3643 /* durable_rename already emitted log message */
3644 return false;
3645 }
3646
3648
3649 return true;
3650}
int durable_unlink(const char *fname, int elevel)
Definition fd.c:869
short access
bool InstallXLogFileSegmentActive
Definition xlog.c:528
#define stat
Definition win32_port.h:74

References Assert, DEBUG1, durable_rename(), durable_unlink(), fb(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9661 of file xlog.c.

9662{
9663 bool result;
9664
9668
9669 return result;
9670}

References fb(), XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8856 of file xlog.c.

8857{
8858 char *msg = NULL;
8860
8861 Assert(tli != 0);
8862
8863 /*
8864 * Quick exit if fsync is disabled or write() has already synced the WAL
8865 * file.
8866 */
8867 if (!enableFsync ||
8870 return;
8871
8872 /*
8873 * Measure I/O timing to sync the WAL file for pg_stat_io.
8874 */
8876
8878 switch (wal_sync_method)
8879 {
8881 if (pg_fsync_no_writethrough(fd) != 0)
8882 msg = _("could not fsync file \"%s\": %m");
8883 break;
8884#ifdef HAVE_FSYNC_WRITETHROUGH
8886 if (pg_fsync_writethrough(fd) != 0)
8887 msg = _("could not fsync write-through file \"%s\": %m");
8888 break;
8889#endif
8891 if (pg_fdatasync(fd) != 0)
8892 msg = _("could not fdatasync file \"%s\": %m");
8893 break;
8896 /* not reachable */
8897 Assert(false);
8898 break;
8899 default:
8900 ereport(PANIC,
8902 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8903 break;
8904 }
8905
8906 /* PANIC if failed to fsync */
8907 if (msg)
8908 {
8909 char xlogfname[MAXFNAMELEN];
8910 int save_errno = errno;
8911
8913 errno = save_errno;
8914 ereport(PANIC,
8916 errmsg(msg, xlogfname)));
8917 }
8918
8920
8922 start, 1, 0);
8923}
#define _(x)
Definition elog.c:91
int pg_fsync_no_writethrough(int fd)
Definition fd.c:438
int pg_fdatasync(int fd)
Definition fd.c:477
int pg_fsync_writethrough(int fd)
Definition fd.c:458
return str start
@ IOOBJECT_WAL
Definition pgstat.h:279
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_FSYNC
Definition pgstat.h:308
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
static int fd(const char *x, int i)
bool track_wal_io_timing
Definition xlog.c:140

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fb(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8084 of file xlog.c.

8085{
8087 XLogSegNo segno;
8089
8091 segno = currSegNo;
8092
8093 /* Calculate how many segments are kept by slots. */
8096 {
8098
8099 /*
8100 * Account for max_slot_wal_keep_size to avoid keeping more than
8101 * configured. However, don't do that during a binary upgrade: if
8102 * slots were to be invalidated because of this, it would not be
8103 * possible to preserve logical ones during the upgrade.
8104 */
8106 {
8108
8111
8112 if (currSegNo - segno > slot_keep_segs)
8113 segno = currSegNo - slot_keep_segs;
8114 }
8115 }
8116
8117 /*
8118 * If WAL summarization is in use, don't remove WAL that has yet to be
8119 * summarized.
8120 */
8123 {
8125
8127 if (unsummarized_segno < segno)
8128 segno = unsummarized_segno;
8129 }
8130
8131 /* but, keep at least wal_keep_size if that's set */
8132 if (wal_keep_size_mb > 0)
8133 {
8135
8137 if (currSegNo - segno < keep_segs)
8138 {
8139 /* avoid underflow, don't go below 1 */
8140 if (currSegNo <= keep_segs)
8141 segno = 1;
8142 else
8143 segno = currSegNo - keep_segs;
8144 }
8145 }
8146
8147 /* don't delete WAL segments newer than the calculated segment */
8148 if (segno < *logSegNo)
8149 *logSegNo = segno;
8150}
bool IsBinaryUpgrade
Definition globals.c:121
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition xlog.c:119
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition xlog.c:2683
int max_slot_wal_keep_size_mb
Definition xlog.c:138

References ConvertToXSegs, fb(), GetOldestUnsummarizedLSN(), IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4940 of file xlog.c.

4941{
4945}
void reset(void)

References Assert, ControlFile, fb(), palloc_object, ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6549 of file xlog.c.

6550{
6552
6554
6555 return oldXLogAllowed;
6556}

References fb(), and LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6808 of file xlog.c.

6809{
6810 long write_msecs,
6811 sync_msecs,
6816
6818
6821
6824
6825 /* Accumulate checkpoint timing summary data, in milliseconds. */
6828
6829 /*
6830 * All of the published timing statistics are accounted for. Only
6831 * continue if a log message is to be written.
6832 */
6833 if (!log_checkpoints)
6834 return;
6835
6838
6839 /*
6840 * Timing values returned from CheckpointStats are in microseconds.
6841 * Convert to milliseconds for consistent printing.
6842 */
6844
6849 average_msecs = (long) ((average_sync_time + 999) / 1000);
6850
6851 /*
6852 * ControlFileLock is not required to see ControlFile->checkPoint and
6853 * ->checkPointCopy here as we are the only updator of those variables at
6854 * this moment.
6855 */
6856 if (restartpoint)
6857 ereport(LOG,
6858 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6859 "wrote %d SLRU buffers; %d WAL file(s) added, "
6860 "%d removed, %d recycled; write=%ld.%03d s, "
6861 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6862 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6863 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6870 write_msecs / 1000, (int) (write_msecs % 1000),
6871 sync_msecs / 1000, (int) (sync_msecs % 1000),
6872 total_msecs / 1000, (int) (total_msecs % 1000),
6874 longest_msecs / 1000, (int) (longest_msecs % 1000),
6875 average_msecs / 1000, (int) (average_msecs % 1000),
6876 (int) (PrevCheckPointDistance / 1024.0),
6877 (int) (CheckPointDistanceEstimate / 1024.0),
6880 else
6881 ereport(LOG,
6882 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6883 "wrote %d SLRU buffers; %d WAL file(s) added, "
6884 "%d removed, %d recycled; write=%ld.%03d s, "
6885 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6886 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6887 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6894 write_msecs / 1000, (int) (write_msecs % 1000),
6895 sync_msecs / 1000, (int) (sync_msecs % 1000),
6896 total_msecs / 1000, (int) (total_msecs % 1000),
6898 longest_msecs / 1000, (int) (longest_msecs % 1000),
6899 average_msecs / 1000, (int) (average_msecs % 1000),
6900 (int) (PrevCheckPointDistance / 1024.0),
6901 (int) (CheckPointDistanceEstimate / 1024.0),
6904}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition timestamp.c:1757
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition xlog.h:187
uint64 ckpt_longest_sync
Definition xlog.h:186
TimestampTz ckpt_end_t
Definition xlog.h:176
int ckpt_slru_written
Definition xlog.h:179
PgStat_Counter sync_time
Definition pgstat.h:265
PgStat_Counter write_time
Definition pgstat.h:264
static double CheckPointDistanceEstimate
Definition xlog.c:162
static double PrevCheckPointDistance
Definition xlog.c:163

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), fb(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6776 of file xlog.c.

6777{
6778 if (restartpoint)
6779 ereport(LOG,
6780 /* translator: the placeholders show checkpoint options */
6781 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6782 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6783 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6784 (flags & CHECKPOINT_FAST) ? " fast" : "",
6785 (flags & CHECKPOINT_FORCE) ? " force" : "",
6786 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6787 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6788 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6789 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6790 else
6791 ereport(LOG,
6792 /* translator: the placeholders show checkpoint options */
6793 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6794 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6795 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6796 (flags & CHECKPOINT_FAST) ? " fast" : "",
6797 (flags & CHECKPOINT_FORCE) ? " force" : "",
6798 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6799 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6800 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6801 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6802}
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:154
#define CHECKPOINT_CAUSE_XLOG
Definition xlog.h:159
#define CHECKPOINT_CAUSE_TIME
Definition xlog.h:160

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), fb(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6411 of file xlog.c.

6412{
6413 bool promoted = false;
6414
6415 /*
6416 * Perform a checkpoint to update all our recovery activity to disk.
6417 *
6418 * Note that we write a shutdown checkpoint rather than an on-line one.
6419 * This is not particularly critical, but since we may be assigning a new
6420 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6421 * only changes in shutdown checkpoints, which allows some extra error
6422 * checking in xlog_redo.
6423 *
6424 * In promotion, only create a lightweight end-of-recovery record instead
6425 * of a full checkpoint. A checkpoint is requested later, after we're
6426 * fully out of recovery mode and already accepting queries.
6427 */
6430 {
6431 promoted = true;
6432
6433 /*
6434 * Insert a special WAL record to mark the end of recovery, since we
6435 * aren't doing a checkpoint. That means that the checkpointer process
6436 * may likely be in the middle of a time-smoothed restartpoint and
6437 * could continue to be for minutes after this. That sounds strange,
6438 * but the effect is roughly the same and it would be stranger to try
6439 * to come out of the restartpoint and then checkpoint. We request a
6440 * checkpoint later anyway, just for safety.
6441 */
6443 }
6444 else
6445 {
6449 }
6450
6451 return promoted;
6452}
static void CreateEndOfRecoveryRecord(void)
Definition xlog.c:7507
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), fb(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3728 of file xlog.c.

3729{
3731 int lf;
3732 bool added;
3733 char path[MAXPGPATH];
3734 uint64 offset;
3735
3737 return; /* unlocked check says no */
3738
3740 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3741 if (offset >= (uint32) (0.75 * wal_segment_size))
3742 {
3743 _logSegNo++;
3744 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3745 if (lf >= 0)
3746 close(lf);
3747 if (added)
3749 }
3750}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition xlog.c:3230

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, fb(), XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6374 of file xlog.c.

6375{
6376 /*
6377 * We have reached the end of base backup, as indicated by pg_control. The
6378 * data on disk is now consistent (unless minRecoveryPoint is further
6379 * ahead, which can happen if we crashed during previous recovery). Reset
6380 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6381 * make sure we don't allow starting up at an earlier point even if
6382 * recovery is stopped and restarted soon after this.
6383 */
6385
6386 if (ControlFile->minRecoveryPoint < EndRecPtr)
6387 {
6388 ControlFile->minRecoveryPoint = EndRecPtr;
6390 }
6391
6396
6398}
XLogRecPtr backupStartPoint
Definition pg_control.h:172
XLogRecPtr backupEndPoint
Definition pg_control.h:173

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, fb(), InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4387 of file xlog.c.

4388{
4389 pg_crc32c crc;
4390 int fd;
4391 char wal_segsz_str[20];
4392 int r;
4393
4394 /*
4395 * Read data...
4396 */
4398 O_RDWR | PG_BINARY);
4399 if (fd < 0)
4400 ereport(PANIC,
4402 errmsg("could not open file \"%s\": %m",
4404
4406 r = read(fd, ControlFile, sizeof(ControlFileData));
4407 if (r != sizeof(ControlFileData))
4408 {
4409 if (r < 0)
4410 ereport(PANIC,
4412 errmsg("could not read file \"%s\": %m",
4414 else
4415 ereport(PANIC,
4417 errmsg("could not read file \"%s\": read %d of %zu",
4418 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4419 }
4421
4422 close(fd);
4423
4424 /*
4425 * Check for expected pg_control format version. If this is wrong, the
4426 * CRC check will likely fail because we'll be checking the wrong number
4427 * of bytes. Complaining about wrong version will probably be more
4428 * enlightening than complaining about wrong CRC.
4429 */
4430
4432 ereport(FATAL,
4434 errmsg("database files are incompatible with server"),
4435 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4436 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4439 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4440
4442 ereport(FATAL,
4444 errmsg("database files are incompatible with server"),
4445 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4446 " but the server was compiled with PG_CONTROL_VERSION %d.",
4448 errhint("It looks like you need to initdb.")));
4449
4450 /* Now check the CRC. */
4455 FIN_CRC32C(crc);
4456
4457 if (!EQ_CRC32C(crc, ControlFile->crc))
4458 ereport(FATAL,
4460 errmsg("incorrect checksum in control file")));
4461
4462 /*
4463 * Do compatibility checking immediately. If the database isn't
4464 * compatible with the backend executable, we want to abort before we can
4465 * possibly do any damage.
4466 */
4468 ereport(FATAL,
4470 errmsg("database files are incompatible with server"),
4471 /* translator: %s is a variable name and %d is its value */
4472 errdetail("The database cluster was initialized with %s %d,"
4473 " but the server was compiled with %s %d.",
4474 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4475 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4476 errhint("It looks like you need to initdb.")));
4478 ereport(FATAL,
4480 errmsg("database files are incompatible with server"),
4481 /* translator: %s is a variable name and %d is its value */
4482 errdetail("The database cluster was initialized with %s %d,"
4483 " but the server was compiled with %s %d.",
4484 "MAXALIGN", ControlFile->maxAlign,
4485 "MAXALIGN", MAXIMUM_ALIGNOF),
4486 errhint("It looks like you need to initdb.")));
4488 ereport(FATAL,
4490 errmsg("database files are incompatible with server"),
4491 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4492 errhint("It looks like you need to initdb.")));
4493 if (ControlFile->blcksz != BLCKSZ)
4494 ereport(FATAL,
4496 errmsg("database files are incompatible with server"),
4497 /* translator: %s is a variable name and %d is its value */
4498 errdetail("The database cluster was initialized with %s %d,"
4499 " but the server was compiled with %s %d.",
4500 "BLCKSZ", ControlFile->blcksz,
4501 "BLCKSZ", BLCKSZ),
4502 errhint("It looks like you need to recompile or initdb.")));
4504 ereport(FATAL,
4506 errmsg("database files are incompatible with server"),
4507 /* translator: %s is a variable name and %d is its value */
4508 errdetail("The database cluster was initialized with %s %d,"
4509 " but the server was compiled with %s %d.",
4510 "RELSEG_SIZE", ControlFile->relseg_size,
4511 "RELSEG_SIZE", RELSEG_SIZE),
4512 errhint("It looks like you need to recompile or initdb.")));
4514 ereport(FATAL,
4516 errmsg("database files are incompatible with server"),
4517 /* translator: %s is a variable name and %d is its value */
4518 errdetail("The database cluster was initialized with %s %d,"
4519 " but the server was compiled with %s %d.",
4520 "SLRU_PAGES_PER_SEGMENT", ControlFile->slru_pages_per_segment,
4521 "SLRU_PAGES_PER_SEGMENT", SLRU_PAGES_PER_SEGMENT),
4522 errhint("It looks like you need to recompile or initdb.")));
4524 ereport(FATAL,
4526 errmsg("database files are incompatible with server"),
4527 /* translator: %s is a variable name and %d is its value */
4528 errdetail("The database cluster was initialized with %s %d,"
4529 " but the server was compiled with %s %d.",
4530 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4531 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4532 errhint("It looks like you need to recompile or initdb.")));
4534 ereport(FATAL,
4536 errmsg("database files are incompatible with server"),
4537 /* translator: %s is a variable name and %d is its value */
4538 errdetail("The database cluster was initialized with %s %d,"
4539 " but the server was compiled with %s %d.",
4540 "NAMEDATALEN", ControlFile->nameDataLen,
4541 "NAMEDATALEN", NAMEDATALEN),
4542 errhint("It looks like you need to recompile or initdb.")));
4544 ereport(FATAL,
4546 errmsg("database files are incompatible with server"),
4547 /* translator: %s is a variable name and %d is its value */
4548 errdetail("The database cluster was initialized with %s %d,"
4549 " but the server was compiled with %s %d.",
4550 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4551 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4552 errhint("It looks like you need to recompile or initdb.")));
4554 ereport(FATAL,
4556 errmsg("database files are incompatible with server"),
4557 /* translator: %s is a variable name and %d is its value */
4558 errdetail("The database cluster was initialized with %s %d,"
4559 " but the server was compiled with %s %d.",
4560 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4561 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4562 errhint("It looks like you need to recompile or initdb.")));
4564 ereport(FATAL,
4566 errmsg("database files are incompatible with server"),
4567 /* translator: %s is a variable name and %d is its value */
4568 errdetail("The database cluster was initialized with %s %d,"
4569 " but the server was compiled with %s %d.",
4570 "LOBLKSIZE", ControlFile->loblksize,
4571 "LOBLKSIZE", (int) LOBLKSIZE),
4572 errhint("It looks like you need to recompile or initdb.")));
4573
4574 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4575
4577
4580 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4581 "invalid WAL segment size in control file (%d bytes)",
4584 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4585
4587 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4589
4590 /* check and update variables dependent on wal_segment_size */
4593 /* translator: both %s are GUC names */
4594 errmsg("\"%s\" must be at least twice \"%s\"",
4595 "min_wal_size", "wal_segment_size")));
4596
4599 /* translator: both %s are GUC names */
4600 errmsg("\"%s\" must be at least twice \"%s\"",
4601 "max_wal_size", "wal_segment_size")));
4602
4606
4608
4609 /* Make the initdb settings visible as GUC variables, too */
4610 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4612}
#define PG_BINARY
Definition c.h:1281
#define CATALOG_VERSION_NO
Definition catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1193
int BasicOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1086
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition guc.c:4196
@ PGC_S_DYNAMIC_DEFAULT
Definition guc.h:114
@ PGC_INTERNAL
Definition guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition heaptoast.h:84
#define read(a, b, c)
Definition win32.h:13
#define LOBLKSIZE
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define SLRU_PAGES_PER_SEGMENT
#define FLOATFORMAT_VALUE
Definition pg_control.h:203
#define PG_CONTROL_VERSION
Definition pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
uint32 pg_control_version
Definition pg_control.h:127
uint32 xlog_seg_size
Definition pg_control.h:215
uint32 slru_pages_per_segment
Definition pg_control.h:212
uint32 indexMaxKeys
Definition pg_control.h:218
uint32 catalog_version_no
Definition pg_control.h:128
pg_crc32c crc
Definition pg_control.h:242
uint32 toast_max_chunk_size
Definition pg_control.h:220
#define UsableBytesInPage
Definition xlog.c:599
bool DataChecksumsEnabled(void)
Definition xlog.c:4648
static int UsableBytesInSegment
Definition xlog.c:608
int min_wal_size_mb
Definition xlog.c:118
#define XLOG_CONTROL_FILE

References Assert, BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6461 of file xlog.c.

6462{
6463 /*
6464 * We check shared state each time only until we leave recovery mode. We
6465 * can't re-enter recovery, so there's no need to keep checking after the
6466 * shared variable has once been seen false.
6467 */
6469 return false;
6470 else
6471 {
6472 /*
6473 * use volatile pointer to make sure we make a fresh read of the
6474 * shared variable.
6475 */
6476 volatile XLogCtlData *xlogctl = XLogCtl;
6477
6478 LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
6479
6480 /*
6481 * Note: We don't need a memory barrier when we're still in recovery.
6482 * We might exit recovery immediately after return, so the caller
6483 * can't rely on 'true' meaning that we're still in recovery anyway.
6484 */
6485
6487 }
6488}
static bool LocalRecoveryInProgress
Definition xlog.c:227

References fb(), LocalRecoveryInProgress, RECOVERY_STATE_DONE, and XLogCtl.

Referenced by amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), DisableLogicalDecoding(), DisableLogicalDecodingIfNecessary(), do_pg_backup_start(), do_pg_backup_stop(), EnableLogicalDecoding(), EnsureLogicalDecodingEnabled(), error_commit_ts_disabled(), ExecCheckpoint(), ExecWaitStmt(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_clear_extended_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), pgstat_report_replslotsync(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_effective_wal_level(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), verify_heapam(), WaitForLSN(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7682 of file xlog.c.

7683{
7684 /*
7685 * Also refrain from creating a restartpoint if we have seen any
7686 * references to non-existent pages. Restarting recovery from the
7687 * restartpoint would not see the references, so we would lose the
7688 * cross-check that the pages belonged to a relation that was dropped
7689 * later.
7690 */
7692 {
7693 elog(DEBUG2,
7694 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
7695 LSN_FORMAT_ARGS(checkPoint->redo));
7696 return;
7697 }
7698
7699 /*
7700 * Copy the checkpoint record to shared memory, so that checkpointer can
7701 * work out the next time it wants to perform a restartpoint.
7702 */
7706 XLogCtl->lastCheckPoint = *checkPoint;
7708}
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
bool XLogHaveInvalidPages(void)
Definition xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9582 of file xlog.c.

9583{
9584 static bool already_done = false;
9585
9586 if (already_done)
9587 return;
9589 already_done = true;
9590}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344

References before_shmem_exit(), BoolGetDatum(), do_pg_abort_backup(), and fb().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3978 of file xlog.c.

3979{
3980 DIR *xldir;
3981 struct dirent *xlde;
3982 char switchseg[MAXFNAMELEN];
3986
3987 /*
3988 * Initialize info about where to begin the work. This will recycle,
3989 * somewhat arbitrarily, 10 future segments.
3990 */
3994
3995 /*
3996 * Construct a filename of the last segment to be kept.
3997 */
3999
4000 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
4001 switchseg);
4002
4004
4005 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4006 {
4007 /* Ignore files that are not XLOG segments */
4008 if (!IsXLogFileName(xlde->d_name))
4009 continue;
4010
4011 /*
4012 * Remove files that are on a timeline older than the new one we're
4013 * switching to, but with a segment number >= the first segment on the
4014 * new timeline.
4015 */
4016 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
4017 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
4018 {
4019 /*
4020 * If the file has already been marked as .ready, however, don't
4021 * remove it yet. It should be OK to remove it - files that are
4022 * not part of our timeline history are not required for recovery
4023 * - but seems safer to let them be archived and removed later.
4024 */
4025 if (!XLogArchiveIsReady(xlde->d_name))
4027 }
4028 }
4029
4030 FreeDir(xldir);
4031}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition xlog.c:4047
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3903 of file xlog.c.

3905{
3906 DIR *xldir;
3907 struct dirent *xlde;
3908 char lastoff[MAXFNAMELEN];
3911
3912 /* Initialize info about where to try to recycle to */
3915
3916 /*
3917 * Construct a filename of the last segment to be kept. The timeline ID
3918 * doesn't matter, we ignore that in the comparison. (During recovery,
3919 * InsertTimeLineID isn't set, so we can't use that.)
3920 */
3922
3923 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3924 lastoff);
3925
3927
3928 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3929 {
3930 /* Ignore files that are not XLOG segments */
3931 if (!IsXLogFileName(xlde->d_name) &&
3932 !IsPartialXLogFileName(xlde->d_name))
3933 continue;
3934
3935 /*
3936 * We ignore the timeline part of the XLOG segment identifiers in
3937 * deciding whether a segment is still needed. This ensures that we
3938 * won't prematurely remove a segment from a parent timeline. We could
3939 * probably be a little more proactive about removing segments of
3940 * non-parent timelines, but that would be a whole lot more
3941 * complicated.
3942 *
3943 * We use the alphanumeric sorting property of the filenames to decide
3944 * which ones are earlier than the lastoff segment.
3945 */
3946 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3947 {
3948 if (XLogArchiveCheckDone(xlde->d_name))
3949 {
3950 /* Update the last removed location in shared memory first */
3951 UpdateLastRemovedPtr(xlde->d_name);
3952
3954 }
3955 }
3956 }
3957
3958 FreeDir(xldir);
3959}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition xlog.c:2234
static void UpdateLastRemovedPtr(char *filename)
Definition xlog.c:3850
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3870 of file xlog.c.

3871{
3872 DIR *xldir;
3873 struct dirent *xlde;
3874
3875 elog(DEBUG2, "removing all temporary WAL segments");
3876
3878 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3879 {
3880 char path[MAXPGPATH];
3881
3882 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3883 continue;
3884
3885 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3886 unlink(path);
3887 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3888 }
3889 FreeDir(xldir);
3890}

References AllocateDir(), DEBUG2, elog, fb(), FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4047 of file xlog.c.

4050{
4051 char path[MAXPGPATH];
4052#ifdef WIN32
4053 char newpath[MAXPGPATH];
4054#endif
4055 const char *segname = segment_de->d_name;
4056
4057 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4058
4059 /*
4060 * Before deleting the file, see if it can be recycled as a future log
4061 * segment. Only recycle normal files, because we don't want to recycle
4062 * symbolic links pointing to a separate archive directory.
4063 */
4064 if (wal_recycle &&
4066 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4067 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4069 true, recycleSegNo, insertTLI))
4070 {
4072 (errmsg_internal("recycled write-ahead log file \"%s\"",
4073 segname)));
4075 /* Needn't recheck that slot on future iterations */
4076 (*endlogSegNo)++;
4077 }
4078 else
4079 {
4080 /* No need for any more future segments, or recycling failed ... */
4081 int rc;
4082
4084 (errmsg_internal("removing write-ahead log file \"%s\"",
4085 segname)));
4086
4087#ifdef WIN32
4088
4089 /*
4090 * On Windows, if another process (e.g another backend) holds the file
4091 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4092 * will still show up in directory listing until the last handle is
4093 * closed. To avoid confusing the lingering deleted file for a live
4094 * WAL file that needs to be archived, rename it before deleting it.
4095 *
4096 * If another process holds the file open without FILE_SHARE_DELETE
4097 * flag, rename will fail. We'll try again at the next checkpoint.
4098 */
4099 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4100 if (rename(path, newpath) != 0)
4101 {
4102 ereport(LOG,
4104 errmsg("could not rename file \"%s\": %m",
4105 path)));
4106 return;
4107 }
4108 rc = durable_unlink(newpath, LOG);
4109#else
4110 rc = durable_unlink(path, LOG);
4111#endif
4112 if (rc != 0)
4113 {
4114 /* Message already logged by durable_unlink() */
4115 return;
4116 }
4118 }
4119
4121}
@ PGFILETYPE_REG
Definition file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition xlog.c:3601
bool wal_recycle
Definition xlog.c:131

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), fb(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8193 of file xlog.c.

8194{
8196
8197 /* XLOG SWITCH has no data */
8199
8200 if (mark_unimportant)
8203
8204 return RecPtr;
8205}
#define XLOG_SWITCH
Definition pg_control.h:73
#define XLOG_MARK_UNIMPORTANT
Definition xlog.h:166
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References fb(), XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1115 of file xlog.c.

1117{
1122
1123 size = MAXALIGN(size);
1124
1125 /* All (non xlog-switch) records should contain data. */
1126 Assert(size > SizeOfXLogRecord);
1127
1128 /*
1129 * The duration the spinlock needs to be held is minimized by minimizing
1130 * the calculations that have to be done while holding the lock. The
1131 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1132 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1133 * page headers. The mapping between "usable" byte positions and physical
1134 * positions (XLogRecPtrs) can be done outside the locked region, and
1135 * because the usable byte position doesn't include any headers, reserving
1136 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1137 */
1138 SpinLockAcquire(&Insert->insertpos_lck);
1139
1140 startbytepos = Insert->CurrBytePos;
1141 endbytepos = startbytepos + size;
1142 prevbytepos = Insert->PrevBytePos;
1143 Insert->CurrBytePos = endbytepos;
1144 Insert->PrevBytePos = startbytepos;
1145
1146 SpinLockRelease(&Insert->insertpos_lck);
1147
1151
1152 /*
1153 * Check that the conversions between "usable byte positions" and
1154 * XLogRecPtrs work consistently in both directions.
1155 */
1159}
#define MAXALIGN(LEN)
Definition c.h:836
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition xlog.c:1905
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition xlog.c:1948

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1171 of file xlog.c.

1172{
1178 XLogRecPtr ptr;
1180
1181 /*
1182 * These calculations are a bit heavy-weight to be done while holding a
1183 * spinlock, but since we're holding all the WAL insertion locks, there
1184 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1185 * compete for it, but that's not called very frequently.
1186 */
1187 SpinLockAcquire(&Insert->insertpos_lck);
1188
1189 startbytepos = Insert->CurrBytePos;
1190
1192 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1193 {
1194 SpinLockRelease(&Insert->insertpos_lck);
1195 *EndPos = *StartPos = ptr;
1196 return false;
1197 }
1198
1199 endbytepos = startbytepos + size;
1200 prevbytepos = Insert->PrevBytePos;
1201
1204
1207 {
1208 /* consume the rest of the segment */
1209 *EndPos += segleft;
1211 }
1212 Insert->CurrBytePos = endbytepos;
1213 Insert->PrevBytePos = startbytepos;
1214
1215 SpinLockRelease(&Insert->insertpos_lck);
1216
1218
1223
1224 return true;
1225}

References Assert, fb(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ ResetInstallXLogFileSegmentActive()

◆ SetInstallXLogFileSegmentActive()

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4885 of file xlog.c.

4886{
4887 if (XLogArchivingActive())
4888 return XLogArchiveCommand;
4889 else
4890 return "(disabled)";
4891}
char * XLogArchiveCommand
Definition xlog.c:123

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_effective_wal_level()

const char * show_effective_wal_level ( void  )

Definition at line 4912 of file xlog.c.

4913{
4915 return "minimal";
4916
4917 /*
4918 * During recovery, effective_wal_level reflects the primary's
4919 * configuration rather than the local wal_level value.
4920 */
4921 if (RecoveryInProgress())
4922 return IsXLogLogicalInfoEnabled() ? "logical" : "replica";
4923
4924 return XLogLogicalInfoActive() ? "logical" : "replica";
4925}
bool IsXLogLogicalInfoEnabled(void)
Definition logicalctl.c:220
#define XLogLogicalInfoActive()
Definition xlog.h:136

References IsXLogLogicalInfoEnabled(), RecoveryInProgress(), wal_level, WAL_LEVEL_MINIMAL, and XLogLogicalInfoActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 4897 of file xlog.c.

4898{
4899 /*
4900 * We display the actual state based on shared memory, so that this GUC
4901 * reports up-to-date state if examined intra-query. The underlying
4902 * variable (in_hot_standby_guc) changes only when we transmit a new value
4903 * to the client.
4904 */
4905 return RecoveryInProgress() ? "on" : "off";
4906}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6729 of file xlog.c.

6730{
6731 /*
6732 * We should have an aux process resource owner to use, and we should not
6733 * be in a transaction that's installed some other resowner.
6734 */
6739
6740 /* Don't be chatty in standalone mode */
6742 (errmsg("shutting down")));
6743
6744 /*
6745 * Signal walsenders to move to stopping state.
6746 */
6748
6749 /*
6750 * Wait for WAL senders to be in stopping state. This prevents commands
6751 * from writing new WAL.
6752 */
6754
6755 if (RecoveryInProgress())
6757 else
6758 {
6759 /*
6760 * If archiving is enabled, rotate the last XLOG file so that all the
6761 * remaining records are archived (postmaster wakes up the archiver
6762 * process one more time at the end of shutdown). The checkpoint
6763 * record will go to the next XLOG file and won't be archived (yet).
6764 */
6765 if (XLogArchivingActive())
6766 RequestXLogSwitch(false);
6767
6769 }
6770}
bool IsPostmasterEnvironment
Definition globals.c:119
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition resowner.c:176
void WalSndInitStopping(void)
Definition walsender.c:3879
void WalSndWaitStopping(void)
Definition walsender.c:3905
bool CreateRestartPoint(int flags)
Definition xlog.c:7722
bool CreateCheckPoint(int flags)
Definition xlog.c:7016

References Assert, AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), fb(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5518 of file xlog.c.

5519{
5521 CheckPoint checkPoint;
5522 bool wasShutdown;
5523 bool didCrash;
5524 bool haveTblspcMap;
5525 bool haveBackupLabel;
5534 bool promoted = false;
5535 char timebuf[128];
5536
5537 /*
5538 * We should have an aux process resource owner to use, and we should not
5539 * be in a transaction that's installed some other resowner.
5540 */
5545
5546 /*
5547 * Check that contents look valid.
5548 */
5550 ereport(FATAL,
5552 errmsg("control file contains invalid checkpoint location")));
5553
5554 switch (ControlFile->state)
5555 {
5556 case DB_SHUTDOWNED:
5557
5558 /*
5559 * This is the expected case, so don't be chatty in standalone
5560 * mode
5561 */
5563 (errmsg("database system was shut down at %s",
5564 str_time(ControlFile->time,
5565 timebuf, sizeof(timebuf)))));
5566 break;
5567
5569 ereport(LOG,
5570 (errmsg("database system was shut down in recovery at %s",
5572 timebuf, sizeof(timebuf)))));
5573 break;
5574
5575 case DB_SHUTDOWNING:
5576 ereport(LOG,
5577 (errmsg("database system shutdown was interrupted; last known up at %s",
5579 timebuf, sizeof(timebuf)))));
5580 break;
5581
5583 ereport(LOG,
5584 (errmsg("database system was interrupted while in recovery at %s",
5586 timebuf, sizeof(timebuf))),
5587 errhint("This probably means that some data is corrupted and"
5588 " you will have to use the last backup for recovery.")));
5589 break;
5590
5592 ereport(LOG,
5593 (errmsg("database system was interrupted while in recovery at log time %s",
5595 timebuf, sizeof(timebuf))),
5596 errhint("If this has occurred more than once some data might be corrupted"
5597 " and you might need to choose an earlier recovery target.")));
5598 break;
5599
5600 case DB_IN_PRODUCTION:
5601 ereport(LOG,
5602 (errmsg("database system was interrupted; last known up at %s",
5604 timebuf, sizeof(timebuf)))));
5605 break;
5606
5607 default:
5608 ereport(FATAL,
5610 errmsg("control file contains invalid database cluster state")));
5611 }
5612
5613 /* This is just to allow attaching to startup process with a debugger */
5614#ifdef XLOG_REPLAY_DELAY
5616 pg_usleep(60000000L);
5617#endif
5618
5619 /*
5620 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5621 * In cases where someone has performed a copy for PITR, these directories
5622 * may have been excluded and need to be re-created.
5623 */
5625
5626 /* Set up timeout handler needed to report startup progress. */
5630
5631 /*----------
5632 * If we previously crashed, perform a couple of actions:
5633 *
5634 * - The pg_wal directory may still include some temporary WAL segments
5635 * used when creating a new segment, so perform some clean up to not
5636 * bloat this path. This is done first as there is no point to sync
5637 * this temporary data.
5638 *
5639 * - There might be data which we had written, intending to fsync it, but
5640 * which we had not actually fsync'd yet. Therefore, a power failure in
5641 * the near future might cause earlier unflushed writes to be lost, even
5642 * though more recent data written to disk from here on would be
5643 * persisted. To avoid that, fsync the entire data directory.
5644 */
5647 {
5650 didCrash = true;
5651 }
5652 else
5653 didCrash = false;
5654
5655 /*
5656 * Prepare for WAL recovery if needed.
5657 *
5658 * InitWalRecovery analyzes the control file and the backup label file, if
5659 * any. It updates the in-memory ControlFile buffer according to the
5660 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5661 * It also applies the tablespace map file, if any.
5662 */
5665 checkPoint = ControlFile->checkPointCopy;
5666
5667 /* initialize shared memory variables from the checkpoint record */
5668 TransamVariables->nextXid = checkPoint.nextXid;
5669 TransamVariables->nextOid = checkPoint.nextOid;
5671 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5672 AdvanceOldestClogXid(checkPoint.oldestXid);
5673 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5674 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
5676 checkPoint.newestCommitTsXid);
5677
5678 /*
5679 * Clear out any old relcache cache files. This is *necessary* if we do
5680 * any WAL replay, since that would probably result in the cache files
5681 * being out of sync with database reality. In theory we could leave them
5682 * in place if the database had been cleanly shut down, but it seems
5683 * safest to just remove them always and let them be rebuilt during the
5684 * first backend startup. These files needs to be removed from all
5685 * directories including pg_tblspc, however the symlinks are created only
5686 * after reading tablespace_map file in case of archive recovery from
5687 * backup, so needs to clear old relcache files here after creating
5688 * symlinks.
5689 */
5691
5692 /*
5693 * Initialize replication slots, before there's a chance to remove
5694 * required resources.
5695 */
5697
5698 /*
5699 * Startup the logical decoding status with the last status stored in the
5700 * checkpoint record.
5701 */
5703
5704 /*
5705 * Startup logical state, needs to be setup now so we have proper data
5706 * during crash recovery.
5707 */
5709
5710 /*
5711 * Startup CLOG. This must be done after TransamVariables->nextXid has
5712 * been initialized and before we accept connections or begin WAL replay.
5713 */
5714 StartupCLOG();
5715
5716 /*
5717 * Startup MultiXact. We need to do this early to be able to replay
5718 * truncations.
5719 */
5721
5722 /*
5723 * Ditto for commit timestamps. Activate the facility if the setting is
5724 * enabled in the control file, as there should be no tracking of commit
5725 * timestamps done when the setting was disabled. This facility can be
5726 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5727 */
5730
5731 /*
5732 * Recover knowledge about replay progress of known replication partners.
5733 */
5735
5736 /*
5737 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5738 * control file. On recovery, all unlogged relations are blown away, so
5739 * the unlogged LSN counter can be reset too.
5740 */
5744 else
5747
5748 /*
5749 * Copy any missing timeline history files between 'now' and the recovery
5750 * target timeline from archive to pg_wal. While we don't need those files
5751 * ourselves - the history file of the recovery target timeline covers all
5752 * the previous timelines in the history too - a cascading standby server
5753 * might be interested in them. Or, if you archive the WAL from this
5754 * server to a different archive than the primary, it'd be good for all
5755 * the history files to get archived there after failover, so that you can
5756 * use one of the old timelines as a PITR target. Timeline history files
5757 * are small, so it's better to copy them unnecessarily than not copy them
5758 * and regret later.
5759 */
5761
5762 /*
5763 * Before running in recovery, scan pg_twophase and fill in its status to
5764 * be able to work on entries generated by redo. Doing a scan before
5765 * taking any recovery action has the merit to discard any 2PC files that
5766 * are newer than the first record to replay, saving from any conflicts at
5767 * replay. This avoids as well any subsequent scans when doing recovery
5768 * of the on-disk two-phase data.
5769 */
5771
5772 /*
5773 * When starting with crash recovery, reset pgstat data - it might not be
5774 * valid. Otherwise restore pgstat data. It's safe to do this here,
5775 * because postmaster will not yet have started any other processes.
5776 *
5777 * NB: Restoring replication slot stats relies on slot state to have
5778 * already been restored from disk.
5779 *
5780 * TODO: With a bit of extra work we could just start with a pgstat file
5781 * associated with the checkpoint redo location we're starting from.
5782 */
5783 if (didCrash)
5785 else
5787
5789
5792
5793 /* REDO */
5794 if (InRecovery)
5795 {
5796 /* Initialize state for RecoveryInProgress() */
5800 else
5803
5804 /*
5805 * Update pg_control to show that we are recovering and to show the
5806 * selected checkpoint as the place we are starting from. We also mark
5807 * pg_control with any minimum recovery stop point obtained from a
5808 * backup history file.
5809 *
5810 * No need to hold ControlFileLock yet, we aren't up far enough.
5811 */
5813
5814 /*
5815 * If there was a backup label file, it's done its job and the info
5816 * has now been propagated into pg_control. We must get rid of the
5817 * label file so that if we crash during recovery, we'll pick up at
5818 * the latest recovery restartpoint instead of going all the way back
5819 * to the backup start point. It seems prudent though to just rename
5820 * the file out of the way rather than delete it completely.
5821 */
5822 if (haveBackupLabel)
5823 {
5826 }
5827
5828 /*
5829 * If there was a tablespace_map file, it's done its job and the
5830 * symlinks have been created. We must get rid of the map file so
5831 * that if we crash during recovery, we don't create symlinks again.
5832 * It seems prudent though to just rename the file out of the way
5833 * rather than delete it completely.
5834 */
5835 if (haveTblspcMap)
5836 {
5839 }
5840
5841 /*
5842 * Initialize our local copy of minRecoveryPoint. When doing crash
5843 * recovery we want to replay up to the end of WAL. Particularly, in
5844 * the case of a promoted standby minRecoveryPoint value in the
5845 * control file is only updated after the first checkpoint. However,
5846 * if the instance crashes before the first post-recovery checkpoint
5847 * is completed then recovery will use a stale location causing the
5848 * startup process to think that there are still invalid page
5849 * references when checking for data consistency.
5850 */
5852 {
5855 }
5856 else
5857 {
5860 }
5861
5862 /* Check that the GUCs used to generate the WAL allow recovery */
5864
5865 /*
5866 * We're in recovery, so unlogged relations may be trashed and must be
5867 * reset. This should be done BEFORE allowing Hot Standby
5868 * connections, so that read-only backends don't try to read whatever
5869 * garbage is left over from before.
5870 */
5872
5873 /*
5874 * Likewise, delete any saved transaction snapshot files that got left
5875 * behind by crashed backends.
5876 */
5878
5879 /*
5880 * Initialize for Hot Standby, if enabled. We won't let backends in
5881 * yet, not until we've reached the min recovery point specified in
5882 * control file and we've established a recovery snapshot from a
5883 * running-xacts WAL record.
5884 */
5886 {
5887 TransactionId *xids;
5888 int nxids;
5889
5891 (errmsg_internal("initializing for hot standby")));
5892
5894
5895 if (wasShutdown)
5897 else
5898 oldestActiveXID = checkPoint.oldestActiveXid;
5900
5901 /* Tell procarray about the range of xids it has to deal with */
5903
5904 /*
5905 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5906 * have already been started up and other SLRUs are not maintained
5907 * during recovery and need not be started yet.
5908 */
5910
5911 /*
5912 * If we're beginning at a shutdown checkpoint, we know that
5913 * nothing was running on the primary at this point. So fake-up an
5914 * empty running-xacts record and use that here and now. Recover
5915 * additional standby state for prepared transactions.
5916 */
5917 if (wasShutdown)
5918 {
5920 TransactionId latestCompletedXid;
5921
5922 /* Update pg_subtrans entries for any prepared transactions */
5924
5925 /*
5926 * Construct a RunningTransactions snapshot representing a
5927 * shut down server, with only prepared transactions still
5928 * alive. We're never overflowed at this point because all
5929 * subxids are listed with their parent prepared transactions.
5930 */
5931 running.xcnt = nxids;
5932 running.subxcnt = 0;
5934 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5936 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5937 TransactionIdRetreat(latestCompletedXid);
5938 Assert(TransactionIdIsNormal(latestCompletedXid));
5939 running.latestCompletedXid = latestCompletedXid;
5940 running.xids = xids;
5941
5943 }
5944 }
5945
5946 /*
5947 * We're all set for replaying the WAL now. Do it.
5948 */
5950 performedWalRecovery = true;
5951 }
5952 else
5953 performedWalRecovery = false;
5954
5955 /*
5956 * Finish WAL recovery.
5957 */
5959 EndOfLog = endOfRecoveryInfo->endOfLog;
5960 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5961 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5962 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5963
5964 /*
5965 * Reset ps status display, so as no information related to recovery shows
5966 * up.
5967 */
5968 set_ps_display("");
5969
5970 /*
5971 * When recovering from a backup (we are in recovery, and archive recovery
5972 * was requested), complain if we did not roll forward far enough to reach
5973 * the point where the database is consistent. For regular online
5974 * backup-from-primary, that means reaching the end-of-backup WAL record
5975 * (at which point we reset backupStartPoint to be Invalid), for
5976 * backup-from-replica (which can't inject records into the WAL stream),
5977 * that point is when we reach the minRecoveryPoint in pg_control (which
5978 * we purposefully copy last when backing up from a replica). For
5979 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5980 * or snapshot-style backups (which don't), backupEndRequired will be set
5981 * to false.
5982 *
5983 * Note: it is indeed okay to look at the local variable
5984 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5985 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5986 * been advanced beyond the WAL we processed.
5987 */
5988 if (InRecovery &&
5991 {
5992 /*
5993 * Ran off end of WAL before reaching end-of-backup WAL record, or
5994 * minRecoveryPoint. That's a bad sign, indicating that you tried to
5995 * recover from an online backup but never called pg_backup_stop(), or
5996 * you didn't archive all the WAL needed.
5997 */
5999 {
6001 ereport(FATAL,
6003 errmsg("WAL ends before end of online backup"),
6004 errhint("All WAL generated while online backup was taken must be available at recovery.")));
6005 else
6006 ereport(FATAL,
6008 errmsg("WAL ends before consistent recovery point")));
6009 }
6010 }
6011
6012 /*
6013 * Reset unlogged relations to the contents of their INIT fork. This is
6014 * done AFTER recovery is complete so as to include any unlogged relations
6015 * created during recovery, but BEFORE recovery is marked as having
6016 * completed successfully. Otherwise we'd not retry if any of the post
6017 * end-of-recovery steps fail.
6018 */
6019 if (InRecovery)
6021
6022 /*
6023 * Pre-scan prepared transactions to find out the range of XIDs present.
6024 * This information is not quite needed yet, but it is positioned here so
6025 * as potential problems are detected before any on-disk change is done.
6026 */
6028
6029 /*
6030 * Allow ordinary WAL segment creation before possibly switching to a new
6031 * timeline, which creates a new segment, and after the last ReadRecord().
6032 */
6034
6035 /*
6036 * Consider whether we need to assign a new timeline ID.
6037 *
6038 * If we did archive recovery, we always assign a new ID. This handles a
6039 * couple of issues. If we stopped short of the end of WAL during
6040 * recovery, then we are clearly generating a new timeline and must assign
6041 * it a unique new ID. Even if we ran to the end, modifying the current
6042 * last segment is problematic because it may result in trying to
6043 * overwrite an already-archived copy of that segment, and we encourage
6044 * DBAs to make their archive_commands reject that. We can dodge the
6045 * problem by making the new active segment have a new timeline ID.
6046 *
6047 * In a normal crash recovery, we can just extend the timeline we were in.
6048 */
6049 newTLI = endOfRecoveryInfo->lastRecTLI;
6051 {
6053 ereport(LOG,
6054 (errmsg("selected new timeline ID: %u", newTLI)));
6055
6056 /*
6057 * Make a writable copy of the last WAL segment. (Note that we also
6058 * have a copy of the last block of the old WAL in
6059 * endOfRecovery->lastPage; we will use that below.)
6060 */
6062
6063 /*
6064 * Remove the signal files out of the way, so that we don't
6065 * accidentally re-enter archive recovery mode in a subsequent crash.
6066 */
6067 if (endOfRecoveryInfo->standby_signal_file_found)
6069
6070 if (endOfRecoveryInfo->recovery_signal_file_found)
6072
6073 /*
6074 * Write the timeline history file, and have it archived. After this
6075 * point (or rather, as soon as the file is archived), the timeline
6076 * will appear as "taken" in the WAL archive and to any standby
6077 * servers. If we crash before actually switching to the new
6078 * timeline, standby servers will nevertheless think that we switched
6079 * to the new timeline, and will try to connect to the new timeline.
6080 * To minimize the window for that, try to do as little as possible
6081 * between here and writing the end-of-recovery record.
6082 */
6084 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6085
6086 ereport(LOG,
6087 (errmsg("archive recovery complete")));
6088 }
6089
6090 /* Save the selected TimeLineID in shared memory, too */
6095
6096 /*
6097 * Actually, if WAL ended in an incomplete record, skip the parts that
6098 * made it through and start writing after the portion that persisted.
6099 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6100 * we'll do as soon as we're open for writing new WAL.)
6101 */
6103 {
6104 /*
6105 * We should only have a missingContrecPtr if we're not switching to a
6106 * new timeline. When a timeline switch occurs, WAL is copied from the
6107 * old timeline to the new only up to the end of the last complete
6108 * record, so there can't be an incomplete WAL record that we need to
6109 * disregard.
6110 */
6111 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6114 }
6115
6116 /*
6117 * Prepare to write WAL starting at EndOfLog location, and init xlog
6118 * buffer cache using the block containing the last record from the
6119 * previous incarnation.
6120 */
6121 Insert = &XLogCtl->Insert;
6123 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6124
6125 /*
6126 * Tricky point here: lastPage contains the *last* block that the LastRec
6127 * record spans, not the one it starts in. The last block is indeed the
6128 * one we want to use.
6129 */
6130 if (EndOfLog % XLOG_BLCKSZ != 0)
6131 {
6132 char *page;
6133 int len;
6134 int firstIdx;
6135
6137 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6139
6140 /* Copy the valid part of the last block, and zero the rest */
6141 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6142 memcpy(page, endOfRecoveryInfo->lastPage, len);
6143 memset(page + len, 0, XLOG_BLCKSZ - len);
6144
6147 }
6148 else
6149 {
6150 /*
6151 * There is no partial block to copy. Just set InitializedUpTo, and
6152 * let the first attempt to insert a log record to initialize the next
6153 * buffer.
6154 */
6156 }
6157
6158 /*
6159 * Update local and shared status. This is OK to do without any locks
6160 * because no other process can be reading or writing WAL yet.
6161 */
6168
6169 /*
6170 * Preallocate additional log files, if wanted.
6171 */
6173
6174 /*
6175 * Okay, we're officially UP.
6176 */
6177 InRecovery = false;
6178
6179 /* start the archive_timeout timer and LSN running */
6182
6183 /* also initialize latestCompletedXid, to nextXid - 1 */
6188
6189 /*
6190 * Start up subtrans, if not already done for hot standby. (commit
6191 * timestamps are started below, if necessary.)
6192 */
6195
6196 /*
6197 * Perform end of recovery actions for any SLRUs that need it.
6198 */
6199 TrimCLOG();
6200 TrimMultiXact();
6201
6202 /*
6203 * Reload shared-memory state for prepared transactions. This needs to
6204 * happen before renaming the last partial segment of the old timeline as
6205 * it may be possible that we have to recover some transactions from it.
6206 */
6208
6209 /* Shut down xlogreader */
6211
6212 /* Enable WAL writes for this backend only. */
6214
6215 /* If necessary, write overwrite-contrecord before doing anything else */
6217 {
6220 }
6221
6222 /*
6223 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6224 * record before resource manager writes cleanup WAL records or checkpoint
6225 * record is written.
6226 */
6227 Insert->fullPageWrites = lastFullPageWrites;
6229
6230 /*
6231 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6232 */
6235
6236 /*
6237 * If any of the critical GUCs have changed, log them before we allow
6238 * backends to write WAL.
6239 */
6241
6242 /* If this is archive recovery, perform post-recovery cleanup actions. */
6245
6246 /*
6247 * Local WAL inserts enabled, so it's time to finish initialization of
6248 * commit timestamp.
6249 */
6251
6252 /*
6253 * Update logical decoding status in shared memory and write an
6254 * XLOG_LOGICAL_DECODING_STATUS_CHANGE, if necessary.
6255 */
6257
6258 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6259 if (endOfRecoveryInfo->lastPage)
6260 pfree(endOfRecoveryInfo->lastPage);
6261 pfree(endOfRecoveryInfo->recoveryStopReason);
6263
6264 /*
6265 * All done with end-of-recovery actions.
6266 *
6267 * Now allow backends to write WAL and update the control file status in
6268 * consequence. SharedRecoveryState, that controls if backends can write
6269 * WAL, is updated while holding ControlFileLock to prevent other backends
6270 * to look at an inconsistent state of the control file in shared memory.
6271 * There is still a small window during which backends can write WAL and
6272 * the control file is still referring to a system not in DB_IN_PRODUCTION
6273 * state while looking at the on-disk control file.
6274 *
6275 * Also, we use info_lck to update SharedRecoveryState to ensure that
6276 * there are no race conditions concerning visibility of other recent
6277 * updates to shared memory.
6278 */
6281
6285
6288
6289 /*
6290 * Wake up the checkpointer process as there might be a request to disable
6291 * logical decoding by concurrent slot drop.
6292 */
6294
6295 /*
6296 * Wake up all waiters. They need to report an error that recovery was
6297 * ended before reaching the target LSN.
6298 */
6302
6303 /*
6304 * Shutdown the recovery environment. This must occur after
6305 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6306 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6307 * any session building a snapshot will not rely on KnownAssignedXids as
6308 * RecoveryInProgress() would return false at this stage. This is
6309 * particularly critical for prepared 2PC transactions, that would still
6310 * need to be included in snapshots once recovery has ended.
6311 */
6314
6315 /*
6316 * If there were cascading standby servers connected to us, nudge any wal
6317 * sender processes to notice that we've been promoted.
6318 */
6319 WalSndWakeup(true, true);
6320
6321 /*
6322 * If this was a promotion, request an (online) checkpoint now. This isn't
6323 * required for consistency, but the last restartpoint might be far back,
6324 * and in case of a crash, recovering from it might take a longer than is
6325 * appropriate now that we're not in standby mode anymore.
6326 */
6327 if (promoted)
6329}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:504
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition timeline.c:304
void startup_progress_timeout_handler(void)
Definition startup.c:303
uint32 TransactionId
Definition c.h:676
void WakeupCheckpointer(void)
void StartupCLOG(void)
Definition clog.c:843
void TrimCLOG(void)
Definition clog.c:858
void StartupCommitTs(void)
Definition commit_ts.c:608
void CompleteCommitTsInitialization(void)
Definition commit_ts.c:618
void SyncDataDirectory(void)
Definition fd.c:3590
void UpdateLogicalDecodingStatusEndOfRecovery(void)
Definition logicalctl.c:553
void StartupLogicalDecodingStatus(bool last_status)
Definition logicalctl.c:146
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
void TrimMultiXact(void)
Definition multixact.c:1834
void StartupMultiXact(void)
Definition multixact.c:1809
void StartupReplicationOrigin(void)
Definition origin.c:728
@ DB_IN_PRODUCTION
Definition pg_control.h:99
@ DB_IN_CRASH_RECOVERY
Definition pg_control.h:97
const void size_t len
void pgstat_restore_stats(void)
Definition pgstat.c:507
void pgstat_discard_stats(void)
Definition pgstat.c:519
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition procarray.c:1056
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition procarray.c:1025
static void set_ps_display(const char *activity)
Definition ps_status.h:40
void ResetUnloggedRelations(int op)
Definition reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition reinit.h:27
void RelationCacheInitFileRemove(void)
Definition relcache.c:6895
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition slot.c:2378
void DeleteAllExportedSnapshotFiles(void)
Definition snapmgr.c:1587
void InitRecoveryTransactionEnvironment(void)
Definition standby.c:95
void ShutdownRecoveryTransactionEnvironment(void)
Definition standby.c:161
@ SUBXIDS_IN_SUBTRANS
Definition standby.h:82
TransactionId oldestRunningXid
Definition standby.h:92
TransactionId nextXid
Definition standby.h:91
TransactionId latestCompletedXid
Definition standby.h:95
subxids_array_status subxid_status
Definition standby.h:90
TransactionId * xids
Definition standby.h:97
FullTransactionId latestCompletedXid
Definition transam.h:238
pg_atomic_uint64 logInsertResult
Definition xlog.c:473
uint64 PrevBytePos
Definition xlog.c:411
XLogRecPtr Flush
Definition xlog.c:325
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition subtrans.c:283
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition timeout.h:38
#define TransactionIdRetreat(dest)
Definition transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition transam.h:103
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
void RecoverPreparedTransactions(void)
Definition twophase.c:2083
void restoreTwoPhaseData(void)
Definition twophase.c:1904
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1966
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2045
void WalSndWakeup(bool physical, bool logical)
Definition walsender.c:3800
void UpdateFullPageWrites(void)
Definition xlog.c:8299
static void ValidateXLOGDirectoryStructure(void)
Definition xlog.c:4137
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition xlog.c:7572
static void XLogReportParameters(void)
Definition xlog.c:8236
static bool PerformRecoveryXLogAction(void)
Definition xlog.c:6411
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition xlog.c:5378
static bool lastFullPageWrites
Definition xlog.c:220
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition xlog.c:5303
static void CheckRequiredParameterValues(void)
Definition xlog.c:5474
static void RemoveTempXlogFiles(void)
Definition xlog.c:3870
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition xlog.c:5290
#define TABLESPACE_MAP_OLD
Definition xlog.h:322
#define TABLESPACE_MAP
Definition xlog.h:321
#define STANDBY_SIGNAL_FILE
Definition xlog.h:317
#define BACKUP_LABEL_OLD
Definition xlog.h:319
#define BACKUP_LABEL_FILE
Definition xlog.h:318
#define RECOVERY_SIGNAL_FILE
Definition xlog.h:316
@ RECOVERY_STATE_CRASH
Definition xlog.h:92
@ RECOVERY_STATE_ARCHIVE
Definition xlog.h:93
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
static XLogRecPtr abortedRecPtr
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
TimeLineID recoveryTargetTLI
HotStandbyState standbyState
Definition xlogutils.c:53
bool InRecovery
Definition xlogutils.c:50
@ STANDBY_DISABLED
Definition xlogutils.h:52
void WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
Definition xlogwait.c:317
@ WAIT_LSN_TYPE_STANDBY_REPLAY
Definition xlogwait.h:39
@ WAIT_LSN_TYPE_STANDBY_FLUSH
Definition xlogwait.h:41
@ WAIT_LSN_TYPE_STANDBY_WRITE
Definition xlogwait.h:40

References abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, fb(), findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, CheckPoint::logicalDecodingEnabled, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupLogicalDecodingStatus(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), UpdateLogicalDecodingStatusEndOfRecovery(), ValidateXLOGDirectoryStructure(), WAIT_LSN_TYPE_STANDBY_FLUSH, WAIT_LSN_TYPE_STANDBY_REPLAY, WAIT_LSN_TYPE_STANDBY_WRITE, WaitLSNWakeup(), WakeupCheckpointer(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsValid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char buf,
size_t  bufsize 
)
static

Definition at line 5290 of file xlog.c.

5291{
5293 "%Y-%m-%d %H:%M:%S %Z",
5295
5296 return buf;
5297}
#define bufsize
static char buf[DEFAULT_XLOG_SEG_SIZE]
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition localtime.c:1345
PGDLLIMPORT pg_tz * log_timezone
Definition pgtz.c:31

References buf, bufsize, fb(), log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6336 of file xlog.c.

6337{
6338 /* initialize minRecoveryPoint to this record */
6341 if (ControlFile->minRecoveryPoint < EndRecPtr)
6342 {
6343 ControlFile->minRecoveryPoint = EndRecPtr;
6344 ControlFile->minRecoveryPointTLI = replayTLI;
6345 }
6346 /* update local copy */
6349
6350 /*
6351 * The startup process can update its local copy of minRecoveryPoint from
6352 * this point.
6353 */
6355
6357
6358 /*
6359 * We update SharedRecoveryState while holding the lock on ControlFileLock
6360 * so both states are consistent in shared memory.
6361 */
6365
6367}
static bool updateMinRecoveryPoint
Definition xlog.c:650

References ControlFile, DB_IN_ARCHIVE_RECOVERY, fb(), XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6951 of file xlog.c.

6952{
6953 /*
6954 * The status is reported only for end-of-recovery and shutdown
6955 * checkpoints or shutdown restartpoints. Updating the ps display is
6956 * useful in those situations as it may not be possible to rely on
6957 * pg_stat_activity to see the status of the checkpointer or the startup
6958 * process.
6959 */
6961 return;
6962
6963 if (reset)
6964 set_ps_display("");
6965 else
6966 {
6967 char activitymsg[128];
6968
6969 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6970 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6971 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6972 restartpoint ? "restartpoint" : "checkpoint");
6974 }
6975}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, fb(), reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6913 of file xlog.c.

6914{
6915 /*
6916 * To estimate the number of segments consumed between checkpoints, keep a
6917 * moving average of the amount of WAL generated in previous checkpoint
6918 * cycles. However, if the load is bursty, with quiet periods and busy
6919 * periods, we want to cater for the peak load. So instead of a plain
6920 * moving average, let the average decline slowly if the previous cycle
6921 * used less WAL than estimated, but bump it up immediately if it used
6922 * more.
6923 *
6924 * When checkpoints are triggered by max_wal_size, this should converge to
6925 * CheckpointSegments * wal_segment_size,
6926 *
6927 * Note: This doesn't pay any attention to what caused the checkpoint.
6928 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6929 * starting a base backup, are counted the same as those created
6930 * automatically. The slow-decline will largely mask them out, if they are
6931 * not frequent. If they are frequent, it seems reasonable to count them
6932 * in as any others; if you issue a manual checkpoint every 5 minutes and
6933 * never let a timed checkpoint happen, it makes sense to base the
6934 * preallocation on that 5 minute interval rather than whatever
6935 * checkpoint_timeout is set to.
6936 */
6937 PrevCheckPointDistance = nbytes;
6938 if (CheckPointDistanceEstimate < nbytes)
6940 else
6942 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6943}

References CheckPointDistanceEstimate, fb(), and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4619 of file xlog.c.

4620{
4622}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8299 of file xlog.c.

8300{
8302 bool recoveryInProgress;
8303
8304 /*
8305 * Do nothing if full_page_writes has not been changed.
8306 *
8307 * It's safe to check the shared full_page_writes without the lock,
8308 * because we assume that there is no concurrently running process which
8309 * can update it.
8310 */
8311 if (fullPageWrites == Insert->fullPageWrites)
8312 return;
8313
8314 /*
8315 * Perform this outside critical section so that the WAL insert
8316 * initialization done by RecoveryInProgress() doesn't trigger an
8317 * assertion failure.
8318 */
8320
8322
8323 /*
8324 * It's always safe to take full page images, even when not strictly
8325 * required, but not the other round. So if we're setting full_page_writes
8326 * to true, first set it true and then write the WAL record. If we're
8327 * setting it to false, first write the WAL record and then set the global
8328 * flag.
8329 */
8330 if (fullPageWrites)
8331 {
8333 Insert->fullPageWrites = true;
8335 }
8336
8337 /*
8338 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8339 * full_page_writes during archive recovery, if required.
8340 */
8342 {
8344 XLogRegisterData(&fullPageWrites, sizeof(bool));
8345
8347 }
8348
8349 if (!fullPageWrites)
8350 {
8352 Insert->fullPageWrites = false;
8354 }
8356}
#define XLOG_FPW_CHANGE
Definition pg_control.h:77

References END_CRIT_SECTION, fb(), fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char filename)
static

Definition at line 3850 of file xlog.c.

3851{
3852 uint32 tli;
3853 XLogSegNo segno;
3854
3856
3858 if (segno > XLogCtl->lastRemovedSegNo)
3859 XLogCtl->lastRemovedSegNo = segno;
3861}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2704 of file xlog.c.

2705{
2706 /* Quick check using our local copy of the variable */
2707 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2708 return;
2709
2710 /*
2711 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2712 * i.e., we're doing crash recovery. We never modify the control file's
2713 * value in that case, so we can short-circuit future checks here too. The
2714 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2715 * updated until crash recovery finishes. We only do this for the startup
2716 * process as it should not update its own reference of minRecoveryPoint
2717 * until it has finished crash recovery to make sure that all WAL
2718 * available is replayed in this case. This also saves from extra locks
2719 * taken on the control file from the startup process.
2720 */
2722 {
2723 updateMinRecoveryPoint = false;
2724 return;
2725 }
2726
2728
2729 /* update local copy */
2732
2734 updateMinRecoveryPoint = false;
2735 else if (force || LocalMinRecoveryPoint < lsn)
2736 {
2739
2740 /*
2741 * To avoid having to update the control file too often, we update it
2742 * all the way to the last record being replayed, even though 'lsn'
2743 * would suffice for correctness. This also allows the 'force' case
2744 * to not need a valid 'lsn' value.
2745 *
2746 * Another important reason for doing it this way is that the passed
2747 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2748 * the caller got it from a corrupted heap page. Accepting such a
2749 * value as the min recovery point would prevent us from coming up at
2750 * all. Instead, we just log a warning and continue with recovery.
2751 * (See also the comments about corrupt LSNs in XLogFlush.)
2752 */
2754 if (!force && newMinRecoveryPoint < lsn)
2755 elog(WARNING,
2756 "xlog min recovery request %X/%08X is past current point %X/%08X",
2758
2759 /* update control file */
2761 {
2767
2769 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2772 }
2773 }
2775}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), fb(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsValid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4137 of file xlog.c.

4138{
4139 char path[MAXPGPATH];
4140 struct stat stat_buf;
4141
4142 /* Check for pg_wal; if it doesn't exist, error out */
4143 if (stat(XLOGDIR, &stat_buf) != 0 ||
4144 !S_ISDIR(stat_buf.st_mode))
4145 ereport(FATAL,
4147 errmsg("required WAL directory \"%s\" does not exist",
4148 XLOGDIR)));
4149
4150 /* Check for archive_status */
4151 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4152 if (stat(path, &stat_buf) == 0)
4153 {
4154 /* Check for weird cases where it exists but isn't a directory */
4155 if (!S_ISDIR(stat_buf.st_mode))
4156 ereport(FATAL,
4158 errmsg("required WAL directory \"%s\" does not exist",
4159 path)));
4160 }
4161 else
4162 {
4163 ereport(LOG,
4164 (errmsg("creating missing WAL directory \"%s\"", path)));
4165 if (MakePGDirectory(path) < 0)
4166 ereport(FATAL,
4168 errmsg("could not create missing directory \"%s\": %m",
4169 path)));
4170 }
4171
4172 /* Check for summaries */
4173 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4174 if (stat(path, &stat_buf) == 0)
4175 {
4176 /* Check for weird cases where it exists but isn't a directory */
4177 if (!S_ISDIR(stat_buf.st_mode))
4178 ereport(FATAL,
4179 (errmsg("required WAL directory \"%s\" does not exist",
4180 path)));
4181 }
4182 else
4183 {
4184 ereport(LOG,
4185 (errmsg("creating missing WAL directory \"%s\"", path)));
4186 if (MakePGDirectory(path) < 0)
4187 ereport(FATAL,
4188 (errmsg("could not create missing directory \"%s\": %m",
4189 path)));
4190 }
4191}
int MakePGDirectory(const char *directoryName)
Definition fd.c:3959
#define S_ISDIR(m)
Definition win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, fb(), LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1511 of file xlog.c.

1512{
1518 int i;
1519
1520 if (MyProc == NULL)
1521 elog(PANIC, "cannot wait without a PGPROC structure");
1522
1523 /*
1524 * Check if there's any work to do. Use a barrier to ensure we get the
1525 * freshest value.
1526 */
1528 if (upto <= inserted)
1529 return inserted;
1530
1531 /* Read the current insert position */
1532 SpinLockAcquire(&Insert->insertpos_lck);
1533 bytepos = Insert->CurrBytePos;
1534 SpinLockRelease(&Insert->insertpos_lck);
1536
1537 /*
1538 * No-one should request to flush a piece of WAL that hasn't even been
1539 * reserved yet. However, it can happen if there is a block with a bogus
1540 * LSN on disk, for example. XLogFlush checks for that situation and
1541 * complains, but only after the flush. Here we just assume that to mean
1542 * that all WAL that has been reserved needs to be finished. In this
1543 * corner-case, the return value can be smaller than 'upto' argument.
1544 */
1545 if (upto > reservedUpto)
1546 {
1547 ereport(LOG,
1548 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1551 }
1552
1553 /*
1554 * Loop through all the locks, sleeping on any in-progress insert older
1555 * than 'upto'.
1556 *
1557 * finishedUpto is our return value, indicating the point upto which all
1558 * the WAL insertions have been finished. Initialize it to the head of
1559 * reserved WAL, and as we iterate through the insertion locks, back it
1560 * out for any insertion that's still in progress.
1561 */
1563 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1564 {
1566
1567 do
1568 {
1569 /*
1570 * See if this insertion is in progress. LWLockWaitForVar will
1571 * wait for the lock to be released, or for the 'value' to be set
1572 * by a LWLockUpdateVar call. When a lock is initially acquired,
1573 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1574 * know where it's inserting yet. We will have to wait for it. If
1575 * it's a small insertion, the record will most likely fit on the
1576 * same page and the inserter will release the lock without ever
1577 * calling LWLockUpdateVar. But if it has to sleep, it will
1578 * advertise the insertion point with LWLockUpdateVar before
1579 * sleeping.
1580 *
1581 * In this loop we are only waiting for insertions that started
1582 * before WaitXLogInsertionsToFinish was called. The lack of
1583 * memory barriers in the loop means that we might see locks as
1584 * "unused" that have since become used. This is fine because
1585 * they only can be used for later insertions that we would not
1586 * want to wait on anyway. Not taking a lock to acquire the
1587 * current insertingAt value means that we might see older
1588 * insertingAt values. This is also fine, because if we read a
1589 * value too old, we will add ourselves to the wait queue, which
1590 * contains atomic operations.
1591 */
1592 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1595 {
1596 /* the lock was free, so no insertion in progress */
1598 break;
1599 }
1600
1601 /*
1602 * This insertion is still in progress. Have to wait, unless the
1603 * inserter has proceeded past 'upto'.
1604 */
1605 } while (insertingat < upto);
1606
1609 }
1610
1611 /*
1612 * Advance the limit we know to have been inserted and return the freshest
1613 * value we know of, which might be beyond what we requested if somebody
1614 * is concurrently doing this with an 'upto' pointer ahead of us.
1615 */
1617 finishedUpto);
1618
1619 return finishedUpto;
1620}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition atomics.h:595
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition lwlock.c:1592
PGPROC * MyProc
Definition proc.c:67
pg_atomic_uint64 insertingAt
Definition xlog.c:373

References elog, ereport, errmsg(), fb(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), XLogCtl, and XLogRecPtrIsValid.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1378 of file xlog.c.

1379{
1380 bool immed;
1381
1382 /*
1383 * It doesn't matter which of the WAL insertion locks we acquire, so try
1384 * the one we used last time. If the system isn't particularly busy, it's
1385 * a good bet that it's still available, and it's good to have some
1386 * affinity to a particular lock so that you don't unnecessarily bounce
1387 * cache lines between processes when there's no contention.
1388 *
1389 * If this is the first time through in this backend, pick a lock
1390 * (semi-)randomly. This allows the locks to be used evenly if you have a
1391 * lot of very short connections.
1392 */
1393 static int lockToTry = -1;
1394
1395 if (lockToTry == -1)
1398
1399 /*
1400 * The insertingAt value is initially set to 0, as we don't know our
1401 * insert location yet.
1402 */
1404 if (!immed)
1405 {
1406 /*
1407 * If we couldn't get the lock immediately, try another lock next
1408 * time. On a system with more insertion locks than concurrent
1409 * inserters, this causes all the inserters to eventually migrate to a
1410 * lock that no-one else is using. On a system with more inserters
1411 * than locks, it still helps to distribute the inserters evenly
1412 * across the locks.
1413 */
1415 }
1416}
ProcNumber MyProcNumber
Definition globals.c:90
static int MyLockNo
Definition xlog.c:653

References fb(), LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1423 of file xlog.c.

1424{
1425 int i;
1426
1427 /*
1428 * When holding all the locks, all but the last lock's insertingAt
1429 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1430 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1431 */
1432 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1433 {
1438 }
1439 /* Variable value reset to 0 at release */
1441
1442 holdingAllLocks = true;
1443}
#define PG_UINT64_MAX
Definition c.h:617
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1728
static bool holdingAllLocks
Definition xlog.c:654

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1452 of file xlog.c.

1453{
1454 if (holdingAllLocks)
1455 {
1456 int i;
1457
1458 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1461 0);
1462
1463 holdingAllLocks = false;
1464 }
1465 else
1466 {
1469 0);
1470 }
1471}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition lwlock.c:1866

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1478 of file xlog.c.

1479{
1480 if (holdingAllLocks)
1481 {
1482 /*
1483 * We use the last lock to mark our actual position, see comments in
1484 * WALInsertLockAcquireExclusive.
1485 */
1488 insertingAt);
1489 }
1490 else
1493 insertingAt);
1494}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1755 of file xlog.c.

1757{
1758 char *pdst = dstbuf;
1759 XLogRecPtr recptr = startptr;
1761 Size nbytes = count;
1762
1764 return 0;
1765
1766 Assert(XLogRecPtrIsValid(startptr));
1767
1768 /*
1769 * Caller should ensure that the requested data has been inserted into WAL
1770 * buffers before we try to read it.
1771 */
1773 if (startptr + count > inserted)
1774 ereport(ERROR,
1775 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1776 LSN_FORMAT_ARGS(startptr + count),
1778
1779 /*
1780 * Loop through the buffers without a lock. For each buffer, atomically
1781 * read and verify the end pointer, then copy the data out, and finally
1782 * re-read and re-verify the end pointer.
1783 *
1784 * Once a page is evicted, it never returns to the WAL buffers, so if the
1785 * end pointer matches the expected end pointer before and after we copy
1786 * the data, then the right page must have been present during the data
1787 * copy. Read barriers are necessary to ensure that the data copy actually
1788 * happens between the two verification steps.
1789 *
1790 * If either verification fails, we simply terminate the loop and return
1791 * with the data that had been already copied out successfully.
1792 */
1793 while (nbytes > 0)
1794 {
1795 uint32 offset = recptr % XLOG_BLCKSZ;
1798 XLogRecPtr endptr;
1799 const char *page;
1800 const char *psrc;
1802
1803 /*
1804 * Calculate the end pointer we expect in the xlblocks array if the
1805 * correct page is present.
1806 */
1807 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1808
1809 /*
1810 * First verification step: check that the correct page is present in
1811 * the WAL buffers.
1812 */
1814 if (expectedEndPtr != endptr)
1815 break;
1816
1817 /*
1818 * The correct page is present (or was at the time the endptr was
1819 * read; must re-verify later). Calculate pointer to source data and
1820 * determine how much data to read from this page.
1821 */
1822 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1823 psrc = page + offset;
1824 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1825
1826 /*
1827 * Ensure that the data copy and the first verification step are not
1828 * reordered.
1829 */
1831
1832 /* data copy */
1834
1835 /*
1836 * Ensure that the data copy and the second verification step are not
1837 * reordered.
1838 */
1840
1841 /*
1842 * Second verification step: check that the page we read from wasn't
1843 * evicted while we were copying the data.
1844 */
1846 if (expectedEndPtr != endptr)
1847 break;
1848
1849 pdst += npagebytes;
1850 recptr += npagebytes;
1851 nbytes -= npagebytes;
1852 }
1853
1854 Assert(pdst - dstbuf <= count);
1855
1856 return pdst - dstbuf;
1857}
#define pg_read_barrier()
Definition atomics.h:154
#define Min(x, y)
Definition c.h:1007
TimeLineID GetWALInsertionTimeLine(void)
Definition xlog.c:6647

References Assert, ereport, errmsg(), ERROR, fb(), GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsValid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4277 of file xlog.c.

4278{
4279 int fd;
4280 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4281
4282 /*
4283 * Initialize version and compatibility-check fields
4284 */
4287
4290
4296
4299
4302
4303 ControlFile->float8ByVal = true; /* vestigial */
4304
4305 /*
4306 * Initialize the default 'char' signedness.
4307 *
4308 * The signedness of the char type is implementation-defined. For instance
4309 * on x86 architecture CPUs, the char data type is typically treated as
4310 * signed by default, whereas on aarch architecture CPUs, it is typically
4311 * treated as unsigned by default. In v17 or earlier, we accidentally let
4312 * C implementation signedness affect persistent data. This led to
4313 * inconsistent results when comparing char data across different
4314 * platforms.
4315 *
4316 * This flag can be used as a hint to ensure consistent behavior for
4317 * pre-v18 data files that store data sorted by the 'char' type on disk,
4318 * especially in cross-platform replication scenarios.
4319 *
4320 * Newly created database clusters unconditionally set the default char
4321 * signedness to true. pg_upgrade changes this flag for clusters that were
4322 * initialized on signedness=false platforms. As a result,
4323 * signedness=false setting will become rare over time. If we had known
4324 * about this problem during the last development cycle that forced initdb
4325 * (v8.3), we would have made all clusters signed or all clusters
4326 * unsigned. Making pg_upgrade the only source of signedness=false will
4327 * cause the population of database clusters to converge toward that
4328 * retrospective ideal.
4329 */
4331
4332 /* Contents are protected with a CRC */
4338
4339 /*
4340 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4341 * the excess over sizeof(ControlFileData). This reduces the odds of
4342 * premature-EOF errors when reading pg_control. We'll still fail when we
4343 * check the contents of the file, but hopefully with a more specific
4344 * error than "couldn't read pg_control".
4345 */
4346 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4347 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4348
4351 if (fd < 0)
4352 ereport(PANIC,
4354 errmsg("could not create file \"%s\": %m",
4356
4357 errno = 0;
4360 {
4361 /* if write didn't set errno, assume problem is no disk space */
4362 if (errno == 0)
4363 errno = ENOSPC;
4364 ereport(PANIC,
4366 errmsg("could not write to file \"%s\": %m",
4368 }
4370
4372 if (pg_fsync(fd) != 0)
4373 ereport(PANIC,
4375 errmsg("could not fsync file \"%s\": %m",
4378
4379 if (close(fd) != 0)
4380 ereport(PANIC,
4382 errmsg("could not close file \"%s\": %m",
4384}
#define PG_CONTROL_FILE_SIZE
Definition pg_control.h:260

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg(), fb(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8368 of file xlog.c.

8369{
8370 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8371 XLogRecPtr lsn = record->EndRecPtr;
8372
8373 /*
8374 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8375 * XLOG_FPI_FOR_HINT records.
8376 */
8377 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8378 !XLogRecHasAnyBlockRefs(record));
8379
8380 if (info == XLOG_NEXTOID)
8381 {
8382 Oid nextOid;
8383
8384 /*
8385 * We used to try to take the maximum of TransamVariables->nextOid and
8386 * the recorded nextOid, but that fails if the OID counter wraps
8387 * around. Since no OID allocation should be happening during replay
8388 * anyway, better to just believe the record exactly. We still take
8389 * OidGenLock while setting the variable, just in case.
8390 */
8391 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8393 TransamVariables->nextOid = nextOid;
8396 }
8397 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8398 {
8399 CheckPoint checkPoint;
8400 TimeLineID replayTLI;
8401
8402 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8403 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8405 TransamVariables->nextXid = checkPoint.nextXid;
8408 TransamVariables->nextOid = checkPoint.nextOid;
8412 checkPoint.nextMultiOffset);
8413
8415 checkPoint.oldestMultiDB);
8416
8417 /*
8418 * No need to set oldestClogXid here as well; it'll be set when we
8419 * redo an xl_clog_truncate if it changed since initialization.
8420 */
8421 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8422
8423 /*
8424 * If we see a shutdown checkpoint while waiting for an end-of-backup
8425 * record, the backup was canceled and the end-of-backup record will
8426 * never arrive.
8427 */
8431 ereport(PANIC,
8432 (errmsg("online backup was canceled, recovery cannot continue")));
8433
8434 /*
8435 * If we see a shutdown checkpoint, we know that nothing was running
8436 * on the primary at this point. So fake-up an empty running-xacts
8437 * record and use that here and now. Recover additional standby state
8438 * for prepared transactions.
8439 */
8441 {
8442 TransactionId *xids;
8443 int nxids;
8445 TransactionId latestCompletedXid;
8447
8449
8450 /* Update pg_subtrans entries for any prepared transactions */
8452
8453 /*
8454 * Construct a RunningTransactions snapshot representing a shut
8455 * down server, with only prepared transactions still alive. We're
8456 * never overflowed at this point because all subxids are listed
8457 * with their parent prepared transactions.
8458 */
8459 running.xcnt = nxids;
8460 running.subxcnt = 0;
8462 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8464 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8465 TransactionIdRetreat(latestCompletedXid);
8466 Assert(TransactionIdIsNormal(latestCompletedXid));
8467 running.latestCompletedXid = latestCompletedXid;
8468 running.xids = xids;
8469
8471 }
8472
8473 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8477
8478 /*
8479 * We should've already switched to the new TLI before replaying this
8480 * record.
8481 */
8482 (void) GetCurrentReplayRecPtr(&replayTLI);
8483 if (checkPoint.ThisTimeLineID != replayTLI)
8484 ereport(PANIC,
8485 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8486 checkPoint.ThisTimeLineID, replayTLI)));
8487
8488 RecoveryRestartPoint(&checkPoint, record);
8489
8490 /*
8491 * After replaying a checkpoint record, free all smgr objects.
8492 * Otherwise we would never do so for dropped relations, as the
8493 * startup does not process shared invalidation messages or call
8494 * AtEOXact_SMgr().
8495 */
8497 }
8498 else if (info == XLOG_CHECKPOINT_ONLINE)
8499 {
8500 CheckPoint checkPoint;
8501 TimeLineID replayTLI;
8502
8503 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8504 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8507 checkPoint.nextXid))
8508 TransamVariables->nextXid = checkPoint.nextXid;
8510
8511 /*
8512 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8513 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8514 * counter is from the start of the checkpoint and might well be stale
8515 * compared to later XLOG_NEXTOID records. We could try to take the
8516 * maximum of the nextOid counter and our latest value, but since
8517 * there's no particular guarantee about the speed with which the OID
8518 * counter wraps around, that's a risky thing to do. In any case,
8519 * users of the nextOid counter are required to avoid assignment of
8520 * duplicates, so that a somewhat out-of-date value should be safe.
8521 */
8522
8523 /* Handle multixact */
8525 checkPoint.nextMultiOffset);
8526
8527 /*
8528 * NB: This may perform multixact truncation when replaying WAL
8529 * generated by an older primary.
8530 */
8532 checkPoint.oldestMultiDB);
8534 checkPoint.oldestXid))
8536 checkPoint.oldestXidDB);
8537 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8541
8542 /* TLI should not change in an on-line checkpoint */
8543 (void) GetCurrentReplayRecPtr(&replayTLI);
8544 if (checkPoint.ThisTimeLineID != replayTLI)
8545 ereport(PANIC,
8546 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8547 checkPoint.ThisTimeLineID, replayTLI)));
8548
8549 RecoveryRestartPoint(&checkPoint, record);
8550
8551 /*
8552 * After replaying a checkpoint record, free all smgr objects.
8553 * Otherwise we would never do so for dropped relations, as the
8554 * startup does not process shared invalidation messages or call
8555 * AtEOXact_SMgr().
8556 */
8558 }
8559 else if (info == XLOG_OVERWRITE_CONTRECORD)
8560 {
8561 /* nothing to do here, handled in xlogrecovery_redo() */
8562 }
8563 else if (info == XLOG_END_OF_RECOVERY)
8564 {
8566 TimeLineID replayTLI;
8567
8568 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8569
8570 /*
8571 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8572 * but this case is rarer and harder to test, so the benefit doesn't
8573 * outweigh the potential extra cost of maintenance.
8574 */
8575
8576 /*
8577 * We should've already switched to the new TLI before replaying this
8578 * record.
8579 */
8580 (void) GetCurrentReplayRecPtr(&replayTLI);
8581 if (xlrec.ThisTimeLineID != replayTLI)
8582 ereport(PANIC,
8583 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8584 xlrec.ThisTimeLineID, replayTLI)));
8585 }
8586 else if (info == XLOG_NOOP)
8587 {
8588 /* nothing to do here */
8589 }
8590 else if (info == XLOG_SWITCH)
8591 {
8592 /* nothing to do here */
8593 }
8594 else if (info == XLOG_RESTORE_POINT)
8595 {
8596 /* nothing to do here, handled in xlogrecovery.c */
8597 }
8598 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8599 {
8600 /*
8601 * XLOG_FPI records contain nothing else but one or more block
8602 * references. Every block reference must include a full-page image
8603 * even if full_page_writes was disabled when the record was generated
8604 * - otherwise there would be no point in this record.
8605 *
8606 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8607 * WAL-logged because of a hint bit update. They are only generated
8608 * when checksums and/or wal_log_hints are enabled. They may include
8609 * no full-page images if full_page_writes was disabled when they were
8610 * generated. In this case there is nothing to do here.
8611 *
8612 * No recovery conflicts are generated by these generic records - if a
8613 * resource manager needs to generate conflicts, it has to define a
8614 * separate WAL record type and redo routine.
8615 */
8616 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8617 {
8618 Buffer buffer;
8619
8620 if (!XLogRecHasBlockImage(record, block_id))
8621 {
8622 if (info == XLOG_FPI)
8623 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8624 continue;
8625 }
8626
8627 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8628 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8629 UnlockReleaseBuffer(buffer);
8630 }
8631 }
8632 else if (info == XLOG_BACKUP_END)
8633 {
8634 /* nothing to do here, handled in xlogrecovery_redo() */
8635 }
8636 else if (info == XLOG_PARAMETER_CHANGE)
8637 {
8639
8640 /* Update our copy of the parameters in pg_control */
8641 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8642
8644 ControlFile->MaxConnections = xlrec.MaxConnections;
8645 ControlFile->max_worker_processes = xlrec.max_worker_processes;
8646 ControlFile->max_wal_senders = xlrec.max_wal_senders;
8647 ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
8648 ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
8649 ControlFile->wal_level = xlrec.wal_level;
8650 ControlFile->wal_log_hints = xlrec.wal_log_hints;
8651
8652 /*
8653 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8654 * recover back up to this point before allowing hot standby again.
8655 * This is important if the max_* settings are decreased, to ensure
8656 * you don't run queries against the WAL preceding the change. The
8657 * local copies cannot be updated as long as crash recovery is
8658 * happening and we expect all the WAL to be replayed.
8659 */
8661 {
8664 }
8666 {
8667 TimeLineID replayTLI;
8668
8669 (void) GetCurrentReplayRecPtr(&replayTLI);
8671 ControlFile->minRecoveryPointTLI = replayTLI;
8672 }
8673
8674 CommitTsParameterChange(xlrec.track_commit_timestamp,
8676 ControlFile->track_commit_timestamp = xlrec.track_commit_timestamp;
8677
8680
8681 /* Check to see if any parameter change gives a problem on recovery */
8683 }
8684 else if (info == XLOG_FPW_CHANGE)
8685 {
8686 bool fpw;
8687
8688 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8689
8690 /*
8691 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8692 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8693 * full_page_writes has been disabled during online backup.
8694 */
8695 if (!fpw)
8696 {
8701 }
8702
8703 /* Keep track of full_page_writes */
8705 }
8706 else if (info == XLOG_CHECKPOINT_REDO)
8707 {
8708 /* nothing to do here, just for informational purposes */
8709 }
8710 else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE)
8711 {
8712 bool status;
8713
8714 memcpy(&status, XLogRecGetData(record), sizeof(bool));
8715
8716 /*
8717 * We need to toggle the logical decoding status and update the
8718 * XLogLogicalInfo cache of processes synchronously because
8719 * XLogLogicalInfoActive() is used even during read-only queries
8720 * (e.g., via RelationIsAccessibleInLogicalDecoding()). In the
8721 * 'disable' case, it is safe to invalidate existing slots after
8722 * disabling logical decoding because logical decoding cannot process
8723 * subsequent WAL records, which may not contain logical information.
8724 */
8725 if (status)
8727 else
8729
8730 elog(DEBUG1, "update logical decoding status to %d during recovery",
8731 status);
8732
8733 if (InRecovery && InHotStandby)
8734 {
8735 if (!status)
8736 {
8737 /*
8738 * Invalidate logical slots if we are in hot standby and the
8739 * primary disabled logical decoding.
8740 */
8742 0, InvalidOid,
8744 }
8745 else if (sync_replication_slots)
8746 {
8747 /*
8748 * Signal the postmaster to launch the slotsync worker.
8749 *
8750 * XXX: For simplicity, we keep the slotsync worker running
8751 * even after logical decoding is disabled. A future
8752 * improvement can consider starting and stopping the worker
8753 * based on logical decoding status change.
8754 */
8756 }
8757 }
8758 }
8759}
int Buffer
Definition buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
uint8_t uint8
Definition c.h:554
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition commit_ts.c:640
pid_t PostmasterPid
Definition globals.c:106
void DisableLogicalDecoding(void)
Definition logicalctl.c:491
void EnableLogicalDecoding(void)
Definition logicalctl.c:340
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2191
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2164
#define XLOG_RESTORE_POINT
Definition pg_control.h:76
#define XLOG_FPI
Definition pg_control.h:80
#define XLOG_FPI_FOR_HINT
Definition pg_control.h:79
#define XLOG_NEXTOID
Definition pg_control.h:72
#define XLOG_NOOP
Definition pg_control.h:71
#define XLOG_PARAMETER_CHANGE
Definition pg_control.h:75
#define XLOG_LOGICAL_DECODING_STATUS_CHANGE
Definition pg_control.h:84
@ RS_INVAL_WAL_LEVEL
Definition slot.h:66
bool sync_replication_slots
Definition slotsync.c:117
void smgrdestroyall(void)
Definition smgr.c:386
#define FullTransactionIdPrecedes(a, b)
Definition transam.h:51
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
#define kill(pid, sig)
Definition win32_port.h:490
#define SIGUSR1
Definition win32_port.h:170
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition xlog.c:7682
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XLogRecMaxBlockId(decoder)
Definition xlogreader.h:417
#define XLogRecHasBlockImage(decoder, block_id)
Definition xlogreader.h:422
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:416
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition xlogutils.c:303
@ STANDBY_INITIALIZED
Definition xlogutils.h:53
#define InHotStandby
Definition xlogutils.h:60
@ BLK_RESTORED
Definition xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, DEBUG1, DisableLogicalDecoding(), elog, EnableLogicalDecoding(), XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, fb(), FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, kill, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::max_locks_per_xact, ControlFileData::max_prepared_xacts, ControlFileData::max_wal_senders, ControlFileData::max_worker_processes, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PostmasterPid, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SIGUSR1, smgrdestroyall(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, sync_replication_slots, CheckPoint::ThisTimeLineID, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), ControlFileData::wal_level, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_LOGICAL_DECODING_STATUS_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsValid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2989 of file xlog.c.

2990{
2992 bool flexible = true;
2993 static TimestampTz lastflush;
2995 int flushblocks;
2997
2998 /* XLOG doesn't need flushing during recovery */
2999 if (RecoveryInProgress())
3000 return false;
3001
3002 /*
3003 * Since we're not in recovery, InsertTimeLineID is set and can't change,
3004 * so we can read it without a lock.
3005 */
3007
3008 /* read updated LogwrtRqst */
3012
3013 /* back off to last completed page boundary */
3014 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3015
3016 /* if we have already flushed that far, consider async commit records */
3018 if (WriteRqst.Write <= LogwrtResult.Flush)
3019 {
3023 flexible = false; /* ensure it all gets written */
3024 }
3025
3026 /*
3027 * If already known flushed, we're done. Just need to check if we are
3028 * holding an open file handle to a logfile that's no longer in use,
3029 * preventing the file from being deleted.
3030 */
3031 if (WriteRqst.Write <= LogwrtResult.Flush)
3032 {
3033 if (openLogFile >= 0)
3034 {
3037 {
3038 XLogFileClose();
3039 }
3040 }
3041 return false;
3042 }
3043
3044 /*
3045 * Determine how far to flush WAL, based on the wal_writer_delay and
3046 * wal_writer_flush_after GUCs.
3047 *
3048 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3049 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3050 * logic is the same in both places if you change this.
3051 */
3053 flushblocks =
3055
3056 if (WalWriterFlushAfter == 0 || lastflush == 0)
3057 {
3058 /* first call, or block based limits disabled */
3059 WriteRqst.Flush = WriteRqst.Write;
3060 lastflush = now;
3061 }
3063 {
3064 /*
3065 * Flush the writes at least every WalWriterDelay ms. This is
3066 * important to bound the amount of time it takes for an asynchronous
3067 * commit to hit disk.
3068 */
3069 WriteRqst.Flush = WriteRqst.Write;
3070 lastflush = now;
3071 }
3072 else if (flushblocks >= WalWriterFlushAfter)
3073 {
3074 /* exceeded wal_writer_flush_after blocks, flush */
3075 WriteRqst.Flush = WriteRqst.Write;
3076 lastflush = now;
3077 }
3078 else
3079 {
3080 /* no flushing, this time round */
3081 WriteRqst.Flush = 0;
3082 }
3083
3084#ifdef WAL_DEBUG
3085 if (XLOG_DEBUG)
3086 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3091#endif
3092
3094
3095 /* now wait for any in-progress insertions to finish and get write lock */
3099 if (WriteRqst.Write > LogwrtResult.Write ||
3101 {
3103 }
3105
3107
3108 /* wake up walsenders now that we've released heavily contended locks */
3110
3111 /*
3112 * If we flushed an LSN that someone was waiting for, notify the waiters.
3113 */
3114 if (waitLSNState &&
3118
3119 /*
3120 * Great, done. To take some work off the critical path, try to initialize
3121 * as many of the no-longer-needed WAL buffers for future use as we can.
3122 */
3124
3125 /*
3126 * If we determined that we need to write data, but somebody else
3127 * wrote/flushed already, it should be considered as being active, to
3128 * avoid hibernating too early.
3129 */
3130 return true;
3131}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
pg_atomic_uint64 minWaitedLSN[WAIT_LSN_TYPE_COUNT]
Definition xlogwait.h:85
XLogRecPtr asyncXactLSN
Definition xlog.c:460
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition walsender.h:65
int WalWriterFlushAfter
Definition walwriter.c:71
int WalWriterDelay
Definition walwriter.c:70
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
struct WaitLSNState * waitLSNState
Definition xlogwait.c:68
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, fb(), XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), WaitLSNState::minWaitedLSN, now(), openLogFile, openLogSegNo, pg_atomic_read_u64(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1905 of file xlog.c.

1906{
1911 XLogRecPtr result;
1912
1915
1917 {
1918 /* fits on first page of segment */
1919 if (bytesleft == 0)
1920 seg_offset = 0;
1921 else
1923 }
1924 else
1925 {
1926 /* account for the first page on segment with long header */
1929
1932
1933 if (bytesleft == 0)
1935 else
1937 }
1938
1940
1941 return result;
1942}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1865 of file xlog.c.

1866{
1871 XLogRecPtr result;
1872
1875
1877 {
1878 /* fits on first page of segment */
1880 }
1881 else
1882 {
1883 /* account for the first page on segment with long header */
1886
1889
1891 }
1892
1894
1895 return result;
1896}

References fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2284 of file xlog.c.

2285{
2287
2289
2291 return true;
2292 return false;
2293}

References CheckPointSegments, fb(), RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4694 of file xlog.c.

4695{
4696 int xbuffers;
4697
4698 xbuffers = NBuffers / 32;
4701 if (xbuffers < 8)
4702 xbuffers = 8;
4703 return xbuffers;
4704}

References fb(), NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3677 of file xlog.c.

3678{
3679 Assert(openLogFile >= 0);
3680
3681 /*
3682 * WAL segment files will not be re-read in normal operation, so we advise
3683 * the OS to release any cached pages. But do not do so if WAL archiving
3684 * or streaming is active, because archiver and walsender process could
3685 * use the cache to read the WAL segment.
3686 */
3687#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3688 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3690#endif
3691
3692 if (close(openLogFile) != 0)
3693 {
3694 char xlogfname[MAXFNAMELEN];
3695 int save_errno = errno;
3696
3698 errno = save_errno;
3699 ereport(PANIC,
3701 errmsg("could not close file \"%s\": %m", xlogfname)));
3702 }
3703
3704 openLogFile = -1;
3706}
void ReleaseExternalFD(void)
Definition fd.c:1221

References Assert, close, ereport, errcode_for_file_access(), errmsg(), fb(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3456 of file xlog.c.

3459{
3460 char path[MAXPGPATH];
3461 char tmppath[MAXPGPATH];
3462 PGAlignedXLogBlock buffer;
3463 int srcfd;
3464 int fd;
3465 int nbytes;
3466
3467 /*
3468 * Open the source file
3469 */
3472 if (srcfd < 0)
3473 ereport(ERROR,
3475 errmsg("could not open file \"%s\": %m", path)));
3476
3477 /*
3478 * Copy into a temp file name.
3479 */
3480 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3481
3482 unlink(tmppath);
3483
3484 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3486 if (fd < 0)
3487 ereport(ERROR,
3489 errmsg("could not create file \"%s\": %m", tmppath)));
3490
3491 /*
3492 * Do the data copying.
3493 */
3494 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3495 {
3496 int nread;
3497
3498 nread = upto - nbytes;
3499
3500 /*
3501 * The part that is not read from the source file is filled with
3502 * zeros.
3503 */
3504 if (nread < sizeof(buffer))
3505 memset(buffer.data, 0, sizeof(buffer));
3506
3507 if (nread > 0)
3508 {
3509 int r;
3510
3511 if (nread > sizeof(buffer))
3512 nread = sizeof(buffer);
3514 r = read(srcfd, buffer.data, nread);
3515 if (r != nread)
3516 {
3517 if (r < 0)
3518 ereport(ERROR,
3520 errmsg("could not read file \"%s\": %m",
3521 path)));
3522 else
3523 ereport(ERROR,
3525 errmsg("could not read file \"%s\": read %d of %zu",
3526 path, r, (Size) nread)));
3527 }
3529 }
3530 errno = 0;
3532 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3533 {
3534 int save_errno = errno;
3535
3536 /*
3537 * If we fail to make the file, delete it to release disk space
3538 */
3539 unlink(tmppath);
3540 /* if write didn't set errno, assume problem is no disk space */
3542
3543 ereport(ERROR,
3545 errmsg("could not write to file \"%s\": %m", tmppath)));
3546 }
3548 }
3549
3551 if (pg_fsync(fd) != 0)
3554 errmsg("could not fsync file \"%s\": %m", tmppath)));
3556
3557 if (CloseTransientFile(fd) != 0)
3558 ereport(ERROR,
3560 errmsg("could not close file \"%s\": %m", tmppath)));
3561
3562 if (CloseTransientFile(srcfd) != 0)
3563 ereport(ERROR,
3565 errmsg("could not close file \"%s\": %m", path)));
3566
3567 /*
3568 * Now move the segment into place with its final name.
3569 */
3571 elog(ERROR, "InstallXLogFileSegment should not have failed");
3572}
int CloseTransientFile(int fd)
Definition fd.c:2851
int data_sync_elevel(int elevel)
Definition fd.c:3982
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2674
char data[XLOG_BLCKSZ]
Definition c.h:1139

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3418 of file xlog.c.

3419{
3420 bool ignore_added;
3421 char path[MAXPGPATH];
3422 int fd;
3423
3424 Assert(logtli != 0);
3425
3427 if (fd >= 0)
3428 return fd;
3429
3430 /* Now open original target segment (might not be file I just made) */
3433 if (fd < 0)
3434 ereport(ERROR,
3436 errmsg("could not open file \"%s\": %m", path)));
3437 return fd;
3438}
#define O_CLOEXEC
Definition win32_port.h:344

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char path 
)
static

Definition at line 3230 of file xlog.c.

3232{
3233 char tmppath[MAXPGPATH];
3236 int fd;
3237 int save_errno;
3240
3241 Assert(logtli != 0);
3242
3244
3245 /*
3246 * Try to use existent file (checkpoint maker may have created it already)
3247 */
3248 *added = false;
3251 if (fd < 0)
3252 {
3253 if (errno != ENOENT)
3254 ereport(ERROR,
3256 errmsg("could not open file \"%s\": %m", path)));
3257 }
3258 else
3259 return fd;
3260
3261 /*
3262 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3263 * another process is doing the same thing. If so, we will end up
3264 * pre-creating an extra log segment. That seems OK, and better than
3265 * holding the lock throughout this lengthy process.
3266 */
3267 elog(DEBUG2, "creating and filling new WAL file");
3268
3269 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3270
3271 unlink(tmppath);
3272
3275
3276 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3278 if (fd < 0)
3279 ereport(ERROR,
3281 errmsg("could not create file \"%s\": %m", tmppath)));
3282
3283 /* Measure I/O timing when initializing segment */
3285
3287 save_errno = 0;
3288 if (wal_init_zero)
3289 {
3290 ssize_t rc;
3291
3292 /*
3293 * Zero-fill the file. With this setting, we do this the hard way to
3294 * ensure that all the file space has really been allocated. On
3295 * platforms that allow "holes" in files, just seeking to the end
3296 * doesn't allocate intermediate space. This way, we know that we
3297 * have all the space and (after the fsync below) that all the
3298 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3299 * O_DSYNC will be sufficient to sync future writes to the log file.
3300 */
3302
3303 if (rc < 0)
3304 save_errno = errno;
3305 }
3306 else
3307 {
3308 /*
3309 * Otherwise, seeking to the end and writing a solitary byte is
3310 * enough.
3311 */
3312 errno = 0;
3313 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3314 {
3315 /* if write didn't set errno, assume no disk space */
3317 }
3318 }
3320
3321 /*
3322 * A full segment worth of data is written when using wal_init_zero. One
3323 * byte is written when not using it.
3324 */
3326 io_start, 1,
3328
3329 if (save_errno)
3330 {
3331 /*
3332 * If we fail to make the file, delete it to release disk space
3333 */
3334 unlink(tmppath);
3335
3336 close(fd);
3337
3338 errno = save_errno;
3339
3340 ereport(ERROR,
3342 errmsg("could not write to file \"%s\": %m", tmppath)));
3343 }
3344
3345 /* Measure I/O timing when flushing segment */
3347
3349 if (pg_fsync(fd) != 0)
3350 {
3351 save_errno = errno;
3352 close(fd);
3353 errno = save_errno;
3354 ereport(ERROR,
3356 errmsg("could not fsync file \"%s\": %m", tmppath)));
3357 }
3359
3361 IOOP_FSYNC, io_start, 1, 0);
3362
3363 if (close(fd) != 0)
3364 ereport(ERROR,
3366 errmsg("could not close file \"%s\": %m", tmppath)));
3367
3368 /*
3369 * Now move the segment into place with its final name. Cope with
3370 * possibility that someone else has created the file while we were
3371 * filling ours: if so, use ours to pre-create a future log segment.
3372 */
3374
3375 /*
3376 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3377 * that was a constant, but that was always a bit dubious: normally, at a
3378 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3379 * here, it was the offset from the insert location. We can't do the
3380 * normal XLOGfileslop calculation here because we don't have access to
3381 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3382 * CheckPointSegments.
3383 */
3386 logtli))
3387 {
3388 *added = true;
3389 elog(DEBUG2, "done creating and filling new WAL file");
3390 }
3391 else
3392 {
3393 /*
3394 * No need for any more future segments, or InstallXLogFileSegment()
3395 * failed to rename the file into place. If the rename failed, a
3396 * caller opening the file may fail.
3397 */
3398 unlink(tmppath);
3399 elog(DEBUG2, "abandoned new WAL file");
3400 }
3401
3402 return -1;
3403}
#define IO_DIRECT_WAL_INIT
Definition fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
Definition file_utils.c:709
@ IOCONTEXT_INIT
Definition pgstat.h:288
@ IOOP_WRITE
Definition pgstat.h:316
#define pg_pwrite
Definition port.h:248
bool wal_init_zero
Definition xlog.c:130

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3656 of file xlog.c.

3657{
3658 char path[MAXPGPATH];
3659 int fd;
3660
3661 XLogFilePath(path, tli, segno, wal_segment_size);
3662
3665 if (fd < 0)
3666 ereport(PANIC,
3668 errmsg("could not open file \"%s\": %m", path)));
3669
3670 return fd;
3671}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fb(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2234 of file xlog.c.

2235{
2238 double distance;
2240
2241 /*
2242 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2243 * correspond to. Always recycle enough segments to meet the minimum, and
2244 * remove enough segments to stay below the maximum.
2245 */
2250
2251 /*
2252 * Between those limits, recycle enough segments to get us through to the
2253 * estimated end of next checkpoint.
2254 *
2255 * To estimate where the next checkpoint will finish, assume that the
2256 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2257 * every checkpoint.
2258 */
2260 /* add 10% for good measure. */
2261 distance *= 1.10;
2262
2263 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2265
2266 if (recycleSegNo < minSegNo)
2268 if (recycleSegNo > maxSegNo)
2270
2271 return recycleSegNo;
2272}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, fb(), max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2784 of file xlog.c.

2785{
2789
2790 /*
2791 * During REDO, we are reading not writing WAL. Therefore, instead of
2792 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2793 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2794 * to act this way too, and because when it tries to write the
2795 * end-of-recovery checkpoint, it should indeed flush.
2796 */
2797 if (!XLogInsertAllowed())
2798 {
2799 UpdateMinRecoveryPoint(record, false);
2800 return;
2801 }
2802
2803 /* Quick exit if already known flushed */
2804 if (record <= LogwrtResult.Flush)
2805 return;
2806
2807#ifdef WAL_DEBUG
2808 if (XLOG_DEBUG)
2809 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2810 LSN_FORMAT_ARGS(record),
2813#endif
2814
2816
2817 /*
2818 * Since fsync is usually a horribly expensive operation, we try to
2819 * piggyback as much data as we can on each fsync: if we see any more data
2820 * entered into the xlog buffer, we'll write and fsync that too, so that
2821 * the final value of LogwrtResult.Flush is as large as possible. This
2822 * gives us some chance of avoiding another fsync immediately after.
2823 */
2824
2825 /* initialize to given target; may increase below */
2826 WriteRqstPtr = record;
2827
2828 /*
2829 * Now wait until we get the write lock, or someone else does the flush
2830 * for us.
2831 */
2832 for (;;)
2833 {
2835
2836 /* done already? */
2838 if (record <= LogwrtResult.Flush)
2839 break;
2840
2841 /*
2842 * Before actually performing the write, wait for all in-flight
2843 * insertions to the pages we're about to write to finish.
2844 */
2846 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2850
2851 /*
2852 * Try to get the write lock. If we can't get it immediately, wait
2853 * until it's released, and recheck if we still need to do the flush
2854 * or if the backend that held the lock did it for us already. This
2855 * helps to maintain a good rate of group committing when the system
2856 * is bottlenecked by the speed of fsyncing.
2857 */
2859 {
2860 /*
2861 * The lock is now free, but we didn't acquire it yet. Before we
2862 * do, loop back to check if someone else flushed the record for
2863 * us already.
2864 */
2865 continue;
2866 }
2867
2868 /* Got the lock; recheck whether request is satisfied */
2870 if (record <= LogwrtResult.Flush)
2871 {
2873 break;
2874 }
2875
2876 /*
2877 * Sleep before flush! By adding a delay here, we may give further
2878 * backends the opportunity to join the backlog of group commit
2879 * followers; this can significantly improve transaction throughput,
2880 * at the risk of increasing transaction latency.
2881 *
2882 * We do not sleep if enableFsync is not turned on, nor if there are
2883 * fewer than CommitSiblings other backends with active transactions.
2884 */
2885 if (CommitDelay > 0 && enableFsync &&
2887 {
2891
2892 /*
2893 * Re-check how far we can now flush the WAL. It's generally not
2894 * safe to call WaitXLogInsertionsToFinish while holding
2895 * WALWriteLock, because an in-progress insertion might need to
2896 * also grab WALWriteLock to make progress. But we know that all
2897 * the insertions up to insertpos have already finished, because
2898 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2899 * We're only calling it again to allow insertpos to be moved
2900 * further forward, not to actually wait for anyone.
2901 */
2903 }
2904
2905 /* try to write/flush later additions to XLOG as well */
2906 WriteRqst.Write = insertpos;
2907 WriteRqst.Flush = insertpos;
2908
2909 XLogWrite(WriteRqst, insertTLI, false);
2910
2912 /* done */
2913 break;
2914 }
2915
2917
2918 /* wake up walsenders now that we've released heavily contended locks */
2920
2921 /*
2922 * If we flushed an LSN that someone was waiting for, notify the waiters.
2923 */
2924 if (waitLSNState &&
2928
2929 /*
2930 * If we still haven't flushed to the request point then we have a
2931 * problem; most likely, the requested flush point is past end of XLOG.
2932 * This has been seen to occur when a disk page has a corrupted LSN.
2933 *
2934 * Formerly we treated this as a PANIC condition, but that hurts the
2935 * system's robustness rather than helping it: we do not want to take down
2936 * the whole system due to corruption on one data page. In particular, if
2937 * the bad page is encountered again during recovery then we would be
2938 * unable to restart the database at all! (This scenario actually
2939 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2940 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2941 * the only time we can reach here during recovery is while flushing the
2942 * end-of-recovery checkpoint record, and we don't expect that to have a
2943 * bad LSN.
2944 *
2945 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2946 * since xact.c calls this routine inside a critical section. However,
2947 * calls from bufmgr.c are not within critical sections and so we will not
2948 * force a restart for a bad LSN on a data page.
2949 */
2950 if (LogwrtResult.Flush < record)
2951 elog(ERROR,
2952 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2953 LSN_FORMAT_ARGS(record),
2955
2956 /*
2957 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2958 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2959 * remain consistent.
2960 */
2961 Assert(!XLogNeedsFlush(record));
2962}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1404
bool MinimumActiveBackends(int min)
Definition procarray.c:3508
int CommitDelay
Definition xlog.c:135
int CommitSiblings
Definition xlog.c:136
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3146
bool XLogInsertAllowed(void)
Definition xlog.c:6516

References Assert, CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, fb(), XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), WaitLSNState::minWaitedLSN, pg_atomic_read_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WAIT_LSN_TYPE_PRIMARY_FLUSH, waitLSNState, WaitLSNWakeup(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FinishSyncWorker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_logical_decoding_status_update_record(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3796 of file xlog.c.

3797{
3798 XLogSegNo lastRemovedSegNo;
3799
3801 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3803
3804 return lastRemovedSegNo;
3805}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3812 of file xlog.c.

3813{
3814 DIR *xldir;
3815 struct dirent *xlde;
3817
3819 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3820 {
3823
3824 /* Ignore files that are not XLOG segments. */
3825 if (!IsXLogFileName(xlde->d_name))
3826 continue;
3827
3828 /* Parse filename to get TLI and segno. */
3831
3832 /* Ignore anything that's not from the TLI of interest. */
3833 if (tli != file_tli)
3834 continue;
3835
3836 /* If it's the oldest so far, update oldest_segno. */
3837 if (oldest_segno == 0 || file_segno < oldest_segno)
3839 }
3840
3841 FreeDir(xldir);
3842 return oldest_segno;
3843}

References AllocateDir(), fb(), FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2683 of file xlog.c.

2684{
2685 XLogRecPtr retval;
2686
2690
2691 return retval;
2692}
XLogRecPtr replicationSlotMinLSN
Definition xlog.c:461

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5303 of file xlog.c.

5304{
5305 char xlogfname[MAXFNAMELEN];
5308
5309 /* we always switch to a new timeline after archive recovery */
5310 Assert(endTLI != newTLI);
5311
5312 /*
5313 * Update min recovery point one last time.
5314 */
5316
5317 /*
5318 * Calculate the last segment on the old timeline, and the first segment
5319 * on the new timeline. If the switch happens in the middle of a segment,
5320 * they are the same, but if the switch happens exactly at a segment
5321 * boundary, startLogSegNo will be endLogSegNo + 1.
5322 */
5325
5326 /*
5327 * Initialize the starting WAL segment for the new timeline. If the switch
5328 * happens in the middle of a segment, copy data from the last WAL segment
5329 * of the old timeline up to the switch point, to the starting WAL segment
5330 * on the new timeline.
5331 */
5333 {
5334 /*
5335 * Make a copy of the file on the new timeline.
5336 *
5337 * Writing WAL isn't allowed yet, so there are no locking
5338 * considerations. But we should be just as tense as XLogFileInit to
5339 * avoid emplacing a bogus file.
5340 */
5343 }
5344 else
5345 {
5346 /*
5347 * The switch happened at a segment boundary, so just create the next
5348 * segment on the new timeline.
5349 */
5350 int fd;
5351
5353
5354 if (close(fd) != 0)
5355 {
5356 int save_errno = errno;
5357
5359 errno = save_errno;
5360 ereport(ERROR,
5362 errmsg("could not close file \"%s\": %m", xlogfname)));
5363 }
5364 }
5365
5366 /*
5367 * Let's just make real sure there are not .ready or .done flags posted
5368 * for the new segment.
5369 */
5372}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition xlog.c:3456

References Assert, close, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6516 of file xlog.c.

6517{
6518 /*
6519 * If value is "unconditionally true" or "unconditionally false", just
6520 * return it. This provides the normal fast path once recovery is known
6521 * done.
6522 */
6523 if (LocalXLogInsertAllowed >= 0)
6524 return (bool) LocalXLogInsertAllowed;
6525
6526 /*
6527 * Else, must check to see if we're still in recovery.
6528 */
6529 if (RecoveryInProgress())
6530 return false;
6531
6532 /*
6533 * On exit from recovery, reset to "unconditionally true", since there is
6534 * no need to keep checking.
6535 */
6537 return true;
6538}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
uint64  fpi_bytes,
bool  topxid_included 
)

Definition at line 750 of file xlog.c.

756{
759 bool inserted;
760 XLogRecord *rechdr = (XLogRecord *) rdata->data;
761 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
767
768 /* Does this record type require special handling? */
769 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
770 {
771 if (info == XLOG_SWITCH)
773 else if (info == XLOG_CHECKPOINT_REDO)
775 }
776
777 /* we assume that all of the record header is in the first chunk */
779
780 /* cross-check on whether we should be here or not */
781 if (!XLogInsertAllowed())
782 elog(ERROR, "cannot make new WAL entries during recovery");
783
784 /*
785 * Given that we're not in recovery, InsertTimeLineID is set and can't
786 * change, so we can read it without a lock.
787 */
789
790 /*----------
791 *
792 * We have now done all the preparatory work we can without holding a
793 * lock or modifying shared state. From here on, inserting the new WAL
794 * record to the shared WAL buffer cache is a two-step process:
795 *
796 * 1. Reserve the right amount of space from the WAL. The current head of
797 * reserved space is kept in Insert->CurrBytePos, and is protected by
798 * insertpos_lck.
799 *
800 * 2. Copy the record to the reserved WAL space. This involves finding the
801 * correct WAL buffer containing the reserved space, and copying the
802 * record in place. This can be done concurrently in multiple processes.
803 *
804 * To keep track of which insertions are still in-progress, each concurrent
805 * inserter acquires an insertion lock. In addition to just indicating that
806 * an insertion is in progress, the lock tells others how far the inserter
807 * has progressed. There is a small fixed number of insertion locks,
808 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
809 * boundary, it updates the value stored in the lock to the how far it has
810 * inserted, to allow the previous buffer to be flushed.
811 *
812 * Holding onto an insertion lock also protects RedoRecPtr and
813 * fullPageWrites from changing until the insertion is finished.
814 *
815 * Step 2 can usually be done completely in parallel. If the required WAL
816 * page is not initialized yet, you have to grab WALBufMappingLock to
817 * initialize it, but the WAL writer tries to do that ahead of insertions
818 * to avoid that from happening in the critical path.
819 *
820 *----------
821 */
823
824 if (likely(class == WALINSERT_NORMAL))
825 {
827
828 /*
829 * Check to see if my copy of RedoRecPtr is out of date. If so, may
830 * have to go back and have the caller recompute everything. This can
831 * only happen just after a checkpoint, so it's better to be slow in
832 * this case and fast otherwise.
833 *
834 * Also check to see if fullPageWrites was just turned on or there's a
835 * running backup (which forces full-page writes); if we weren't
836 * already doing full-page writes then go back and recompute.
837 *
838 * If we aren't doing full-page writes then RedoRecPtr doesn't
839 * actually affect the contents of the XLOG record, so we'll update
840 * our local copy but not force a recomputation. (If doPageWrites was
841 * just turned off, we could recompute the record without full pages,
842 * but we choose not to bother.)
843 */
844 if (RedoRecPtr != Insert->RedoRecPtr)
845 {
847 RedoRecPtr = Insert->RedoRecPtr;
848 }
849 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
850
851 if (doPageWrites &&
854 {
855 /*
856 * Oops, some buffer now needs to be backed up that the caller
857 * didn't back up. Start over.
858 */
861 return InvalidXLogRecPtr;
862 }
863
864 /*
865 * Reserve space for the record in the WAL. This also sets the xl_prev
866 * pointer.
867 */
869 &rechdr->xl_prev);
870
871 /* Normal records are always inserted. */
872 inserted = true;
873 }
874 else if (class == WALINSERT_SPECIAL_SWITCH)
875 {
876 /*
877 * In order to insert an XLOG_SWITCH record, we need to hold all of
878 * the WAL insertion locks, not just one, so that no one else can
879 * begin inserting a record until we've figured out how much space
880 * remains in the current WAL segment and claimed all of it.
881 *
882 * Nonetheless, this case is simpler than the normal cases handled
883 * below, which must check for changes in doPageWrites and RedoRecPtr.
884 * Those checks are only needed for records that can contain buffer
885 * references, and an XLOG_SWITCH record never does.
886 */
890 }
891 else
892 {
894
895 /*
896 * We need to update both the local and shared copies of RedoRecPtr,
897 * which means that we need to hold all the WAL insertion locks.
898 * However, there can't be any buffer references, so as above, we need
899 * not check RedoRecPtr before inserting the record; we just need to
900 * update it afterwards.
901 */
905 &rechdr->xl_prev);
906 RedoRecPtr = Insert->RedoRecPtr = StartPos;
907 inserted = true;
908 }
909
910 if (inserted)
911 {
912 /*
913 * Now that xl_prev has been filled in, calculate CRC of the record
914 * header.
915 */
916 rdata_crc = rechdr->xl_crc;
919 rechdr->xl_crc = rdata_crc;
920
921 /*
922 * All the record data, including the header, is now ready to be
923 * inserted. Copy the record in the space reserved.
924 */
925 CopyXLogRecordToWAL(rechdr->xl_tot_len,
928
929 /*
930 * Unless record is flagged as not important, update LSN of last
931 * important record in the current slot. When holding all locks, just
932 * update the first one.
933 */
934 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
935 {
936 int lockno = holdingAllLocks ? 0 : MyLockNo;
937
939 }
940 }
941 else
942 {
943 /*
944 * This was an xlog-switch record, but the current insert location was
945 * already exactly at the beginning of a segment, so there was no need
946 * to do anything.
947 */
948 }
949
950 /*
951 * Done! Let others know that we're finished.
952 */
954
956
958
959 /*
960 * Mark top transaction id is logged (if needed) so that we should not try
961 * to log it again with the next WAL record in the current subtransaction.
962 */
963 if (topxid_included)
965
966 /*
967 * Update shared LogwrtRqst.Write, if we crossed page boundary.
968 */
970 {
972 /* advance global request to include new block(s) */
977 }
978
979 /*
980 * If this was an XLOG_SWITCH record, flush the record and the empty
981 * padding space that fills the rest of the segment, and perform
982 * end-of-segment actions (eg, notifying archiver).
983 */
984 if (class == WALINSERT_SPECIAL_SWITCH)
985 {
988
989 /*
990 * Even though we reserved the rest of the segment for us, which is
991 * reflected in EndPos, we return a pointer to just the end of the
992 * xlog-switch record.
993 */
994 if (inserted)
995 {
998 {
1000
1001 if (offset == EndPos % XLOG_BLCKSZ)
1003 else
1005 }
1006 }
1007 }
1008
1009#ifdef WAL_DEBUG
1010 if (XLOG_DEBUG)
1011 {
1013 XLogRecord *record;
1017 char *errormsg = NULL;
1019
1021
1023 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1024
1025 /*
1026 * We have to piece together the WAL record data from the XLogRecData
1027 * entries, so that we can pass it to the rm_desc function as one
1028 * contiguous chunk.
1029 */
1031 for (; rdata != NULL; rdata = rdata->next)
1033
1034 /* We also need temporary space to decode the record. */
1035 record = (XLogRecord *) recordBuf.data;
1038
1039 if (!debug_reader)
1041 XL_ROUTINE(.page_read = NULL,
1042 .segment_open = NULL,
1043 .segment_close = NULL),
1044 NULL);
1045 if (!debug_reader)
1046 {
1047 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1048 }
1050 decoded,
1051 record,
1052 EndPos,
1053 &errormsg))
1054 {
1055 appendStringInfo(&buf, "error decoding record: %s",
1056 errormsg ? errormsg : "no error message");
1057 }
1058 else
1059 {
1060 appendStringInfoString(&buf, " - ");
1061
1062 debug_reader->record = decoded;
1064 debug_reader->record = NULL;
1065 }
1066 elog(LOG, "%s", buf.data);
1067
1068 pfree(decoded);
1069 pfree(buf.data);
1070 pfree(recordBuf.data);
1072 }
1073#endif
1074
1075 /*
1076 * Update our global variables
1077 */
1080
1081 /* Report WAL traffic to the instrumentation. */
1082 if (inserted)
1083 {
1084 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1088
1089 /* Required for the flush of pending stats WAL data */
1090 pgstat_report_fixed = true;
1091 }
1092
1093 return EndPos;
1094}
#define likely(x)
Definition c.h:421
#define unlikely(x)
Definition c.h:422
void * palloc(Size size)
Definition mcxt.c:1387
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
void MarkSubxactTopXidLogged(void)
Definition xact.c:592
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition xact.c:542
XLogRecPtr XactLastRecEnd
Definition xlog.c:257
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition xlog.c:1232
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1115
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition xlog.c:1171
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, fb(), FIN_CRC32C, holdingAllLocks, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogRecPtrIsValid, and XLogSegmentOffset.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3146 of file xlog.c.

3147{
3148 /*
3149 * During recovery, we don't flush WAL but update minRecoveryPoint
3150 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3151 * would need to be updated.
3152 *
3153 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3154 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3155 * This check should be consistent with the one in XLogFlush().
3156 */
3157 if (!XLogInsertAllowed())
3158 {
3159 /* Quick exit if already known to be updated or cannot be updated */
3161 return false;
3162
3163 /*
3164 * An invalid minRecoveryPoint means that we need to recover all the
3165 * WAL, i.e., we're doing crash recovery. We never modify the control
3166 * file's value in that case, so we can short-circuit future checks
3167 * here too. This triggers a quick exit path for the startup process,
3168 * which cannot update its local copy of minRecoveryPoint as long as
3169 * it has not replayed all WAL available when doing crash recovery.
3170 */
3172 {
3173 updateMinRecoveryPoint = false;
3174 return false;
3175 }
3176
3177 /*
3178 * Update local copy of minRecoveryPoint. But if the lock is busy,
3179 * just return a conservative guess.
3180 */
3182 return true;
3186
3187 /*
3188 * Check minRecoveryPoint for any other process than the startup
3189 * process doing crash recovery, which should not update the control
3190 * file value if crash recovery is still running.
3191 */
3193 updateMinRecoveryPoint = false;
3194
3195 /* check again */
3197 return false;
3198 else
3199 return true;
3200 }
3201
3202 /* Quick exit if already known flushed */
3203 if (record <= LogwrtResult.Flush)
3204 return false;
3205
3206 /* read LogwrtResult and update local state */
3208
3209 /* check again */
3210 if (record <= LogwrtResult.Flush)
3211 return false;
3212
3213 return true;
3214}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1347

References ControlFile, fb(), XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsValid.

Referenced by GetVictimBuffer(), SetHintBits(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8156 of file xlog.c.

8157{
8159 XLogRegisterData(&nextOid, sizeof(Oid));
8161
8162 /*
8163 * We need not flush the NEXTOID record immediately, because any of the
8164 * just-allocated OIDs could only reach disk as part of a tuple insert or
8165 * update that would have its own XLOG record that must follow the NEXTOID
8166 * record. Therefore, the standard buffer LSN interlock applied to those
8167 * records will ensure no such OID reaches disk before the NEXTOID record
8168 * does.
8169 *
8170 * Note, however, that the above statement only covers state "within" the
8171 * database. When we use a generated OID as a file or directory name, we
8172 * are in a sense violating the basic WAL rule, because that filesystem
8173 * change may reach disk before the NEXTOID WAL record does. The impact
8174 * of this is that if a database crash occurs immediately afterward, we
8175 * might after restart re-generate the same OID and find that it conflicts
8176 * with the leftover file or directory. But since for safety's sake we
8177 * always loop until finding a nonconflicting filename, this poses no real
8178 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8179 */
8180}

References fb(), XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1948 of file xlog.c.

1949{
1952 uint32 offset;
1953 uint64 result;
1954
1956
1958 offset = ptr % XLOG_BLCKSZ;
1959
1960 if (fullpages == 0)
1961 {
1962 result = fullsegs * UsableBytesInSegment;
1963 if (offset > 0)
1964 {
1965 Assert(offset >= SizeOfXLogLongPHD);
1966 result += offset - SizeOfXLogLongPHD;
1967 }
1968 }
1969 else
1970 {
1971 result = fullsegs * UsableBytesInSegment +
1972 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1973 (fullpages - 1) * UsableBytesInPage; /* full pages */
1974 if (offset > 0)
1975 {
1976 Assert(offset >= SizeOfXLogShortPHD);
1977 result += offset - SizeOfXLogShortPHD;
1978 }
1979 }
1980
1981 return result;
1982}

References Assert, fb(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8236 of file xlog.c.

8237{
8246 {
8247 /*
8248 * The change in number of backend slots doesn't need to be WAL-logged
8249 * if archiving is not enabled, as you can't start archive recovery
8250 * with wal_level=minimal anyway. We don't really care about the
8251 * values in pg_control either if wal_level=minimal, but seems better
8252 * to keep them up-to-date to avoid confusion.
8253 */
8255 {
8258
8260 xlrec.max_worker_processes = max_worker_processes;
8261 xlrec.max_wal_senders = max_wal_senders;
8262 xlrec.max_prepared_xacts = max_prepared_xacts;
8263 xlrec.max_locks_per_xact = max_locks_per_xact;
8264 xlrec.wal_level = wal_level;
8265 xlrec.wal_log_hints = wal_log_hints;
8266 xlrec.track_commit_timestamp = track_commit_timestamp;
8267
8269 XLogRegisterData(&xlrec, sizeof(xlrec));
8270
8273 }
8274
8276
8286
8288 }
8289}

References ControlFile, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char rpName)

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2613 of file xlog.c.

2614{
2615 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2616 bool sleeping;
2617 bool wakeup = false;
2619
2623 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2624 XLogCtl->asyncXactLSN = asyncXactLSN;
2626
2627 /*
2628 * If somebody else already called this function with a more aggressive
2629 * LSN, they will have done what we needed (and perhaps more).
2630 */
2631 if (asyncXactLSN <= prevAsyncXactLSN)
2632 return;
2633
2634 /*
2635 * If the WALWriter is sleeping, kick it to make it come out of low-power
2636 * mode, so that this async commit will reach disk within the expected
2637 * amount of time. Otherwise, determine whether it has enough WAL
2638 * available to flush, the same way that XLogBackgroundFlush() does.
2639 */
2640 if (sleeping)
2641 wakeup = true;
2642 else
2643 {
2644 int flushblocks;
2645
2647
2648 flushblocks =
2650
2652 wakeup = true;
2653 }
2654
2655 if (wakeup)
2656 {
2657 volatile PROC_HDR *procglobal = ProcGlobal;
2658 ProcNumber walwriterProc = procglobal->walwriterProc;
2659
2660 if (walwriterProc != INVALID_PROC_NUMBER)
2661 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2662 }
2663}
void SetLatch(Latch *latch)
Definition latch.c:290
#define GetPGProcByNumber(n)
Definition proc.h:446
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
PROC_HDR * ProcGlobal
Definition proc.c:79
ProcNumber walwriterProc
Definition proc.h:430
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References XLogCtlData::asyncXactLSN, fb(), XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 5012 of file xlog.c.

5013{
5014 bool foundCFile,
5015 foundXLog;
5016 char *allocptr;
5017 int i;
5019
5020#ifdef WAL_DEBUG
5021
5022 /*
5023 * Create a memory context for WAL debugging that's exempt from the normal
5024 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
5025 * an allocation fails, but wal_debug is not for production use anyway.
5026 */
5027 if (walDebugCxt == NULL)
5028 {
5030 "WAL Debug",
5033 }
5034#endif
5035
5036
5037 XLogCtl = (XLogCtlData *)
5038 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
5039
5042 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
5043
5044 if (foundCFile || foundXLog)
5045 {
5046 /* both should be present or neither */
5048
5049 /* Initialize local copy of WALInsertLocks */
5051
5052 if (localControlFile)
5054 return;
5055 }
5056 memset(XLogCtl, 0, sizeof(XLogCtlData));
5057
5058 /*
5059 * Already have read control file locally, unless in bootstrap mode. Move
5060 * contents into shared memory.
5061 */
5062 if (localControlFile)
5063 {
5066 }
5067
5068 /*
5069 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5070 * multiple of the alignment for same, so no extra alignment padding is
5071 * needed here.
5072 */
5073 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5076
5077 for (i = 0; i < XLOGbuffers; i++)
5078 {
5080 }
5081
5082 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5083 allocptr += sizeof(WALInsertLockPadded) -
5088
5089 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5090 {
5094 }
5095
5096 /*
5097 * Align the start of the page buffers to a full xlog block size boundary.
5098 * This simplifies some calculations in XLOG insertion. It is also
5099 * required for O_DIRECT.
5100 */
5104
5105 /*
5106 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5107 * in additional info.)
5108 */
5112 XLogCtl->WalWriterSleeping = false;
5113
5120}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:829
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:698
MemoryContext TopMemoryContext
Definition mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition mcxt.c:743
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:389
#define SpinLockInit(lock)
Definition spin.h:57
int XLogCacheBlck
Definition xlog.c:496
WALInsertLockPadded * WALInsertLocks
Definition xlog.c:447
slock_t insertpos_lck
Definition xlog.c:401
Size XLOGShmemSize(void)
Definition xlog.c:4962

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, fb(), i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4962 of file xlog.c.

4963{
4964 Size size;
4965
4966 /*
4967 * If the value of wal_buffers is -1, use the preferred auto-tune value.
4968 * This isn't an amazingly clean place to do this, but we must wait till
4969 * NBuffers has received its final value, and must do it before using the
4970 * value of XLOGbuffers to do anything important.
4971 *
4972 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4973 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4974 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4975 * the matter with PGC_S_OVERRIDE.
4976 */
4977 if (XLOGbuffers == -1)
4978 {
4979 char buf[32];
4980
4981 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4982 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4984 if (XLOGbuffers == -1) /* failed to apply it? */
4985 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4987 }
4988 Assert(XLOGbuffers > 0);
4989
4990 /* XLogCtl */
4991 size = sizeof(XLogCtlData);
4992
4993 /* WAL insertion locks, plus alignment */
4994 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
4995 /* xlblocks array */
4996 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
4997 /* extra alignment padding for XLOG I/O buffers */
4998 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4999 /* and the buffers themselves */
5000 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
5001
5002 /*
5003 * Note: we don't count ControlFileData, it comes out of the "slop factor"
5004 * added by CreateSharedMemoryAndSemaphores. This lets us use this
5005 * routine again below to compute the actual allocation size.
5006 */
5007
5008 return size;
5009}
#define Max(x, y)
Definition c.h:1001
@ PGC_S_OVERRIDE
Definition guc.h:123
@ PGC_POSTMASTER
Definition guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition shmem.c:495
Size mul_size(Size s1, Size s2)
Definition shmem.c:510

References add_size(), Assert, buf, fb(), Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9634 of file xlog.c.

9635{
9637
9640}
#define AmStartupProcess()
Definition miscadmin.h:390
void ShutdownWalRcv(void)
void ResetInstallXLogFileSegmentActive(void)
Definition xlog.c:9653

References AmStartupProcess, Assert, IsUnderPostmaster, ResetInstallXLogFileSegmentActive(), and ShutdownWalRcv().

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2308 of file xlog.c.

2309{
2310 bool ispartialpage;
2311 bool last_iteration;
2312 bool finishing_seg;
2313 int curridx;
2314 int npages;
2315 int startidx;
2317
2318 /* We should always be inside a critical section here */
2320
2321 /*
2322 * Update local LogwrtResult (caller probably did this already, but...)
2323 */
2325
2326 /*
2327 * Since successive pages in the xlog cache are consecutively allocated,
2328 * we can usually gather multiple pages together and issue just one
2329 * write() call. npages is the number of pages we have determined can be
2330 * written together; startidx is the cache block index of the first one,
2331 * and startoffset is the file offset at which it should go. The latter
2332 * two variables are only valid when npages > 0, but we must initialize
2333 * all of them to keep the compiler quiet.
2334 */
2335 npages = 0;
2336 startidx = 0;
2337 startoffset = 0;
2338
2339 /*
2340 * Within the loop, curridx is the cache block index of the page to
2341 * consider writing. Begin at the buffer containing the next unwritten
2342 * page, or last partially written page.
2343 */
2345
2346 while (LogwrtResult.Write < WriteRqst.Write)
2347 {
2348 /*
2349 * Make sure we're not ahead of the insert process. This could happen
2350 * if we're passed a bogus WriteRqst.Write that is past the end of the
2351 * last page that's been initialized by AdvanceXLInsertBuffer.
2352 */
2354
2355 if (LogwrtResult.Write >= EndPtr)
2356 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2359
2360 /* Advance LogwrtResult.Write to end of current buffer page */
2363
2366 {
2367 /*
2368 * Switch to new logfile segment. We cannot have any pending
2369 * pages here (since we dump what we have at segment end).
2370 */
2371 Assert(npages == 0);
2372 if (openLogFile >= 0)
2373 XLogFileClose();
2376 openLogTLI = tli;
2377
2378 /* create/use new log file */
2381 }
2382
2383 /* Make sure we have the current logfile open */
2384 if (openLogFile < 0)
2385 {
2388 openLogTLI = tli;
2391 }
2392
2393 /* Add current page to the set of pending pages-to-dump */
2394 if (npages == 0)
2395 {
2396 /* first of group */
2397 startidx = curridx;
2400 }
2401 npages++;
2402
2403 /*
2404 * Dump the set if this will be the last loop iteration, or if we are
2405 * at the last page of the cache area (since the next page won't be
2406 * contiguous in memory), or if we are at the end of the logfile
2407 * segment.
2408 */
2410
2413
2414 if (last_iteration ||
2417 {
2418 char *from;
2419 Size nbytes;
2420 Size nleft;
2423
2424 /* OK to write the page(s) */
2425 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2426 nbytes = npages * (Size) XLOG_BLCKSZ;
2427 nleft = nbytes;
2428 do
2429 {
2430 errno = 0;
2431
2432 /*
2433 * Measure I/O timing to write WAL data, for pg_stat_io.
2434 */
2436
2440
2442 IOOP_WRITE, start, 1, written);
2443
2444 if (written <= 0)
2445 {
2446 char xlogfname[MAXFNAMELEN];
2447 int save_errno;
2448
2449 if (errno == EINTR)
2450 continue;
2451
2452 save_errno = errno;
2455 errno = save_errno;
2456 ereport(PANIC,
2458 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2460 }
2461 nleft -= written;
2462 from += written;
2464 } while (nleft > 0);
2465
2466 npages = 0;
2467
2468 /*
2469 * If we just wrote the whole last page of a logfile segment,
2470 * fsync the segment immediately. This avoids having to go back
2471 * and re-open prior segments when an fsync request comes along
2472 * later. Doing it here ensures that one and only one backend will
2473 * perform this fsync.
2474 *
2475 * This is also the right place to notify the Archiver that the
2476 * segment is ready to copy to archival storage, and to update the
2477 * timer for archive_timeout, and to signal for a checkpoint if
2478 * too many logfile segments have been used since the last
2479 * checkpoint.
2480 */
2481 if (finishing_seg)
2482 {
2484
2485 /* signal that we need to wakeup walsenders later */
2487
2488 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2489
2490 if (XLogArchivingActive())
2492
2495
2496 /*
2497 * Request a checkpoint if we've consumed too much xlog since
2498 * the last one. For speed, we first check using the local
2499 * copy of RedoRecPtr, which might be out of date; if it looks
2500 * like a checkpoint is needed, forcibly update RedoRecPtr and
2501 * recheck.
2502 */
2504 {
2505 (void) GetRedoRecPtr();
2508 }
2509 }
2510 }
2511
2512 if (ispartialpage)
2513 {
2514 /* Only asked to write a partial page */
2516 break;
2517 }
2519
2520 /* If flexible, break out of loop as soon as we wrote something */
2521 if (flexible && npages == 0)
2522 break;
2523 }
2524
2525 Assert(npages == 0);
2526
2527 /*
2528 * If asked to flush, do so
2529 */
2530 if (LogwrtResult.Flush < WriteRqst.Flush &&
2532 {
2533 /*
2534 * Could get here without iterating above loop, in which case we might
2535 * have no open file or the wrong one. However, we do not need to
2536 * fsync more than one file.
2537 */
2540 {
2541 if (openLogFile >= 0 &&
2544 XLogFileClose();
2545 if (openLogFile < 0)
2546 {
2549 openLogTLI = tli;
2552 }
2553
2555 }
2556
2557 /* signal that we need to wakeup walsenders later */
2559
2561 }
2562
2563 /*
2564 * Update shared-memory status
2565 *
2566 * We make sure that the shared 'request' values do not fall behind the
2567 * 'result' values. This is not absolutely essential, but it saves some
2568 * code in a couple of places.
2569 */
2576
2577 /*
2578 * We write Write first, bar, then Flush. When reading, the opposite must
2579 * be done (with a matching barrier in between), so that we always see a
2580 * Flush value that trails behind the Write value seen.
2581 */
2585
2586#ifdef USE_ASSERT_CHECKING
2587 {
2591
2597
2598 /* WAL written to disk is always ahead of WAL flushed */
2599 Assert(Write >= Flush);
2600
2601 /* WAL inserted to buffers is always ahead of WAL written */
2602 Assert(Insert >= Write);
2603 }
2604#endif
2605}
void ReserveExternalFD(void)
Definition fd.c:1203
volatile uint32 CritSectionCount
Definition globals.c:45
XLogRecPtr Flush
XLogRecPtr Write
#define WalSndWakeupRequest()
Definition walsender.h:58
#define EINTR
Definition win32_port.h:361
XLogRecPtr GetRedoRecPtr(void)
Definition xlog.c:6564
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition xlog.c:3656
#define NextBufIdx(idx)
Definition xlog.c:586
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition xlog.c:8856
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition xlog.c:2284
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), fb(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition xlog.h:68
@ ARCHIVE_MODE_OFF
Definition xlog.h:66
@ ARCHIVE_MODE_ON
Definition xlog.h:67

Definition at line 194 of file xlog.c.

194 {
195 {"always", ARCHIVE_MODE_ALWAYS, false},
196 {"on", ARCHIVE_MODE_ON, false},
197 {"off", ARCHIVE_MODE_OFF, false},
198 {"true", ARCHIVE_MODE_ON, true},
199 {"false", ARCHIVE_MODE_OFF, true},
200 {"yes", ARCHIVE_MODE_ON, true},
201 {"no", ARCHIVE_MODE_OFF, true},
202 {"1", ARCHIVE_MODE_ON, true},
203 {"0", ARCHIVE_MODE_OFF, true},
204 {NULL, 0, false}
205};

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 169 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 162 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 135 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 136 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 125 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 220 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 227 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 239 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 138 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 118 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 638 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 639 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 163 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 650 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 127 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 129 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 128 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 139 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 130 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 119 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 126 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 131 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

Definition at line 137 of file xlog.c.

Referenced by ApplyLauncherMain(), launch_sync_worker(), and WaitForWALToBecomeAvailable().

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 146 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 174 of file xlog.c.

174 {
175 {"fsync", WAL_SYNC_METHOD_FSYNC, false},
176#ifdef HAVE_FSYNC_WRITETHROUGH
177 {"fsync_writethrough", WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, false},
178#endif
179 {"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
180#ifdef O_SYNC
181 {"open_sync", WAL_SYNC_METHOD_OPEN, false},
182#endif
183#ifdef O_DSYNC
184 {"open_datasync", WAL_SYNC_METHOD_OPEN_DSYNC, false},
185#endif
186 {NULL, 0, false}
187};

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 121 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 120 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl