PostgreSQL Source Code  git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_iovec.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
bool check_max_slot_wal_keep_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
void CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

uint32 bootstrap_data_checksum_version
 
int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 111 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 598 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 575 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 579 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 150 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:462
pg_atomic_uint64 logWriteResult
Definition: xlog.c:472
pg_atomic_uint64 logFlushResult
Definition: xlog.c:473
static XLogCtlData * XLogCtl
Definition: xlog.c:561

Definition at line 615 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 592 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 586 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 554 of file xlog.c.

555 {
WalInsertClass
Definition: xlog.c:555
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:557
@ WALINSERT_NORMAL
Definition: xlog.c:556
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:558

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1980 of file xlog.c.

1981 {
1983  int nextidx;
1984  XLogRecPtr OldPageRqstPtr;
1985  XLogwrtRqst WriteRqst;
1986  XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1987  XLogRecPtr NewPageBeginPtr;
1988  XLogPageHeader NewPage;
1989  int npages pg_attribute_unused() = 0;
1990 
1991  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
1992 
1993  /*
1994  * Now that we have the lock, check if someone initialized the page
1995  * already.
1996  */
1997  while (upto >= XLogCtl->InitializedUpTo || opportunistic)
1998  {
2000 
2001  /*
2002  * Get ending-offset of the buffer page we need to replace (this may
2003  * be zero if the buffer hasn't been used yet). Fall through if it's
2004  * already written out.
2005  */
2006  OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
2007  if (LogwrtResult.Write < OldPageRqstPtr)
2008  {
2009  /*
2010  * Nope, got work to do. If we just want to pre-initialize as much
2011  * as we can without flushing, give up now.
2012  */
2013  if (opportunistic)
2014  break;
2015 
2016  /* Advance shared memory write request position */
2018  if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2019  XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2021 
2022  /*
2023  * Acquire an up-to-date LogwrtResult value and see if we still
2024  * need to write it or if someone else already did.
2025  */
2027  if (LogwrtResult.Write < OldPageRqstPtr)
2028  {
2029  /*
2030  * Must acquire write lock. Release WALBufMappingLock first,
2031  * to make sure that all insertions that we need to wait for
2032  * can finish (up to this same position). Otherwise we risk
2033  * deadlock.
2034  */
2035  LWLockRelease(WALBufMappingLock);
2036 
2037  WaitXLogInsertionsToFinish(OldPageRqstPtr);
2038 
2039  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2040 
2042  if (LogwrtResult.Write >= OldPageRqstPtr)
2043  {
2044  /* OK, someone wrote it already */
2045  LWLockRelease(WALWriteLock);
2046  }
2047  else
2048  {
2049  /* Have to write it ourselves */
2050  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2051  WriteRqst.Write = OldPageRqstPtr;
2052  WriteRqst.Flush = 0;
2053  XLogWrite(WriteRqst, tli, false);
2054  LWLockRelease(WALWriteLock);
2056  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2057  }
2058  /* Re-acquire WALBufMappingLock and retry */
2059  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2060  continue;
2061  }
2062  }
2063 
2064  /*
2065  * Now the next buffer slot is free and we can set it up to be the
2066  * next output page.
2067  */
2068  NewPageBeginPtr = XLogCtl->InitializedUpTo;
2069  NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2070 
2071  Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2072 
2073  NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2074 
2075  /*
2076  * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2077  * before initializing. Otherwise, the old page may be partially
2078  * zeroed but look valid.
2079  */
2081  pg_write_barrier();
2082 
2083  /*
2084  * Be sure to re-zero the buffer so that bytes beyond what we've
2085  * written will look like zeroes and not valid XLOG records...
2086  */
2087  MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
2088 
2089  /*
2090  * Fill the new page's header
2091  */
2092  NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2093 
2094  /* NewPage->xlp_info = 0; */ /* done by memset */
2095  NewPage->xlp_tli = tli;
2096  NewPage->xlp_pageaddr = NewPageBeginPtr;
2097 
2098  /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2099 
2100  /*
2101  * If online backup is not in progress, mark the header to indicate
2102  * that WAL records beginning in this page have removable backup
2103  * blocks. This allows the WAL archiver to know whether it is safe to
2104  * compress archived WAL data by transforming full-block records into
2105  * the non-full-block format. It is sufficient to record this at the
2106  * page level because we force a page switch (in fact a segment
2107  * switch) when starting a backup, so the flag will be off before any
2108  * records can be written during the backup. At the end of a backup,
2109  * the last page will be marked as all unsafe when perhaps only part
2110  * is unsafe, but at worst the archiver would miss the opportunity to
2111  * compress a few records.
2112  */
2113  if (Insert->runningBackups == 0)
2114  NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2115 
2116  /*
2117  * If first page of an XLOG segment file, make it a long header.
2118  */
2119  if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2120  {
2121  XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2122 
2123  NewLongPage->xlp_sysid = ControlFile->system_identifier;
2124  NewLongPage->xlp_seg_size = wal_segment_size;
2125  NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2126  NewPage->xlp_info |= XLP_LONG_HEADER;
2127  }
2128 
2129  /*
2130  * Make sure the initialization of the page becomes visible to others
2131  * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2132  * holding a lock.
2133  */
2134  pg_write_barrier();
2135 
2136  pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2137  XLogCtl->InitializedUpTo = NewPageEndPtr;
2138 
2139  npages++;
2140  }
2141  LWLockRelease(WALBufMappingLock);
2142 
2143 #ifdef WAL_DEBUG
2144  if (XLOG_DEBUG && npages > 0)
2145  {
2146  elog(DEBUG1, "initialized %d pages, up to %X/%X",
2147  npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2148  }
2149 #endif
2150 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:480
#define pg_write_barrier()
Definition: atomics.h:152
#define pg_attribute_unused()
Definition: c.h:123
#define Assert(condition)
Definition: c.h:858
#define MemSet(start, val, len)
Definition: c.h:1020
size_t Size
Definition: c.h:605
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
static void Insert(File file)
Definition: fd.c:1313
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
@ LW_EXCLUSIVE
Definition: lwlock.h:114
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint64 system_identifier
Definition: pg_control.h:109
PgStat_Counter wal_buffers_full
Definition: pgstat.h:452
XLogwrtRqst LogwrtRqst
Definition: xlog.c:455
slock_t info_lck
Definition: xlog.c:548
XLogRecPtr InitializedUpTo
Definition: xlog.c:485
char * pages
Definition: xlog.c:492
pg_atomic_uint64 * xlblocks
Definition: xlog.c:493
XLogCtlInsert Insert
Definition: xlog.c:452
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:327
XLogRecPtr Flush
Definition: xlog.c:322
XLogRecPtr Write
Definition: xlog.c:321
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1499
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:615
int wal_segment_size
Definition: xlog.c:143
static XLogwrtResult LogwrtResult
Definition: xlog.c:607
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:586
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2309
static ControlFileData * ControlFile
Definition: xlog.c:569
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), PgStat_PendingWalStats::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2193 of file xlog.c.

2194 {
2197 }
double CheckPointCompletionTarget
Definition: checkpointer.c:138
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2157

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2186 of file xlog.c.

2187 {
2190 }
int max_wal_size_mb
Definition: xlog.c:114

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4724 of file xlog.c.

4725 {
4726  /*
4727  * If some checks were deferred, it's possible that the checks will fail
4728  * later during InitializeWalConsistencyChecking(). But in that case, the
4729  * postmaster will exit anyway, so it's safe to proceed with the
4730  * assignment.
4731  *
4732  * Any built-in resource managers specified are assigned immediately,
4733  * which affects WAL created before shared_preload_libraries are
4734  * processed. Any custom resource managers specified won't be assigned
4735  * until after shared_preload_libraries are processed, but that's OK
4736  * because WAL for a custom resource manager can't be written before the
4737  * module is loaded anyway.
4738  */
4739  wal_consistency_checking = extra;
4740 }
bool * wal_consistency_checking
Definition: xlog.c:126

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8561 of file xlog.c.

8562 {
8563  if (wal_sync_method != new_wal_sync_method)
8564  {
8565  /*
8566  * To ensure that no blocks escape unsynced, force an fsync on the
8567  * currently open log segment (if any). Also, if the open flag is
8568  * changing, close the log file so it will be reopened (with new flag
8569  * bit) at next use.
8570  */
8571  if (openLogFile >= 0)
8572  {
8573  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8574  if (pg_fsync(openLogFile) != 0)
8575  {
8576  char xlogfname[MAXFNAMELEN];
8577  int save_errno;
8578 
8579  save_errno = errno;
8580  XLogFileName(xlogfname, openLogTLI, openLogSegNo,
8582  errno = save_errno;
8583  ereport(PANIC,
8585  errmsg("could not fsync file \"%s\": %m", xlogfname)));
8586  }
8587 
8589  if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8590  XLogFileClose();
8591  }
8592  }
8593 }
int errcode_for_file_access(void)
Definition: elog.c:882
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
static int openLogFile
Definition: xlog.c:630
static int get_sync_bit(int method)
Definition: xlog.c:8513
int wal_sync_method
Definition: xlog.c:130
static TimeLineID openLogTLI
Definition: xlog.c:632
static void XLogFileClose(void)
Definition: xlog.c:3628
static XLogSegNo openLogSegNo
Definition: xlog.c:631
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( void  )

Definition at line 5000 of file xlog.c.

5001 {
5002  CheckPoint checkPoint;
5003  char *buffer;
5004  XLogPageHeader page;
5005  XLogLongPageHeader longpage;
5006  XLogRecord *record;
5007  char *recptr;
5008  uint64 sysidentifier;
5009  struct timeval tv;
5010  pg_crc32c crc;
5011 
5012  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5014 
5015  /*
5016  * Select a hopefully-unique system identifier code for this installation.
5017  * We use the result of gettimeofday(), including the fractional seconds
5018  * field, as being about as unique as we can easily get. (Think not to
5019  * use random(), since it hasn't been seeded and there's no portable way
5020  * to seed it other than the system clock value...) The upper half of the
5021  * uint64 value is just the tv_sec part, while the lower half contains the
5022  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5023  * PID for a little extra uniqueness. A person knowing this encoding can
5024  * determine the initialization time of the installation, which could
5025  * perhaps be useful sometimes.
5026  */
5027  gettimeofday(&tv, NULL);
5028  sysidentifier = ((uint64) tv.tv_sec) << 32;
5029  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5030  sysidentifier |= getpid() & 0xFFF;
5031 
5032  /* page buffer must be aligned suitably for O_DIRECT */
5033  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5034  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5035  memset(page, 0, XLOG_BLCKSZ);
5036 
5037  /*
5038  * Set up information for the initial checkpoint record
5039  *
5040  * The initial checkpoint record is written to the beginning of the WAL
5041  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5042  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5043  */
5044  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5045  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
5046  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
5047  checkPoint.fullPageWrites = fullPageWrites;
5048  checkPoint.nextXid =
5050  checkPoint.nextOid = FirstGenbkiObjectId;
5051  checkPoint.nextMulti = FirstMultiXactId;
5052  checkPoint.nextMultiOffset = 0;
5053  checkPoint.oldestXid = FirstNormalTransactionId;
5054  checkPoint.oldestXidDB = Template1DbOid;
5055  checkPoint.oldestMulti = FirstMultiXactId;
5056  checkPoint.oldestMultiDB = Template1DbOid;
5059  checkPoint.time = (pg_time_t) time(NULL);
5061 
5062  TransamVariables->nextXid = checkPoint.nextXid;
5063  TransamVariables->nextOid = checkPoint.nextOid;
5065  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5066  AdvanceOldestClogXid(checkPoint.oldestXid);
5067  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5068  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5070 
5071  /* Set up the XLOG page header */
5072  page->xlp_magic = XLOG_PAGE_MAGIC;
5073  page->xlp_info = XLP_LONG_HEADER;
5074  page->xlp_tli = BootstrapTimeLineID;
5076  longpage = (XLogLongPageHeader) page;
5077  longpage->xlp_sysid = sysidentifier;
5078  longpage->xlp_seg_size = wal_segment_size;
5079  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5080 
5081  /* Insert the initial checkpoint record */
5082  recptr = ((char *) page + SizeOfXLogLongPHD);
5083  record = (XLogRecord *) recptr;
5084  record->xl_prev = 0;
5085  record->xl_xid = InvalidTransactionId;
5086  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5088  record->xl_rmid = RM_XLOG_ID;
5089  recptr += SizeOfXLogRecord;
5090  /* fill the XLogRecordDataHeaderShort struct */
5091  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5092  *(recptr++) = sizeof(checkPoint);
5093  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5094  recptr += sizeof(checkPoint);
5095  Assert(recptr - (char *) record == record->xl_tot_len);
5096 
5097  INIT_CRC32C(crc);
5098  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5099  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5100  FIN_CRC32C(crc);
5101  record->xl_crc = crc;
5102 
5103  /* Create first XLOG segment file */
5106 
5107  /*
5108  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5109  * close the file again in a moment.
5110  */
5111 
5112  /* Write the first page with the initial record */
5113  errno = 0;
5114  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5115  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5116  {
5117  /* if write didn't set errno, assume problem is no disk space */
5118  if (errno == 0)
5119  errno = ENOSPC;
5120  ereport(PANIC,
5122  errmsg("could not write bootstrap write-ahead log file: %m")));
5123  }
5125 
5126  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5127  if (pg_fsync(openLogFile) != 0)
5128  ereport(PANIC,
5130  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5132 
5133  if (close(openLogFile) != 0)
5134  ereport(PANIC,
5136  errmsg("could not close bootstrap write-ahead log file: %m")));
5137 
5138  openLogFile = -1;
5139 
5140  /* Now create pg_control */
5141  InitControlFile(sysidentifier);
5142  ControlFile->time = checkPoint.time;
5143  ControlFile->checkPoint = checkPoint.redo;
5144  ControlFile->checkPointCopy = checkPoint;
5145 
5146  /* some additional ControlFile fields are set in WriteControlFile() */
5147  WriteControlFile();
5148 
5149  /* Bootstrap the commit log, too */
5150  BootStrapCLOG();
5154 
5155  pfree(buffer);
5156 
5157  /*
5158  * Force control file to be read - in contrast to normal processing we'd
5159  * otherwise never run the checks and GUC related initializations therein.
5160  */
5161  ReadControlFile();
5162 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:804
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2279
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2313
void BootStrapMultiXact(void)
Definition: multixact.c:1985
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:50
MultiXactId oldestMulti
Definition: pg_control.h:49
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
TransactionId newestCommitTsXid
Definition: pg_control.h:54
TransactionId oldestXid
Definition: pg_control.h:47
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:44
TransactionId oldestActiveXid
Definition: pg_control.h:63
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:45
FullTransactionId nextXid
Definition: pg_control.h:43
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
pg_time_t time
Definition: pg_control.h:51
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:48
CheckPoint checkPointCopy
Definition: pg_control.h:134
pg_time_t time
Definition: pg_control.h:131
XLogRecPtr checkPoint
Definition: pg_control.h:132
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3369
bool fullPageWrites
Definition: xlog.c:122
static void InitControlFile(uint64 sysidentifier)
Definition: xlog.c:4193
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9404
static void WriteControlFile(void)
Definition: xlog.c:4228
#define BootstrapTimeLineID
Definition: xlog.c:111
static void ReadControlFile(void)
Definition: xlog.c:4310
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2157 of file xlog.c.

2158 {
2159  double target;
2160 
2161  /*-------
2162  * Calculate the distance at which to trigger a checkpoint, to avoid
2163  * exceeding max_wal_size_mb. This is based on two assumptions:
2164  *
2165  * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2166  * WAL for two checkpoint cycles to allow us to recover from the
2167  * secondary checkpoint if the first checkpoint failed, though we
2168  * only did this on the primary anyway, not on standby. Keeping just
2169  * one checkpoint simplifies processing and reduces disk space in
2170  * many smaller databases.)
2171  * b) during checkpoint, we consume checkpoint_completion_target *
2172  * number of segments consumed between checkpoints.
2173  *-------
2174  */
2175  target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2177 
2178  /* round down */
2179  CheckPointSegments = (int) target;
2180 
2181  if (CheckPointSegments < 1)
2182  CheckPointSegments = 1;
2183 }
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:598
int CheckPointSegments
Definition: xlog.c:156

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_max_slot_wal_keep_size()

bool check_max_slot_wal_keep_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2218 of file xlog.c.

2219 {
2220  if (IsBinaryUpgrade && *newval != -1)
2221  {
2222  GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
2223  "max_slot_wal_keep_size");
2224  return false;
2225  }
2226 
2227  return true;
2228 }
bool IsBinaryUpgrade
Definition: globals.c:118
#define GUC_check_errdetail
Definition: guc.h:448

References GUC_check_errdetail, IsBinaryUpgrade, and newval.

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4604 of file xlog.c.

4605 {
4606  /*
4607  * -1 indicates a request for auto-tune.
4608  */
4609  if (*newval == -1)
4610  {
4611  /*
4612  * If we haven't yet changed the boot_val default of -1, just let it
4613  * be. We'll fix it when XLOGShmemSize is called.
4614  */
4615  if (XLOGbuffers == -1)
4616  return true;
4617 
4618  /* Otherwise, substitute the auto-tune value */
4620  }
4621 
4622  /*
4623  * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4624  * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4625  * the case, we just silently treat such values as a request for the
4626  * minimum. (We could throw an error instead, but that doesn't seem very
4627  * helpful.)
4628  */
4629  if (*newval < 4)
4630  *newval = 4;
4631 
4632  return true;
4633 }
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4588
int XLOGbuffers
Definition: xlog.c:117

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4639 of file xlog.c.

4640 {
4641  char *rawstring;
4642  List *elemlist;
4643  ListCell *l;
4644  bool newwalconsistency[RM_MAX_ID + 1];
4645 
4646  /* Initialize the array */
4647  MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4648 
4649  /* Need a modifiable copy of string */
4650  rawstring = pstrdup(*newval);
4651 
4652  /* Parse string into list of identifiers */
4653  if (!SplitIdentifierString(rawstring, ',', &elemlist))
4654  {
4655  /* syntax error in list */
4656  GUC_check_errdetail("List syntax is invalid.");
4657  pfree(rawstring);
4658  list_free(elemlist);
4659  return false;
4660  }
4661 
4662  foreach(l, elemlist)
4663  {
4664  char *tok = (char *) lfirst(l);
4665  int rmid;
4666 
4667  /* Check for 'all'. */
4668  if (pg_strcasecmp(tok, "all") == 0)
4669  {
4670  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4671  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4672  newwalconsistency[rmid] = true;
4673  }
4674  else
4675  {
4676  /* Check if the token matches any known resource manager. */
4677  bool found = false;
4678 
4679  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4680  {
4681  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4682  pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4683  {
4684  newwalconsistency[rmid] = true;
4685  found = true;
4686  break;
4687  }
4688  }
4689  if (!found)
4690  {
4691  /*
4692  * During startup, it might be a not-yet-loaded custom
4693  * resource manager. Defer checking until
4694  * InitializeWalConsistencyChecking().
4695  */
4697  {
4699  }
4700  else
4701  {
4702  GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4703  pfree(rawstring);
4704  list_free(elemlist);
4705  return false;
4706  }
4707  }
4708  }
4709  }
4710 
4711  pfree(rawstring);
4712  list_free(elemlist);
4713 
4714  /* assign new value */
4715  *extra = guc_malloc(ERROR, (RM_MAX_ID + 1) * sizeof(bool));
4716  memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4717  return true;
4718 }
#define ERROR
Definition: elog.h:39
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:640
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1695
bool process_shared_preload_libraries_done
Definition: miscinit.c:1779
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3457
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:166
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, ERROR, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2200 of file xlog.c.

2201 {
2202  if (!IsValidWalSegSize(*newval))
2203  {
2204  GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2205  return false;
2206  }
2207 
2208  return true;
2209 }
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7431 of file xlog.c.

7432 {
7438 
7439  /* Write out all dirty data in SLRUs and the main buffer pool */
7440  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7442  CheckPointCLOG();
7447  CheckPointBuffers(flags);
7448 
7449  /* Perform all queued up fsyncs */
7450  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7454  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7455 
7456  /* We deliberately delay 2PC checkpointing as long as possible */
7457  CheckPointTwoPhase(checkPointRedo);
7458 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
void CheckPointBuffers(int flags)
Definition: bufmgr.c:3653
void CheckPointCLOG(void)
Definition: clog.c:937
void CheckPointCommitTs(void)
Definition: commit_ts.c:820
void CheckPointMultiXact(void)
Definition: multixact.c:2255
void CheckPointReplicationOrigin(void)
Definition: origin.c:573
void CheckPointPredicate(void)
Definition: predicate.c:1036
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1835
void CheckPointSnapBuild(void)
Definition: snapbuild.c:2054
TimestampTz ckpt_write_t
Definition: xlog.h:160
TimestampTz ckpt_sync_end_t
Definition: xlog.h:162
TimestampTz ckpt_sync_t
Definition: xlog.h:161
void CheckPointSUBTRANS(void)
Definition: subtrans.c:355
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1816
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:137

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5344 of file xlog.c.

5345 {
5346  /*
5347  * For archive recovery, the WAL must be generated with at least 'replica'
5348  * wal_level.
5349  */
5351  {
5352  ereport(FATAL,
5353  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5354  errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
5355  errdetail("This happens if you temporarily set wal_level=minimal on the server."),
5356  errhint("Use a backup taken after setting wal_level to higher than minimal.")));
5357  }
5358 
5359  /*
5360  * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5361  * must have at least as many backend slots as the primary.
5362  */
5364  {
5365  /* We ignore autovacuum_max_workers when we make this test. */
5366  RecoveryRequiresIntParameter("max_connections",
5369  RecoveryRequiresIntParameter("max_worker_processes",
5372  RecoveryRequiresIntParameter("max_wal_senders",
5375  RecoveryRequiresIntParameter("max_prepared_transactions",
5378  RecoveryRequiresIntParameter("max_locks_per_transaction",
5381  }
5382 }
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:140
int max_worker_processes
Definition: globals.c:141
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:180
int max_locks_per_xact
Definition: pg_control.h:183
int max_prepared_xacts
Definition: pg_control.h:182
int max_prepared_xacts
Definition: twophase.c:115
int max_wal_senders
Definition: walsender.c:121
bool EnableHotStandby
Definition: xlog.c:121
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:72
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:137
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3716 of file xlog.c.

3717 {
3718  int save_errno = errno;
3719  XLogSegNo lastRemovedSegNo;
3720 
3722  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3724 
3725  if (segno <= lastRemovedSegNo)
3726  {
3727  char filename[MAXFNAMELEN];
3728 
3729  XLogFileName(filename, tli, segno, wal_segment_size);
3730  errno = save_errno;
3731  ereport(ERROR,
3733  errmsg("requested WAL segment %s has already been removed",
3734  filename)));
3735  }
3736  errno = save_errno;
3737 }
static char * filename
Definition: pg_dumpall.c:119
XLogSegNo lastRemovedSegNo
Definition: xlog.c:461
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5255 of file xlog.c.

5257 {
5258  /*
5259  * Execute the recovery_end_command, if any.
5260  */
5261  if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5263  "recovery_end_command",
5264  true,
5265  WAIT_EVENT_RECOVERY_END_COMMAND);
5266 
5267  /*
5268  * We switched to a new timeline. Clean up segments on the old timeline.
5269  *
5270  * If there are any higher-numbered segments on the old timeline, remove
5271  * them. They might contain valid WAL, but they might also be
5272  * pre-allocated files containing garbage. In any case, they are not part
5273  * of the new timeline's history so we don't need them.
5274  */
5275  RemoveNonParentXlogFiles(EndOfLog, newTLI);
5276 
5277  /*
5278  * If the switch happened in the middle of a segment, what to do with the
5279  * last, partial segment on the old timeline? If we don't archive it, and
5280  * the server that created the WAL never archives it either (e.g. because
5281  * it was hit by a meteor), it will never make it to the archive. That's
5282  * OK from our point of view, because the new segment that we created with
5283  * the new TLI contains all the WAL from the old timeline up to the switch
5284  * point. But if you later try to do PITR to the "missing" WAL on the old
5285  * timeline, recovery won't find it in the archive. It's physically
5286  * present in the new file with new TLI, but recovery won't look there
5287  * when it's recovering to the older timeline. On the other hand, if we
5288  * archive the partial segment, and the original server on that timeline
5289  * is still running and archives the completed version of the same segment
5290  * later, it will fail. (We used to do that in 9.4 and below, and it
5291  * caused such problems).
5292  *
5293  * As a compromise, we rename the last segment with the .partial suffix,
5294  * and archive it. Archive recovery will never try to read .partial
5295  * segments, so they will normally go unused. But in the odd PITR case,
5296  * the administrator can copy them manually to the pg_wal directory
5297  * (removing the suffix). They can be useful in debugging, too.
5298  *
5299  * If a .done or .ready file already exists for the old timeline, however,
5300  * we had already determined that the segment is complete, so we can let
5301  * it be archived normally. (In particular, if it was restored from the
5302  * archive to begin with, it's expected to have a .done file).
5303  */
5304  if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5306  {
5307  char origfname[MAXFNAMELEN];
5308  XLogSegNo endLogSegNo;
5309 
5310  XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5311  XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5312 
5313  if (!XLogArchiveIsReadyOrDone(origfname))
5314  {
5315  char origpath[MAXPGPATH];
5316  char partialfname[MAXFNAMELEN];
5317  char partialpath[MAXPGPATH];
5318 
5319  XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5320  snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5321  snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5322 
5323  /*
5324  * Make sure there's no .done or .ready file for the .partial
5325  * file.
5326  */
5327  XLogArchiveCleanup(partialfname);
5328 
5329  durable_rename(origpath, partialpath, ERROR);
5330  XLogArchiveNotify(partialfname);
5331  }
5332  }
5333 }
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
#define MAXPGPATH
#define snprintf
Definition: port.h:238
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3929
#define XLogArchivingActive()
Definition: xlog.h:97
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:83

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4150 of file xlog.c.

4151 {
4152  DIR *xldir;
4153  struct dirent *xlde;
4154  char path[MAXPGPATH + sizeof(XLOGDIR)];
4155 
4156  xldir = AllocateDir(XLOGDIR);
4157 
4158  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4159  {
4160  if (IsBackupHistoryFileName(xlde->d_name))
4161  {
4162  if (XLogArchiveCheckDone(xlde->d_name))
4163  {
4164  elog(DEBUG2, "removing WAL backup history file \"%s\"",
4165  xlde->d_name);
4166  snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4167  unlink(path);
4168  XLogArchiveCleanup(xlde->d_name);
4169  }
4170  }
4171  }
4172 
4173  FreeDir(xldir);
4174 }
#define DEBUG2
Definition: elog.h:29
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1220 of file xlog.c.

1222 {
1223  char *currpos;
1224  int freespace;
1225  int written;
1226  XLogRecPtr CurrPos;
1227  XLogPageHeader pagehdr;
1228 
1229  /*
1230  * Get a pointer to the right place in the right WAL buffer to start
1231  * inserting to.
1232  */
1233  CurrPos = StartPos;
1234  currpos = GetXLogBuffer(CurrPos, tli);
1235  freespace = INSERT_FREESPACE(CurrPos);
1236 
1237  /*
1238  * there should be enough space for at least the first field (xl_tot_len)
1239  * on this page.
1240  */
1241  Assert(freespace >= sizeof(uint32));
1242 
1243  /* Copy record data */
1244  written = 0;
1245  while (rdata != NULL)
1246  {
1247  char *rdata_data = rdata->data;
1248  int rdata_len = rdata->len;
1249 
1250  while (rdata_len > freespace)
1251  {
1252  /*
1253  * Write what fits on this page, and continue on the next page.
1254  */
1255  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1256  memcpy(currpos, rdata_data, freespace);
1257  rdata_data += freespace;
1258  rdata_len -= freespace;
1259  written += freespace;
1260  CurrPos += freespace;
1261 
1262  /*
1263  * Get pointer to beginning of next page, and set the xlp_rem_len
1264  * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1265  *
1266  * It's safe to set the contrecord flag and xlp_rem_len without a
1267  * lock on the page. All the other flags were already set when the
1268  * page was initialized, in AdvanceXLInsertBuffer, and we're the
1269  * only backend that needs to set the contrecord flag.
1270  */
1271  currpos = GetXLogBuffer(CurrPos, tli);
1272  pagehdr = (XLogPageHeader) currpos;
1273  pagehdr->xlp_rem_len = write_len - written;
1274  pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1275 
1276  /* skip over the page header */
1277  if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1278  {
1279  CurrPos += SizeOfXLogLongPHD;
1280  currpos += SizeOfXLogLongPHD;
1281  }
1282  else
1283  {
1284  CurrPos += SizeOfXLogShortPHD;
1285  currpos += SizeOfXLogShortPHD;
1286  }
1287  freespace = INSERT_FREESPACE(CurrPos);
1288  }
1289 
1290  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1291  memcpy(currpos, rdata_data, rdata_len);
1292  currpos += rdata_len;
1293  CurrPos += rdata_len;
1294  freespace -= rdata_len;
1295  written += rdata_len;
1296 
1297  rdata = rdata->next;
1298  }
1299  Assert(written == write_len);
1300 
1301  /*
1302  * If this was an xlog-switch, it's not enough to write the switch record,
1303  * we also have to consume all the remaining space in the WAL segment. We
1304  * have already reserved that space, but we need to actually fill it.
1305  */
1306  if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1307  {
1308  /* An xlog-switch record doesn't contain any data besides the header */
1309  Assert(write_len == SizeOfXLogRecord);
1310 
1311  /* Assert that we did reserve the right amount of space */
1312  Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0);
1313 
1314  /* Use up all the remaining space on the current page */
1315  CurrPos += freespace;
1316 
1317  /*
1318  * Cause all remaining pages in the segment to be flushed, leaving the
1319  * XLog position where it should be, at the start of the next segment.
1320  * We do this one page at a time, to make sure we don't deadlock
1321  * against ourselves if wal_buffers < wal_segment_size.
1322  */
1323  while (CurrPos < EndPos)
1324  {
1325  /*
1326  * The minimal action to flush the page would be to call
1327  * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1328  * AdvanceXLInsertBuffer(...). The page would be left initialized
1329  * mostly to zeros, except for the page header (always the short
1330  * variant, as this is never a segment's first page).
1331  *
1332  * The large vistas of zeros are good for compressibility, but the
1333  * headers interrupting them every XLOG_BLCKSZ (with values that
1334  * differ from page to page) are not. The effect varies with
1335  * compression tool, but bzip2 for instance compresses about an
1336  * order of magnitude worse if those headers are left in place.
1337  *
1338  * Rather than complicating AdvanceXLInsertBuffer itself (which is
1339  * called in heavily-loaded circumstances as well as this lightly-
1340  * loaded one) with variant behavior, we just use GetXLogBuffer
1341  * (which itself calls the two methods we need) to get the pointer
1342  * and zero most of the page. Then we just zero the page header.
1343  */
1344  currpos = GetXLogBuffer(CurrPos, tli);
1345  MemSet(currpos, 0, SizeOfXLogShortPHD);
1346 
1347  CurrPos += XLOG_BLCKSZ;
1348  }
1349  }
1350  else
1351  {
1352  /* Align the end position, so that the next record starts aligned */
1353  CurrPos = MAXALIGN64(CurrPos);
1354  }
1355 
1356  if (CurrPos != EndPos)
1357  ereport(PANIC,
1359  errmsg_internal("space reserved for WAL record does not match what was written"));
1360 }
unsigned int uint32
Definition: c.h:506
#define MAXALIGN64(LEN)
Definition: c.h:836
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:575
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1627
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert, XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 6821 of file xlog.c.

6822 {
6823  bool shutdown;
6824  CheckPoint checkPoint;
6825  XLogRecPtr recptr;
6826  XLogSegNo _logSegNo;
6828  uint32 freespace;
6829  XLogRecPtr PriorRedoPtr;
6830  XLogRecPtr last_important_lsn;
6831  VirtualTransactionId *vxids;
6832  int nvxids;
6833  int oldXLogAllowed = 0;
6834 
6835  /*
6836  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6837  * issued at a different time.
6838  */
6840  shutdown = true;
6841  else
6842  shutdown = false;
6843 
6844  /* sanity check */
6845  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6846  elog(ERROR, "can't create a checkpoint during recovery");
6847 
6848  /*
6849  * Prepare to accumulate statistics.
6850  *
6851  * Note: because it is possible for log_checkpoints to change while a
6852  * checkpoint proceeds, we always accumulate stats, even if
6853  * log_checkpoints is currently off.
6854  */
6855  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6857 
6858  /*
6859  * Let smgr prepare for checkpoint; this has to happen outside the
6860  * critical section and before we determine the REDO pointer. Note that
6861  * smgr must not do anything that'd have to be undone if we decide no
6862  * checkpoint is needed.
6863  */
6865 
6866  /*
6867  * Use a critical section to force system panic if we have trouble.
6868  */
6870 
6871  if (shutdown)
6872  {
6873  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6876  LWLockRelease(ControlFileLock);
6877  }
6878 
6879  /* Begin filling in the checkpoint WAL record */
6880  MemSet(&checkPoint, 0, sizeof(checkPoint));
6881  checkPoint.time = (pg_time_t) time(NULL);
6882 
6883  /*
6884  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6885  * pointer. This allows us to begin accumulating changes to assemble our
6886  * starting snapshot of locks and transactions.
6887  */
6888  if (!shutdown && XLogStandbyInfoActive())
6890  else
6892 
6893  /*
6894  * Get location of last important record before acquiring insert locks (as
6895  * GetLastImportantRecPtr() also locks WAL locks).
6896  */
6897  last_important_lsn = GetLastImportantRecPtr();
6898 
6899  /*
6900  * If this isn't a shutdown or forced checkpoint, and if there has been no
6901  * WAL activity requiring a checkpoint, skip it. The idea here is to
6902  * avoid inserting duplicate checkpoints when the system is idle.
6903  */
6905  CHECKPOINT_FORCE)) == 0)
6906  {
6907  if (last_important_lsn == ControlFile->checkPoint)
6908  {
6909  END_CRIT_SECTION();
6910  ereport(DEBUG1,
6911  (errmsg_internal("checkpoint skipped because system is idle")));
6912  return;
6913  }
6914  }
6915 
6916  /*
6917  * An end-of-recovery checkpoint is created before anyone is allowed to
6918  * write WAL. To allow us to write the checkpoint record, temporarily
6919  * enable XLogInsertAllowed.
6920  */
6921  if (flags & CHECKPOINT_END_OF_RECOVERY)
6922  oldXLogAllowed = LocalSetXLogInsertAllowed();
6923 
6924  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6925  if (flags & CHECKPOINT_END_OF_RECOVERY)
6926  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6927  else
6928  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6929 
6930  /*
6931  * We must block concurrent insertions while examining insert state.
6932  */
6934 
6935  checkPoint.fullPageWrites = Insert->fullPageWrites;
6936 
6937  if (shutdown)
6938  {
6939  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
6940 
6941  /*
6942  * Compute new REDO record ptr = location of next XLOG record.
6943  *
6944  * Since this is a shutdown checkpoint, there can't be any concurrent
6945  * WAL insertion.
6946  */
6947  freespace = INSERT_FREESPACE(curInsert);
6948  if (freespace == 0)
6949  {
6950  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
6951  curInsert += SizeOfXLogLongPHD;
6952  else
6953  curInsert += SizeOfXLogShortPHD;
6954  }
6955  checkPoint.redo = curInsert;
6956 
6957  /*
6958  * Here we update the shared RedoRecPtr for future XLogInsert calls;
6959  * this must be done while holding all the insertion locks.
6960  *
6961  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
6962  * left pointing past where it really needs to point. This is okay;
6963  * the only consequence is that XLogInsert might back up whole buffers
6964  * that it didn't really need to. We can't postpone advancing
6965  * RedoRecPtr because XLogInserts that happen while we are dumping
6966  * buffers must assume that their buffer changes are not included in
6967  * the checkpoint.
6968  */
6969  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
6970  }
6971 
6972  /*
6973  * Now we can release the WAL insertion locks, allowing other xacts to
6974  * proceed while we are flushing disk buffers.
6975  */
6977 
6978  /*
6979  * If this is an online checkpoint, we have not yet determined the redo
6980  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
6981  * record; the LSN at which it starts becomes the new redo pointer. We
6982  * don't do this for a shutdown checkpoint, because in that case no WAL
6983  * can be written between the redo point and the insertion of the
6984  * checkpoint record itself, so the checkpoint record itself serves to
6985  * mark the redo point.
6986  */
6987  if (!shutdown)
6988  {
6989  int dummy = 0;
6990 
6991  /* Record must have payload to avoid assertion failure. */
6992  XLogBeginInsert();
6993  XLogRegisterData((char *) &dummy, sizeof(dummy));
6994  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
6995 
6996  /*
6997  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
6998  * shared memory and RedoRecPtr in backend-local memory, but we need
6999  * to copy that into the record that will be inserted when the
7000  * checkpoint is complete.
7001  */
7002  checkPoint.redo = RedoRecPtr;
7003  }
7004 
7005  /* Update the info_lck-protected copy of RedoRecPtr as well */
7007  XLogCtl->RedoRecPtr = checkPoint.redo;
7009 
7010  /*
7011  * If enabled, log checkpoint start. We postpone this until now so as not
7012  * to log anything if we decided to skip the checkpoint.
7013  */
7014  if (log_checkpoints)
7015  LogCheckpointStart(flags, false);
7016 
7017  /* Update the process title */
7018  update_checkpoint_display(flags, false, false);
7019 
7020  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7021 
7022  /*
7023  * Get the other info we need for the checkpoint record.
7024  *
7025  * We don't need to save oldestClogXid in the checkpoint, it only matters
7026  * for the short period in which clog is being truncated, and if we crash
7027  * during that we'll redo the clog truncation and fix up oldestClogXid
7028  * there.
7029  */
7030  LWLockAcquire(XidGenLock, LW_SHARED);
7031  checkPoint.nextXid = TransamVariables->nextXid;
7032  checkPoint.oldestXid = TransamVariables->oldestXid;
7034  LWLockRelease(XidGenLock);
7035 
7036  LWLockAcquire(CommitTsLock, LW_SHARED);
7039  LWLockRelease(CommitTsLock);
7040 
7041  LWLockAcquire(OidGenLock, LW_SHARED);
7042  checkPoint.nextOid = TransamVariables->nextOid;
7043  if (!shutdown)
7044  checkPoint.nextOid += TransamVariables->oidCount;
7045  LWLockRelease(OidGenLock);
7046 
7047  MultiXactGetCheckptMulti(shutdown,
7048  &checkPoint.nextMulti,
7049  &checkPoint.nextMultiOffset,
7050  &checkPoint.oldestMulti,
7051  &checkPoint.oldestMultiDB);
7052 
7053  /*
7054  * Having constructed the checkpoint record, ensure all shmem disk buffers
7055  * and commit-log buffers are flushed to disk.
7056  *
7057  * This I/O could fail for various reasons. If so, we will fail to
7058  * complete the checkpoint, but there is no reason to force a system
7059  * panic. Accordingly, exit critical section while doing it.
7060  */
7061  END_CRIT_SECTION();
7062 
7063  /*
7064  * In some cases there are groups of actions that must all occur on one
7065  * side or the other of a checkpoint record. Before flushing the
7066  * checkpoint record we must explicitly wait for any backend currently
7067  * performing those groups of actions.
7068  *
7069  * One example is end of transaction, so we must wait for any transactions
7070  * that are currently in commit critical sections. If an xact inserted
7071  * its commit record into XLOG just before the REDO point, then a crash
7072  * restart from the REDO point would not replay that record, which means
7073  * that our flushing had better include the xact's update of pg_xact. So
7074  * we wait till he's out of his commit critical section before proceeding.
7075  * See notes in RecordTransactionCommit().
7076  *
7077  * Because we've already released the insertion locks, this test is a bit
7078  * fuzzy: it is possible that we will wait for xacts we didn't really need
7079  * to wait for. But the delay should be short and it seems better to make
7080  * checkpoint take a bit longer than to hold off insertions longer than
7081  * necessary. (In fact, the whole reason we have this issue is that xact.c
7082  * does commit record XLOG insertion and clog update as two separate steps
7083  * protected by different locks, but again that seems best on grounds of
7084  * minimizing lock contention.)
7085  *
7086  * A transaction that has not yet set delayChkptFlags when we look cannot
7087  * be at risk, since it has not inserted its commit record yet; and one
7088  * that's already cleared it is not at risk either, since it's done fixing
7089  * clog and we will correctly flush the update below. So we cannot miss
7090  * any xacts we need to wait for.
7091  */
7093  if (nvxids > 0)
7094  {
7095  do
7096  {
7097  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7098  pg_usleep(10000L); /* wait for 10 msec */
7100  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7102  }
7103  pfree(vxids);
7104 
7105  CheckPointGuts(checkPoint.redo, flags);
7106 
7108  if (nvxids > 0)
7109  {
7110  do
7111  {
7112  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7113  pg_usleep(10000L); /* wait for 10 msec */
7115  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7117  }
7118  pfree(vxids);
7119 
7120  /*
7121  * Take a snapshot of running transactions and write this to WAL. This
7122  * allows us to reconstruct the state of running transactions during
7123  * archive recovery, if required. Skip, if this info disabled.
7124  *
7125  * If we are shutting down, or Startup process is completing crash
7126  * recovery we don't need to write running xact data.
7127  */
7128  if (!shutdown && XLogStandbyInfoActive())
7130 
7132 
7133  /*
7134  * Now insert the checkpoint record into XLOG.
7135  */
7136  XLogBeginInsert();
7137  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7138  recptr = XLogInsert(RM_XLOG_ID,
7139  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7141 
7142  XLogFlush(recptr);
7143 
7144  /*
7145  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7146  * overwritten at next startup. No-one should even try, this just allows
7147  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7148  * to just temporarily disable writing until the system has exited
7149  * recovery.
7150  */
7151  if (shutdown)
7152  {
7153  if (flags & CHECKPOINT_END_OF_RECOVERY)
7154  LocalXLogInsertAllowed = oldXLogAllowed;
7155  else
7156  LocalXLogInsertAllowed = 0; /* never again write WAL */
7157  }
7158 
7159  /*
7160  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7161  * = end of actual checkpoint record.
7162  */
7163  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7164  ereport(PANIC,
7165  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7166 
7167  /*
7168  * Remember the prior checkpoint's redo ptr for
7169  * UpdateCheckPointDistanceEstimate()
7170  */
7171  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7172 
7173  /*
7174  * Update the control file.
7175  */
7176  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7177  if (shutdown)
7180  ControlFile->checkPointCopy = checkPoint;
7181  /* crash recovery should always recover to the end of WAL */
7184 
7185  /*
7186  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7187  * unused on non-shutdown checkpoints, but seems useful to store it always
7188  * for debugging purposes.
7189  */
7191 
7193  LWLockRelease(ControlFileLock);
7194 
7195  /* Update shared-memory copy of checkpoint XID/epoch */
7197  XLogCtl->ckptFullXid = checkPoint.nextXid;
7199 
7200  /*
7201  * We are now done with critical updates; no need for system panic if we
7202  * have trouble while fooling with old log segments.
7203  */
7204  END_CRIT_SECTION();
7205 
7206  /*
7207  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7208  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7209  * where (a) we're not inside of a critical section and (b) we can be
7210  * certain that the relevant record has been flushed to disk, which must
7211  * happen before it can be summarized.
7212  *
7213  * If this is a shutdown checkpoint, then this happens reasonably
7214  * promptly: we've only just inserted and flushed the
7215  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7216  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7217  * record was written before we began flushing data to disk, and that
7218  * could be many minutes ago at this point. However, we don't XLogFlush()
7219  * after inserting that record, so we're not guaranteed that it's on disk
7220  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7221  * record.
7222  */
7224 
7225  /*
7226  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7227  */
7229 
7230  /*
7231  * Update the average distance between checkpoints if the prior checkpoint
7232  * exists.
7233  */
7234  if (PriorRedoPtr != InvalidXLogRecPtr)
7236 
7237  /*
7238  * Delete old log files, those no longer needed for last checkpoint to
7239  * prevent the disk holding the xlog from growing full.
7240  */
7242  KeepLogSeg(recptr, &_logSegNo);
7244  _logSegNo, InvalidOid,
7246  {
7247  /*
7248  * Some slots have been invalidated; recalculate the old-segment
7249  * horizon, starting again from RedoRecPtr.
7250  */
7252  KeepLogSeg(recptr, &_logSegNo);
7253  }
7254  _logSegNo--;
7255  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7256  checkPoint.ThisTimeLineID);
7257 
7258  /*
7259  * Make more log segments if needed. (Do this after recycling old log
7260  * segments, since that may supply some of the needed files.)
7261  */
7262  if (!shutdown)
7263  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7264 
7265  /*
7266  * Truncate pg_subtrans if possible. We can throw away all data before
7267  * the oldest XMIN of any running transaction. No future transaction will
7268  * attempt to reference any pg_subtrans entry older than that (see Asserts
7269  * in subtrans.c). During recovery, though, we mustn't do this because
7270  * StartupSUBTRANS hasn't been called yet.
7271  */
7272  if (!RecoveryInProgress())
7274 
7275  /* Real work is done; log and update stats. */
7276  LogCheckpointEnd(false);
7277 
7278  /* Reset the process title */
7279  update_checkpoint_display(flags, false, true);
7280 
7281  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7282  NBuffers,
7286 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:471
int NBuffers
Definition: globals.c:139
@ LW_SHARED
Definition: lwlock.h:115
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2233
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:81
@ DB_SHUTDOWNING
Definition: pg_control.h:93
@ DB_SHUTDOWNED
Definition: pg_control.h:91
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:68
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:114
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:115
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3030
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2867
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2022
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3076
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1779
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
TimestampTz ckpt_start_t
Definition: xlog.h:159
int ckpt_segs_removed
Definition: xlog.h:168
int ckpt_segs_added
Definition: xlog.h:167
int ckpt_bufs_written
Definition: xlog.h:165
int ckpt_segs_recycled
Definition: xlog.h:169
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:167
XLogRecPtr unloggedLSN
Definition: pg_control.h:136
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:168
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:457
TimeLineID InsertTimeLineID
Definition: xlog.c:504
XLogRecPtr RedoRecPtr
Definition: xlog.c:456
TimeLineID PrevTimeLineID
Definition: xlog.c:505
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:464
XLogRecPtr RedoRecPtr
Definition: xlog.c:430
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void SetWalSummarizerLatch(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:253
bool RecoveryInProgress(void)
Definition: xlog.c:6290
static void WALInsertLockRelease(void)
Definition: xlog.c:1440
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1853
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1411
static void UpdateControlFile(void)
Definition: xlog.c:4526
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3854
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6586
static XLogRecPtr RedoRecPtr
Definition: xlog.c:273
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6618
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3679
bool log_checkpoints
Definition: xlog.c:129
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7874
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6378
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6493
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6721
static int LocalXLogInsertAllowed
Definition: xlog.c:236
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2791
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7431
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6759
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:138
#define CHECKPOINT_FORCE
Definition: xlog.h:140
#define XLogStandbyInfoActive()
Definition: xlog.h:121
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SetWalSummarizerLatch(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7297 of file xlog.c.

7298 {
7299  xl_end_of_recovery xlrec;
7300  XLogRecPtr recptr;
7301 
7302  /* sanity check */
7303  if (!RecoveryInProgress())
7304  elog(ERROR, "can only be used to end recovery");
7305 
7306  xlrec.end_time = GetCurrentTimestamp();
7307 
7312 
7314 
7315  XLogBeginInsert();
7316  XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
7317  recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7318 
7319  XLogFlush(recptr);
7320 
7321  /*
7322  * Update the control file so that crash recovery can follow the timeline
7323  * changes to this point.
7324  */
7325  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7326  ControlFile->minRecoveryPoint = recptr;
7327  ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7329  LWLockRelease(ControlFileLock);
7330 
7331  END_CRIT_SECTION();
7332 }
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:76
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7361 of file xlog.c.

7363 {
7365  XLogRecPtr recptr;
7366  XLogPageHeader pagehdr;
7367  XLogRecPtr startPos;
7368 
7369  /* sanity checks */
7370  if (!RecoveryInProgress())
7371  elog(ERROR, "can only be used at end of recovery");
7372  if (pagePtr % XLOG_BLCKSZ != 0)
7373  elog(ERROR, "invalid position for missing continuation record %X/%X",
7374  LSN_FORMAT_ARGS(pagePtr));
7375 
7376  /* The current WAL insert position should be right after the page header */
7377  startPos = pagePtr;
7378  if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7379  startPos += SizeOfXLogLongPHD;
7380  else
7381  startPos += SizeOfXLogShortPHD;
7382  recptr = GetXLogInsertRecPtr();
7383  if (recptr != startPos)
7384  elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
7385  LSN_FORMAT_ARGS(recptr));
7386 
7388 
7389  /*
7390  * Initialize the XLOG page header (by GetXLogBuffer), and set the
7391  * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7392  *
7393  * No other backend is allowed to write WAL yet, so acquiring the WAL
7394  * insertion lock is just pro forma.
7395  */
7397  pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7400 
7401  /*
7402  * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7403  * page. We know it becomes the first record, because no other backend is
7404  * allowed to write WAL yet.
7405  */
7406  XLogBeginInsert();
7407  xlrec.overwritten_lsn = aborted_lsn;
7409  XLogRegisterData((char *) &xlrec, sizeof(xl_overwrite_contrecord));
7410  recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7411 
7412  /* check that the record was inserted to the right place */
7413  if (ProcLastRecPtr != startPos)
7414  elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
7416 
7417  XLogFlush(recptr);
7418 
7419  END_CRIT_SECTION();
7420 
7421  return recptr;
7422 }
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:80
static void WALInsertLockAcquire(void)
Definition: xlog.c:1366
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9355
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7512 of file xlog.c.

7513 {
7514  XLogRecPtr lastCheckPointRecPtr;
7515  XLogRecPtr lastCheckPointEndPtr;
7516  CheckPoint lastCheckPoint;
7517  XLogRecPtr PriorRedoPtr;
7518  XLogRecPtr receivePtr;
7519  XLogRecPtr replayPtr;
7520  TimeLineID replayTLI;
7521  XLogRecPtr endptr;
7522  XLogSegNo _logSegNo;
7523  TimestampTz xtime;
7524 
7525  /* Concurrent checkpoint/restartpoint cannot happen */
7527 
7528  /* Get a local copy of the last safe checkpoint record. */
7530  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7531  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7532  lastCheckPoint = XLogCtl->lastCheckPoint;
7534 
7535  /*
7536  * Check that we're still in recovery mode. It's ok if we exit recovery
7537  * mode after this check, the restart point is valid anyway.
7538  */
7539  if (!RecoveryInProgress())
7540  {
7541  ereport(DEBUG2,
7542  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7543  return false;
7544  }
7545 
7546  /*
7547  * If the last checkpoint record we've replayed is already our last
7548  * restartpoint, we can't perform a new restart point. We still update
7549  * minRecoveryPoint in that case, so that if this is a shutdown restart
7550  * point, we won't start up earlier than before. That's not strictly
7551  * necessary, but when hot standby is enabled, it would be rather weird if
7552  * the database opened up for read-only connections at a point-in-time
7553  * before the last shutdown. Such time travel is still possible in case of
7554  * immediate shutdown, though.
7555  *
7556  * We don't explicitly advance minRecoveryPoint when we do create a
7557  * restartpoint. It's assumed that flushing the buffers will do that as a
7558  * side-effect.
7559  */
7560  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7561  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7562  {
7563  ereport(DEBUG2,
7564  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7565  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7566 
7568  if (flags & CHECKPOINT_IS_SHUTDOWN)
7569  {
7570  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7573  LWLockRelease(ControlFileLock);
7574  }
7575  return false;
7576  }
7577 
7578  /*
7579  * Update the shared RedoRecPtr so that the startup process can calculate
7580  * the number of segments replayed since last restartpoint, and request a
7581  * restartpoint if it exceeds CheckPointSegments.
7582  *
7583  * Like in CreateCheckPoint(), hold off insertions to update it, although
7584  * during recovery this is just pro forma, because no WAL insertions are
7585  * happening.
7586  */
7588  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7590 
7591  /* Also update the info_lck-protected copy */
7593  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7595 
7596  /*
7597  * Prepare to accumulate statistics.
7598  *
7599  * Note: because it is possible for log_checkpoints to change while a
7600  * checkpoint proceeds, we always accumulate stats, even if
7601  * log_checkpoints is currently off.
7602  */
7603  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7605 
7606  if (log_checkpoints)
7607  LogCheckpointStart(flags, true);
7608 
7609  /* Update the process title */
7610  update_checkpoint_display(flags, true, false);
7611 
7612  CheckPointGuts(lastCheckPoint.redo, flags);
7613 
7614  /*
7615  * This location needs to be after CheckPointGuts() to ensure that some
7616  * work has already happened during this checkpoint.
7617  */
7618  INJECTION_POINT("create-restart-point");
7619 
7620  /*
7621  * Remember the prior checkpoint's redo ptr for
7622  * UpdateCheckPointDistanceEstimate()
7623  */
7624  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7625 
7626  /*
7627  * Update pg_control, using current time. Check that it still shows an
7628  * older checkpoint, else do nothing; this is a quick hack to make sure
7629  * nothing really bad happens if somehow we get here after the
7630  * end-of-recovery checkpoint.
7631  */
7632  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7633  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7634  {
7635  /*
7636  * Update the checkpoint information. We do this even if the cluster
7637  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7638  * segments recycled below.
7639  */
7640  ControlFile->checkPoint = lastCheckPointRecPtr;
7641  ControlFile->checkPointCopy = lastCheckPoint;
7642 
7643  /*
7644  * Ensure minRecoveryPoint is past the checkpoint record and update it
7645  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7646  * this will have happened already while writing out dirty buffers,
7647  * but not necessarily - e.g. because no buffers were dirtied. We do
7648  * this because a backup performed in recovery uses minRecoveryPoint
7649  * to determine which WAL files must be included in the backup, and
7650  * the file (or files) containing the checkpoint record must be
7651  * included, at a minimum. Note that for an ordinary restart of
7652  * recovery there's no value in having the minimum recovery point any
7653  * earlier than this anyway, because redo will begin just after the
7654  * checkpoint record.
7655  */
7657  {
7658  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7659  {
7660  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7662 
7663  /* update local copy */
7666  }
7667  if (flags & CHECKPOINT_IS_SHUTDOWN)
7669  }
7671  }
7672  LWLockRelease(ControlFileLock);
7673 
7674  /*
7675  * Update the average distance between checkpoints/restartpoints if the
7676  * prior checkpoint exists.
7677  */
7678  if (PriorRedoPtr != InvalidXLogRecPtr)
7680 
7681  /*
7682  * Delete old log files, those no longer needed for last restartpoint to
7683  * prevent the disk holding the xlog from growing full.
7684  */
7686 
7687  /*
7688  * Retreat _logSegNo using the current end of xlog replayed or received,
7689  * whichever is later.
7690  */
7691  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7692  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7693  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7694  KeepLogSeg(endptr, &_logSegNo);
7696  _logSegNo, InvalidOid,
7698  {
7699  /*
7700  * Some slots have been invalidated; recalculate the old-segment
7701  * horizon, starting again from RedoRecPtr.
7702  */
7704  KeepLogSeg(endptr, &_logSegNo);
7705  }
7706  _logSegNo--;
7707 
7708  /*
7709  * Try to recycle segments on a useful timeline. If we've been promoted
7710  * since the beginning of this restartpoint, use the new timeline chosen
7711  * at end of recovery. If we're still in recovery, use the timeline we're
7712  * currently replaying.
7713  *
7714  * There is no guarantee that the WAL segments will be useful on the
7715  * current timeline; if recovery proceeds to a new timeline right after
7716  * this, the pre-allocated WAL segments on this timeline will not be used,
7717  * and will go wasted until recycled on the next restartpoint. We'll live
7718  * with that.
7719  */
7720  if (!RecoveryInProgress())
7721  replayTLI = XLogCtl->InsertTimeLineID;
7722 
7723  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7724 
7725  /*
7726  * Make more log segments if needed. (Do this after recycling old log
7727  * segments, since that may supply some of the needed files.)
7728  */
7729  PreallocXlogFiles(endptr, replayTLI);
7730 
7731  /*
7732  * Truncate pg_subtrans if possible. We can throw away all data before
7733  * the oldest XMIN of any running transaction. No future transaction will
7734  * attempt to reference any pg_subtrans entry older than that (see Asserts
7735  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7736  * this because StartupSUBTRANS hasn't been called yet.
7737  */
7738  if (EnableHotStandby)
7740 
7741  /* Real work is done; log and update stats. */
7742  LogCheckpointEnd(true);
7743 
7744  /* Reset the process title */
7745  update_checkpoint_display(flags, true, true);
7746 
7747  xtime = GetLatestXTime();
7749  (errmsg("recovery restart point at %X/%X",
7750  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7751  xtime ? errdetail("Last completed transaction was at log time %s.",
7752  timestamptz_to_str(xtime)) : 0));
7753 
7754  /*
7755  * Finally, execute archive_cleanup_command, if any.
7756  */
7757  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7759  "archive_cleanup_command",
7760  false,
7761  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7762 
7763  return true;
7764 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1853
int64 TimestampTz
Definition: timestamp.h:39
#define LOG
Definition: elog.h:31
bool IsUnderPostmaster
Definition: globals.c:117
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:357
BackendType MyBackendType
Definition: miscinit.c:63
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:95
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:92
CheckPoint lastCheckPoint
Definition: xlog.c:540
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:538
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:539
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2711
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:641
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:642
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:84
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9314 of file xlog.c.

9315 {
9316  bool during_backup_start = DatumGetBool(arg);
9317 
9318  /* If called during backup start, there shouldn't be one already running */
9319  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9320 
9321  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9322  {
9326 
9329 
9330  if (!during_backup_start)
9331  ereport(WARNING,
9332  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9333  }
9334 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:438
static SessionBackupState sessionBackupState
Definition: xlog.c:391
@ SESSION_BACKUP_NONE
Definition: xlog.h:283

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8712 of file xlog.c.

8714 {
8716 
8717  Assert(state != NULL);
8719 
8720  /*
8721  * During recovery, we don't need to check WAL level. Because, if WAL
8722  * level is not sufficient, it's impossible to get here during recovery.
8723  */
8725  ereport(ERROR,
8726  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8727  errmsg("WAL level not sufficient for making an online backup"),
8728  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
8729 
8730  if (strlen(backupidstr) > MAXPGPATH)
8731  ereport(ERROR,
8732  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8733  errmsg("backup label too long (max %d bytes)",
8734  MAXPGPATH)));
8735 
8736  memcpy(state->name, backupidstr, strlen(backupidstr));
8737 
8738  /*
8739  * Mark backup active in shared memory. We must do full-page WAL writes
8740  * during an on-line backup even if not doing so at other times, because
8741  * it's quite possible for the backup dump to obtain a "torn" (partially
8742  * written) copy of a database page if it reads the page concurrently with
8743  * our write to the same page. This can be fixed as long as the first
8744  * write to the page in the WAL sequence is a full-page write. Hence, we
8745  * increment runningBackups then force a CHECKPOINT, to ensure there are
8746  * no dirty pages in shared memory that might get dumped while the backup
8747  * is in progress without having a corresponding WAL record. (Once the
8748  * backup is complete, we need not force full-page writes anymore, since
8749  * we expect that any pages not modified during the backup interval must
8750  * have been correctly captured by the backup.)
8751  *
8752  * Note that forcing full-page writes has no effect during an online
8753  * backup from the standby.
8754  *
8755  * We must hold all the insertion locks to change the value of
8756  * runningBackups, to ensure adequate interlocking against
8757  * XLogInsertRecord().
8758  */
8762 
8763  /*
8764  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8765  * work correctly, it is critical that sessionBackupState is only updated
8766  * after this block is over.
8767  */
8769  {
8770  bool gotUniqueStartpoint = false;
8771  DIR *tblspcdir;
8772  struct dirent *de;
8773  tablespaceinfo *ti;
8774  int datadirpathlen;
8775 
8776  /*
8777  * Force an XLOG file switch before the checkpoint, to ensure that the
8778  * WAL segment the checkpoint is written to doesn't contain pages with
8779  * old timeline IDs. That would otherwise happen if you called
8780  * pg_backup_start() right after restoring from a PITR archive: the
8781  * first WAL segment containing the startup checkpoint has pages in
8782  * the beginning with the old timeline ID. That can cause trouble at
8783  * recovery: we won't have a history file covering the old timeline if
8784  * pg_wal directory was not included in the base backup and the WAL
8785  * archive was cleared too before starting the backup.
8786  *
8787  * This also ensures that we have emitted a WAL page header that has
8788  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8789  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8790  * compress out removable backup blocks, it won't remove any that
8791  * occur after this point.
8792  *
8793  * During recovery, we skip forcing XLOG file switch, which means that
8794  * the backup taken during recovery is not available for the special
8795  * recovery case described above.
8796  */
8798  RequestXLogSwitch(false);
8799 
8800  do
8801  {
8802  bool checkpointfpw;
8803 
8804  /*
8805  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8806  * page problems, this guarantees that two successive backup runs
8807  * will have different checkpoint positions and hence different
8808  * history file names, even if nothing happened in between.
8809  *
8810  * During recovery, establish a restartpoint if possible. We use
8811  * the last restartpoint as the backup starting checkpoint. This
8812  * means that two successive backup runs can have same checkpoint
8813  * positions.
8814  *
8815  * Since the fact that we are executing do_pg_backup_start()
8816  * during recovery means that checkpointer is running, we can use
8817  * RequestCheckpoint() to establish a restartpoint.
8818  *
8819  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8820  * passing fast = true). Otherwise this can take awhile.
8821  */
8823  (fast ? CHECKPOINT_IMMEDIATE : 0));
8824 
8825  /*
8826  * Now we need to fetch the checkpoint record location, and also
8827  * its REDO pointer. The oldest point in WAL that would be needed
8828  * to restore starting from the checkpoint is precisely the REDO
8829  * pointer.
8830  */
8831  LWLockAcquire(ControlFileLock, LW_SHARED);
8832  state->checkpointloc = ControlFile->checkPoint;
8833  state->startpoint = ControlFile->checkPointCopy.redo;
8835  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8836  LWLockRelease(ControlFileLock);
8837 
8839  {
8840  XLogRecPtr recptr;
8841 
8842  /*
8843  * Check to see if all WAL replayed during online backup
8844  * (i.e., since last restartpoint used as backup starting
8845  * checkpoint) contain full-page writes.
8846  */
8848  recptr = XLogCtl->lastFpwDisableRecPtr;
8850 
8851  if (!checkpointfpw || state->startpoint <= recptr)
8852  ereport(ERROR,
8853  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8854  errmsg("WAL generated with full_page_writes=off was replayed "
8855  "since last restartpoint"),
8856  errhint("This means that the backup being taken on the standby "
8857  "is corrupt and should not be used. "
8858  "Enable full_page_writes and run CHECKPOINT on the primary, "
8859  "and then try an online backup again.")));
8860 
8861  /*
8862  * During recovery, since we don't use the end-of-backup WAL
8863  * record and don't write the backup history file, the
8864  * starting WAL location doesn't need to be unique. This means
8865  * that two base backups started at the same time might use
8866  * the same checkpoint as starting locations.
8867  */
8868  gotUniqueStartpoint = true;
8869  }
8870 
8871  /*
8872  * If two base backups are started at the same time (in WAL sender
8873  * processes), we need to make sure that they use different
8874  * checkpoints as starting locations, because we use the starting
8875  * WAL location as a unique identifier for the base backup in the
8876  * end-of-backup WAL record and when we write the backup history
8877  * file. Perhaps it would be better generate a separate unique ID
8878  * for each backup instead of forcing another checkpoint, but
8879  * taking a checkpoint right after another is not that expensive
8880  * either because only few buffers have been dirtied yet.
8881  */
8883  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8884  {
8885  XLogCtl->Insert.lastBackupStart = state->startpoint;
8886  gotUniqueStartpoint = true;
8887  }
8889  } while (!gotUniqueStartpoint);
8890 
8891  /*
8892  * Construct tablespace_map file.
8893  */
8894  datadirpathlen = strlen(DataDir);
8895 
8896  /* Collect information about all tablespaces */
8897  tblspcdir = AllocateDir("pg_tblspc");
8898  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
8899  {
8900  char fullpath[MAXPGPATH + 10];
8901  char linkpath[MAXPGPATH];
8902  char *relpath = NULL;
8903  char *s;
8904  PGFileType de_type;
8905  char *badp;
8906  Oid tsoid;
8907 
8908  /*
8909  * Try to parse the directory name as an unsigned integer.
8910  *
8911  * Tablespace directories should be positive integers that can be
8912  * represented in 32 bits, with no leading zeroes or trailing
8913  * garbage. If we come across a name that doesn't meet those
8914  * criteria, skip it.
8915  */
8916  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8917  continue;
8918  errno = 0;
8919  tsoid = strtoul(de->d_name, &badp, 10);
8920  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
8921  continue;
8922 
8923  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
8924 
8925  de_type = get_dirent_type(fullpath, de, false, ERROR);
8926 
8927  if (de_type == PGFILETYPE_LNK)
8928  {
8929  StringInfoData escapedpath;
8930  int rllen;
8931 
8932  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
8933  if (rllen < 0)
8934  {
8935  ereport(WARNING,
8936  (errmsg("could not read symbolic link \"%s\": %m",
8937  fullpath)));
8938  continue;
8939  }
8940  else if (rllen >= sizeof(linkpath))
8941  {
8942  ereport(WARNING,
8943  (errmsg("symbolic link \"%s\" target is too long",
8944  fullpath)));
8945  continue;
8946  }
8947  linkpath[rllen] = '\0';
8948 
8949  /*
8950  * Relpath holds the relative path of the tablespace directory
8951  * when it's located within PGDATA, or NULL if it's located
8952  * elsewhere.
8953  */
8954  if (rllen > datadirpathlen &&
8955  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
8956  IS_DIR_SEP(linkpath[datadirpathlen]))
8957  relpath = pstrdup(linkpath + datadirpathlen + 1);
8958 
8959  /*
8960  * Add a backslash-escaped version of the link path to the
8961  * tablespace map file.
8962  */
8963  initStringInfo(&escapedpath);
8964  for (s = linkpath; *s; s++)
8965  {
8966  if (*s == '\n' || *s == '\r' || *s == '\\')
8967  appendStringInfoChar(&escapedpath, '\\');
8968  appendStringInfoChar(&escapedpath, *s);
8969  }
8970  appendStringInfo(tblspcmapfile, "%s %s\n",
8971  de->d_name, escapedpath.data);
8972  pfree(escapedpath.data);
8973  }
8974  else if (de_type == PGFILETYPE_DIR)
8975  {
8976  /*
8977  * It's possible to use allow_in_place_tablespaces to create
8978  * directories directly under pg_tblspc, for testing purposes
8979  * only.
8980  *
8981  * In this case, we store a relative path rather than an
8982  * absolute path into the tablespaceinfo.
8983  */
8984  snprintf(linkpath, sizeof(linkpath), "pg_tblspc/%s",
8985  de->d_name);
8986  relpath = pstrdup(linkpath);
8987  }
8988  else
8989  {
8990  /* Skip any other file type that appears here. */
8991  continue;
8992  }
8993 
8994  ti = palloc(sizeof(tablespaceinfo));
8995  ti->oid = tsoid;
8996  ti->path = pstrdup(linkpath);
8997  ti->rpath = relpath;
8998  ti->size = -1;
8999 
9000  if (tablespaces)
9001  *tablespaces = lappend(*tablespaces, ti);
9002  }
9003  FreeDir(tblspcdir);
9004 
9005  state->starttime = (pg_time_t) time(NULL);
9006  }
9008 
9009  state->started_in_recovery = backup_started_in_recovery;
9010 
9011  /*
9012  * Mark that the start phase has correctly finished for the backup.
9013  */
9015 }
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:941
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:68
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:102
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:94
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:546
XLogRecPtr lastBackupStart
Definition: xlog.c:439
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:7981
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9314
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:284
#define CHECKPOINT_WAIT
Definition: xlog.h:143
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:139
#define XLogIsNeeded()
Definition: xlog.h:107

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9040 of file xlog.c.

9041 {
9042  bool backup_stopped_in_recovery = false;
9043  char histfilepath[MAXPGPATH];
9044  char lastxlogfilename[MAXFNAMELEN];
9045  char histfilename[MAXFNAMELEN];
9046  XLogSegNo _logSegNo;
9047  FILE *fp;
9048  int seconds_before_warning;
9049  int waits = 0;
9050  bool reported_waiting = false;
9051 
9052  Assert(state != NULL);
9053 
9054  backup_stopped_in_recovery = RecoveryInProgress();
9055 
9056  /*
9057  * During recovery, we don't need to check WAL level. Because, if WAL
9058  * level is not sufficient, it's impossible to get here during recovery.
9059  */
9060  if (!backup_stopped_in_recovery && !XLogIsNeeded())
9061  ereport(ERROR,
9062  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9063  errmsg("WAL level not sufficient for making an online backup"),
9064  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
9065 
9066  /*
9067  * OK to update backup counter and session-level lock.
9068  *
9069  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9070  * otherwise they can be updated inconsistently, which might cause
9071  * do_pg_abort_backup() to fail.
9072  */
9074 
9075  /*
9076  * It is expected that each do_pg_backup_start() call is matched by
9077  * exactly one do_pg_backup_stop() call.
9078  */
9081 
9082  /*
9083  * Clean up session-level lock.
9084  *
9085  * You might think that WALInsertLockRelease() can be called before
9086  * cleaning up session-level lock because session-level lock doesn't need
9087  * to be protected with WAL insertion lock. But since
9088  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9089  * cleaned up before it.
9090  */
9092 
9094 
9095  /*
9096  * If we are taking an online backup from the standby, we confirm that the
9097  * standby has not been promoted during the backup.
9098  */
9099  if (state->started_in_recovery && !backup_stopped_in_recovery)
9100  ereport(ERROR,
9101  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9102  errmsg("the standby was promoted during online backup"),
9103  errhint("This means that the backup being taken is corrupt "
9104  "and should not be used. "
9105  "Try taking another online backup.")));
9106 
9107  /*
9108  * During recovery, we don't write an end-of-backup record. We assume that
9109  * pg_control was backed up last and its minimum recovery point can be
9110  * available as the backup end location. Since we don't have an
9111  * end-of-backup record, we use the pg_control value to check whether
9112  * we've reached the end of backup when starting recovery from this
9113  * backup. We have no way of checking if pg_control wasn't backed up last
9114  * however.
9115  *
9116  * We don't force a switch to new WAL file but it is still possible to
9117  * wait for all the required files to be archived if waitforarchive is
9118  * true. This is okay if we use the backup to start a standby and fetch
9119  * the missing WAL using streaming replication. But in the case of an
9120  * archive recovery, a user should set waitforarchive to true and wait for
9121  * them to be archived to ensure that all the required files are
9122  * available.
9123  *
9124  * We return the current minimum recovery point as the backup end
9125  * location. Note that it can be greater than the exact backup end
9126  * location if the minimum recovery point is updated after the backup of
9127  * pg_control. This is harmless for current uses.
9128  *
9129  * XXX currently a backup history file is for informational and debug
9130  * purposes only. It's not essential for an online backup. Furthermore,
9131  * even if it's created, it will not be archived during recovery because
9132  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9133  * backup history file during recovery.
9134  */
9135  if (backup_stopped_in_recovery)
9136  {
9137  XLogRecPtr recptr;
9138 
9139  /*
9140  * Check to see if all WAL replayed during online backup contain
9141  * full-page writes.
9142  */
9144  recptr = XLogCtl->lastFpwDisableRecPtr;
9146 
9147  if (state->startpoint <= recptr)
9148  ereport(ERROR,
9149  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9150  errmsg("WAL generated with full_page_writes=off was replayed "
9151  "during online backup"),
9152  errhint("This means that the backup being taken on the standby "
9153  "is corrupt and should not be used. "
9154  "Enable full_page_writes and run CHECKPOINT on the primary, "
9155  "and then try an online backup again.")));
9156 
9157 
9158  LWLockAcquire(ControlFileLock, LW_SHARED);
9159  state->stoppoint = ControlFile->minRecoveryPoint;
9161  LWLockRelease(ControlFileLock);
9162  }
9163  else
9164  {
9165  char *history_file;
9166 
9167  /*
9168  * Write the backup-end xlog record
9169  */
9170  XLogBeginInsert();
9171  XLogRegisterData((char *) (&state->startpoint),
9172  sizeof(state->startpoint));
9173  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9174 
9175  /*
9176  * Given that we're not in recovery, InsertTimeLineID is set and can't
9177  * change, so we can read it without a lock.
9178  */
9179  state->stoptli = XLogCtl->InsertTimeLineID;
9180 
9181  /*
9182  * Force a switch to a new xlog segment file, so that the backup is
9183  * valid as soon as archiver moves out the current segment file.
9184  */
9185  RequestXLogSwitch(false);
9186 
9187  state->stoptime = (pg_time_t) time(NULL);
9188 
9189  /*
9190  * Write the backup history file
9191  */
9192  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9193  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9194  state->startpoint, wal_segment_size);
9195  fp = AllocateFile(histfilepath, "w");
9196  if (!fp)
9197  ereport(ERROR,
9199  errmsg("could not create file \"%s\": %m",
9200  histfilepath)));
9201 
9202  /* Build and save the contents of the backup history file */
9203  history_file = build_backup_content(state, true);
9204  fprintf(fp, "%s", history_file);
9205  pfree(history_file);
9206 
9207  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9208  ereport(ERROR,
9210  errmsg("could not write file \"%s\": %m",
9211  histfilepath)));
9212 
9213  /*
9214  * Clean out any no-longer-needed history files. As a side effect,
9215  * this will post a .ready file for the newly created history file,
9216  * notifying the archiver that history file may be archived
9217  * immediately.
9218  */
9220  }
9221 
9222  /*
9223  * If archiving is enabled, wait for all the required WAL files to be
9224  * archived before returning. If archiving isn't enabled, the required WAL
9225  * needs to be transported via streaming replication (hopefully with
9226  * wal_keep_size set high enough), or some more exotic mechanism like
9227  * polling and copying files from pg_wal with script. We have no knowledge
9228  * of those mechanisms, so it's up to the user to ensure that he gets all
9229  * the required WAL.
9230  *
9231  * We wait until both the last WAL file filled during backup and the
9232  * history file have been archived, and assume that the alphabetic sorting
9233  * property of the WAL files ensures any earlier WAL files are safely
9234  * archived as well.
9235  *
9236  * We wait forever, since archive_command is supposed to work and we
9237  * assume the admin wanted his backup to work completely. If you don't
9238  * wish to wait, then either waitforarchive should be passed in as false,
9239  * or you can set statement_timeout. Also, some notices are issued to
9240  * clue in anyone who might be doing this interactively.
9241  */
9242 
9243  if (waitforarchive &&
9244  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9245  (backup_stopped_in_recovery && XLogArchivingAlways())))
9246  {
9247  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9248  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9250 
9251  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9252  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9253  state->startpoint, wal_segment_size);
9254 
9255  seconds_before_warning = 60;
9256  waits = 0;
9257 
9258  while (XLogArchiveIsBusy(lastxlogfilename) ||
9259  XLogArchiveIsBusy(histfilename))
9260  {
9262 
9263  if (!reported_waiting && waits > 5)
9264  {
9265  ereport(NOTICE,
9266  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9267  reported_waiting = true;
9268  }
9269 
9270  (void) WaitLatch(MyLatch,
9272  1000L,
9273  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9275 
9276  if (++waits >= seconds_before_warning)
9277  {
9278  seconds_before_warning *= 2; /* This wraps in >10 years... */
9279  ereport(WARNING,
9280  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9281  waits),
9282  errhint("Check that your archive_command is executing properly. "
9283  "You can safely cancel this backup, "
9284  "but the database backup will not be usable without all the WAL segments.")));
9285  }
9286  }
9287 
9288  ereport(NOTICE,
9289  (errmsg("all required WAL segments have been archived")));
9290  }
9291  else if (waitforarchive)
9292  ereport(NOTICE,
9293  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9294 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2583
int FreeFile(FILE *file)
Definition: fd.c:2781
struct Latch * MyLatch
Definition: globals.c:60
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:72
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4150
#define XLogArchivingAlways()
Definition: xlog.h:100
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9021 of file xlog.c.

9022 {
9023  return sessionBackupState;
9024 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8513 of file xlog.c.

8514 {
8515  int o_direct_flag = 0;
8516 
8517  /*
8518  * Use O_DIRECT if requested, except in walreceiver process. The WAL
8519  * written by walreceiver is normally read by the startup process soon
8520  * after it's written. Also, walreceiver performs unaligned writes, which
8521  * don't work with O_DIRECT, so it is required for correctness too.
8522  */
8524  o_direct_flag = PG_O_DIRECT;
8525 
8526  /* If fsync is disabled, never open in sync mode */
8527  if (!enableFsync)
8528  return o_direct_flag;
8529 
8530  switch (method)
8531  {
8532  /*
8533  * enum values for all sync options are defined even if they are
8534  * not supported on the current platform. But if not, they are
8535  * not included in the enum option array, and therefore will never
8536  * be seen here.
8537  */
8538  case WAL_SYNC_METHOD_FSYNC:
8541  return o_direct_flag;
8542 #ifdef O_SYNC
8543  case WAL_SYNC_METHOD_OPEN:
8544  return O_SYNC | o_direct_flag;
8545 #endif
8546 #ifdef O_DSYNC
8548  return O_DSYNC | o_direct_flag;
8549 #endif
8550  default:
8551  /* can't happen (unless we are out of sync with option array) */
8552  elog(ERROR, "unrecognized wal_sync_method: %d", method);
8553  return 0; /* silence warning */
8554  }
8555 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:126
#define AmWalReceiverProcess()
Definition: miscadmin.h:383
#define O_DSYNC
Definition: win32_port.h:352
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4826 of file xlog.c.

4827 {
4828  return ControlFile->wal_level;
4829 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4571 of file xlog.c.

4572 {
4574 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:518

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

Definition at line 6455 of file xlog.c.

6456 {
6458 
6460 
6461  /*
6462  * If we're writing and flushing WAL, the time line can't be changing, so
6463  * no lock is required.
6464  */
6465  if (insertTLI)
6466  *insertTLI = XLogCtl->InsertTimeLineID;
6467 
6468  return LogwrtResult.Flush;
6469 }
RecoveryState SharedRecoveryState
Definition: xlog.c:511
XLogRecPtr Flush
Definition: xlog.c:328
@ RECOVERY_STATE_DONE
Definition: xlog.h:91

References Assert, XLogwrtResult::Flush, XLogCtlData::InsertTimeLineID, LogwrtResult, RECOVERY_STATE_DONE, RefreshXLogWriteResult, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by binary_upgrade_logical_slot_has_caught_up(), get_flush_position(), GetCurrentLSN(), GetLatestLSN(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), pg_replication_slot_advance(), read_local_xlog_page_guts(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6423 of file xlog.c.

6424 {
6425  *RedoRecPtr_p = RedoRecPtr;
6426  *doPageWrites_p = doPageWrites;
6427 }
static bool doPageWrites
Definition: xlog.c:286

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6438 of file xlog.c.

6439 {
6440  XLogRecPtr recptr;
6441 
6443  recptr = XLogCtl->LogwrtRqst.Write;
6445 
6446  return recptr;
6447 }

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6493 of file xlog.c.

6494 {
6496  int i;
6497 
6498  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6499  {
6500  XLogRecPtr last_important;
6501 
6502  /*
6503  * Need to take a lock to prevent torn reads of the LSN, which are
6504  * possible on some of the supported platforms. WAL insert locks only
6505  * support exclusive mode, so we have to use that.
6506  */
6508  last_important = WALInsertLocks[i].l.lastImportantAt;
6509  LWLockRelease(&WALInsertLocks[i].l.lock);
6510 
6511  if (res < last_important)
6512  res = last_important;
6513  }
6514 
6515  return res;
6516 }
int i
Definition: isn.c:73
XLogRecPtr lastImportantAt
Definition: xlog.c:371
WALInsertLock l
Definition: xlog.c:383
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:564
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:150

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6522 of file xlog.c.

6523 {
6524  pg_time_t result;
6525 
6526  /* Need WALWriteLock, but shared lock is sufficient */
6527  LWLockAcquire(WALWriteLock, LW_SHARED);
6528  result = XLogCtl->lastSegSwitchTime;
6529  *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6530  LWLockRelease(WALWriteLock);
6531 
6532  return result;
6533 }
pg_time_t lastSegSwitchTime
Definition: xlog.c:467
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:468

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4545 of file xlog.c.

4546 {
4547  Assert(ControlFile != NULL);
4549 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:228

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

Definition at line 9383 of file xlog.c.

9384 {
9385  LWLockAcquire(ControlFileLock, LW_SHARED);
9386  *oldrecptr = ControlFile->checkPointCopy.redo;
9388  LWLockRelease(ControlFileLock);
9389 }

References ControlFileData::checkPointCopy, ControlFile, LW_SHARED, LWLockAcquire(), LWLockRelease(), CheckPoint::redo, and CheckPoint::ThisTimeLineID.

Referenced by ExecuteRecoveryCommand(), and RestoreArchivedFile().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6326 of file xlog.c.

6327 {
6328  RecoveryState retval;
6329 
6331  retval = XLogCtl->SharedRecoveryState;
6333 
6334  return retval;
6335 }
RecoveryState
Definition: xlog.h:88

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6393 of file xlog.c.

6394 {
6395  XLogRecPtr ptr;
6396 
6397  /*
6398  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6399  * grabbed a WAL insertion lock to read the authoritative value in
6400  * Insert->RedoRecPtr, someone might update it just after we've released
6401  * the lock.
6402  */
6404  ptr = XLogCtl->RedoRecPtr;
6406 
6407  if (RedoRecPtr < ptr)
6408  RedoRecPtr = ptr;
6409 
6410  return RedoRecPtr;
6411 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7790 of file xlog.c.

7791 {
7792  XLogRecPtr currpos; /* current write LSN */
7793  XLogSegNo currSeg; /* segid of currpos */
7794  XLogSegNo targetSeg; /* segid of targetLSN */
7795  XLogSegNo oldestSeg; /* actual oldest segid */
7796  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7797  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7798  uint64 keepSegs;
7799 
7800  /*
7801  * slot does not reserve WAL. Either deactivated, or has never been active
7802  */
7803  if (XLogRecPtrIsInvalid(targetLSN))
7804  return WALAVAIL_INVALID_LSN;
7805 
7806  /*
7807  * Calculate the oldest segment currently reserved by all slots,
7808  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7809  * oldestSlotSeg to the current segment.
7810  */
7811  currpos = GetXLogWriteRecPtr();
7812  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7813  KeepLogSeg(currpos, &oldestSlotSeg);
7814 
7815  /*
7816  * Find the oldest extant segment file. We get 1 until checkpoint removes
7817  * the first WAL segment file since startup, which causes the status being
7818  * wrong under certain abnormal conditions but that doesn't actually harm.
7819  */
7820  oldestSeg = XLogGetLastRemovedSegno() + 1;
7821 
7822  /* calculate oldest segment by max_wal_size */
7823  XLByteToSeg(currpos, currSeg, wal_segment_size);
7825 
7826  if (currSeg > keepSegs)
7827  oldestSegMaxWalSize = currSeg - keepSegs;
7828  else
7829  oldestSegMaxWalSize = 1;
7830 
7831  /* the segment we care about */
7832  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7833 
7834  /*
7835  * No point in returning reserved or extended status values if the
7836  * targetSeg is known to be lost.
7837  */
7838  if (targetSeg >= oldestSlotSeg)
7839  {
7840  /* show "reserved" when targetSeg is within max_wal_size */
7841  if (targetSeg >= oldestSegMaxWalSize)
7842  return WALAVAIL_RESERVED;
7843 
7844  /* being retained by slots exceeding max_wal_size */
7845  return WALAVAIL_EXTENDED;
7846  }
7847 
7848  /* WAL segments are no longer retained but haven't been removed yet */
7849  if (targetSeg >= oldestSeg)
7850  return WALAVAIL_UNRESERVED;
7851 
7852  /* Definitely lost */
7853  return WALAVAIL_REMOVED;
7854 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3747
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9371
@ WALAVAIL_REMOVED
Definition: xlog.h:191
@ WALAVAIL_RESERVED
Definition: xlog.h:187
@ WALAVAIL_UNRESERVED
Definition: xlog.h:190
@ WALAVAIL_EXTENDED
Definition: xlog.h:188
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:186

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6476 of file xlog.c.

6477 {
6479 
6480  /* Since the value can't be changing, no lock is required. */
6481  return XLogCtl->InsertTimeLineID;
6482 }

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1627 of file xlog.c.

1628 {
1629  int idx;
1630  XLogRecPtr endptr;
1631  static uint64 cachedPage = 0;
1632  static char *cachedPos = NULL;
1633  XLogRecPtr expectedEndPtr;
1634 
1635  /*
1636  * Fast path for the common case that we need to access again the same
1637  * page as last time.
1638  */
1639  if (ptr / XLOG_BLCKSZ == cachedPage)
1640  {
1641  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1642  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1643  return cachedPos + ptr % XLOG_BLCKSZ;
1644  }
1645 
1646  /*
1647  * The XLog buffer cache is organized so that a page is always loaded to a
1648  * particular buffer. That way we can easily calculate the buffer a given
1649  * page must be loaded into, from the XLogRecPtr alone.
1650  */
1651  idx = XLogRecPtrToBufIdx(ptr);
1652 
1653  /*
1654  * See what page is loaded in the buffer at the moment. It could be the
1655  * page we're looking for, or something older. It can't be anything newer
1656  * - that would imply the page we're looking for has already been written
1657  * out to disk and evicted, and the caller is responsible for making sure
1658  * that doesn't happen.
1659  *
1660  * We don't hold a lock while we read the value. If someone is just about
1661  * to initialize or has just initialized the page, it's possible that we
1662  * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1663  * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1664  * we're looking for.
1665  */
1666  expectedEndPtr = ptr;
1667  expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1668 
1669  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1670  if (expectedEndPtr != endptr)
1671  {
1672  XLogRecPtr initializedUpto;
1673 
1674  /*
1675  * Before calling AdvanceXLInsertBuffer(), which can block, let others
1676  * know how far we're finished with inserting the record.
1677  *
1678  * NB: If 'ptr' points to just after the page header, advertise a
1679  * position at the beginning of the page rather than 'ptr' itself. If
1680  * there are no other insertions running, someone might try to flush
1681  * up to our advertised location. If we advertised a position after
1682  * the page header, someone might try to flush the page header, even
1683  * though page might actually not be initialized yet. As the first
1684  * inserter on the page, we are effectively responsible for making
1685  * sure that it's initialized, before we let insertingAt to move past
1686  * the page header.
1687  */
1688  if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1689  XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1690  initializedUpto = ptr - SizeOfXLogShortPHD;
1691  else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1692  XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1693  initializedUpto = ptr - SizeOfXLogLongPHD;
1694  else
1695  initializedUpto = ptr;
1696 
1697  WALInsertLockUpdateInsertingAt(initializedUpto);
1698 
1699  AdvanceXLInsertBuffer(ptr, tli, false);
1700  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1701 
1702  if (expectedEndPtr != endptr)
1703  elog(PANIC, "could not find WAL buffer for %X/%X",
1704  LSN_FORMAT_ARGS(ptr));
1705  }
1706  else
1707  {
1708  /*
1709  * Make sure the initialization of the page is visible to us, and
1710  * won't arrive later to overwrite the WAL data we write on the page.
1711  */
1713  }
1714 
1715  /*
1716  * Found the buffer holding this page. Return a pointer to the right
1717  * offset within the page.
1718  */
1719  cachedPage = ptr / XLOG_BLCKSZ;
1720  cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1721 
1722  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1723  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1724 
1725  return cachedPos + ptr % XLOG_BLCKSZ;
1726 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_memory_barrier()
Definition: atomics.h:138
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1466
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1980

References AdvanceXLInsertBuffer(), Assert, elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9355 of file xlog.c.

9356 {
9358  uint64 current_bytepos;
9359 
9360  SpinLockAcquire(&Insert->insertpos_lck);
9361  current_bytepos = Insert->CurrBytePos;
9362  SpinLockRelease(&Insert->insertpos_lck);
9363 
9364  return XLogBytePosToRecPtr(current_bytepos);
9365 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier)
static

Definition at line 4193 of file xlog.c.

4194 {
4195  char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4196 
4197  /*
4198  * Generate a random nonce. This is used for authentication requests that
4199  * will fail because the user does not exist. The nonce is used to create
4200  * a genuine-looking password challenge for the non-existent user, in lieu
4201  * of an actual stored password.
4202  */
4203  if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4204  ereport(PANIC,
4205  (errcode(ERRCODE_INTERNAL_ERROR),
4206  errmsg("could not generate secret authorization token")));
4207 
4208  memset(ControlFile, 0, sizeof(ControlFileData));
4209  /* Initialize pg_control status fields */
4210  ControlFile->system_identifier = sysidentifier;
4211  memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
4214 
4215  /* Set important parameter values for use when replaying WAL */
4225 }
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:184
int wal_level
Definition: xlog.c:131
bool wal_log_hints
Definition: xlog.c:123
uint32 bootstrap_data_checksum_version
Definition: bootstrap.c:44
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References bootstrap_data_checksum_version, ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4751 of file xlog.c.

4752 {
4754 
4756  {
4757  struct config_generic *guc;
4758 
4759  guc = find_option("wal_consistency_checking", false, false, ERROR);
4760 
4762 
4763  set_config_option_ext("wal_consistency_checking",
4765  guc->scontext, guc->source, guc->srole,
4766  GUC_ACTION_SET, true, ERROR, false);
4767 
4768  /* checking should not be deferred again */
4770  }
4771 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1237
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3373
@ GUC_ACTION_SET
Definition: guc.h:199
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:125

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3552 of file xlog.c.

3554 {
3555  char path[MAXPGPATH];
3556  struct stat stat_buf;
3557 
3558  Assert(tli != 0);
3559 
3560  XLogFilePath(path, tli, *segno, wal_segment_size);
3561 
3562  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3564  {
3565  LWLockRelease(ControlFileLock);
3566  return false;
3567  }
3568 
3569  if (!find_free)
3570  {
3571  /* Force installation: get rid of any pre-existing segment file */
3572  durable_unlink(path, DEBUG1);
3573  }
3574  else
3575  {
3576  /* Find a free slot to put it in */
3577  while (stat(path, &stat_buf) == 0)
3578  {
3579  if ((*segno) >= max_segno)
3580  {
3581  /* Failed to find a free slot within specified range */
3582  LWLockRelease(ControlFileLock);
3583  return false;
3584  }
3585  (*segno)++;
3586  XLogFilePath(path, tli, *segno, wal_segment_size);
3587  }
3588  }
3589 
3590  Assert(access(path, F_OK) != 0 && errno == ENOENT);
3591  if (durable_rename(tmppath, path, LOG) != 0)
3592  {
3593  LWLockRelease(ControlFileLock);
3594  /* durable_rename already emitted log message */
3595  return false;
3596  }
3597 
3598  LWLockRelease(ControlFileLock);
3599 
3600  return true;
3601 }
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:521
#define stat
Definition: win32_port.h:284

References Assert, DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9412 of file xlog.c.

9413 {
9414  bool result;
9415 
9416  LWLockAcquire(ControlFileLock, LW_SHARED);
9418  LWLockRelease(ControlFileLock);
9419 
9420  return result;
9421 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8603 of file xlog.c.

8604 {
8605  char *msg = NULL;
8606  instr_time start;
8607 
8608  Assert(tli != 0);
8609 
8610  /*
8611  * Quick exit if fsync is disabled or write() has already synced the WAL
8612  * file.
8613  */
8614  if (!enableFsync ||
8617  return;
8618 
8619  /* Measure I/O timing to sync the WAL file */
8620  if (track_wal_io_timing)
8622  else
8624 
8625  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8626  switch (wal_sync_method)
8627  {
8628  case WAL_SYNC_METHOD_FSYNC:
8629  if (pg_fsync_no_writethrough(fd) != 0)
8630  msg = _("could not fsync file \"%s\": %m");
8631  break;
8632 #ifdef HAVE_FSYNC_WRITETHROUGH
8634  if (pg_fsync_writethrough(fd) != 0)
8635  msg = _("could not fsync write-through file \"%s\": %m");
8636  break;
8637 #endif
8639  if (pg_fdatasync(fd) != 0)
8640  msg = _("could not fdatasync file \"%s\": %m");
8641  break;
8642  case WAL_SYNC_METHOD_OPEN:
8644  /* not reachable */
8645  Assert(false);
8646  break;
8647  default:
8648  ereport(PANIC,
8649  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8650  errmsg_internal("unrecognized wal_sync_method: %d", wal_sync_method));
8651  break;
8652  }
8653 
8654  /* PANIC if failed to fsync */
8655  if (msg)
8656  {
8657  char xlogfname[MAXFNAMELEN];
8658  int save_errno = errno;
8659 
8660  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8661  errno = save_errno;
8662  ereport(PANIC,
8664  errmsg(msg, xlogfname)));
8665  }
8666 
8668 
8669  /*
8670  * Increment the I/O timing and the number of times WAL files were synced.
8671  */
8672  if (track_wal_io_timing)
8673  {
8674  instr_time end;
8675 
8678  }
8679 
8681 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:456
PgStat_Counter wal_sync
Definition: pgstat.h:454
bool track_wal_io_timing
Definition: xlog.c:137

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 7874 of file xlog.c.

7875 {
7876  XLogSegNo currSegNo;
7877  XLogSegNo segno;
7878  XLogRecPtr keep;
7879 
7880  XLByteToSeg(recptr, currSegNo, wal_segment_size);
7881  segno = currSegNo;
7882 
7883  /*
7884  * Calculate how many segments are kept by slots first, adjusting for
7885  * max_slot_wal_keep_size.
7886  */
7888  if (keep != InvalidXLogRecPtr && keep < recptr)
7889  {
7890  XLByteToSeg(keep, segno, wal_segment_size);
7891 
7892  /* Cap by max_slot_wal_keep_size ... */
7893  if (max_slot_wal_keep_size_mb >= 0)
7894  {
7895  uint64 slot_keep_segs;
7896 
7897  slot_keep_segs =
7899 
7900  if (currSegNo - segno > slot_keep_segs)
7901  segno = currSegNo - slot_keep_segs;
7902  }
7903  }
7904 
7905  /*
7906  * If WAL summarization is in use, don't remove WAL that has yet to be
7907  * summarized.
7908  */
7909  keep = GetOldestUnsummarizedLSN(NULL, NULL, false);
7910  if (keep != InvalidXLogRecPtr)
7911  {
7912  XLogSegNo unsummarized_segno;
7913 
7914  XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
7915  if (unsummarized_segno < segno)
7916  segno = unsummarized_segno;
7917  }
7918 
7919  /* but, keep at least wal_keep_size if that's set */
7920  if (wal_keep_size_mb > 0)
7921  {
7922  uint64 keep_segs;
7923 
7925  if (currSegNo - segno < keep_segs)
7926  {
7927  /* avoid underflow, don't go below 1 */
7928  if (currSegNo <= keep_segs)
7929  segno = 1;
7930  else
7931  segno = currSegNo - keep_segs;
7932  }
7933  }
7934 
7935  /* don't delete WAL segments newer than the calculated segment */
7936  if (segno < *logSegNo)
7937  *logSegNo = segno;
7938 }
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact, bool reset_pending_lsn)
int wal_keep_size_mb
Definition: xlog.c:116
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2690
int max_slot_wal_keep_size_mb
Definition: xlog.c:135

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4813 of file xlog.c.

4814 {
4815  Assert(reset || ControlFile == NULL);
4816  ControlFile = palloc(sizeof(ControlFileData));
4817  ReadControlFile();
4818 }
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6378 of file xlog.c.

6379 {
6380  int oldXLogAllowed = LocalXLogInsertAllowed;
6381 
6383 
6384  return oldXLogAllowed;
6385 }

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6618 of file xlog.c.

6619 {
6620  long write_msecs,
6621  sync_msecs,
6622  total_msecs,
6623  longest_msecs,
6624  average_msecs;
6625  uint64 average_sync_time;
6626 
6628 
6631 
6634 
6635  /* Accumulate checkpoint timing summary data, in milliseconds. */
6636  PendingCheckpointerStats.write_time += write_msecs;
6637  PendingCheckpointerStats.sync_time += sync_msecs;
6638 
6639  /*
6640  * All of the published timing statistics are accounted for. Only
6641  * continue if a log message is to be written.
6642  */
6643  if (!log_checkpoints)
6644  return;
6645 
6648 
6649  /*
6650  * Timing values returned from CheckpointStats are in microseconds.
6651  * Convert to milliseconds for consistent printing.
6652  */
6653  longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6654 
6655  average_sync_time = 0;
6657  average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6659  average_msecs = (long) ((average_sync_time + 999) / 1000);
6660 
6661  /*
6662  * ControlFileLock is not required to see ControlFile->checkPoint and
6663  * ->checkPointCopy here as we are the only updator of those variables at
6664  * this moment.
6665  */
6666  if (restartpoint)
6667  ereport(LOG,
6668  (errmsg("restartpoint complete: wrote %d buffers (%.1f%%); "
6669  "%d WAL file(s) added, %d removed, %d recycled; "
6670  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6671  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6672  "distance=%d kB, estimate=%d kB; "
6673  "lsn=%X/%X, redo lsn=%X/%X",
6675  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6679  write_msecs / 1000, (int) (write_msecs % 1000),
6680  sync_msecs / 1000, (int) (sync_msecs % 1000),
6681  total_msecs / 1000, (int) (total_msecs % 1000),
6683  longest_msecs / 1000, (int) (longest_msecs % 1000),
6684  average_msecs / 1000, (int) (average_msecs % 1000),
6685  (int) (PrevCheckPointDistance / 1024.0),
6686  (int) (CheckPointDistanceEstimate / 1024.0),
6689  else
6690  ereport(LOG,
6691  (errmsg("checkpoint complete: wrote %d buffers (%.1f%%); "
6692  "%d WAL file(s) added, %d removed, %d recycled; "
6693  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6694  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6695  "distance=%d kB, estimate=%d kB; "
6696  "lsn=%X/%X, redo lsn=%X/%X",
6698  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6702  write_msecs / 1000, (int) (write_msecs % 1000),
6703  sync_msecs / 1000, (int) (sync_msecs % 1000),
6704  total_msecs / 1000, (int) (total_msecs % 1000),
6706  longest_msecs / 1000, (int) (longest_msecs % 1000),
6707  average_msecs / 1000, (int) (average_msecs % 1000),
6708  (int) (PrevCheckPointDistance / 1024.0),
6709  (int) (CheckPointDistanceEstimate / 1024.0),
6712 }
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1766
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:173
uint64 ckpt_longest_sync
Definition: xlog.h:172
TimestampTz ckpt_end_t
Definition: xlog.h:163
int ckpt_sync_rels
Definition: xlog.h:171
PgStat_Counter sync_time
Definition: pgstat.h:269
PgStat_Counter write_time
Definition: pgstat.h:268
static double CheckPointDistanceEstimate
Definition: xlog.c:159
static double PrevCheckPointDistance
Definition: xlog.c:160

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6586 of file xlog.c.

6587 {
6588  if (restartpoint)
6589  ereport(LOG,
6590  /* translator: the placeholders show checkpoint options */
6591  (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6592  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6593  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6594  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6595  (flags & CHECKPOINT_FORCE) ? " force" : "",
6596  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6597  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6598  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6599  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6600  else
6601  ereport(LOG,
6602  /* translator: the placeholders show checkpoint options */
6603  (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6604  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6605  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6606  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6607  (flags & CHECKPOINT_FORCE) ? " force" : "",
6608  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6609  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6610  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6611  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6612 }
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:146
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:141
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:147

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FLUSH_ALL, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6240 of file xlog.c.

6241 {
6242  bool promoted = false;
6243 
6244  /*
6245  * Perform a checkpoint to update all our recovery activity to disk.
6246  *
6247  * Note that we write a shutdown checkpoint rather than an on-line one.
6248  * This is not particularly critical, but since we may be assigning a new
6249  * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6250  * only changes in shutdown checkpoints, which allows some extra error
6251  * checking in xlog_redo.
6252  *
6253  * In promotion, only create a lightweight end-of-recovery record instead
6254  * of a full checkpoint. A checkpoint is requested later, after we're
6255  * fully out of recovery mode and already accepting queries.
6256  */
6259  {
6260  promoted = true;
6261 
6262  /*
6263  * Insert a special WAL record to mark the end of recovery, since we
6264  * aren't doing a checkpoint. That means that the checkpointer process
6265  * may likely be in the middle of a time-smoothed restartpoint and
6266  * could continue to be for minutes after this. That sounds strange,
6267  * but the effect is roughly the same and it would be stranger to try
6268  * to come out of the restartpoint and then checkpoint. We request a
6269  * checkpoint later anyway, just for safety.
6270  */
6272  }
6273  else
6274  {
6277  CHECKPOINT_WAIT);
6278  }
6279 
6280  return promoted;
6281 }
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7297
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3679 of file xlog.c.

3680 {
3681  XLogSegNo _logSegNo;
3682  int lf;
3683  bool added;
3684  char path[MAXPGPATH];
3685  uint64 offset;
3686 
3688  return; /* unlocked check says no */
3689 
3690  XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3691  offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3692  if (offset >= (uint32) (0.75 * wal_segment_size))
3693  {
3694  _logSegNo++;
3695  lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3696  if (lf >= 0)
3697  close(lf);
3698  if (added)
3700  }
3701 }
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3199

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6203 of file xlog.c.

6204 {
6205  /*
6206  * We have reached the end of base backup, as indicated by pg_control. The
6207  * data on disk is now consistent (unless minRecoveryPoint is further
6208  * ahead, which can happen if we crashed during previous recovery). Reset
6209  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6210  * make sure we don't allow starting up at an earlier point even if
6211  * recovery is stopped and restarted soon after this.
6212  */
6213  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6214 
6215  if (ControlFile->minRecoveryPoint < EndRecPtr)
6216  {
6217  ControlFile->minRecoveryPoint = EndRecPtr;
6219  }
6220 
6223  ControlFile->backupEndRequired = false;
6225 
6226  LWLockRelease(ControlFileLock);
6227 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:169
bool backupEndRequired
Definition: pg_control.h:171
XLogRecPtr backupEndPoint
Definition: pg_control.h:170

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4310 of file xlog.c.

4311 {
4312  pg_crc32c crc;
4313  int fd;
4314  static char wal_segsz_str[20];
4315  int r;
4316 
4317  /*
4318  * Read data...
4319  */
4321  O_RDWR | PG_BINARY);
4322  if (fd < 0)
4323  ereport(PANIC,
4325  errmsg("could not open file \"%s\": %m",
4326  XLOG_CONTROL_FILE)));
4327 
4328  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4329  r = read(fd, ControlFile, sizeof(ControlFileData));
4330  if (r != sizeof(ControlFileData))
4331  {
4332  if (r < 0)
4333  ereport(PANIC,
4335  errmsg("could not read file \"%s\": %m",
4336  XLOG_CONTROL_FILE)));
4337  else
4338  ereport(PANIC,
4340  errmsg("could not read file \"%s\": read %d of %zu",
4341  XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4342  }
4344 
4345  close(fd);
4346 
4347  /*
4348  * Check for expected pg_control format version. If this is wrong, the
4349  * CRC check will likely fail because we'll be checking the wrong number
4350  * of bytes. Complaining about wrong version will probably be more
4351  * enlightening than complaining about wrong CRC.
4352  */
4353 
4355  ereport(FATAL,
4356  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4357  errmsg("database files are incompatible with server"),
4358  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4359  " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4362  errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4363 
4365  ereport(FATAL,
4366  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4367  errmsg("database files are incompatible with server"),
4368  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4369  " but the server was compiled with PG_CONTROL_VERSION %d.",
4371  errhint("It looks like you need to initdb.")));
4372 
4373  /* Now check the CRC. */
4374  INIT_CRC32C(crc);
4375  COMP_CRC32C(crc,
4376  (char *) ControlFile,
4377  offsetof(ControlFileData, crc));
4378  FIN_CRC32C(crc);
4379 
4380  if (!EQ_CRC32C(crc, ControlFile->crc))
4381  ereport(FATAL,
4382  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4383  errmsg("incorrect checksum in control file")));
4384 
4385  /*
4386  * Do compatibility checking immediately. If the database isn't
4387  * compatible with the backend executable, we want to abort before we can
4388  * possibly do any damage.
4389  */
4391  ereport(FATAL,
4392  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4393  errmsg("database files are incompatible with server"),
4394  errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d,"
4395  " but the server was compiled with CATALOG_VERSION_NO %d.",
4397  errhint("It looks like you need to initdb.")));
4398  if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4399  ereport(FATAL,
4400  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4401  errmsg("database files are incompatible with server"),
4402  errdetail("The database cluster was initialized with MAXALIGN %d,"
4403  " but the server was compiled with MAXALIGN %d.",
4404  ControlFile->maxAlign, MAXIMUM_ALIGNOF),
4405  errhint("It looks like you need to initdb.")));
4407  ereport(FATAL,
4408  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4409  errmsg("database files are incompatible with server"),
4410  errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4411  errhint("It looks like you need to initdb.")));
4412  if (ControlFile->blcksz != BLCKSZ)
4413  ereport(FATAL,
4414  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4415  errmsg("database files are incompatible with server"),
4416  errdetail("The database cluster was initialized with BLCKSZ %d,"
4417  " but the server was compiled with BLCKSZ %d.",
4418  ControlFile->blcksz, BLCKSZ),
4419  errhint("It looks like you need to recompile or initdb.")));
4420  if (ControlFile->relseg_size != RELSEG_SIZE)
4421  ereport(FATAL,
4422  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4423  errmsg("database files are incompatible with server"),
4424  errdetail("The database cluster was initialized with RELSEG_SIZE %d,"
4425  " but the server was compiled with RELSEG_SIZE %d.",
4426  ControlFile->relseg_size, RELSEG_SIZE),
4427  errhint("It looks like you need to recompile or initdb.")));
4428  if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4429  ereport(FATAL,
4430  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4431  errmsg("database files are incompatible with server"),
4432  errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
4433  " but the server was compiled with XLOG_BLCKSZ %d.",
4434  ControlFile->xlog_blcksz, XLOG_BLCKSZ),
4435  errhint("It looks like you need to recompile or initdb.")));
4437  ereport(FATAL,
4438  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4439  errmsg("database files are incompatible with server"),
4440  errdetail("The database cluster was initialized with NAMEDATALEN %d,"
4441  " but the server was compiled with NAMEDATALEN %d.",
4443  errhint("It looks like you need to recompile or initdb.")));
4445  ereport(FATAL,
4446  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4447  errmsg("database files are incompatible with server"),
4448  errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d,"
4449  " but the server was compiled with INDEX_MAX_KEYS %d.",
4451  errhint("It looks like you need to recompile or initdb.")));
4453  ereport(FATAL,
4454  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4455  errmsg("database files are incompatible with server"),
4456  errdetail("The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
4457  " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
4459  errhint("It looks like you need to recompile or initdb.")));
4461  ereport(FATAL,
4462  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4463  errmsg("database files are incompatible with server"),
4464  errdetail("The database cluster was initialized with LOBLKSIZE %d,"
4465  " but the server was compiled with LOBLKSIZE %d.",
4466  ControlFile->loblksize, (int) LOBLKSIZE),
4467  errhint("It looks like you need to recompile or initdb.")));
4468 
4469 #ifdef USE_FLOAT8_BYVAL
4470  if (ControlFile->float8ByVal != true)
4471  ereport(FATAL,
4472  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4473  errmsg("database files are incompatible with server"),
4474  errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
4475  " but the server was compiled with USE_FLOAT8_BYVAL."),
4476  errhint("It looks like you need to recompile or initdb.")));
4477 #else
4478  if (ControlFile->float8ByVal != false)
4479  ereport(FATAL,
4480  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4481  errmsg("database files are incompatible with server"),
4482  errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
4483  " but the server was compiled without USE_FLOAT8_BYVAL."),
4484  errhint("It looks like you need to recompile or initdb.")));
4485 #endif
4486 
4488 
4490  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4491  errmsg_plural("invalid WAL segment size in control file (%d byte)",
4492  "invalid WAL segment size in control file (%d bytes)",
4495  errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4496 
4497  snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4498  SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4500 
4501  /* check and update variables dependent on wal_segment_size */
4503  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4504  errmsg("min_wal_size must be at least twice wal_segment_size")));
4505 
4507  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4508  errmsg("max_wal_size must be at least twice wal_segment_size")));
4509 
4511  (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4513 
4515 
4516  /* Make the initdb settings visible as GUC variables, too */
4517  SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4519 }
#define PG_BINARY
Definition: c.h:1273
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1182
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1087
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4275
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_INTERNAL
Definition: guc.h:69
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:200
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:124
uint32 xlog_seg_size
Definition: pg_control.h:210
uint32 nameDataLen
Definition: pg_control.h:212
uint32 indexMaxKeys
Definition: pg_control.h:213
uint32 relseg_size
Definition: pg_control.h:207
uint32 catalog_version_no
Definition: pg_control.h:125
double floatFormat
Definition: pg_control.h:199
uint32 xlog_blcksz
Definition: pg_control.h:209
uint32 loblksize
Definition: pg_control.h:216
pg_crc32c crc
Definition: pg_control.h:231
uint32 toast_max_chunk_size
Definition: pg_control.h:215
#define UsableBytesInPage
Definition: xlog.c:592
bool DataChecksumsEnabled(void)
Definition: xlog.c:4555
static int UsableBytesInSegment
Definition: xlog.c:601
int min_wal_size_mb
Definition: xlog.c:115
#define XLOG_CONTROL_FILE

References BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6290 of file xlog.c.

6291 {
6292  /*
6293  * We check shared state each time only until we leave recovery mode. We
6294  * can't re-enter recovery, so there's no need to keep checking after the
6295  * shared variable has once been seen false.
6296  */
6298  return false;
6299  else
6300  {
6301  /*
6302  * use volatile pointer to make sure we make a fresh read of the
6303  * shared variable.
6304  */
6305  volatile XLogCtlData *xlogctl = XLogCtl;
6306 
6308 
6309  /*
6310  * Note: We don't need a memory barrier when we're still in recovery.
6311  * We might exit recovery immediately after return, so the caller
6312  * can't rely on 'true' meaning that we're still in recovery anyway.
6313  */
6314 
6315  return LocalRecoveryInProgress;
6316  }
6317 }
static bool LocalRecoveryInProgress
Definition: xlog.c:224

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7471 of file xlog.c.

7472 {
7473  /*
7474  * Also refrain from creating a restartpoint if we have seen any
7475  * references to non-existent pages. Restarting recovery from the
7476  * restartpoint would not see the references, so we would lose the
7477  * cross-check that the pages belonged to a relation that was dropped
7478  * later.
7479  */
7480  if (XLogHaveInvalidPages())
7481  {
7482  elog(DEBUG2,
7483  "could not record restart point at %X/%X because there "
7484  "are unresolved references to invalid pages",
7485  LSN_FORMAT_ARGS(checkPoint->redo));
7486  return;
7487  }
7488 
7489  /*
7490  * Copy the checkpoint record to shared memory, so that checkpointer can
7491  * work out the next time it wants to perform a restartpoint.
7492  */
7496  XLogCtl->lastCheckPoint = *checkPoint;
7498 }
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:235

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9341 of file xlog.c.

9342 {
9343  static bool already_done = false;
9344 
9345  if (already_done)
9346  return;
9348  already_done = true;
9349 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3929 of file xlog.c.

3930 {
3931  DIR *xldir;
3932  struct dirent *xlde;
3933  char switchseg[MAXFNAMELEN];
3934  XLogSegNo endLogSegNo;
3935  XLogSegNo switchLogSegNo;
3936  XLogSegNo recycleSegNo;
3937 
3938  /*
3939  * Initialize info about where to begin the work. This will recycle,
3940  * somewhat arbitrarily, 10 future segments.
3941  */
3942  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3943  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3944  recycleSegNo = endLogSegNo + 10;
3945 
3946  /*
3947  * Construct a filename of the last segment to be kept.
3948  */
3949  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3950 
3951  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3952  switchseg);
3953 
3954  xldir = AllocateDir(XLOGDIR);
3955 
3956  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3957  {
3958  /* Ignore files that are not XLOG segments */
3959  if (!IsXLogFileName(xlde->d_name))
3960  continue;
3961 
3962  /*
3963  * Remove files that are on a timeline older than the new one we're
3964  * switching to, but with a segment number >= the first segment on the
3965  * new timeline.
3966  */
3967  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3968  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3969  {
3970  /*
3971  * If the file has already been marked as .ready, however, don't
3972  * remove it yet. It should be OK to remove it - files that are
3973  * not part of our timeline history are not required for recovery
3974  * - but seems safer to let them be archived and removed later.
3975  */
3976  if (!XLogArchiveIsReady(xlde->d_name))
3977  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3978  }
3979  }
3980 
3981  FreeDir(xldir);
3982 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:3998
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3854 of file xlog.c.

3856 {
3857  DIR *xldir;
3858  struct dirent *xlde;
3859  char lastoff[MAXFNAMELEN];
3860  XLogSegNo endlogSegNo;
3861  XLogSegNo recycleSegNo;
3862 
3863  /* Initialize info about where to try to recycle to */
3864  XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3865  recycleSegNo = XLOGfileslop(lastredoptr);
3866 
3867  /*
3868  * Construct a filename of the last segment to be kept. The timeline ID
3869  * doesn't matter, we ignore that in the comparison. (During recovery,
3870  * InsertTimeLineID isn't set, so we can't use that.)
3871  */
3872  XLogFileName(lastoff, 0, segno, wal_segment_size);
3873 
3874  elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3875  lastoff);
3876 
3877  xldir = AllocateDir(XLOGDIR);
3878 
3879  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3880  {
3881  /* Ignore files that are not XLOG segments */
3882  if (!IsXLogFileName(xlde->d_name) &&
3883  !IsPartialXLogFileName(xlde->d_name))
3884  continue;
3885 
3886  /*
3887  * We ignore the timeline part of the XLOG segment identifiers in
3888  * deciding whether a segment is still needed. This ensures that we
3889  * won't prematurely remove a segment from a parent timeline. We could
3890  * probably be a little more proactive about removing segments of
3891  * non-parent timelines, but that would be a whole lot more
3892  * complicated.
3893  *
3894  * We use the alphanumeric sorting property of the filenames to decide
3895  * which ones are earlier than the lastoff segment.
3896  */
3897  if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3898  {
3899  if (XLogArchiveCheckDone(xlde->d_name))
3900  {
3901  /* Update the last removed location in shared memory first */
3903 
3904  RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3905  }
3906  }
3907  }
3908 
3909  FreeDir(xldir);
3910 }
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2235
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3801
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3821 of file xlog.c.

3822 {
3823  DIR *xldir;
3824  struct dirent *xlde;
3825 
3826  elog(DEBUG2, "removing all temporary WAL segments");
3827 
3828  xldir = AllocateDir(XLOGDIR);
3829  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3830  {
3831  char path[MAXPGPATH];
3832 
3833  if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3834  continue;
3835 
3836  snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3837  unlink(path);
3838  elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3839  }
3840  FreeDir(xldir);
3841 }

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 3998 of file xlog.c.

4001 {
4002  char path[MAXPGPATH];
4003 #ifdef WIN32
4004  char newpath[MAXPGPATH];
4005 #endif
4006  const char *segname = segment_de->d_name;
4007 
4008  snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4009 
4010  /*
4011  * Before deleting the file, see if it can be recycled as a future log
4012  * segment. Only recycle normal files, because we don't want to recycle
4013  * symbolic links pointing to a separate archive directory.
4014  */
4015  if (wal_recycle &&
4016  *endlogSegNo <= recycleSegNo &&
4017  XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4018  get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4019  InstallXLogFileSegment(endlogSegNo, path,
4020  true, recycleSegNo, insertTLI))
4021  {
4022  ereport(DEBUG2,
4023  (errmsg_internal("recycled write-ahead log file \"%s\"",
4024  segname)));
4026  /* Needn't recheck that slot on future iterations */
4027  (*endlogSegNo)++;
4028  }
4029  else
4030  {
4031  /* No need for any more future segments, or recycling failed ... */
4032  int rc;
4033 
4034  ereport(DEBUG2,
4035  (errmsg_internal("removing write-ahead log file \"%s\"",
4036  segname)));
4037 
4038 #ifdef WIN32
4039 
4040  /*
4041  * On Windows, if another process (e.g another backend) holds the file
4042  * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4043  * will still show up in directory listing until the last handle is
4044  * closed. To avoid confusing the lingering deleted file for a live
4045  * WAL file that needs to be archived, rename it before deleting it.
4046  *
4047  * If another process holds the file open without FILE_SHARE_DELETE
4048  * flag, rename will fail. We'll try again at the next checkpoint.
4049  */
4050  snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4051  if (rename(path, newpath) != 0)
4052  {
4053  ereport(LOG,
4055  errmsg("could not rename file \"%s\": %m",
4056  path)));
4057  return;
4058  }
4059  rc = durable_unlink(newpath, LOG);
4060 #else
4061  rc = durable_unlink(path, LOG);
4062 #endif
4063  if (rc != 0)
4064  {
4065  /* Message already logged by durable_unlink() */
4066  return;
4067  }
4069  }
4070 
4071  XLogArchiveCleanup(segname);
4072 }
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3552
bool wal_recycle
Definition: xlog.c:128

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 7981 of file xlog.c.

7982 {
7983  XLogRecPtr RecPtr;
7984 
7985  /* XLOG SWITCH has no data */
7986  XLogBeginInsert();
7987 
7988  if (mark_unimportant)
7990  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
7991 
7992  return RecPtr;
7993 }
#define XLOG_SWITCH
Definition: pg_control.h:71
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:153
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1103 of file xlog.c.

1105 {
1107  uint64 startbytepos;
1108  uint64 endbytepos;
1109  uint64 prevbytepos;
1110 
1111  size = MAXALIGN(size);
1112 
1113  /* All (non xlog-switch) records should contain data. */
1115 
1116  /*
1117  * The duration the spinlock needs to be held is minimized by minimizing
1118  * the calculations that have to be done while holding the lock. The
1119  * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1120  * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1121  * page headers. The mapping between "usable" byte positions and physical
1122  * positions (XLogRecPtrs) can be done outside the locked region, and
1123  * because the usable byte position doesn't include any headers, reserving
1124  * X bytes from WAL is almost as simple as "CurrBytePos += X".
1125  */
1126  SpinLockAcquire(&Insert->insertpos_lck);
1127 
1128  startbytepos = Insert->CurrBytePos;
1129  endbytepos = startbytepos + size;
1130  prevbytepos = Insert->PrevBytePos;
1131  Insert->CurrBytePos = endbytepos;
1132  Insert->PrevBytePos = startbytepos;
1133 
1134  SpinLockRelease(&Insert->insertpos_lck);
1135 
1136  *StartPos = XLogBytePosToRecPtr(startbytepos);
1137  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1138  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1139 
1140  /*
1141  * Check that the conversions between "usable byte positions" and
1142  * XLogRecPtrs work consistently in both directions.
1143  */
1144  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1145  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1146  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1147 }
#define MAXALIGN(LEN)
Definition: c.h:811
static pg_noinline void Size size
Definition: slab.c:607
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1893
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1936

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1159 of file xlog.c.

1160 {
1162  uint64 startbytepos;
1163  uint64 endbytepos;
1164  uint64 prevbytepos;
1166  XLogRecPtr ptr;
1167  uint32 segleft;
1168 
1169  /*
1170  * These calculations are a bit heavy-weight to be done while holding a
1171  * spinlock, but since we're holding all the WAL insertion locks, there
1172  * are no other inserters competing for it. GetXLogInsertRecPtr() does
1173  * compete for it, but that's not called very frequently.
1174  */
1175  SpinLockAcquire(&Insert->insertpos_lck);
1176 
1177  startbytepos = Insert->CurrBytePos;
1178 
1179  ptr = XLogBytePosToEndRecPtr(startbytepos);
1180  if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1181  {
1182  SpinLockRelease(&Insert->insertpos_lck);
1183  *EndPos = *StartPos = ptr;
1184  return false;
1185  }
1186 
1187  endbytepos = startbytepos + size;
1188  prevbytepos = Insert->PrevBytePos;
1189 
1190  *StartPos = XLogBytePosToRecPtr(startbytepos);
1191  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1192 
1193  segleft = wal_segment_size - XLogSegmentOffset(*EndPos, wal_segment_size);
1194  if (segleft != wal_segment_size)
1195  {
1196  /* consume the rest of the segment */
1197  *EndPos += segleft;
1198  endbytepos = XLogRecPtrToBytePos(*EndPos);
1199  }
1200  Insert->CurrBytePos = endbytepos;
1201  Insert->PrevBytePos = startbytepos;
1202 
1203  SpinLockRelease(&Insert->insertpos_lck);
1204 
1205  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1206 
1207  Assert(XLogSegmentOffset(*EndPos, wal_segment_size) == 0);
1208  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1209  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1210  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1211 
1212  return true;
1213 }

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9404 of file xlog.c.

9405 {
9406  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9408  LWLockRelease(ControlFileLock);
9409 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9427 of file xlog.c.

9428 {
9430  XLogCtl->WalWriterSleeping = sleeping;
9432 }
bool WalWriterSleeping
Definition: xlog.c:528

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char* show_archive_command ( void  )

Definition at line 4777 of file xlog.c.

4778 {
4779  if (XLogArchivingActive())
4780  return XLogArchiveCommand;
4781  else
4782  return "(disabled)";
4783 }
char * XLogArchiveCommand
Definition: xlog.c:120

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char* show_in_hot_standby ( void  )

Definition at line 4789 of file xlog.c.

4790 {
4791  /*
4792  * We display the actual state based on shared memory, so that this GUC
4793  * reports up-to-date state if examined intra-query. The underlying
4794  * variable (in_hot_standby_guc) changes only when we transmit a new value
4795  * to the client.
4796  */
4797  return RecoveryInProgress() ? "on" : "off";
4798 }

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6539 of file xlog.c.

6540 {
6541  /*
6542  * We should have an aux process resource owner to use, and we should not
6543  * be in a transaction that's installed some other resowner.
6544  */
6546  Assert(CurrentResourceOwner == NULL ||
6549 
6550  /* Don't be chatty in standalone mode */
6552  (errmsg("shutting down")));
6553 
6554  /*
6555  * Signal walsenders to move to stopping state.
6556  */
6558 
6559  /*
6560  * Wait for WAL senders to be in stopping state. This prevents commands
6561  * from writing new WAL.
6562  */
6564 
6565  if (RecoveryInProgress())
6567  else
6568  {
6569  /*
6570  * If archiving is enabled, rotate the last XLOG file so that all the
6571  * remaining records are archived (postmaster wakes up the archiver
6572  * process one more time at the end of shutdown). The checkpoint
6573  * record will go to the next XLOG file and won't be archived (yet).
6574  */
6575  if (XLogArchivingActive())
6576  RequestXLogSwitch(false);
6577 
6579  }
6580 }
bool IsPostmasterEnvironment
Definition: globals.c:116
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3745
void WalSndWaitStopping(void)
Definition: walsender.c:3771
bool CreateRestartPoint(int flags)
Definition: xlog.c:7512
void CreateCheckPoint(int flags)
Definition: xlog.c:6821

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5388 of file xlog.c.

5389 {
5391  CheckPoint checkPoint;
5392  bool wasShutdown;
5393  bool didCrash;
5394  bool haveTblspcMap;
5395  bool haveBackupLabel;
5396  XLogRecPtr EndOfLog;
5397  TimeLineID EndOfLogTLI;
5398  TimeLineID newTLI;
5399  bool performedWalRecovery;
5400  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5403  TransactionId oldestActiveXID;
5404  bool promoted = false;
5405 
5406  /*
5407  * We should have an aux process resource owner to use, and we should not
5408  * be in a transaction that's installed some other resowner.
5409  */
5411  Assert(CurrentResourceOwner == NULL ||
5414 
5415  /*
5416  * Check that contents look valid.
5417  */
5419  ereport(FATAL,
5421  errmsg("control file contains invalid checkpoint location")));
5422 
5423  switch (ControlFile->state)
5424  {
5425  case DB_SHUTDOWNED:
5426 
5427  /*
5428  * This is the expected case, so don't be chatty in standalone
5429  * mode
5430  */
5432  (errmsg("database system was shut down at %s",
5433  str_time(ControlFile->time))));
5434  break;
5435 
5437  ereport(LOG,
5438  (errmsg("database system was shut down in recovery at %s",
5439  str_time(ControlFile->time))));
5440  break;
5441 
5442  case DB_SHUTDOWNING:
5443  ereport(LOG,
5444  (errmsg("database system shutdown was interrupted; last known up at %s",
5445  str_time(ControlFile->time))));
5446  break;
5447 
5448  case DB_IN_CRASH_RECOVERY:
5449  ereport(LOG,
5450  (errmsg("database system was interrupted while in recovery at %s",
5452  errhint("This probably means that some data is corrupted and"
5453  " you will have to use the last backup for recovery.")));
5454  break;
5455 
5457  ereport(LOG,
5458  (errmsg("database system was interrupted while in recovery at log time %s",
5460  errhint("If this has occurred more than once some data might be corrupted"
5461  " and you might need to choose an earlier recovery target.")));
5462  break;
5463 
5464  case DB_IN_PRODUCTION:
5465  ereport(LOG,
5466  (errmsg("database system was interrupted; last known up at %s",
5467  str_time(ControlFile->time))));
5468  break;
5469 
5470  default:
5471  ereport(FATAL,
5473  errmsg("control file contains invalid database cluster state")));
5474  }
5475 
5476  /* This is just to allow attaching to startup process with a debugger */
5477 #ifdef XLOG_REPLAY_DELAY
5479  pg_usleep(60000000L);
5480 #endif
5481 
5482  /*
5483  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5484  * In cases where someone has performed a copy for PITR, these directories
5485  * may have been excluded and need to be re-created.
5486  */
5488 
5489  /* Set up timeout handler needed to report startup progress. */
5493 
5494  /*----------
5495  * If we previously crashed, perform a couple of actions:
5496  *
5497  * - The pg_wal directory may still include some temporary WAL segments
5498  * used when creating a new segment, so perform some clean up to not
5499  * bloat this path. This is done first as there is no point to sync
5500  * this temporary data.
5501  *
5502  * - There might be data which we had written, intending to fsync it, but
5503  * which we had not actually fsync'd yet. Therefore, a power failure in
5504  * the near future might cause earlier unflushed writes to be lost, even
5505  * though more recent data written to disk from here on would be
5506  * persisted. To avoid that, fsync the entire data directory.
5507  */
5508  if (ControlFile->state != DB_SHUTDOWNED &&
5510  {
5513  didCrash = true;
5514  }
5515  else
5516  didCrash = false;
5517 
5518  /*
5519  * Prepare for WAL recovery if needed.
5520  *
5521  * InitWalRecovery analyzes the control file and the backup label file, if
5522  * any. It updates the in-memory ControlFile buffer according to the
5523  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5524  * It also applies the tablespace map file, if any.
5525  */
5526  InitWalRecovery(ControlFile, &wasShutdown,
5527  &haveBackupLabel, &haveTblspcMap);
5528  checkPoint = ControlFile->checkPointCopy;
5529 
5530  /* initialize shared memory variables from the checkpoint record */
5531  TransamVariables->nextXid = checkPoint.nextXid;
5532  TransamVariables->nextOid = checkPoint.nextOid;
5534  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5535  AdvanceOldestClogXid(checkPoint.oldestXid);
5536  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5537  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5539  checkPoint.newestCommitTsXid);
5540  XLogCtl->ckptFullXid = checkPoint.nextXid;
5541 
5542  /*
5543  * Clear out any old relcache cache files. This is *necessary* if we do
5544  * any WAL replay, since that would probably result in the cache files
5545  * being out of sync with database reality. In theory we could leave them
5546  * in place if the database had been cleanly shut down, but it seems
5547  * safest to just remove them always and let them be rebuilt during the
5548  * first backend startup. These files needs to be removed from all
5549  * directories including pg_tblspc, however the symlinks are created only
5550  * after reading tablespace_map file in case of archive recovery from
5551  * backup, so needs to clear old relcache files here after creating
5552  * symlinks.
5553  */
5555 
5556  /*
5557  * Initialize replication slots, before there's a chance to remove
5558  * required resources.
5559  */
5561 
5562  /*
5563  * Startup logical state, needs to be setup now so we have proper data
5564  * during crash recovery.
5565  */
5567 
5568  /*
5569  * Startup CLOG. This must be done after TransamVariables->nextXid has
5570  * been initialized and before we accept connections or begin WAL replay.
5571  */
5572  StartupCLOG();
5573 
5574  /*
5575  * Startup MultiXact. We need to do this early to be able to replay
5576  * truncations.
5577  */
5578  StartupMultiXact();
5579 
5580  /*
5581  * Ditto for commit timestamps. Activate the facility if the setting is
5582  * enabled in the control file, as there should be no tracking of commit
5583  * timestamps done when the setting was disabled. This facility can be
5584  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5585  */
5587  StartupCommitTs();
5588 
5589  /*
5590  * Recover knowledge about replay progress of known replication partners.
5591  */
5593 
5594  /*
5595  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5596  * control file. On recovery, all unlogged relations are blown away, so
5597  * the unlogged LSN counter can be reset too.
5598  */
5602  else
5605 
5606  /*
5607  * Copy any missing timeline history files between 'now' and the recovery
5608  * target timeline from archive to pg_wal. While we don't need those files
5609  * ourselves - the history file of the recovery target timeline covers all
5610  * the previous timelines in the history too - a cascading standby server
5611  * might be interested in them. Or, if you archive the WAL from this
5612  * server to a different archive than the primary, it'd be good for all
5613  * the history files to get archived there after failover, so that you can
5614  * use one of the old timelines as a PITR target. Timeline history files
5615  * are small, so it's better to copy them unnecessarily than not copy them
5616  * and regret later.
5617  */
5619 
5620  /*
5621  * Before running in recovery, scan pg_twophase and fill in its status to
5622  * be able to work on entries generated by redo. Doing a scan before
5623  * taking any recovery action has the merit to discard any 2PC files that
5624  * are newer than the first record to replay, saving from any conflicts at
5625  * replay. This avoids as well any subsequent scans when doing recovery
5626  * of the on-disk two-phase data.
5627  */
5629 
5630  /*
5631  * When starting with crash recovery, reset pgstat data - it might not be
5632  * valid. Otherwise restore pgstat data. It's safe to do this here,
5633  * because postmaster will not yet have started any other processes.
5634  *
5635  * NB: Restoring replication slot stats relies on slot state to have
5636  * already been restored from disk.
5637  *
5638  * TODO: With a bit of extra work we could just start with a pgstat file
5639  * associated with the checkpoint redo location we're starting from.
5640  */
5641  if (didCrash)
5643  else
5645 
5646  lastFullPageWrites = checkPoint.fullPageWrites;
5647 
5650 
5651  /* REDO */
5652  if (InRecovery)
5653  {
5654  /* Initialize state for RecoveryInProgress() */
5656  if (InArchiveRecovery)
5658  else
5661 
5662  /*
5663  * Update pg_control to show that we are recovering and to show the
5664  * selected checkpoint as the place we are starting from. We also mark
5665  * pg_control with any minimum recovery stop point obtained from a
5666  * backup history file.
5667  *
5668  * No need to hold ControlFileLock yet, we aren't up far enough.
5669  */
5671 
5672  /*
5673  * If there was a backup label file, it's done its job and the info
5674  * has now been propagated into pg_control. We must get rid of the
5675  * label file so that if we crash during recovery, we'll pick up at
5676  * the latest recovery restartpoint instead of going all the way back
5677  * to the backup start point. It seems prudent though to just rename
5678  * the file out of the way rather than delete it completely.
5679  */
5680  if (haveBackupLabel)
5681  {
5682  unlink(BACKUP_LABEL_OLD);
5684  }
5685 
5686  /*
5687  * If there was a tablespace_map file, it's done its job and the
5688  * symlinks have been created. We must get rid of the map file so
5689  * that if we crash during recovery, we don't create symlinks again.
5690  * It seems prudent though to just rename the file out of the way
5691  * rather than delete it completely.
5692  */
5693  if (haveTblspcMap)
5694  {
5695  unlink(TABLESPACE_MAP_OLD);
5697  }
5698 
5699  /*
5700  * Initialize our local copy of minRecoveryPoint. When doing crash
5701  * recovery we want to replay up to the end of WAL. Particularly, in
5702  * the case of a promoted standby minRecoveryPoint value in the
5703  * control file is only updated after the first checkpoint. However,
5704  * if the instance crashes before the first post-recovery checkpoint
5705  * is completed then recovery will use a stale location causing the
5706  * startup process to think that there are still invalid page
5707  * references when checking for data consistency.
5708  */
5709  if (InArchiveRecovery)
5710  {
5713  }
5714  else
5715  {
5718  }
5719 
5720  /* Check that the GUCs used to generate the WAL allow recovery */
5722 
5723  /*
5724  * We're in recovery, so unlogged relations may be trashed and must be
5725  * reset. This should be done BEFORE allowing Hot Standby
5726  * connections, so that read-only backends don't try to read whatever
5727  * garbage is left over from before.
5728  */
5730 
5731  /*
5732  * Likewise, delete any saved transaction snapshot files that got left
5733  * behind by crashed backends.
5734  */
5736 
5737  /*
5738  * Initialize for Hot Standby, if enabled. We won't let backends in
5739  * yet, not until we've reached the min recovery point specified in
5740  * control file and we've established a recovery snapshot from a
5741  * running-xacts WAL record.
5742  */
5744  {
5745  TransactionId *xids;
5746  int nxids;
5747 
5748  ereport(DEBUG1,
5749  (errmsg_internal("initializing for hot standby")));
5750 
5752 
5753  if (wasShutdown)
5754  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5755  else
5756  oldestActiveXID = checkPoint.oldestActiveXid;
5757  Assert(TransactionIdIsValid(oldestActiveXID));
5758 
5759  /* Tell procarray about the range of xids it has to deal with */
5761 
5762  /*
5763  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5764  * have already been started up and other SLRUs are not maintained
5765  * during recovery and need not be started yet.
5766  */
5767  StartupSUBTRANS(oldestActiveXID);
5768 
5769  /*
5770  * If we're beginning at a shutdown checkpoint, we know that
5771  * nothing was running on the primary at this point. So fake-up an
5772  * empty running-xacts record and use that here and now. Recover
5773  * additional standby state for prepared transactions.
5774  */
5775  if (wasShutdown)
5776  {
5777  RunningTransactionsData running;
5778  TransactionId latestCompletedXid;
5779 
5780  /*
5781  * Construct a RunningTransactions snapshot representing a
5782  * shut down server, with only prepared transactions still
5783  * alive. We're never overflowed at this point because all
5784  * subxids are listed with their parent prepared transactions.
5785  */
5786  running.xcnt = nxids;
5787  running.subxcnt = 0;
5788  running.subxid_overflow = false;
5789  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5790  running.oldestRunningXid = oldestActiveXID;
5791  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5792  TransactionIdRetreat(latestCompletedXid);
5793  Assert(TransactionIdIsNormal(latestCompletedXid));
5794  running.latestCompletedXid = latestCompletedXid;
5795  running.xids = xids;
5796 
5797  ProcArrayApplyRecoveryInfo(&running);
5798 
5800  }
5801  }
5802 
5803  /*
5804  * We're all set for replaying the WAL now. Do it.
5805  */
5807  performedWalRecovery = true;
5808  }
5809  else
5810  performedWalRecovery = false;
5811 
5812  /*
5813  * Finish WAL recovery.
5814  */
5815  endOfRecoveryInfo = FinishWalRecovery();
5816  EndOfLog = endOfRecoveryInfo->endOfLog;
5817  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5818  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5819  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5820 
5821  /*
5822  * Reset ps status display, so as no information related to recovery shows
5823  * up.
5824  */
5825  set_ps_display("");
5826 
5827  /*
5828  * When recovering from a backup (we are in recovery, and archive recovery
5829  * was requested), complain if we did not roll forward far enough to reach
5830  * the point where the database is consistent. For regular online
5831  * backup-from-primary, that means reaching the end-of-backup WAL record
5832  * (at which point we reset backupStartPoint to be Invalid), for
5833  * backup-from-replica (which can't inject records into the WAL stream),
5834  * that point is when we reach the minRecoveryPoint in pg_control (which
5835  * we purposefully copy last when backing up from a replica). For
5836  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5837  * or snapshot-style backups (which don't), backupEndRequired will be set
5838  * to false.
5839  *
5840  * Note: it is indeed okay to look at the local variable
5841  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5842  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5843  * been advanced beyond the WAL we processed.
5844  */
5845  if (InRecovery &&
5846  (EndOfLog < LocalMinRecoveryPoint ||
5848  {
5849  /*
5850  * Ran off end of WAL before reaching end-of-backup WAL record, or
5851  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5852  * recover from an online backup but never called pg_backup_stop(), or
5853  * you didn't archive all the WAL needed.
5854  */
5856  {
5858  ereport(FATAL,
5859  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5860  errmsg("WAL ends before end of online backup"),
5861  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5862  else
5863  ereport(FATAL,
5864  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5865  errmsg("WAL ends before consistent recovery point")));
5866  }
5867  }
5868 
5869  /*
5870  * Reset unlogged relations to the contents of their INIT fork. This is
5871  * done AFTER recovery is complete so as to include any unlogged relations
5872  * created during recovery, but BEFORE recovery is marked as having
5873  * completed successfully. Otherwise we'd not retry if any of the post
5874  * end-of-recovery steps fail.
5875  */
5876  if (InRecovery)
5878 
5879  /*
5880  * Pre-scan prepared transactions to find out the range of XIDs present.
5881  * This information is not quite needed yet, but it is positioned here so
5882  * as potential problems are detected before any on-disk change is done.
5883  */
5884  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5885 
5886  /*
5887  * Allow ordinary WAL segment creation before possibly switching to a new
5888  * timeline, which creates a new segment, and after the last ReadRecord().
5889  */
5891 
5892  /*
5893  * Consider whether we need to assign a new timeline ID.
5894  *
5895  * If we did archive recovery, we always assign a new ID. This handles a
5896  * couple of issues. If we stopped short of the end of WAL during
5897  * recovery, then we are clearly generating a new timeline and must assign
5898  * it a unique new ID. Even if we ran to the end, modifying the current
5899  * last segment is problematic because it may result in trying to
5900  * overwrite an already-archived copy of that segment, and we encourage
5901  * DBAs to make their archive_commands reject that. We can dodge the
5902  * problem by making the new active segment have a new timeline ID.
5903  *
5904  * In a normal crash recovery, we can just extend the timeline we were in.
5905  */
5906  newTLI = endOfRecoveryInfo->lastRecTLI;
5908  {
5909  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5910  ereport(LOG,
5911  (errmsg("selected new timeline ID: %u", newTLI)));
5912 
5913  /*
5914  * Make a writable copy of the last WAL segment. (Note that we also
5915  * have a copy of the last block of the old WAL in
5916  * endOfRecovery->lastPage; we will use that below.)
5917  */
5918  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5919 
5920  /*
5921  * Remove the signal files out of the way, so that we don't
5922  * accidentally re-enter archive recovery mode in a subsequent crash.
5923  */
5924  if (endOfRecoveryInfo->standby_signal_file_found)
5926 
5927  if (endOfRecoveryInfo->recovery_signal_file_found)
5929 
5930  /*
5931  * Write the timeline history file, and have it archived. After this
5932  * point (or rather, as soon as the file is archived), the timeline
5933  * will appear as "taken" in the WAL archive and to any standby
5934  * servers. If we crash before actually switching to the new
5935  * timeline, standby servers will nevertheless think that we switched
5936  * to the new timeline, and will try to connect to the new timeline.
5937  * To minimize the window for that, try to do as little as possible
5938  * between here and writing the end-of-recovery record.
5939  */
5941  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5942 
5943  ereport(LOG,
5944  (errmsg("archive recovery complete")));
5945  }
5946 
5947  /* Save the selected TimeLineID in shared memory, too */
5948  XLogCtl->InsertTimeLineID = newTLI;
5949  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5950 
5951  /*
5952  * Actually, if WAL ended in an incomplete record, skip the parts that
5953  * made it through and start writing after the portion that persisted.
5954  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5955  * we'll do as soon as we're open for writing new WAL.)
5956  */
5958  {
5959  /*
5960  * We should only have a missingContrecPtr if we're not switching to a
5961  * new timeline. When a timeline switch occurs, WAL is copied from the
5962  * old timeline to the new only up to the end of the last complete
5963  * record, so there can't be an incomplete WAL record that we need to
5964  * disregard.
5965  */
5966  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
5968  EndOfLog = missingContrecPtr;
5969  }
5970 
5971  /*
5972  * Prepare to write WAL starting at EndOfLog location, and init xlog
5973  * buffer cache using the block containing the last record from the
5974  * previous incarnation.
5975  */
5976  Insert = &XLogCtl->Insert;
5977  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
5978  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
5979 
5980  /*
5981  * Tricky point here: lastPage contains the *last* block that the LastRec
5982  * record spans, not the one it starts in. The last block is indeed the
5983  * one we want to use.
5984  */
5985  if (EndOfLog % XLOG_BLCKSZ != 0)
5986  {
5987  char *page;
5988  int len;
5989  int firstIdx;
5990 
5991  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
5992  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
5993  Assert(len < XLOG_BLCKSZ);
5994 
5995  /* Copy the valid part of the last block, and zero the rest */
5996  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
5997  memcpy(page, endOfRecoveryInfo->lastPage, len);
5998  memset(page + len, 0, XLOG_BLCKSZ - len);
5999 
6000  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6001  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6002  }
6003  else
6004  {
6005  /*
6006  * There is no partial block to copy. Just set InitializedUpTo, and
6007  * let the first attempt to insert a log record to initialize the next
6008  * buffer.
6009  */
6010  XLogCtl->InitializedUpTo = EndOfLog;
6011  }
6012 
6013  /*
6014  * Update local and shared status. This is OK to do without any locks
6015  * because no other process can be reading or writing WAL yet.
6016  */
6017  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6021  XLogCtl->LogwrtRqst.Write = EndOfLog;
6022  XLogCtl->LogwrtRqst.Flush = EndOfLog;
6023 
6024  /*
6025  * Preallocate additional log files, if wanted.
6026  */
6027  PreallocXlogFiles(EndOfLog, newTLI);
6028 
6029  /*
6030  * Okay, we're officially UP.
6031  */
6032  InRecovery = false;
6033 
6034  /* start the archive_timeout timer and LSN running */
6035  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6036  XLogCtl->lastSegSwitchLSN = EndOfLog;
6037 
6038  /* also initialize latestCompletedXid, to nextXid - 1 */
6039  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6042  LWLockRelease(ProcArrayLock);
6043 
6044  /*
6045  * Start up subtrans, if not already done for hot standby. (commit
6046  * timestamps are started below, if necessary.)
6047  */
6049  StartupSUBTRANS(oldestActiveXID);
6050 
6051  /*
6052  * Perform end of recovery actions for any SLRUs that need it.
6053  */
6054  TrimCLOG();
6055  TrimMultiXact();
6056 
6057  /*
6058  * Reload shared-memory state for prepared transactions. This needs to
6059  * happen before renaming the last partial segment of the old timeline as
6060  * it may be possible that we have to recovery some transactions from it.
6061  */
6063 
6064  /* Shut down xlogreader */
6066 
6067  /* Enable WAL writes for this backend only. */
6069 
6070  /* If necessary, write overwrite-contrecord before doing anything else */
6072  {
6075  }
6076 
6077  /*
6078  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6079  * record before resource manager writes cleanup WAL records or checkpoint
6080  * record is written.
6081  */
6082  Insert->fullPageWrites = lastFullPageWrites;
6084 
6085  /*
6086  * Emit checkpoint or end-of-recovery record in XLOG, if required.
6087  */
6088  if (performedWalRecovery)
6089  promoted = PerformRecoveryXLogAction();
6090 
6091  /*
6092  * If any of the critical GUCs have changed, log them before we allow
6093  * backends to write WAL.
6094  */
6096 
6097  /* If this is archive recovery, perform post-recovery cleanup actions. */
6099  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6100 
6101  /*
6102  * Local WAL inserts enabled, so it's time to finish initialization of
6103  * commit timestamp.
6104  */
6106 
6107  /*
6108  * All done with end-of-recovery actions.
6109  *
6110  * Now allow backends to write WAL and update the control file status in
6111  * consequence. SharedRecoveryState, that controls if backends can write
6112  * WAL, is updated while holding ControlFileLock to prevent other backends
6113  * to look at an inconsistent state of the control file in shared memory.
6114  * There is still a small window during which backends can write WAL and
6115  * the control file is still referring to a system not in DB_IN_PRODUCTION
6116  * state while looking at the on-disk control file.
6117  *
6118  * Also, we use info_lck to update SharedRecoveryState to ensure that
6119  * there are no race conditions concerning visibility of other recent
6120  * updates to shared memory.
6121  */
6122  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6124 
6128 
6130  LWLockRelease(ControlFileLock);
6131 
6132  /*
6133  * Shutdown the recovery environment. This must occur after
6134  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6135  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6136  * any session building a snapshot will not rely on KnownAssignedXids as
6137  * RecoveryInProgress() would return false at this stage. This is
6138  * particularly critical for prepared 2PC transactions, that would still
6139  * need to be included in snapshots once recovery has ended.
6140  */
6143 
6144  /*
6145  * If there were cascading standby servers connected to us, nudge any wal
6146  * sender processes to notice that we've been promoted.
6147  */
6148  WalSndWakeup(true, true);
6149 
6150  /*
6151  * If this was a promotion, request an (online) checkpoint now. This isn't
6152  * required for consistency, but the last restartpoint might be far back,
6153  * and in case of a crash, recovering from it might take a longer than is
6154  * appropriate now that we're not in standby mode anymore.
6155  */
6156  if (promoted)
6158 }
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:489
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:652
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
void SyncDataDirectory(void)
Definition: fd.c:3544
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
void TrimMultiXact(void)
Definition: multixact.c:2129
void StartupMultiXact(void)
Definition: multixact.c:2104
void StartupReplicationOrigin(void)
Definition: origin.c:699
@ DB_IN_PRODUCTION
Definition: pg_control.h:96
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:94
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:407
void pgstat_discard_stats(void)
Definition: pgstat.c:419
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6794
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1894
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:84
TransactionId nextXid
Definition: standby.h:83
TransactionId latestCompletedXid
Definition: standby.h:87
TransactionId * xids
Definition: standby.h:89
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:471
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2084
void restoreTwoPhaseData(void)
Definition: twophase.c:1898
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1962
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2043
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3666
void UpdateFullPageWrites(void)
Definition: xlog.c:8087
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5165
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4088
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7361
static void XLogReportParameters(void)
Definition: xlog.c:8024
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6240
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5255
static bool lastFullPageWrites
Definition: xlog.c:217
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5180
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5344
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3821
#define TABLESPACE_MAP_OLD
Definition: xlog.h:302
#define TABLESPACE_MAP
Definition: xlog.h:301
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:297
#define BACKUP_LABEL_OLD
Definition: xlog.h:299
#define BACKUP_LABEL_FILE
Definition: xlog.h:298
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:296
@ RECOVERY_STATE_CRASH
Definition: xlog.h:89
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:90
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:138
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:373
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:372
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:512
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:122
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:49

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow)
static

Definition at line 5165 of file xlog.c.

5166 {
5167  static char buf[128];
5168 
5169  pg_strftime(buf, sizeof(buf),
5170  "%Y-%m-%d %H:%M:%S %Z",
5171  pg_localtime(&tnow, log_timezone));
5172 
5173  return buf;
5174 }
static char * buf
Definition: pg_test_fsync.c:73
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6165 of file xlog.c.

6166 {
6167  /* initialize minRecoveryPoint to this record */
6168  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6170  if (ControlFile->minRecoveryPoint < EndRecPtr)
6171  {
6172  ControlFile->minRecoveryPoint = EndRecPtr;
6173  ControlFile->minRecoveryPointTLI = replayTLI;
6174  }
6175  /* update local copy */
6178 
6179  /*
6180  * The startup process can update its local copy of minRecoveryPoint from
6181  * this point.
6182  */
6183  updateMinRecoveryPoint = true;
6184 
6186 
6187  /*
6188  * We update SharedRecoveryState while holding the lock on ControlFileLock
6189  * so both states are consistent in shared memory.
6190  */
6194 
6195  LWLockRelease(ControlFileLock);
6196 }
static bool updateMinRecoveryPoint
Definition: xlog.c:643

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6759 of file xlog.c.

6760 {
6761  /*
6762  * The status is reported only for end-of-recovery and shutdown
6763  * checkpoints or shutdown restartpoints. Updating the ps display is
6764  * useful in those situations as it may not be possible to rely on
6765  * pg_stat_activity to see the status of the checkpointer or the startup
6766  * process.
6767  */
6768  if ((flags & (CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IS_SHUTDOWN)) == 0)
6769  return;
6770 
6771  if (reset)
6772  set_ps_display("");
6773  else
6774  {
6775  char activitymsg[128];
6776 
6777  snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6778  (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6779  (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6780  restartpoint ? "restartpoint" : "checkpoint");
6781  set_ps_display(activitymsg);
6782  }
6783 }

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6721 of file xlog.c.

6722 {
6723  /*
6724  * To estimate the number of segments consumed between checkpoints, keep a
6725  * moving average of the amount of WAL generated in previous checkpoint
6726  * cycles. However, if the load is bursty, with quiet periods and busy
6727  * periods, we want to cater for the peak load. So instead of a plain
6728  * moving average, let the average decline slowly if the previous cycle
6729  * used less WAL than estimated, but bump it up immediately if it used
6730  * more.
6731  *
6732  * When checkpoints are triggered by max_wal_size, this should converge to
6733  * CheckpointSegments * wal_segment_size,
6734  *
6735  * Note: This doesn't pay any attention to what caused the checkpoint.
6736  * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6737  * starting a base backup, are counted the same as those created
6738  * automatically. The slow-decline will largely mask them out, if they are
6739  * not frequent. If they are frequent, it seems reasonable to count them
6740  * in as any others; if you issue a manual checkpoint every 5 minutes and
6741  * never let a timed checkpoint happen, it makes sense to base the
6742  * preallocation on that 5 minute interval rather than whatever
6743  * checkpoint_timeout is set to.
6744  */
6745  PrevCheckPointDistance = nbytes;
6746  if (CheckPointDistanceEstimate < nbytes)
6747  CheckPointDistanceEstimate = nbytes;
6748  else
6750  (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6751 }

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4526 of file xlog.c.

4527 {
4529 }
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8087 of file xlog.c.

8088 {
8090  bool recoveryInProgress;
8091 
8092  /*
8093  * Do nothing if full_page_writes has not been changed.
8094  *
8095  * It's safe to check the shared full_page_writes without the lock,
8096  * because we assume that there is no concurrently running process which
8097  * can update it.
8098  */
8099  if (fullPageWrites == Insert->fullPageWrites)
8100  return;
8101 
8102  /*
8103  * Perform this outside critical section so that the WAL insert
8104  * initialization done by RecoveryInProgress() doesn't trigger an
8105  * assertion failure.
8106  */
8107  recoveryInProgress = RecoveryInProgress();
8108 
8110 
8111  /*
8112  * It's always safe to take full page images, even when not strictly
8113  * required, but not the other round. So if we're setting full_page_writes
8114  * to true, first set it true and then write the WAL record. If we're
8115  * setting it to false, first write the WAL record and then set the global
8116  * flag.
8117  */
8118  if (fullPageWrites)
8119  {
8121  Insert->fullPageWrites = true;
8123  }
8124 
8125  /*
8126  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8127  * full_page_writes during archive recovery, if required.
8128  */
8129  if (XLogStandbyInfoActive() && !recoveryInProgress)
8130  {
8131  XLogBeginInsert();
8132  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8133 
8134  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8135  }
8136 
8137  if (!fullPageWrites)
8138  {
8140  Insert->fullPageWrites = false;
8142  }
8143  END_CRIT_SECTION();
8144 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:75

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3801 of file xlog.c.

3802 {
3803  uint32 tli;
3804  XLogSegNo segno;
3805 
3806  XLogFromFileName(filename, &tli, &segno, wal_segment_size);
3807 
3809  if (segno > XLogCtl->lastRemovedSegNo)
3810  XLogCtl->lastRemovedSegNo = segno;
3812 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2711 of file xlog.c.

2712 {
2713  /* Quick check using our local copy of the variable */
2714  if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2715  return;
2716 
2717  /*
2718  * An invalid minRecoveryPoint means that we need to recover all the WAL,
2719  * i.e., we're doing crash recovery. We never modify the control file's
2720  * value in that case, so we can short-circuit future checks here too. The
2721  * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2722  * updated until crash recovery finishes. We only do this for the startup
2723  * process as it should not update its own reference of minRecoveryPoint
2724  * until it has finished crash recovery to make sure that all WAL
2725  * available is replayed in this case. This also saves from extra locks
2726  * taken on the control file from the startup process.
2727  */
2729  {
2730  updateMinRecoveryPoint = false;
2731  return;
2732  }
2733 
2734  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2735 
2736  /* update local copy */
2739 
2741  updateMinRecoveryPoint = false;
2742  else if (force || LocalMinRecoveryPoint < lsn)
2743  {
2744  XLogRecPtr newMinRecoveryPoint;
2745  TimeLineID newMinRecoveryPointTLI;
2746 
2747  /*
2748  * To avoid having to update the control file too often, we update it
2749  * all the way to the last record being replayed, even though 'lsn'
2750  * would suffice for correctness. This also allows the 'force' case
2751  * to not need a valid 'lsn' value.
2752  *
2753  * Another important reason for doing it this way is that the passed
2754  * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2755  * the caller got it from a corrupted heap page. Accepting such a
2756  * value as the min recovery point would prevent us from coming up at
2757  * all. Instead, we just log a warning and continue with recovery.
2758  * (See also the comments about corrupt LSNs in XLogFlush.)
2759  */
2760  newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2761  if (!force && newMinRecoveryPoint < lsn)
2762  elog(WARNING,
2763  "xlog min recovery request %X/%X is past current point %X/%X",
2764  LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2765 
2766  /* update control file */
2767  if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2768  {
2769  ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2770  ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2772  LocalMinRecoveryPoint = newMinRecoveryPoint;
2773  LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2774 
2775  ereport(DEBUG2,
2776  (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
2777  LSN_FORMAT_ARGS(newMinRecoveryPoint),
2778  newMinRecoveryPointTLI)));
2779  }
2780  }
2781  LWLockRelease(ControlFileLock);
2782 }
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4088 of file xlog.c.

4089 {
4090  char path[MAXPGPATH];
4091  struct stat stat_buf;
4092 
4093  /* Check for pg_wal; if it doesn't exist, error out */
4094  if (stat(XLOGDIR, &stat_buf) != 0 ||
4095  !S_ISDIR(stat_buf.st_mode))
4096  ereport(FATAL,
4098  errmsg("required WAL directory \"%s\" does not exist",
4099  XLOGDIR)));
4100 
4101  /* Check for archive_status */
4102  snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4103  if (stat(path, &stat_buf) == 0)
4104  {
4105  /* Check for weird cases where it exists but isn't a directory */
4106  if (!S_ISDIR(stat_buf.st_mode))
4107  ereport(FATAL,
4109  errmsg("required WAL directory \"%s\" does not exist",
4110  path)));
4111  }
4112  else
4113  {
4114  ereport(LOG,
4115  (errmsg("creating missing WAL directory \"%s\"", path)));
4116  if (MakePGDirectory(path) < 0)
4117  ereport(FATAL,
4119  errmsg("could not create missing directory \"%s\": %m",
4120  path)));
4121  }
4122 
4123  /* Check for summaries */
4124  snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4125  if (stat(path, &stat_buf) == 0)
4126  {
4127  /* Check for weird cases where it exists but isn't a directory */
4128  if (!S_ISDIR(stat_buf.st_mode))
4129  ereport(FATAL,
4130  (errmsg("required WAL directory \"%s\" does not exist",
4131  path)));
4132  }
4133  else
4134  {
4135  ereport(LOG,
4136  (errmsg("creating missing WAL directory \"%s\"", path)));
4137  if (MakePGDirectory(path) < 0)
4138  ereport(FATAL,
4139  (errmsg("could not create missing directory \"%s\": %m",
4140  path)));
4141  }
4142 }
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
#define S_ISDIR(m)
Definition: win32_port.h:325

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1499 of file xlog.c.

1500 {
1501  uint64 bytepos;
1502  XLogRecPtr inserted;
1503  XLogRecPtr reservedUpto;
1504  XLogRecPtr finishedUpto;
1506  int i;
1507 
1508  if (MyProc == NULL)
1509  elog(PANIC, "cannot wait without a PGPROC structure");
1510 
1511  /*
1512  * Check if there's any work to do. Use a barrier to ensure we get the
1513  * freshest value.
1514  */
1516  if (upto <= inserted)
1517  return inserted;
1518 
1519  /* Read the current insert position */
1520  SpinLockAcquire(&Insert->insertpos_lck);
1521  bytepos = Insert->CurrBytePos;
1522  SpinLockRelease(&Insert->insertpos_lck);
1523  reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1524 
1525  /*
1526  * No-one should request to flush a piece of WAL that hasn't even been
1527  * reserved yet. However, it can happen if there is a block with a bogus
1528  * LSN on disk, for example. XLogFlush checks for that situation and
1529  * complains, but only after the flush. Here we just assume that to mean
1530  * that all WAL that has been reserved needs to be finished. In this
1531  * corner-case, the return value can be smaller than 'upto' argument.
1532  */
1533  if (upto > reservedUpto)
1534  {
1535  ereport(LOG,
1536  (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1537  LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
1538  upto = reservedUpto;
1539  }
1540 
1541  /*
1542  * Loop through all the locks, sleeping on any in-progress insert older
1543  * than 'upto'.
1544  *
1545  * finishedUpto is our return value, indicating the point upto which all
1546  * the WAL insertions have been finished. Initialize it to the head of
1547  * reserved WAL, and as we iterate through the insertion locks, back it
1548  * out for any insertion that's still in progress.
1549  */
1550  finishedUpto = reservedUpto;
1551  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1552  {
1553  XLogRecPtr insertingat = InvalidXLogRecPtr;
1554 
1555  do
1556  {
1557  /*
1558  * See if this insertion is in progress. LWLockWaitForVar will
1559  * wait for the lock to be released, or for the 'value' to be set
1560  * by a LWLockUpdateVar call. When a lock is initially acquired,
1561  * its value is 0 (InvalidXLogRecPtr), which means that we don't
1562  * know where it's inserting yet. We will have to wait for it. If
1563  * it's a small insertion, the record will most likely fit on the
1564  * same page and the inserter will release the lock without ever
1565  * calling LWLockUpdateVar. But if it has to sleep, it will
1566  * advertise the insertion point with LWLockUpdateVar before
1567  * sleeping.
1568  *
1569  * In this loop we are only waiting for insertions that started
1570  * before WaitXLogInsertionsToFinish was called. The lack of
1571  * memory barriers in the loop means that we might see locks as
1572  * "unused" that have since become used. This is fine because
1573  * they only can be used for later insertions that we would not
1574  * want to wait on anyway. Not taking a lock to acquire the
1575  * current insertingAt value means that we might see older
1576  * insertingAt values. This is also fine, because if we read a
1577  * value too old, we will add ourselves to the wait queue, which
1578  * contains atomic operations.
1579  */
1580  if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1582  insertingat, &insertingat))
1583  {
1584  /* the lock was free, so no insertion in progress */
1585  insertingat = InvalidXLogRecPtr;
1586  break;
1587  }
1588 
1589  /*
1590  * This insertion is still in progress. Have to wait, unless the
1591  * inserter has proceeded past 'upto'.
1592  */
1593  } while (insertingat < upto);
1594 
1595  if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1596  finishedUpto = insertingat;
1597  }
1598 
1599  /*
1600  * Advance the limit we know to have been inserted and return the freshest
1601  * value we know of, which might be beyond what we requested if somebody
1602  * is concurrently doing this with an 'upto' pointer ahead of us.
1603  */
1605  finishedUpto);
1606 
1607  return finishedUpto;
1608 }
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target_)
Definition: atomics.h:581
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1586
PGPROC * MyProc
Definition: proc.c:66
pg_atomic_uint64 insertingAt
Definition: xlog.c:370

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1366 of file xlog.c.

1367 {
1368  bool immed;
1369 
1370  /*
1371  * It doesn't matter which of the WAL insertion locks we acquire, so try
1372  * the one we used last time. If the system isn't particularly busy, it's
1373  * a good bet that it's still available, and it's good to have some
1374  * affinity to a particular lock so that you don't unnecessarily bounce
1375  * cache lines between processes when there's no contention.
1376  *
1377  * If this is the first time through in this backend, pick a lock
1378  * (semi-)randomly. This allows the locks to be used evenly if you have a
1379  * lot of very short connections.
1380  */
1381  static int lockToTry = -1;
1382 
1383  if (lockToTry == -1)
1384  lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1385  MyLockNo = lockToTry;
1386 
1387  /*
1388  * The insertingAt value is initially set to 0, as we don't know our
1389  * insert location yet.
1390  */
1392  if (!immed)
1393  {
1394  /*
1395  * If we couldn't get the lock immediately, try another lock next
1396  * time. On a system with more insertion locks than concurrent
1397  * inserters, this causes all the inserters to eventually migrate to a
1398  * lock that no-one else is using. On a system with more inserters
1399  * than locks, it still helps to distribute the inserters evenly
1400  * across the locks.
1401  */
1402  lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1403  }
1404 }
ProcNumber MyProcNumber
Definition: globals.c:87
static int MyLockNo
Definition: xlog.c:646

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1411 of file xlog.c.

1412 {
1413  int i;
1414 
1415  /*
1416  * When holding all the locks, all but the last lock's insertingAt
1417  * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1418  * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1419  */
1420  for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1421  {
1423  LWLockUpdateVar(&WALInsertLocks[i].l.lock,
1425  PG_UINT64_MAX);
1426  }
1427  /* Variable value reset to 0 at release */
1429 
1430  holdingAllLocks = true;
1431 }
#define PG_UINT64_MAX
Definition: c.h:593
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1722
static bool holdingAllLocks
Definition: xlog.c:647

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1440 of file xlog.c.

1441 {
1442  if (holdingAllLocks)
1443  {
1444  int i;
1445 
1446  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1449  0);
1450 
1451  holdingAllLocks = false;
1452  }
1453  else
1454  {
1457  0);
1458  }
1459 }
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1856

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1466 of file xlog.c.

1467 {
1468  if (holdingAllLocks)
1469  {
1470  /*
1471  * We use the last lock to mark our actual position, see comments in
1472  * WALInsertLockAcquireExclusive.
1473  */
1476  insertingAt);
1477  }
1478  else
1481  insertingAt);
1482 }

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1743 of file xlog.c.

1745 {
1746  char *pdst = dstbuf;
1747  XLogRecPtr recptr = startptr;
1748  XLogRecPtr inserted;
1749  Size nbytes = count;
1750 
1751  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1752  return 0;
1753 
1754  Assert(!XLogRecPtrIsInvalid(startptr));
1755 
1756  /*
1757  * Caller should ensure that the requested data has been inserted into WAL
1758  * buffers before we try to read it.
1759  */
1761  if (startptr + count > inserted)
1762  ereport(ERROR,
1763  errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1764  LSN_FORMAT_ARGS(startptr + count),
1765  LSN_FORMAT_ARGS(inserted)));
1766 
1767  /*
1768  * Loop through the buffers without a lock. For each buffer, atomically
1769  * read and verify the end pointer, then copy the data out, and finally
1770  * re-read and re-verify the end pointer.
1771  *
1772  * Once a page is evicted, it never returns to the WAL buffers, so if the
1773  * end pointer matches the expected end pointer before and after we copy
1774  * the data, then the right page must have been present during the data
1775  * copy. Read barriers are necessary to ensure that the data copy actually
1776  * happens between the two verification steps.
1777  *
1778  * If either verification fails, we simply terminate the loop and return
1779  * with the data that had been already copied out successfully.
1780  */
1781  while (nbytes > 0)
1782  {
1783  uint32 offset = recptr % XLOG_BLCKSZ;
1784  int idx = XLogRecPtrToBufIdx(recptr);
1785  XLogRecPtr expectedEndPtr;
1786  XLogRecPtr endptr;
1787  const char *page;
1788  const char *psrc;
1789  Size npagebytes;
1790 
1791  /*
1792  * Calculate the end pointer we expect in the xlblocks array if the
1793  * correct page is present.
1794  */
1795  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1796 
1797  /*
1798  * First verification step: check that the correct page is present in
1799  * the WAL buffers.
1800  */
1801  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1802  if (expectedEndPtr != endptr)
1803  break;
1804 
1805  /*
1806  * The correct page is present (or was at the time the endptr was
1807  * read; must re-verify later). Calculate pointer to source data and
1808  * determine how much data to read from this page.
1809  */
1810  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1811  psrc = page + offset;
1812  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1813 
1814  /*
1815  * Ensure that the data copy and the first verification step are not
1816  * reordered.
1817  */
1818  pg_read_barrier();
1819 
1820  /* data copy */
1821  memcpy(pdst, psrc, npagebytes);
1822 
1823  /*
1824  * Ensure that the data copy and the second verification step are not
1825  * reordered.
1826  */
1827  pg_read_barrier();
1828 
1829  /*
1830  * Second verification step: check that the page we read from wasn't
1831  * evicted while we were copying the data.
1832  */
1833  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1834  if (expectedEndPtr != endptr)
1835  break;
1836 
1837  pdst += npagebytes;
1838  recptr += npagebytes;
1839  nbytes -= npagebytes;
1840  }
1841 
1842  Assert(pdst - dstbuf <= count);
1843 
1844  return pdst - dstbuf;
1845 }
#define pg_read_barrier()
Definition: atomics.h:151
#define Min(x, y)
Definition: c.h:1004
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6476

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4228 of file xlog.c.

4229 {
4230  int fd;
4231  char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4232 
4233  /*
4234  * Initialize version and compatibility-check fields
4235  */
4238 
4239  ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4241 
4242  ControlFile->blcksz = BLCKSZ;
4243  ControlFile->relseg_size = RELSEG_SIZE;
4244  ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4246 
4249 
4252 
4254 
4255  /* Contents are protected with a CRC */
4258  (char *) ControlFile,
4259  offsetof(ControlFileData, crc));
4261 
4262  /*
4263  * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4264  * the excess over sizeof(ControlFileData). This reduces the odds of
4265  * premature-EOF errors when reading pg_control. We'll still fail when we
4266  * check the contents of the file, but hopefully with a more specific
4267  * error than "couldn't read pg_control".
4268  */
4269  memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4270  memcpy(buffer, ControlFile, sizeof(ControlFileData));
4271 
4273  O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4274  if (fd < 0)
4275  ereport(PANIC,
4277  errmsg("could not create file \"%s\": %m",
4278  XLOG_CONTROL_FILE)));
4279 
4280  errno = 0;
4281  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4283  {
4284  /* if write didn't set errno, assume problem is no disk space */
4285  if (errno == 0)
4286  errno = ENOSPC;
4287  ereport(PANIC,
4289  errmsg("could not write to file \"%s\": %m",
4290  XLOG_CONTROL_FILE)));
4291  }
4293 
4294  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4295  if (pg_fsync(fd) != 0)
4296  ereport(PANIC,
4298  errmsg("could not fsync file \"%s\": %m",
4299  XLOG_CONTROL_FILE)));
4301 
4302  if (close(fd) != 0)
4303  ereport(PANIC,
4305  errmsg("could not close file \"%s\": %m",
4306  XLOG_CONTROL_FILE)));
4307 }
#define FLOAT8PASSBYVAL
Definition: c.h:635
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:249

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, FLOAT8PASSBYVAL, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8156 of file xlog.c.

8157 {
8158  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8159  XLogRecPtr lsn = record->EndRecPtr;
8160 
8161  /*
8162  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8163  * XLOG_FPI_FOR_HINT records.
8164  */
8165  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8166  !XLogRecHasAnyBlockRefs(record));
8167 
8168  if (info == XLOG_NEXTOID)
8169  {
8170  Oid nextOid;
8171 
8172  /*
8173  * We used to try to take the maximum of TransamVariables->nextOid and
8174  * the recorded nextOid, but that fails if the OID counter wraps
8175  * around. Since no OID allocation should be happening during replay
8176  * anyway, better to just believe the record exactly. We still take
8177  * OidGenLock while setting the variable, just in case.
8178  */
8179  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8180  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8181  TransamVariables->nextOid = nextOid;
8183  LWLockRelease(OidGenLock);
8184  }
8185  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8186  {
8187  CheckPoint checkPoint;
8188  TimeLineID replayTLI;
8189 
8190  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8191  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8192  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8193  TransamVariables->nextXid = checkPoint.nextXid;
8194  LWLockRelease(XidGenLock);
8195  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8196  TransamVariables->nextOid = checkPoint.nextOid;
8198  LWLockRelease(OidGenLock);
8199  MultiXactSetNextMXact(checkPoint.nextMulti,
8200  checkPoint.nextMultiOffset);
8201 
8203  checkPoint.oldestMultiDB);
8204 
8205  /*
8206  * No need to set oldestClogXid here as well; it'll be set when we
8207  * redo an xl_clog_truncate if it changed since initialization.
8208  */
8209  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8210 
8211  /*
8212  * If we see a shutdown checkpoint while waiting for an end-of-backup
8213  * record, the backup was canceled and the end-of-backup record will
8214  * never arrive.
8215  */
8219  ereport(PANIC,
8220  (errmsg("online backup was canceled, recovery cannot continue")));
8221 
8222  /*
8223  * If we see a shutdown checkpoint, we know that nothing was running
8224  * on the primary at this point. So fake-up an empty running-xacts
8225  * record and use that here and now. Recover additional standby state
8226  * for prepared transactions.
8227  */
8229  {
8230  TransactionId *xids;
8231  int nxids;
8232  TransactionId oldestActiveXID;
8233  TransactionId latestCompletedXid;
8234  RunningTransactionsData running;
8235 
8236  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8237 
8238  /*
8239  * Construct a RunningTransactions snapshot representing a shut
8240  * down server, with only prepared transactions still alive. We're
8241  * never overflowed at this point because all subxids are listed
8242  * with their parent prepared transactions.
8243  */
8244  running.xcnt = nxids;
8245  running.subxcnt = 0;
8246  running.subxid_overflow = false;
8247  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8248  running.oldestRunningXid = oldestActiveXID;
8249  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8250  TransactionIdRetreat(latestCompletedXid);
8251  Assert(TransactionIdIsNormal(latestCompletedXid));
8252  running.latestCompletedXid = latestCompletedXid;
8253  running.xids = xids;
8254 
8255  ProcArrayApplyRecoveryInfo(&running);
8256 
8258  }
8259 
8260  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8261  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8262  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8263  LWLockRelease(ControlFileLock);
8264 
8265  /* Update shared-memory copy of checkpoint XID/epoch */
8267  XLogCtl->ckptFullXid = checkPoint.nextXid;
8269 
8270  /*
8271  * We should've already switched to the new TLI before replaying this
8272  * record.
8273  */
8274  (void) GetCurrentReplayRecPtr(&replayTLI);
8275  if (checkPoint.ThisTimeLineID != replayTLI)
8276  ereport(PANIC,
8277  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8278  checkPoint.ThisTimeLineID, replayTLI)));
8279 
8280  RecoveryRestartPoint(&checkPoint, record);
8281  }
8282  else if (info == XLOG_CHECKPOINT_ONLINE)
8283  {
8284  CheckPoint checkPoint;
8285  TimeLineID replayTLI;
8286 
8287  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8288  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8289  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8291  checkPoint.nextXid))
8292  TransamVariables->nextXid = checkPoint.nextXid;
8293  LWLockRelease(XidGenLock);
8294 
8295  /*
8296  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8297  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8298  * counter is from the start of the checkpoint and might well be stale
8299  * compared to later XLOG_NEXTOID records. We could try to take the
8300  * maximum of the nextOid counter and our latest value, but since
8301  * there's no particular guarantee about the speed with which the OID
8302  * counter wraps around, that's a risky thing to do. In any case,
8303  * users of the nextOid counter are required to avoid assignment of
8304  * duplicates, so that a somewhat out-of-date value should be safe.
8305  */
8306 
8307  /* Handle multixact */
8309  checkPoint.nextMultiOffset);
8310 
8311  /*
8312  * NB: This may perform multixact truncation when replaying WAL
8313  * generated by an older primary.
8314  */
8316  checkPoint.oldestMultiDB);
8318  checkPoint.oldestXid))
8319  SetTransactionIdLimit(checkPoint.oldestXid,
8320  checkPoint.oldestXidDB);
8321  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8322  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8323  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8324  LWLockRelease(ControlFileLock);
8325 
8326  /* Update shared-memory copy of checkpoint XID/epoch */
8328  XLogCtl->ckptFullXid = checkPoint.nextXid;
8330 
8331  /* TLI should not change in an on-line checkpoint */
8332  (void) GetCurrentReplayRecPtr(&replayTLI);
8333  if (checkPoint.ThisTimeLineID != replayTLI)
8334  ereport(PANIC,
8335  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8336  checkPoint.ThisTimeLineID, replayTLI)));
8337 
8338  RecoveryRestartPoint(&checkPoint, record);
8339  }
8340  else if (info == XLOG_OVERWRITE_CONTRECORD)
8341  {
8342  /* nothing to do here, handled in xlogrecovery_redo() */
8343  }
8344  else if (info == XLOG_END_OF_RECOVERY)
8345  {
8346  xl_end_of_recovery xlrec;
8347  TimeLineID replayTLI;
8348 
8349  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8350 
8351  /*
8352  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8353  * but this case is rarer and harder to test, so the benefit doesn't
8354  * outweigh the potential extra cost of maintenance.
8355  */
8356 
8357  /*
8358  * We should've already switched to the new TLI before replaying this
8359  * record.
8360  */
8361  (void) GetCurrentReplayRecPtr(&replayTLI);
8362  if (xlrec.ThisTimeLineID != replayTLI)
8363  ereport(PANIC,
8364  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8365  xlrec.ThisTimeLineID, replayTLI)));
8366  }
8367  else if (info == XLOG_NOOP)
8368  {
8369  /* nothing to do here */
8370  }
8371  else if (info == XLOG_SWITCH)
8372  {
8373  /* nothing to do here */
8374  }
8375  else if (info == XLOG_RESTORE_POINT)
8376  {
8377  /* nothing to do here, handled in xlogrecovery.c */
8378  }
8379  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8380  {
8381  /*
8382  * XLOG_FPI records contain nothing else but one or more block
8383  * references. Every block reference must include a full-page image
8384  * even if full_page_writes was disabled when the record was generated
8385  * - otherwise there would be no point in this record.
8386  *
8387  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8388  * WAL-logged because of a hint bit update. They are only generated
8389  * when checksums and/or wal_log_hints are enabled. They may include
8390  * no full-page images if full_page_writes was disabled when they were
8391  * generated. In this case there is nothing to do here.
8392  *
8393  * No recovery conflicts are generated by these generic records - if a
8394  * resource manager needs to generate conflicts, it has to define a
8395  * separate WAL record type and redo routine.
8396  */
8397  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8398  {
8399  Buffer buffer;
8400 
8401  if (!XLogRecHasBlockImage(record, block_id))
8402  {
8403  if (info == XLOG_FPI)
8404  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8405  continue;
8406  }
8407 
8408  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8409  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8410  UnlockReleaseBuffer(buffer);
8411  }
8412  }
8413  else if (info == XLOG_BACKUP_END)
8414  {
8415  /* nothing to do here, handled in xlogrecovery_redo() */
8416  }
8417  else if (info == XLOG_PARAMETER_CHANGE)
8418  {
8419  xl_parameter_change xlrec;
8420 
8421  /* Update our copy of the parameters in pg_control */
8422  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8423 
8424  /*
8425  * Invalidate logical slots if we are in hot standby and the primary
8426  * does not have a WAL level sufficient for logical decoding. No need
8427  * to search for potentially conflicting logically slots if standby is
8428  * running with wal_level lower than logical, because in that case, we
8429  * would have either disallowed creation of logical slots or
8430  * invalidated existing ones.
8431  */
8432  if (InRecovery && InHotStandby &&
8433  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8436  0, InvalidOid,
8438 
8439  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8445  ControlFile->wal_level = xlrec.wal_level;
8447 
8448  /*
8449  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8450  * recover back up to this point before allowing hot standby again.
8451  * This is important if the max_* settings are decreased, to ensure
8452  * you don't run queries against the WAL preceding the change. The
8453  * local copies cannot be updated as long as crash recovery is
8454  * happening and we expect all the WAL to be replayed.
8455  */
8456  if (InArchiveRecovery)
8457  {
8460  }
8462  {
8463  TimeLineID replayTLI;
8464 
8465  (void) GetCurrentReplayRecPtr(&replayTLI);
8467  ControlFile->minRecoveryPointTLI = replayTLI;
8468  }
8469 
8473 
8475  LWLockRelease(ControlFileLock);
8476 
8477  /* Check to see if any parameter change gives a problem on recovery */
8479  }
8480  else if (info == XLOG_FPW_CHANGE)
8481  {
8482  bool fpw;
8483 
8484  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8485 
8486  /*
8487  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8488  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8489  * full_page_writes has been disabled during online backup.
8490  */
8491  if (!fpw)
8492  {
8494  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8497  }
8498 
8499  /* Keep track of full_page_writes */
8500  lastFullPageWrites = fpw;
8501  }
8502  else if (info == XLOG_CHECKPOINT_REDO)
8503  {
8504  /* nothing to do here, just for informational purposes */
8505  }
8506 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
unsigned char uint8
Definition: c.h:504
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2487
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2462
#define XLOG_RESTORE_POINT
Definition: pg_control.h:74
#define XLOG_FPI
Definition: pg_control.h:78
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:77
#define XLOG_NEXTOID
Definition: pg_control.h:70
#define XLOG_NOOP
Definition: pg_control.h:69
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:73
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7471
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:74
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:50
#define InHotStandby
Definition: xlogutils.h:57
@ BLK_RESTORED
Definition: xlogutils.h:73

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2979 of file xlog.c.

2980 {
2981  XLogwrtRqst WriteRqst;
2982  bool flexible = true;
2983  static TimestampTz lastflush;
2984  TimestampTz now;
2985  int flushblocks;
2986  TimeLineID insertTLI;
2987 
2988  /* XLOG doesn't need flushing during recovery */
2989  if (RecoveryInProgress())
2990  return false;
2991 
2992  /*
2993  * Since we're not in recovery, InsertTimeLineID is set and can't change,
2994  * so we can read it without a lock.
2995  */
2996  insertTLI = XLogCtl->InsertTimeLineID;
2997 
2998  /* read updated LogwrtRqst */
3000  WriteRqst = XLogCtl->LogwrtRqst;
3002 
3003  /* back off to last completed page boundary */
3004  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3005 
3006  /* if we have already flushed that far, consider async commit records */
3008  if (WriteRqst.Write <= LogwrtResult.Flush)
3009  {
3011  WriteRqst.Write = XLogCtl->asyncXactLSN;
3013  flexible = false; /* ensure it all gets written */
3014  }
3015 
3016  /*
3017  * If already known flushed, we're done. Just need to check if we are
3018  * holding an open file handle to a logfile that's no longer in use,
3019  * preventing the file from being deleted.
3020  */
3021  if (WriteRqst.Write <= LogwrtResult.Flush)
3022  {
3023  if (openLogFile >= 0)
3024  {
3027  {
3028  XLogFileClose();
3029  }
3030  }
3031  return false;
3032  }
3033 
3034  /*
3035  * Determine how far to flush WAL, based on the wal_writer_delay and
3036  * wal_writer_flush_after GUCs.
3037  *
3038  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3039  * wal_writer_flush_after, to decide when to wake us up. Make sure the
3040  * logic is the same in both places if you change this.
3041  */
3043  flushblocks =
3044  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3045 
3046  if (WalWriterFlushAfter == 0 || lastflush == 0)
3047  {
3048  /* first call, or block based limits disabled */
3049  WriteRqst.Flush = WriteRqst.Write;
3050  lastflush = now;
3051  }
3052  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3053  {
3054  /*
3055  * Flush the writes at least every WalWriterDelay ms. This is
3056  * important to bound the amount of time it takes for an asynchronous
3057  * commit to hit disk.
3058  */
3059  WriteRqst.Flush = WriteRqst.Write;
3060  lastflush = now;
3061  }
3062  else if (flushblocks >= WalWriterFlushAfter)
3063  {
3064  /* exceeded wal_writer_flush_after blocks, flush */
3065  WriteRqst.Flush = WriteRqst.Write;
3066  lastflush = now;
3067  }
3068  else
3069  {
3070  /* no flushing, this time round */
3071  WriteRqst.Flush = 0;
3072  }
3073 
3074 #ifdef WAL_DEBUG
3075  if (XLOG_DEBUG)
3076  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3077  LSN_FORMAT_ARGS(WriteRqst.Write),
3078  LSN_FORMAT_ARGS(WriteRqst.Flush),
3081 #endif
3082 
3084 
3085  /* now wait for any in-progress insertions to finish and get write lock */
3086  WaitXLogInsertionsToFinish(WriteRqst.Write);
3087  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3089  if (WriteRqst.Write > LogwrtResult.Write ||
3090  WriteRqst.Flush > LogwrtResult.Flush)
3091  {
3092  XLogWrite(WriteRqst, insertTLI, flexible);
3093  }
3094  LWLockRelease(WALWriteLock);
3095 
3096  END_CRIT_SECTION();
3097 
3098  /* wake up walsenders now that we've released heavily contended locks */
3100 
3101  /*
3102  * Great, done. To take some work off the critical path, try to initialize
3103  * as many of the no-longer-needed WAL buffers for future use as we can.
3104  */
3105  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3106 
3107  /*
3108  * If we determined that we need to write data, but somebody else
3109  * wrote/flushed already, it should be considered as being active, to
3110  * avoid hibernating too early.
3111  */
3112  return true;
3113 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
XLogRecPtr asyncXactLSN
Definition: xlog.c:458
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:66
int WalWriterFlushAfter
Definition: walwriter.c:72
int WalWriterDelay
Definition: walwriter.c:71
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1893 of file xlog.c.

1894 {
1895  uint64 fullsegs;
1896  uint64 fullpages;
1897  uint64 bytesleft;
1898  uint32 seg_offset;
1899  XLogRecPtr result;
1900 
1901  fullsegs = bytepos / UsableBytesInSegment;
1902  bytesleft = bytepos % UsableBytesInSegment;
1903 
1904  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1905  {
1906  /* fits on first page of segment */
1907  if (bytesleft == 0)
1908  seg_offset = 0;
1909  else
1910  seg_offset = bytesleft + SizeOfXLogLongPHD;
1911  }
1912  else
1913  {
1914  /* account for the first page on segment with long header */
1915  seg_offset = XLOG_BLCKSZ;
1916  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1917 
1918  fullpages = bytesleft / UsableBytesInPage;
1919  bytesleft = bytesleft % UsableBytesInPage;
1920 
1921  if (bytesleft == 0)
1922  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1923  else
1924  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1925  }
1926 
1927  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1928 
1929  return result;
1930 }
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1853 of file xlog.c.

1854 {
1855  uint64 fullsegs;
1856  uint64 fullpages;
1857  uint64 bytesleft;
1858  uint32 seg_offset;
1859  XLogRecPtr result;
1860 
1861  fullsegs = bytepos / UsableBytesInSegment;
1862  bytesleft = bytepos % UsableBytesInSegment;
1863 
1864  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1865  {
1866  /* fits on first page of segment */
1867  seg_offset = bytesleft + SizeOfXLogLongPHD;
1868  }
1869  else
1870  {
1871  /* account for the first page on segment with long header */
1872  seg_offset = XLOG_BLCKSZ;
1873  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1874 
1875  fullpages = bytesleft / UsableBytesInPage;
1876  bytesleft = bytesleft % UsableBytesInPage;
1877 
1878  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1879  }
1880 
1881  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1882 
1883  return result;
1884 }

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2285 of file xlog.c.

2286 {
2287  XLogSegNo old_segno;
2288 
2290 
2291  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2292  return true;
2293  return false;
2294 }

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4588 of file xlog.c.

4589 {
4590  int xbuffers;
4591 
4592  xbuffers = NBuffers / 32;
4593  if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4594  xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4595  if (xbuffers < 8)
4596  xbuffers = 8;
4597  return xbuffers;
4598 }

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3628 of file xlog.c.

3629 {
3630  Assert(openLogFile >= 0);
3631 
3632  /*
3633  * WAL segment files will not be re-read in normal operation, so we advise
3634  * the OS to release any cached pages. But do not do so if WAL archiving
3635  * or streaming is active, because archiver and walsender process could
3636  * use the cache to read the WAL segment.
3637  */
3638 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3639  if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3640  (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3641 #endif
3642 
3643  if (close(openLogFile) != 0)
3644  {
3645  char xlogfname[MAXFNAMELEN];
3646  int save_errno = errno;
3647 
3649  errno = save_errno;
3650  ereport(PANIC,
3652  errmsg("could not close file \"%s\": %m", xlogfname)));
3653  }
3654 
3655  openLogFile = -1;
3657 }
void ReleaseExternalFD(void)
Definition: fd.c:1239

References Assert, close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3407 of file xlog.c.

3410 {
3411  char path[MAXPGPATH];
3412  char tmppath[MAXPGPATH];
3413  PGAlignedXLogBlock buffer;
3414  int srcfd;
3415  int fd;
3416  int nbytes;
3417 
3418  /*
3419  * Open the source file
3420  */
3421  XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3422  srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3423  if (srcfd < 0)
3424  ereport(ERROR,
3426  errmsg("could not open file \"%s\": %m", path)));
3427 
3428  /*
3429  * Copy into a temp file name.
3430  */
3431  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3432 
3433  unlink(tmppath);
3434 
3435  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3436  fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3437  if (fd < 0)
3438  ereport(ERROR,
3440  errmsg("could not create file \"%s\": %m", tmppath)));
3441 
3442  /*
3443  * Do the data copying.
3444  */
3445  for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3446  {
3447  int nread;
3448 
3449  nread = upto - nbytes;
3450 
3451  /*
3452  * The part that is not read from the source file is filled with
3453  * zeros.
3454  */
3455  if (nread < sizeof(buffer))
3456  memset(buffer.data, 0, sizeof(buffer));
3457 
3458  if (nread > 0)
3459  {
3460  int r;
3461 
3462  if (nread > sizeof(buffer))
3463  nread = sizeof(buffer);
3464  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3465  r = read(srcfd, buffer.data, nread);
3466  if (r != nread)
3467  {
3468  if (r < 0)
3469  ereport(ERROR,
3471  errmsg("could not read file \"%s\": %m",
3472  path)));
3473  else
3474  ereport(ERROR,
3476  errmsg("could not read file \"%s\": read %d of %zu",
3477  path, r, (Size) nread)));
3478  }
3480  }
3481  errno = 0;
3482  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3483  if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3484  {
3485  int save_errno = errno;
3486 
3487  /*
3488  * If we fail to make the file, delete it to release disk space
3489  */
3490  unlink(tmppath);
3491  /* if write didn't set errno, assume problem is no disk space */
3492  errno = save_errno ? save_errno : ENOSPC;
3493 
3494  ereport(ERROR,
3496  errmsg("could not write to file \"%s\": %m", tmppath)));
3497  }
3499  }
3500 
3501  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3502  if (pg_fsync(fd) != 0)
3505  errmsg("could not fsync file \"%s\": %m", tmppath)));
3507 
3508  if (CloseTransientFile(fd) != 0)
3509  ereport(ERROR,
3511  errmsg("could not close file \"%s\": %m", tmppath)));
3512 
3513  if (CloseTransientFile(srcfd) != 0)
3514  ereport(ERROR,
3516  errmsg("could not close file \"%s\": %m", path)));
3517 
3518  /*
3519  * Now move the segment into place with its final name.
3520  */
3521  if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3522  elog(ERROR, "InstallXLogFileSegment should not have failed");
3523 }
int CloseTransientFile(int fd)
Definition: fd.c:2809
int data_sync_elevel(int elevel)
Definition: fd.c:3936
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
char data[XLOG_BLCKSZ]
Definition: c.h:1148

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3369 of file xlog.c.

3370 {
3371  bool ignore_added;
3372  char path[MAXPGPATH];
3373  int fd;
3374 
3375  Assert(logtli != 0);
3376 
3377  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3378  if (fd >= 0)
3379  return fd;
3380 
3381  /* Now open original target segment (might not be file I just made) */
3382  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3384  if (fd < 0)
3385  ereport(ERROR,
3387  errmsg("could not open file \"%s\": %m", path)));
3388  return fd;
3389 }
#define O_CLOEXEC
Definition: win32_port.h:359

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char *  path 
)
static

Definition at line 3199 of file xlog.c.

3201 {
3202  char tmppath[MAXPGPATH];
3203  XLogSegNo installed_segno;
3204  XLogSegNo max_segno;
3205  int fd;
3206  int save_errno;
3207  int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3208 
3209  Assert(logtli != 0);
3210 
3211  XLogFilePath(path, logtli, logsegno, wal_segment_size);
3212 
3213  /*
3214  * Try to use existent file (checkpoint maker may have created it already)
3215  */
3216  *added = false;
3217  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3219  if (fd < 0)
3220  {
3221  if (errno != ENOENT)
3222  ereport(ERROR,
3224  errmsg("could not open file \"%s\": %m", path)));
3225  }
3226  else
3227  return fd;
3228 
3229  /*
3230  * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3231  * another process is doing the same thing. If so, we will end up
3232  * pre-creating an extra log segment. That seems OK, and better than
3233  * holding the lock throughout this lengthy process.
3234  */
3235  elog(DEBUG2, "creating and filling new WAL file");
3236 
3237  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3238 
3239  unlink(tmppath);
3240 
3242  open_flags |= PG_O_DIRECT;
3243 
3244  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3245  fd = BasicOpenFile(tmppath, open_flags);
3246  if (fd < 0)
3247  ereport(ERROR,
3249  errmsg("could not create file \"%s\": %m", tmppath)));
3250 
3251  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3252  save_errno = 0;
3253  if (wal_init_zero)
3254  {
3255  ssize_t rc;
3256 
3257  /*
3258  * Zero-fill the file. With this setting, we do this the hard way to
3259  * ensure that all the file space has really been allocated. On
3260  * platforms that allow "holes" in files, just seeking to the end
3261  * doesn't allocate intermediate space. This way, we know that we
3262  * have all the space and (after the fsync below) that all the
3263  * indirect blocks are down on disk. Therefore, fdatasync(2) or
3264  * O_DSYNC will be sufficient to sync future writes to the log file.
3265  */
3267 
3268  if (rc < 0)
3269  save_errno = errno;
3270  }
3271  else
3272  {
3273  /*
3274  * Otherwise, seeking to the end and writing a solitary byte is
3275  * enough.
3276  */
3277  errno = 0;
3278  if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3279  {
3280  /* if write didn't set errno, assume no disk space */
3281  save_errno = errno ? errno : ENOSPC;
3282  }
3283  }
3285 
3286  if (save_errno)
3287  {
3288  /*
3289  * If we fail to make the file, delete it to release disk space
3290  */
3291  unlink(tmppath);
3292 
3293  close(fd);
3294 
3295  errno = save_errno;
3296 
3297  ereport(ERROR,
3299  errmsg("could not write to file \"%s\": %m", tmppath)));
3300  }
3301 
3302  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3303  if (pg_fsync(fd) != 0)
3304  {
3305  save_errno = errno;
3306  close(fd);
3307  errno = save_errno;
3308  ereport(ERROR,
3310  errmsg("could not fsync file \"%s\": %m", tmppath)));
3311  }
3313 
3314  if (close(fd) != 0)
3315  ereport(ERROR,
3317  errmsg("could not close file \"%s\": %m", tmppath)));
3318 
3319  /*
3320  * Now move the segment into place with its final name. Cope with
3321  * possibility that someone else has created the file while we were
3322  * filling ours: if so, use ours to pre-create a future log segment.
3323  */
3324  installed_segno = logsegno;
3325 
3326  /*
3327  * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3328  * that was a constant, but that was always a bit dubious: normally, at a
3329  * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3330  * here, it was the offset from the insert location. We can't do the
3331  * normal XLOGfileslop calculation here because we don't have access to
3332  * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3333  * CheckPointSegments.
3334  */
3335  max_segno = logsegno + CheckPointSegments;
3336  if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3337  logtli))
3338  {
3339  *added = true;
3340  elog(DEBUG2, "done creating and filling new WAL file");
3341  }
3342  else
3343  {
3344  /*
3345  * No need for any more future segments, or InstallXLogFileSegment()
3346  * failed to rename the file into place. If the rename failed, a
3347  * caller opening the file may fail.
3348  */
3349  unlink(tmppath);
3350  elog(DEBUG2, "abandoned new WAL file");
3351  }
3352 
3353  return -1;
3354 }
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:687
#define pg_pwrite
Definition: port.h:226
bool wal_init_zero
Definition: xlog.c:127

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3607 of file xlog.c.

3608 {
3609  char path[MAXPGPATH];
3610  int fd;
3611 
3612  XLogFilePath(path, tli, segno, wal_segment_size);
3613 
3614  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3616  if (fd < 0)
3617  ereport(PANIC,
3619  errmsg("could not open file \"%s\": %m", path)));
3620 
3621  return fd;
3622 }

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2235 of file xlog.c.

2236 {
2237  XLogSegNo minSegNo;
2238  XLogSegNo maxSegNo;
2239  double distance;
2240  XLogSegNo recycleSegNo;
2241 
2242  /*
2243  * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2244  * correspond to. Always recycle enough segments to meet the minimum, and
2245  * remove enough segments to stay below the maximum.
2246  */
2247  minSegNo = lastredoptr / wal_segment_size +
2249  maxSegNo = lastredoptr / wal_segment_size +
2251 
2252  /*
2253  * Between those limits, recycle enough segments to get us through to the
2254  * estimated end of next checkpoint.
2255  *
2256  * To estimate where the next checkpoint will finish, assume that the
2257  * system runs steadily consuming CheckPointDistanceEstimate bytes between
2258  * every checkpoint.
2259  */
2261  /* add 10% for good measure. */
2262  distance *= 1.10;
2263 
2264  recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2266 
2267  if (recycleSegNo < minSegNo)
2268  recycleSegNo = minSegNo;
2269  if (recycleSegNo > maxSegNo)
2270  recycleSegNo = maxSegNo;
2271 
2272  return recycleSegNo;
2273 }

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2791 of file xlog.c.

2792 {
2793  XLogRecPtr WriteRqstPtr;
2794  XLogwrtRqst WriteRqst;
2795  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2796 
2797  /*
2798  * During REDO, we are reading not writing WAL. Therefore, instead of
2799  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2800  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2801  * to act this way too, and because when it tries to write the
2802  * end-of-recovery checkpoint, it should indeed flush.
2803  */
2804  if (!XLogInsertAllowed())
2805  {
2806  UpdateMinRecoveryPoint(record, false);
2807  return;
2808  }
2809 
2810  /* Quick exit if already known flushed */
2811  if (record <= LogwrtResult.Flush)
2812  return;
2813 
2814 #ifdef WAL_DEBUG
2815  if (XLOG_DEBUG)
2816  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2817  LSN_FORMAT_ARGS(record),
2820 #endif
2821 
2823 
2824  /*
2825  * Since fsync is usually a horribly expensive operation, we try to
2826  * piggyback as much data as we can on each fsync: if we see any more data
2827  * entered into the xlog buffer, we'll write and fsync that too, so that
2828  * the final value of LogwrtResult.Flush is as large as possible. This
2829  * gives us some chance of avoiding another fsync immediately after.
2830  */
2831 
2832  /* initialize to given target; may increase below */
2833  WriteRqstPtr = record;
2834 
2835  /*
2836  * Now wait until we get the write lock, or someone else does the flush
2837  * for us.
2838  */
2839  for (;;)
2840  {
2841  XLogRecPtr insertpos;
2842 
2843  /* done already? */
2845  if (record <= LogwrtResult.Flush)
2846  break;
2847 
2848  /*
2849  * Before actually performing the write, wait for all in-flight
2850  * insertions to the pages we're about to write to finish.
2851  */
2853  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2854  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2856  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2857 
2858  /*
2859  * Try to get the write lock. If we can't get it immediately, wait
2860  * until it's released, and recheck if we still need to do the flush
2861  * or if the backend that held the lock did it for us already. This
2862  * helps to maintain a good rate of group committing when the system
2863  * is bottlenecked by the speed of fsyncing.
2864  */
2865  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2866  {
2867  /*
2868  * The lock is now free, but we didn't acquire it yet. Before we
2869  * do, loop back to check if someone else flushed the record for
2870  * us already.
2871  */
2872  continue;
2873  }
2874 
2875  /* Got the lock; recheck whether request is satisfied */
2877  if (record <= LogwrtResult.Flush)
2878  {
2879  LWLockRelease(WALWriteLock);
2880  break;
2881  }
2882 
2883  /*
2884  * Sleep before flush! By adding a delay here, we may give further
2885  * backends the opportunity to join the backlog of group commit
2886  * followers; this can significantly improve transaction throughput,
2887  * at the risk of increasing transaction latency.
2888  *
2889  * We do not sleep if enableFsync is not turned on, nor if there are
2890  * fewer than CommitSiblings other backends with active transactions.
2891  */
2892  if (CommitDelay > 0 && enableFsync &&
2894  {
2896 
2897  /*
2898  * Re-check how far we can now flush the WAL. It's generally not
2899  * safe to call WaitXLogInsertionsToFinish while holding
2900  * WALWriteLock, because an in-progress insertion might need to
2901  * also grab WALWriteLock to make progress. But we know that all
2902  * the insertions up to insertpos have already finished, because
2903  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2904  * We're only calling it again to allow insertpos to be moved
2905  * further forward, not to actually wait for anyone.
2906  */
2907  insertpos = WaitXLogInsertionsToFinish(insertpos);
2908  }
2909 
2910  /* try to write/flush later additions to XLOG as well */
2911  WriteRqst.Write = insertpos;
2912  WriteRqst.Flush = insertpos;
2913 
2914  XLogWrite(WriteRqst, insertTLI, false);
2915 
2916  LWLockRelease(WALWriteLock);
2917  /* done */
2918  break;
2919  }
2920 
2921  END_CRIT_SECTION();
2922 
2923  /* wake up walsenders now that we've released heavily contended locks */
2925 
2926  /*
2927  * If we still haven't flushed to the request point then we have a
2928  * problem; most likely, the requested flush point is past end of XLOG.
2929  * This has been seen to occur when a disk page has a corrupted LSN.
2930  *
2931  * Formerly we treated this as a PANIC condition, but that hurts the
2932  * system's robustness rather than helping it: we do not want to take down
2933  * the whole system due to corruption on one data page. In particular, if
2934  * the bad page is encountered again during recovery then we would be
2935  * unable to restart the database at all! (This scenario actually
2936  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2937  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2938  * the only time we can reach here during recovery is while flushing the
2939  * end-of-recovery checkpoint record, and we don't expect that to have a
2940  * bad LSN.
2941  *
2942  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2943  * since xact.c calls this routine inside a critical section. However,
2944  * calls from bufmgr.c are not within critical sections and so we will not
2945  * force a restart for a bad LSN on a data page.
2946  */
2947  if (LogwrtResult.Flush < record)
2948  elog(ERROR,
2949  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2950  LSN_FORMAT_ARGS(record),
2952 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1398
bool MinimumActiveBackends(int min)
Definition: procarray.c:3533
int CommitDelay
Definition: xlog.c:132
int CommitSiblings
Definition: xlog.c:133
bool XLogInsertAllowed(void)
Definition: xlog.c:6345

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3747 of file xlog.c.

3748 {
3749  XLogSegNo lastRemovedSegNo;
3750 
3752  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3754 
3755  return lastRemovedSegNo;
3756 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3763 of file xlog.c.

3764 {
3765  DIR *xldir;
3766  struct dirent *xlde;
3767  XLogSegNo oldest_segno = 0;
3768 
3769  xldir = AllocateDir(XLOGDIR);
3770  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3771  {
3772  TimeLineID file_tli;
3773  XLogSegNo file_segno;
3774 
3775  /* Ignore files that are not XLOG segments. */
3776  if (!IsXLogFileName(xlde->d_name))
3777  continue;
3778 
3779  /* Parse filename to get TLI and segno. */
3780  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3782 
3783  /* Ignore anything that's not from the TLI of interest. */
3784  if (tli != file_tli)
3785  continue;
3786 
3787  /* If it's the oldest so far, update oldest_segno. */
3788  if (oldest_segno == 0 || file_segno < oldest_segno)
3789  oldest_segno = file_segno;
3790  }
3791 
3792  FreeDir(xldir);
3793  return oldest_segno;
3794 }

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2690 of file xlog.c.

2691 {
2692  XLogRecPtr retval;
2693 
2695  retval = XLogCtl->replicationSlotMinLSN;
2697 
2698  return retval;
2699 }
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:459

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5180 of file xlog.c.

5181 {
5182  char xlogfname[MAXFNAMELEN];
5183  XLogSegNo endLogSegNo;
5184  XLogSegNo startLogSegNo;
5185 
5186  /* we always switch to a new timeline after archive recovery */
5187  Assert(endTLI != newTLI);
5188 
5189  /*
5190  * Update min recovery point one last time.
5191  */
5193 
5194  /*
5195  * Calculate the last segment on the old timeline, and the first segment
5196  * on the new timeline. If the switch happens in the middle of a segment,
5197  * they are the same, but if the switch happens exactly at a segment
5198  * boundary, startLogSegNo will be endLogSegNo + 1.
5199  */
5200  XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5201  XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5202 
5203  /*
5204  * Initialize the starting WAL segment for the new timeline. If the switch
5205  * happens in the middle of a segment, copy data from the last WAL segment
5206  * of the old timeline up to the switch point, to the starting WAL segment
5207  * on the new timeline.
5208  */
5209  if (endLogSegNo == startLogSegNo)
5210  {
5211  /*
5212  * Make a copy of the file on the new timeline.
5213  *
5214  * Writing WAL isn't allowed yet, so there are no locking
5215  * considerations. But we should be just as tense as XLogFileInit to
5216  * avoid emplacing a bogus file.
5217  */
5218  XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5219  XLogSegmentOffset(endOfLog, wal_segment_size));
5220  }
5221  else
5222  {
5223  /*
5224  * The switch happened at a segment boundary, so just create the next
5225  * segment on the new timeline.
5226  */
5227  int fd;
5228 
5229  fd = XLogFileInit(startLogSegNo, newTLI);
5230 
5231  if (close(fd) != 0)
5232  {
5233  int save_errno = errno;
5234 
5235  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5236  errno = save_errno;
5237  ereport(ERROR,
5239  errmsg("could not close file \"%s\": %m", xlogfname)));
5240  }
5241  }
5242 
5243  /*
5244  * Let's just make real sure there are not .ready or .done flags posted
5245  * for the new segment.
5246  */
5247  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5248  XLogArchiveCleanup(xlogfname);
5249 }
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3407

References Assert, close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6345 of file xlog.c.

6346 {
6347  /*
6348  * If value is "unconditionally true" or "unconditionally false", just
6349  * return it. This provides the normal fast path once recovery is known
6350  * done.
6351  */
6352  if (LocalXLogInsertAllowed >= 0)
6353  return (bool) LocalXLogInsertAllowed;
6354 
6355  /*
6356  * Else, must check to see if we're still in recovery.
6357  */
6358  if (RecoveryInProgress())
6359  return false;
6360 
6361  /*
6362  * On exit from recovery, reset to "unconditionally true", since there is
6363  * no need to keep checking.
6364  */
6366  return true;
6367 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 743 of file xlog.c.

748 {
750  pg_crc32c rdata_crc;
751  bool inserted;
752  XLogRecord *rechdr = (XLogRecord *) rdata->data;
753  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
755  XLogRecPtr StartPos;
756  XLogRecPtr EndPos;
757  bool prevDoPageWrites = doPageWrites;
758  TimeLineID insertTLI;
759 
760  /* Does this record type require special handling? */
761  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
762  {
763  if (info == XLOG_SWITCH)
764  class = WALINSERT_SPECIAL_SWITCH;
765  else if (info == XLOG_CHECKPOINT_REDO)
767  }
768 
769  /* we assume that all of the record header is in the first chunk */
770  Assert(rdata->len >= SizeOfXLogRecord);
771 
772  /* cross-check on whether we should be here or not */
773  if (!XLogInsertAllowed())
774  elog(ERROR, "cannot make new WAL entries during recovery");
775 
776  /*
777  * Given that we're not in recovery, InsertTimeLineID is set and can't
778  * change, so we can read it without a lock.
779  */
780  insertTLI = XLogCtl->InsertTimeLineID;
781 
782  /*----------
783  *
784  * We have now done all the preparatory work we can without holding a
785  * lock or modifying shared state. From here on, inserting the new WAL
786  * record to the shared WAL buffer cache is a two-step process:
787  *
788  * 1. Reserve the right amount of space from the WAL. The current head of
789  * reserved space is kept in Insert->CurrBytePos, and is protected by
790  * insertpos_lck.
791  *
792  * 2. Copy the record to the reserved WAL space. This involves finding the
793  * correct WAL buffer containing the reserved space, and copying the
794  * record in place. This can be done concurrently in multiple processes.
795  *
796  * To keep track of which insertions are still in-progress, each concurrent
797  * inserter acquires an insertion lock. In addition to just indicating that
798  * an insertion is in progress, the lock tells others how far the inserter
799  * has progressed. There is a small fixed number of insertion locks,
800  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
801  * boundary, it updates the value stored in the lock to the how far it has
802  * inserted, to allow the previous buffer to be flushed.
803  *
804  * Holding onto an insertion lock also protects RedoRecPtr and
805  * fullPageWrites from changing until the insertion is finished.
806  *
807  * Step 2 can usually be done completely in parallel. If the required WAL
808  * page is not initialized yet, you have to grab WALBufMappingLock to
809  * initialize it, but the WAL writer tries to do that ahead of insertions
810  * to avoid that from happening in the critical path.
811  *
812  *----------
813  */
815 
816  if (likely(class == WALINSERT_NORMAL))
817  {
819 
820  /*
821  * Check to see if my copy of RedoRecPtr is out of date. If so, may
822  * have to go back and have the caller recompute everything. This can
823  * only happen just after a checkpoint, so it's better to be slow in
824  * this case and fast otherwise.
825  *
826  * Also check to see if fullPageWrites was just turned on or there's a
827  * running backup (which forces full-page writes); if we weren't
828  * already doing full-page writes then go back and recompute.
829  *
830  * If we aren't doing full-page writes then RedoRecPtr doesn't
831  * actually affect the contents of the XLOG record, so we'll update
832  * our local copy but not force a recomputation. (If doPageWrites was
833  * just turned off, we could recompute the record without full pages,
834  * but we choose not to bother.)
835  */
836  if (RedoRecPtr != Insert->RedoRecPtr)
837  {
838  Assert(RedoRecPtr < Insert->RedoRecPtr);
839  RedoRecPtr = Insert->RedoRecPtr;
840  }
841  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
842 
843  if (doPageWrites &&
844  (!prevDoPageWrites ||
845  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
846  {
847  /*
848  * Oops, some buffer now needs to be backed up that the caller
849  * didn't back up. Start over.
850  */
853  return InvalidXLogRecPtr;
854  }
855 
856  /*
857  * Reserve space for the record in the WAL. This also sets the xl_prev
858  * pointer.
859  */
860  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
861  &rechdr->xl_prev);
862 
863  /* Normal records are always inserted. */
864  inserted = true;
865  }
866  else if (class == WALINSERT_SPECIAL_SWITCH)
867  {
868  /*
869  * In order to insert an XLOG_SWITCH record, we need to hold all of
870  * the WAL insertion locks, not just one, so that no one else can
871  * begin inserting a record until we've figured out how much space
872  * remains in the current WAL segment and claimed all of it.
873  *
874  * Nonetheless, this case is simpler than the normal cases handled
875  * below, which must check for changes in doPageWrites and RedoRecPtr.
876  * Those checks are only needed for records that can contain buffer
877  * references, and an XLOG_SWITCH record never does.
878  */
879  Assert(fpw_lsn == InvalidXLogRecPtr);
881  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
882  }
883  else
884  {
886 
887  /*
888  * We need to update both the local and shared copies of RedoRecPtr,
889  * which means that we need to hold all the WAL insertion locks.
890  * However, there can't be any buffer references, so as above, we need
891  * not check RedoRecPtr before inserting the record; we just need to
892  * update it afterwards.
893  */
894  Assert(fpw_lsn == InvalidXLogRecPtr);
896  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
897  &rechdr->xl_prev);
898  RedoRecPtr = Insert->RedoRecPtr = StartPos;
899  inserted = true;
900  }
901 
902  if (inserted)
903  {
904  /*
905  * Now that xl_prev has been filled in, calculate CRC of the record
906  * header.
907  */
908  rdata_crc = rechdr->xl_crc;
909  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
910  FIN_CRC32C(rdata_crc);
911  rechdr->xl_crc = rdata_crc;
912 
913  /*
914  * All the record data, including the header, is now ready to be
915  * inserted. Copy the record in the space reserved.
916  */
918  class == WALINSERT_SPECIAL_SWITCH, rdata,
919  StartPos, EndPos, insertTLI);
920 
921  /*
922  * Unless record is flagged as not important, update LSN of last
923  * important record in the current slot. When holding all locks, just
924  * update the first one.
925  */
926  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
927  {
928  int lockno = holdingAllLocks ? 0 : MyLockNo;
929 
930  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
931  }
932  }
933  else
934  {
935  /*
936  * This was an xlog-switch record, but the current insert location was
937  * already exactly at the beginning of a segment, so there was no need
938  * to do anything.
939  */
940  }
941 
942  /*
943  * Done! Let others know that we're finished.
944  */
946 
948 
950 
951  /*
952  * Mark top transaction id is logged (if needed) so that we should not try
953  * to log it again with the next WAL record in the current subtransaction.
954  */
955  if (topxid_included)
957 
958  /*
959  * Update shared LogwrtRqst.Write, if we crossed page boundary.
960  */
961  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
962  {
964  /* advance global request to include new block(s) */
965  if (XLogCtl->LogwrtRqst.Write < EndPos)
966  XLogCtl->LogwrtRqst.Write = EndPos;
969  }
970 
971  /*
972  * If this was an XLOG_SWITCH record, flush the record and the empty
973  * padding space that fills the rest of the segment, and perform
974  * end-of-segment actions (eg, notifying archiver).
975  */
976  if (class == WALINSERT_SPECIAL_SWITCH)
977  {
978  TRACE_POSTGRESQL_WAL_SWITCH();
979  XLogFlush(EndPos);
980 
981  /*
982  * Even though we reserved the rest of the segment for us, which is
983  * reflected in EndPos, we return a pointer to just the end of the
984  * xlog-switch record.
985  */
986  if (inserted)
987  {
988  EndPos = StartPos + SizeOfXLogRecord;
989  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
990  {
991  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
992 
993  if (offset == EndPos % XLOG_BLCKSZ)
994  EndPos += SizeOfXLogLongPHD;
995  else
996  EndPos += SizeOfXLogShortPHD;
997  }
998  }
999  }
1000 
1001 #ifdef WAL_DEBUG
1002  if (XLOG_DEBUG)
1003  {
1004  static XLogReaderState *debug_reader = NULL;
1005  XLogRecord *record;
1006  DecodedXLogRecord *decoded;
1008  StringInfoData recordBuf;
1009  char *errormsg = NULL;
1010  MemoryContext oldCxt;
1011 
1012  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1013 
1014  initStringInfo(&buf);
1015  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1016 
1017  /*
1018  * We have to piece together the WAL record data from the XLogRecData
1019  * entries, so that we can pass it to the rm_desc function as one
1020  * contiguous chunk.
1021  */
1022  initStringInfo(&recordBuf);
1023  for (; rdata != NULL; rdata = rdata->next)
1024  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1025 
1026  /* We also need temporary space to decode the record. */
1027  record = (XLogRecord *) recordBuf.data;
1028  decoded = (DecodedXLogRecord *)
1030 
1031  if (!debug_reader)
1032  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1033  XL_ROUTINE(.page_read = NULL,
1034  .segment_open = NULL,
1035  .segment_close = NULL),
1036  NULL);
1037  if (!debug_reader)
1038  {
1039  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1040  }
1041  else if (!DecodeXLogRecord(debug_reader,
1042  decoded,
1043  record,
1044  EndPos,
1045  &errormsg))
1046  {
1047  appendStringInfo(&buf, "error decoding record: %s",
1048  errormsg ? errormsg : "no error message");
1049  }
1050  else
1051  {
1052  appendStringInfoString(&buf, " - ");
1053 
1054  debug_reader->record = decoded;
1055  xlog_outdesc(&buf, debug_reader);
1056  debug_reader->record = NULL;
1057  }
1058  elog(LOG, "%s", buf.data);
1059 
1060  pfree(decoded);
1061  pfree(buf.data);
1062  pfree(recordBuf.data);
1063  MemoryContextSwitchTo(oldCxt);
1064  }
1065 #endif
1066 
1067  /*
1068  * Update our global variables
1069  */
1070  ProcLastRecPtr = StartPos;
1071  XactLastRecEnd = EndPos;
1072 
1073  /* Report WAL traffic to the instrumentation. */
1074  if (inserted)
1075  {
1076  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1078  pgWalUsage.wal_fpi += num_fpi;
1079  }
1080 
1081  return EndPos;
1082 }
#define likely(x)
Definition: c.h:310
#define unlikely(x)
Definition: c.h:311
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
MemoryContextSwitchTo(old_ctx)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:588
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:538
XLogRecPtr XactLastRecEnd
Definition: xlog.c:254
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1220
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1103
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1159
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3122 of file xlog.c.

3123 {
3124  /*
3125  * During recovery, we don't flush WAL but update minRecoveryPoint
3126  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3127  * would need to be updated.
3128  */
3129  if (RecoveryInProgress())
3130  {
3131  /*
3132  * An invalid minRecoveryPoint means that we need to recover all the
3133  * WAL, i.e., we're doing crash recovery. We never modify the control
3134  * file's value in that case, so we can short-circuit future checks
3135  * here too. This triggers a quick exit path for the startup process,
3136  * which cannot update its local copy of minRecoveryPoint as long as
3137  * it has not replayed all WAL available when doing crash recovery.
3138  */
3140  updateMinRecoveryPoint = false;
3141 
3142  /* Quick exit if already known to be updated or cannot be updated */
3144  return false;
3145 
3146  /*
3147  * Update local copy of minRecoveryPoint. But if the lock is busy,
3148  * just return a conservative guess.
3149  */
3150  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3151  return true;
3154  LWLockRelease(ControlFileLock);
3155 
3156  /*
3157  * Check minRecoveryPoint for any other process than the startup
3158  * process doing crash recovery, which should not update the control
3159  * file value if crash recovery is still running.
3160  */
3162  updateMinRecoveryPoint = false;
3163 
3164  /* check again */
3166  return false;
3167  else
3168  return true;
3169  }
3170 
3171  /* Quick exit if already known flushed */
3172  if (record <= LogwrtResult.Flush)
3173  return false;
3174 
3175  /* read LogwrtResult and update local state */
3177 
3178  /* check again */
3179  if (record <= LogwrtResult.Flush)
3180  return false;
3181 
3182  return true;
3183 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1341

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 7944 of file xlog.c.

7945 {
7946  XLogBeginInsert();
7947  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
7948  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
7949 
7950  /*
7951  * We need not flush the NEXTOID record immediately, because any of the
7952  * just-allocated OIDs could only reach disk as part of a tuple insert or
7953  * update that would have its own XLOG record that must follow the NEXTOID
7954  * record. Therefore, the standard buffer LSN interlock applied to those
7955  * records will ensure no such OID reaches disk before the NEXTOID record
7956  * does.
7957  *
7958  * Note, however, that the above statement only covers state "within" the
7959  * database. When we use a generated OID as a file or directory name, we
7960  * are in a sense violating the basic WAL rule, because that filesystem
7961  * change may reach disk before the NEXTOID WAL record does. The impact
7962  * of this is that if a database crash occurs immediately afterward, we
7963  * might after restart re-generate the same OID and find that it conflicts
7964  * with the leftover file or directory. But since for safety's sake we
7965  * always loop until finding a nonconflicting filename, this poses no real
7966  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
7967  */
7968 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1936 of file xlog.c.

1937 {
1938  uint64 fullsegs;
1939  uint32 fullpages;
1940  uint32 offset;
1941  uint64 result;
1942 
1943  XLByteToSeg(ptr, fullsegs, wal_segment_size);
1944 
1945  fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1946  offset = ptr % XLOG_BLCKSZ;
1947 
1948  if (fullpages == 0)
1949  {
1950  result = fullsegs * UsableBytesInSegment;
1951  if (offset > 0)
1952  {
1953  Assert(offset >= SizeOfXLogLongPHD);
1954  result += offset - SizeOfXLogLongPHD;
1955  }
1956  }
1957  else
1958  {
1959  result = fullsegs * UsableBytesInSegment +
1960  (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1961  (fullpages - 1) * UsableBytesInPage; /* full pages */
1962  if (offset > 0)
1963  {
1964  Assert(offset >= SizeOfXLogShortPHD);
1965  result += offset - SizeOfXLogShortPHD;
1966  }
1967  }
1968 
1969  return result;
1970 }

References Assert, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8024 of file xlog.c.

8025 {
8026  if (wal_level != ControlFile->wal_level ||
8034  {
8035  /*
8036  * The change in number of backend slots doesn't need to be WAL-logged
8037  * if archiving is not enabled, as you can't start archive recovery
8038  * with wal_level=minimal anyway. We don't really care about the
8039  * values in pg_control either if wal_level=minimal, but seems better
8040  * to keep them up-to-date to avoid confusion.
8041  */
8043  {
8044  xl_parameter_change xlrec;
8045  XLogRecPtr recptr;
8046 
8052  xlrec.wal_level = wal_level;
8053  xlrec.wal_log_hints = wal_log_hints;
8055 
8056  XLogBeginInsert();
8057  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
8058 
8059  recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8060  XLogFlush(recptr);
8061  }
8062 
8063  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8064 
8074 
8075  LWLockRelease(ControlFileLock);
8076  }
8077 }

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 7999 of file xlog.c.

8000 {
8001  XLogRecPtr RecPtr;
8002  xl_restore_point xlrec;
8003 
8004  xlrec.rp_time = GetCurrentTimestamp();
8005  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8006 
8007  XLogBeginInsert();
8008  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
8009 
8010  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8011 
8012  ereport(LOG,
8013  (errmsg("restore point \"%s\" created at %X/%X",
8014  rpName, LSN_FORMAT_ARGS(RecPtr))));
8015 
8016  return RecPtr;
8017 }
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2626 of file xlog.c.

2627 {
2628  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2629  bool sleeping;
2630  bool wakeup = false;
2631  XLogRecPtr prevAsyncXactLSN;
2632 
2634  sleeping = XLogCtl->WalWriterSleeping;
2635  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2636  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2637  XLogCtl->asyncXactLSN = asyncXactLSN;
2639 
2640  /*
2641  * If somebody else already called this function with a more aggressive
2642  * LSN, they will have done what we needed (and perhaps more).
2643  */
2644  if (asyncXactLSN <= prevAsyncXactLSN)
2645  return;
2646 
2647  /*
2648  * If the WALWriter is sleeping, kick it to make it come out of low-power
2649  * mode, so that this async commit will reach disk within the expected
2650  * amount of time. Otherwise, determine whether it has enough WAL
2651  * available to flush, the same way that XLogBackgroundFlush() does.
2652  */
2653  if (sleeping)
2654  wakeup = true;
2655  else
2656  {
2657  int flushblocks;
2658 
2660 
2661  flushblocks =
2662  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2663 
2664  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2665  wakeup = true;
2666  }
2667 
2670 }
void SetLatch(Latch *latch)
Definition: latch.c:632
PROC_HDR * ProcGlobal
Definition: proc.c:78
Latch * walwriterLatch
Definition: proc.h:412
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, XLogCtlData::info_lck, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterLatch, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4885 of file xlog.c.

4886 {
4887  bool foundCFile,
4888  foundXLog;
4889  char *allocptr;
4890  int i;
4891  ControlFileData *localControlFile;
4892 
4893 #ifdef WAL_DEBUG
4894 
4895  /*
4896  * Create a memory context for WAL debugging that's exempt from the normal
4897  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4898  * an allocation fails, but wal_debug is not for production use anyway.
4899  */
4900  if (walDebugCxt == NULL)
4901  {
4903  "WAL Debug",
4905  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4906  }
4907 #endif
4908 
4909 
4910  XLogCtl = (XLogCtlData *)
4911  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4912 
4913  localControlFile = ControlFile;
4915  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4916 
4917  if (foundCFile || foundXLog)
4918  {
4919  /* both should be present or neither */
4920  Assert(foundCFile && foundXLog);
4921 
4922  /* Initialize local copy of WALInsertLocks */
4924 
4925  if (localControlFile)
4926  pfree(localControlFile);
4927  return;
4928  }
4929  memset(XLogCtl, 0, sizeof(XLogCtlData));
4930 
4931  /*
4932  * Already have read control file locally, unless in bootstrap mode. Move
4933  * contents into shared memory.
4934  */
4935  if (localControlFile)
4936  {
4937  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4938  pfree(localControlFile);
4939  }
4940 
4941  /*
4942  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4943  * multiple of the alignment for same, so no extra alignment padding is
4944  * needed here.
4945  */
4946  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4947  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4948  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4949 
4950  for (i = 0; i < XLOGbuffers; i++)
4951  {
4953  }
4954 
4955  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4956  allocptr += sizeof(WALInsertLockPadded) -
4957  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4959  (WALInsertLockPadded *) allocptr;
4960  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4961 
4962  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4963  {
4967  }
4968 
4969  /*
4970  * Align the start of the page buffers to a full xlog block size boundary.
4971  * This simplifies some calculations in XLOG insertion. It is also
4972  * required for O_DIRECT.
4973  */
4974  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
4975  XLogCtl->pages = allocptr;
4976  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
4977 
4978  /*
4979  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
4980  * in additional info.)
4981  */
4985  XLogCtl->WalWriterSleeping = false;
4986 
4993 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:448
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:709
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:60
int XLogCacheBlck
Definition: xlog.c:494
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:444
slock_t insertpos_lck
Definition: xlog.c:398
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4835
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4835 of file xlog.c.

4836 {
4837  Size size;
4838 
4839  /*
4840  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4841  * This isn't an amazingly clean place to do this, but we must wait till
4842  * NBuffers has received its final value, and must do it before using the
4843  * value of XLOGbuffers to do anything important.
4844  *
4845  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4846  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4847  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4848  * the matter with PGC_S_OVERRIDE.
4849  */
4850  if (XLOGbuffers == -1)
4851  {
4852  char buf[32];
4853 
4854  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4855  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4857  if (XLOGbuffers == -1) /* failed to apply it? */
4858  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4859  PGC_S_OVERRIDE);
4860  }
4861  Assert(XLOGbuffers > 0);
4862 
4863  /* XLogCtl */
4864  size = sizeof(XLogCtlData);
4865 
4866  /* WAL insertion locks, plus alignment */
4868  /* xlblocks array */
4870  /* extra alignment padding for XLOG I/O buffers */
4871  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4872  /* and the buffers themselves */
4873  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4874 
4875  /*
4876  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4877  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4878  * routine again below to compute the actual allocation size.
4879  */
4880 
4881  return size;
4882 }
#define Max(x, y)
Definition: c.h:998
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9393 of file xlog.c.

9394 {
9395  ShutdownWalRcv();
9396 
9397  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9399  LWLockRelease(ControlFileLock);
9400 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2309 of file xlog.c.

2310 {
2311  bool ispartialpage;
2312  bool last_iteration;
2313  bool finishing_seg;
2314  int curridx;
2315  int npages;
2316  int startidx;
2317  uint32 startoffset;
2318 
2319  /* We should always be inside a critical section here */
2320  Assert(CritSectionCount > 0);
2321 
2322  /*
2323  * Update local LogwrtResult (caller probably did this already, but...)
2324  */
2326 
2327  /*
2328  * Since successive pages in the xlog cache are consecutively allocated,
2329  * we can usually gather multiple pages together and issue just one
2330  * write() call. npages is the number of pages we have determined can be
2331  * written together; startidx is the cache block index of the first one,
2332  * and startoffset is the file offset at which it should go. The latter
2333  * two variables are only valid when npages > 0, but we must initialize
2334  * all of them to keep the compiler quiet.
2335  */
2336  npages = 0;
2337  startidx = 0;
2338  startoffset = 0;
2339 
2340  /*
2341  * Within the loop, curridx is the cache block index of the page to
2342  * consider writing. Begin at the buffer containing the next unwritten
2343  * page, or last partially written page.
2344  */
2346 
2347  while (LogwrtResult.Write < WriteRqst.Write)
2348  {
2349  /*
2350  * Make sure we're not ahead of the insert process. This could happen
2351  * if we're passed a bogus WriteRqst.Write that is past the end of the
2352  * last page that's been initialized by AdvanceXLInsertBuffer.
2353  */
2354  XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2355 
2356  if (LogwrtResult.Write >= EndPtr)
2357  elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
2359  LSN_FORMAT_ARGS(EndPtr));
2360 
2361  /* Advance LogwrtResult.Write to end of current buffer page */
2362  LogwrtResult.Write = EndPtr;
2363  ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2364 
2367  {
2368  /*
2369  * Switch to new logfile segment. We cannot have any pending
2370  * pages here (since we dump what we have at segment end).
2371  */
2372  Assert(npages == 0);
2373  if (openLogFile >= 0)
2374  XLogFileClose();
2377  openLogTLI = tli;
2378 
2379  /* create/use new log file */
2382  }
2383 
2384  /* Make sure we have the current logfile open */
2385  if (openLogFile < 0)
2386  {
2389  openLogTLI = tli;
2392  }
2393 
2394  /* Add current page to the set of pending pages-to-dump */
2395  if (npages == 0)
2396  {
2397  /* first of group */
2398  startidx = curridx;
2399  startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2401  }
2402  npages++;
2403 
2404  /*
2405  * Dump the set if this will be the last loop iteration, or if we are
2406  * at the last page of the cache area (since the next page won't be
2407  * contiguous in memory), or if we are at the end of the logfile
2408  * segment.
2409  */
2410  last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2411 
2412  finishing_seg = !ispartialpage &&
2413  (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2414 
2415  if (last_iteration ||
2416  curridx == XLogCtl->XLogCacheBlck ||
2417  finishing_seg)
2418  {
2419  char *from;
2420  Size nbytes;
2421  Size nleft;
2422  ssize_t written;
2423  instr_time start;
2424 
2425  /* OK to write the page(s) */
2426  from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2427  nbytes = npages * (Size) XLOG_BLCKSZ;
2428  nleft = nbytes;
2429  do
2430  {
2431  errno = 0;
2432 
2433  /* Measure I/O timing to write WAL data */
2434  if (track_wal_io_timing)
2436  else
2438 
2439  pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2440  written = pg_pwrite(openLogFile, from, nleft, startoffset);
2442 
2443  /*
2444  * Increment the I/O timing and the number of times WAL data
2445  * were written out to disk.
2446  */
2447  if (track_wal_io_timing)
2448  {
2449  instr_time end;
2450 
2453  }
2454 
2456 
2457  if (written <= 0)
2458  {
2459  char xlogfname[MAXFNAMELEN];
2460  int save_errno;
2461 
2462  if (errno == EINTR)
2463  continue;
2464 
2465  save_errno = errno;
2466  XLogFileName(xlogfname, tli, openLogSegNo,
2468  errno = save_errno;
2469  ereport(PANIC,
2471  errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2472  xlogfname, startoffset, nleft)));
2473  }
2474  nleft -= written;
2475  from += written;
2476  startoffset += written;
2477  } while (nleft > 0);
2478 
2479  npages = 0;
2480 
2481  /*
2482  * If we just wrote the whole last page of a logfile segment,
2483  * fsync the segment immediately. This avoids having to go back
2484  * and re-open prior segments when an fsync request comes along
2485  * later. Doing it here ensures that one and only one backend will
2486  * perform this fsync.
2487  *
2488  * This is also the right place to notify the Archiver that the
2489  * segment is ready to copy to archival storage, and to update the
2490  * timer for archive_timeout, and to signal for a checkpoint if
2491  * too many logfile segments have been used since the last
2492  * checkpoint.
2493  */
2494  if (finishing_seg)
2495  {
2497 
2498  /* signal that we need to wakeup walsenders later */
2500 
2501  LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2502 
2503  if (XLogArchivingActive())
2505 
2506  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2508 
2509  /*
2510  * Request a checkpoint if we've consumed too much xlog since
2511  * the last one. For speed, we first check using the local
2512  * copy of RedoRecPtr, which might be out of date; if it looks
2513  * like a checkpoint is needed, forcibly update RedoRecPtr and
2514  * recheck.
2515  */
2517  {
2518  (void) GetRedoRecPtr();
2521  }
2522  }
2523  }
2524 
2525  if (ispartialpage)
2526  {
2527  /* Only asked to write a partial page */
2528  LogwrtResult.Write = WriteRqst.Write;
2529  break;
2530  }
2531  curridx = NextBufIdx(curridx);
2532 
2533  /* If flexible, break out of loop as soon as we wrote something */
2534  if (flexible && npages == 0)
2535  break;
2536  }
2537 
2538  Assert(npages == 0);
2539 
2540  /*
2541  * If asked to flush, do so
2542  */
2543  if (LogwrtResult.Flush < WriteRqst.Flush &&
2545  {
2546  /*
2547  * Could get here without iterating above loop, in which case we might
2548  * have no open file or the wrong one. However, we do not need to
2549  * fsync more than one file.
2550  */
2553  {
2554  if (openLogFile >= 0 &&
2557  XLogFileClose();
2558  if (openLogFile < 0)
2559  {
2562  openLogTLI = tli;
2565  }
2566 
2568  }
2569 
2570  /* signal that we need to wakeup walsenders later */
2572 
2574  }
2575 
2576  /*
2577  * Update shared-memory status
2578  *
2579  * We make sure that the shared 'request' values do not fall behind the
2580  * 'result' values. This is not absolutely essential, but it saves some
2581  * code in a couple of places.
2582  */
2589 
2590  /*
2591  * We write Write first, bar, then Flush. When reading, the opposite must
2592  * be done (with a matching barrier in between), so that we always see a
2593  * Flush value that trails behind the Write value seen.
2594  */
2596  pg_write_barrier();
2598 
2599 #ifdef USE_ASSERT_CHECKING
2600  {
2601  XLogRecPtr Flush;
2602  XLogRecPtr Write;
2604 
2606  pg_read_barrier();
2608  pg_read_barrier();
2610 
2611  /* WAL written to disk is always ahead of WAL flushed */
2612  Assert(Write >= Flush);
2613 
2614  /* WAL inserted to buffers is always ahead of WAL written */
2615  Assert(Insert >= Write);
2616  }
2617 #endif
2618 }
void ReserveExternalFD(void)
Definition: fd.c:1221
volatile uint32 CritSectionCount
Definition: globals.c:43
PgStat_Counter wal_write
Definition: pgstat.h:453
instr_time wal_write_time
Definition: pgstat.h:455
XLogRecPtr Flush
Definition: walreceiver.c:111
XLogRecPtr Write
Definition: walreceiver.c:110
#define WalSndWakeupRequest()
Definition: walsender.h:59
#define EINTR
Definition: win32_port.h:374
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6393
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3607
#define NextBufIdx(idx)
Definition: xlog.c:579
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8603
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2285
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_write, PgStat_PendingWalStats::wal_write_time, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:65
@ ARCHIVE_MODE_OFF
Definition: xlog.h:63
@ ARCHIVE_MODE_ON
Definition: xlog.h:64

Definition at line 166 of file xlog.c.

◆ bootstrap_data_checksum_version

uint32 bootstrap_data_checksum_version
extern

Definition at line 44 of file bootstrap.c.

Referenced by BootstrapModeMain(), and InitControlFile().

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 166 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 159 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 122 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 217 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 224 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 236 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 135 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 115 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 631 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 632 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 160 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

bool track_wal_io_timing = false

Definition at line 137 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 643 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 124 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 126 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 125 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 136 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 127 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 116 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 123 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 128 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 143 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 166 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 118 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 117 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl