PostgreSQL Source Code  git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_iovec.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
bool check_max_slot_wal_keep_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
void CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 109 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 596 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 573 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 577 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 148 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:460
pg_atomic_uint64 logWriteResult
Definition: xlog.c:470
pg_atomic_uint64 logFlushResult
Definition: xlog.c:471
static XLogCtlData * XLogCtl
Definition: xlog.c:559

Definition at line 613 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 590 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 584 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 552 of file xlog.c.

553 {
WalInsertClass
Definition: xlog.c:553
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:555
@ WALINSERT_NORMAL
Definition: xlog.c:554
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:556

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1978 of file xlog.c.

1979 {
1981  int nextidx;
1982  XLogRecPtr OldPageRqstPtr;
1983  XLogwrtRqst WriteRqst;
1984  XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1985  XLogRecPtr NewPageBeginPtr;
1986  XLogPageHeader NewPage;
1987  int npages pg_attribute_unused() = 0;
1988 
1989  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
1990 
1991  /*
1992  * Now that we have the lock, check if someone initialized the page
1993  * already.
1994  */
1995  while (upto >= XLogCtl->InitializedUpTo || opportunistic)
1996  {
1998 
1999  /*
2000  * Get ending-offset of the buffer page we need to replace (this may
2001  * be zero if the buffer hasn't been used yet). Fall through if it's
2002  * already written out.
2003  */
2004  OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
2005  if (LogwrtResult.Write < OldPageRqstPtr)
2006  {
2007  /*
2008  * Nope, got work to do. If we just want to pre-initialize as much
2009  * as we can without flushing, give up now.
2010  */
2011  if (opportunistic)
2012  break;
2013 
2014  /* Advance shared memory write request position */
2016  if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2017  XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2019 
2020  /*
2021  * Acquire an up-to-date LogwrtResult value and see if we still
2022  * need to write it or if someone else already did.
2023  */
2025  if (LogwrtResult.Write < OldPageRqstPtr)
2026  {
2027  /*
2028  * Must acquire write lock. Release WALBufMappingLock first,
2029  * to make sure that all insertions that we need to wait for
2030  * can finish (up to this same position). Otherwise we risk
2031  * deadlock.
2032  */
2033  LWLockRelease(WALBufMappingLock);
2034 
2035  WaitXLogInsertionsToFinish(OldPageRqstPtr);
2036 
2037  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2038 
2040  if (LogwrtResult.Write >= OldPageRqstPtr)
2041  {
2042  /* OK, someone wrote it already */
2043  LWLockRelease(WALWriteLock);
2044  }
2045  else
2046  {
2047  /* Have to write it ourselves */
2048  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2049  WriteRqst.Write = OldPageRqstPtr;
2050  WriteRqst.Flush = 0;
2051  XLogWrite(WriteRqst, tli, false);
2052  LWLockRelease(WALWriteLock);
2054  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2055  }
2056  /* Re-acquire WALBufMappingLock and retry */
2057  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2058  continue;
2059  }
2060  }
2061 
2062  /*
2063  * Now the next buffer slot is free and we can set it up to be the
2064  * next output page.
2065  */
2066  NewPageBeginPtr = XLogCtl->InitializedUpTo;
2067  NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2068 
2069  Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2070 
2071  NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2072 
2073  /*
2074  * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2075  * before initializing. Otherwise, the old page may be partially
2076  * zeroed but look valid.
2077  */
2079  pg_write_barrier();
2080 
2081  /*
2082  * Be sure to re-zero the buffer so that bytes beyond what we've
2083  * written will look like zeroes and not valid XLOG records...
2084  */
2085  MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
2086 
2087  /*
2088  * Fill the new page's header
2089  */
2090  NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2091 
2092  /* NewPage->xlp_info = 0; */ /* done by memset */
2093  NewPage->xlp_tli = tli;
2094  NewPage->xlp_pageaddr = NewPageBeginPtr;
2095 
2096  /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2097 
2098  /*
2099  * If online backup is not in progress, mark the header to indicate
2100  * that WAL records beginning in this page have removable backup
2101  * blocks. This allows the WAL archiver to know whether it is safe to
2102  * compress archived WAL data by transforming full-block records into
2103  * the non-full-block format. It is sufficient to record this at the
2104  * page level because we force a page switch (in fact a segment
2105  * switch) when starting a backup, so the flag will be off before any
2106  * records can be written during the backup. At the end of a backup,
2107  * the last page will be marked as all unsafe when perhaps only part
2108  * is unsafe, but at worst the archiver would miss the opportunity to
2109  * compress a few records.
2110  */
2111  if (Insert->runningBackups == 0)
2112  NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2113 
2114  /*
2115  * If first page of an XLOG segment file, make it a long header.
2116  */
2117  if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2118  {
2119  XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2120 
2121  NewLongPage->xlp_sysid = ControlFile->system_identifier;
2122  NewLongPage->xlp_seg_size = wal_segment_size;
2123  NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2124  NewPage->xlp_info |= XLP_LONG_HEADER;
2125  }
2126 
2127  /*
2128  * Make sure the initialization of the page becomes visible to others
2129  * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2130  * holding a lock.
2131  */
2132  pg_write_barrier();
2133 
2134  pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2135  XLogCtl->InitializedUpTo = NewPageEndPtr;
2136 
2137  npages++;
2138  }
2139  LWLockRelease(WALBufMappingLock);
2140 
2141 #ifdef WAL_DEBUG
2142  if (XLOG_DEBUG && npages > 0)
2143  {
2144  elog(DEBUG1, "initialized %d pages, up to %X/%X",
2145  npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2146  }
2147 #endif
2148 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:478
#define pg_write_barrier()
Definition: atomics.h:150
#define pg_attribute_unused()
Definition: c.h:123
#define Assert(condition)
Definition: c.h:858
#define MemSet(start, val, len)
Definition: c.h:1020
size_t Size
Definition: c.h:605
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
static void Insert(File file)
Definition: fd.c:1313
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_EXCLUSIVE
Definition: lwlock.h:114
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint64 system_identifier
Definition: pg_control.h:110
PgStat_Counter wal_buffers_full
Definition: pgstat.h:452
XLogwrtRqst LogwrtRqst
Definition: xlog.c:453
slock_t info_lck
Definition: xlog.c:546
XLogRecPtr InitializedUpTo
Definition: xlog.c:483
char * pages
Definition: xlog.c:490
pg_atomic_uint64 * xlblocks
Definition: xlog.c:491
XLogCtlInsert Insert
Definition: xlog.c:450
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:325
XLogRecPtr Flush
Definition: xlog.c:320
XLogRecPtr Write
Definition: xlog.c:319
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1497
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:613
int wal_segment_size
Definition: xlog.c:141
static XLogwrtResult LogwrtResult
Definition: xlog.c:605
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:584
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2307
static ControlFileData * ControlFile
Definition: xlog.c:567
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), PgStat_PendingWalStats::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2191 of file xlog.c.

2192 {
2195 }
double CheckPointCompletionTarget
Definition: checkpointer.c:138
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2155

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2184 of file xlog.c.

2185 {
2188 }
int max_wal_size_mb
Definition: xlog.c:112

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4722 of file xlog.c.

4723 {
4724  /*
4725  * If some checks were deferred, it's possible that the checks will fail
4726  * later during InitializeWalConsistencyChecking(). But in that case, the
4727  * postmaster will exit anyway, so it's safe to proceed with the
4728  * assignment.
4729  *
4730  * Any built-in resource managers specified are assigned immediately,
4731  * which affects WAL created before shared_preload_libraries are
4732  * processed. Any custom resource managers specified won't be assigned
4733  * until after shared_preload_libraries are processed, but that's OK
4734  * because WAL for a custom resource manager can't be written before the
4735  * module is loaded anyway.
4736  */
4737  wal_consistency_checking = extra;
4738 }
bool * wal_consistency_checking
Definition: xlog.c:124

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8571 of file xlog.c.

8572 {
8573  if (wal_sync_method != new_wal_sync_method)
8574  {
8575  /*
8576  * To ensure that no blocks escape unsynced, force an fsync on the
8577  * currently open log segment (if any). Also, if the open flag is
8578  * changing, close the log file so it will be reopened (with new flag
8579  * bit) at next use.
8580  */
8581  if (openLogFile >= 0)
8582  {
8583  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8584  if (pg_fsync(openLogFile) != 0)
8585  {
8586  char xlogfname[MAXFNAMELEN];
8587  int save_errno;
8588 
8589  save_errno = errno;
8590  XLogFileName(xlogfname, openLogTLI, openLogSegNo,
8592  errno = save_errno;
8593  ereport(PANIC,
8595  errmsg("could not fsync file \"%s\": %m", xlogfname)));
8596  }
8597 
8599  if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8600  XLogFileClose();
8601  }
8602  }
8603 }
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
static int openLogFile
Definition: xlog.c:628
static int get_sync_bit(int method)
Definition: xlog.c:8523
int wal_sync_method
Definition: xlog.c:128
static TimeLineID openLogTLI
Definition: xlog.c:630
static void XLogFileClose(void)
Definition: xlog.c:3626
static XLogSegNo openLogSegNo
Definition: xlog.c:629
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 4998 of file xlog.c.

4999 {
5000  CheckPoint checkPoint;
5001  char *buffer;
5002  XLogPageHeader page;
5003  XLogLongPageHeader longpage;
5004  XLogRecord *record;
5005  char *recptr;
5006  uint64 sysidentifier;
5007  struct timeval tv;
5008  pg_crc32c crc;
5009 
5010  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5012 
5013  /*
5014  * Select a hopefully-unique system identifier code for this installation.
5015  * We use the result of gettimeofday(), including the fractional seconds
5016  * field, as being about as unique as we can easily get. (Think not to
5017  * use random(), since it hasn't been seeded and there's no portable way
5018  * to seed it other than the system clock value...) The upper half of the
5019  * uint64 value is just the tv_sec part, while the lower half contains the
5020  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5021  * PID for a little extra uniqueness. A person knowing this encoding can
5022  * determine the initialization time of the installation, which could
5023  * perhaps be useful sometimes.
5024  */
5025  gettimeofday(&tv, NULL);
5026  sysidentifier = ((uint64) tv.tv_sec) << 32;
5027  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5028  sysidentifier |= getpid() & 0xFFF;
5029 
5030  /* page buffer must be aligned suitably for O_DIRECT */
5031  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5032  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5033  memset(page, 0, XLOG_BLCKSZ);
5034 
5035  /*
5036  * Set up information for the initial checkpoint record
5037  *
5038  * The initial checkpoint record is written to the beginning of the WAL
5039  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5040  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5041  */
5042  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5043  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
5044  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
5045  checkPoint.fullPageWrites = fullPageWrites;
5046  checkPoint.wal_level = wal_level;
5047  checkPoint.nextXid =
5049  checkPoint.nextOid = FirstGenbkiObjectId;
5050  checkPoint.nextMulti = FirstMultiXactId;
5051  checkPoint.nextMultiOffset = 0;
5052  checkPoint.oldestXid = FirstNormalTransactionId;
5053  checkPoint.oldestXidDB = Template1DbOid;
5054  checkPoint.oldestMulti = FirstMultiXactId;
5055  checkPoint.oldestMultiDB = Template1DbOid;
5058  checkPoint.time = (pg_time_t) time(NULL);
5060 
5061  TransamVariables->nextXid = checkPoint.nextXid;
5062  TransamVariables->nextOid = checkPoint.nextOid;
5064  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5065  AdvanceOldestClogXid(checkPoint.oldestXid);
5066  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5067  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5069 
5070  /* Set up the XLOG page header */
5071  page->xlp_magic = XLOG_PAGE_MAGIC;
5072  page->xlp_info = XLP_LONG_HEADER;
5073  page->xlp_tli = BootstrapTimeLineID;
5075  longpage = (XLogLongPageHeader) page;
5076  longpage->xlp_sysid = sysidentifier;
5077  longpage->xlp_seg_size = wal_segment_size;
5078  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5079 
5080  /* Insert the initial checkpoint record */
5081  recptr = ((char *) page + SizeOfXLogLongPHD);
5082  record = (XLogRecord *) recptr;
5083  record->xl_prev = 0;
5084  record->xl_xid = InvalidTransactionId;
5085  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5087  record->xl_rmid = RM_XLOG_ID;
5088  recptr += SizeOfXLogRecord;
5089  /* fill the XLogRecordDataHeaderShort struct */
5090  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5091  *(recptr++) = sizeof(checkPoint);
5092  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5093  recptr += sizeof(checkPoint);
5094  Assert(recptr - (char *) record == record->xl_tot_len);
5095 
5096  INIT_CRC32C(crc);
5097  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5098  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5099  FIN_CRC32C(crc);
5100  record->xl_crc = crc;
5101 
5102  /* Create first XLOG segment file */
5105 
5106  /*
5107  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5108  * close the file again in a moment.
5109  */
5110 
5111  /* Write the first page with the initial record */
5112  errno = 0;
5113  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5114  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5115  {
5116  /* if write didn't set errno, assume problem is no disk space */
5117  if (errno == 0)
5118  errno = ENOSPC;
5119  ereport(PANIC,
5121  errmsg("could not write bootstrap write-ahead log file: %m")));
5122  }
5124 
5125  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5126  if (pg_fsync(openLogFile) != 0)
5127  ereport(PANIC,
5129  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5131 
5132  if (close(openLogFile) != 0)
5133  ereport(PANIC,
5135  errmsg("could not close bootstrap write-ahead log file: %m")));
5136 
5137  openLogFile = -1;
5138 
5139  /* Now create pg_control */
5140  InitControlFile(sysidentifier, data_checksum_version);
5141  ControlFile->time = checkPoint.time;
5142  ControlFile->checkPoint = checkPoint.redo;
5143  ControlFile->checkPointCopy = checkPoint;
5144 
5145  /* some additional ControlFile fields are set in WriteControlFile() */
5146  WriteControlFile();
5147 
5148  /* Bootstrap the commit log, too */
5149  BootStrapCLOG();
5153 
5154  pfree(buffer);
5155 
5156  /*
5157  * Force control file to be read - in contrast to normal processing we'd
5158  * otherwise never run the checks and GUC related initializations therein.
5159  */
5160  ReadControlFile();
5161 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:804
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2320
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2354
void BootStrapMultiXact(void)
Definition: multixact.c:2026
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3367
bool fullPageWrites
Definition: xlog.c:120
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4191
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9414
int wal_level
Definition: xlog.c:129
static void WriteControlFile(void)
Definition: xlog.c:4226
#define BootstrapTimeLineID
Definition: xlog.c:109
static void ReadControlFile(void)
Definition: xlog.c:4308
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2155 of file xlog.c.

2156 {
2157  double target;
2158 
2159  /*-------
2160  * Calculate the distance at which to trigger a checkpoint, to avoid
2161  * exceeding max_wal_size_mb. This is based on two assumptions:
2162  *
2163  * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2164  * WAL for two checkpoint cycles to allow us to recover from the
2165  * secondary checkpoint if the first checkpoint failed, though we
2166  * only did this on the primary anyway, not on standby. Keeping just
2167  * one checkpoint simplifies processing and reduces disk space in
2168  * many smaller databases.)
2169  * b) during checkpoint, we consume checkpoint_completion_target *
2170  * number of segments consumed between checkpoints.
2171  *-------
2172  */
2173  target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2175 
2176  /* round down */
2177  CheckPointSegments = (int) target;
2178 
2179  if (CheckPointSegments < 1)
2180  CheckPointSegments = 1;
2181 }
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:596
int CheckPointSegments
Definition: xlog.c:154

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_max_slot_wal_keep_size()

bool check_max_slot_wal_keep_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2216 of file xlog.c.

2217 {
2218  if (IsBinaryUpgrade && *newval != -1)
2219  {
2220  GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
2221  "max_slot_wal_keep_size");
2222  return false;
2223  }
2224 
2225  return true;
2226 }
bool IsBinaryUpgrade
Definition: globals.c:119
#define GUC_check_errdetail
Definition: guc.h:472

References GUC_check_errdetail, IsBinaryUpgrade, and newval.

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4602 of file xlog.c.

4603 {
4604  /*
4605  * -1 indicates a request for auto-tune.
4606  */
4607  if (*newval == -1)
4608  {
4609  /*
4610  * If we haven't yet changed the boot_val default of -1, just let it
4611  * be. We'll fix it when XLOGShmemSize is called.
4612  */
4613  if (XLOGbuffers == -1)
4614  return true;
4615 
4616  /* Otherwise, substitute the auto-tune value */
4618  }
4619 
4620  /*
4621  * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4622  * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4623  * the case, we just silently treat such values as a request for the
4624  * minimum. (We could throw an error instead, but that doesn't seem very
4625  * helpful.)
4626  */
4627  if (*newval < 4)
4628  *newval = 4;
4629 
4630  return true;
4631 }
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4586
int XLOGbuffers
Definition: xlog.c:115

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4637 of file xlog.c.

4638 {
4639  char *rawstring;
4640  List *elemlist;
4641  ListCell *l;
4642  bool newwalconsistency[RM_MAX_ID + 1];
4643 
4644  /* Initialize the array */
4645  MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4646 
4647  /* Need a modifiable copy of string */
4648  rawstring = pstrdup(*newval);
4649 
4650  /* Parse string into list of identifiers */
4651  if (!SplitIdentifierString(rawstring, ',', &elemlist))
4652  {
4653  /* syntax error in list */
4654  GUC_check_errdetail("List syntax is invalid.");
4655  pfree(rawstring);
4656  list_free(elemlist);
4657  return false;
4658  }
4659 
4660  foreach(l, elemlist)
4661  {
4662  char *tok = (char *) lfirst(l);
4663  int rmid;
4664 
4665  /* Check for 'all'. */
4666  if (pg_strcasecmp(tok, "all") == 0)
4667  {
4668  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4669  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4670  newwalconsistency[rmid] = true;
4671  }
4672  else
4673  {
4674  /* Check if the token matches any known resource manager. */
4675  bool found = false;
4676 
4677  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4678  {
4679  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4680  pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4681  {
4682  newwalconsistency[rmid] = true;
4683  found = true;
4684  break;
4685  }
4686  }
4687  if (!found)
4688  {
4689  /*
4690  * During startup, it might be a not-yet-loaded custom
4691  * resource manager. Defer checking until
4692  * InitializeWalConsistencyChecking().
4693  */
4695  {
4697  }
4698  else
4699  {
4700  GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4701  pfree(rawstring);
4702  list_free(elemlist);
4703  return false;
4704  }
4705  }
4706  }
4707  }
4708 
4709  pfree(rawstring);
4710  list_free(elemlist);
4711 
4712  /* assign new value */
4713  *extra = guc_malloc(ERROR, (RM_MAX_ID + 1) * sizeof(bool));
4714  memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4715  return true;
4716 }
#define ERROR
Definition: elog.h:39
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:637
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1696
bool process_shared_preload_libraries_done
Definition: miscinit.c:1779
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3457
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:164
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, ERROR, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2198 of file xlog.c.

2199 {
2200  if (!IsValidWalSegSize(*newval))
2201  {
2202  GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2203  return false;
2204  }
2205 
2206  return true;
2207 }
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7440 of file xlog.c.

7441 {
7447 
7448  /* Write out all dirty data in SLRUs and the main buffer pool */
7449  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7451  CheckPointCLOG();
7456  CheckPointBuffers(flags);
7457 
7458  /* Perform all queued up fsyncs */
7459  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7463  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7464 
7465  /* We deliberately delay 2PC checkpointing as long as possible */
7466  CheckPointTwoPhase(checkPointRedo);
7467 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1655
void CheckPointBuffers(int flags)
Definition: bufmgr.c:3722
void CheckPointCLOG(void)
Definition: clog.c:937
void CheckPointCommitTs(void)
Definition: commit_ts.c:820
void CheckPointMultiXact(void)
Definition: multixact.c:2296
void CheckPointReplicationOrigin(void)
Definition: origin.c:573
void CheckPointPredicate(void)
Definition: predicate.c:1036
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1860
void CheckPointSnapBuild(void)
Definition: snapbuild.c:2071
TimestampTz ckpt_write_t
Definition: xlog.h:162
TimestampTz ckpt_sync_end_t
Definition: xlog.h:164
TimestampTz ckpt_sync_t
Definition: xlog.h:163
void CheckPointSUBTRANS(void)
Definition: subtrans.c:355
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1816
CheckpointStatsData CheckpointStats
Definition: xlog.c:207
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5343 of file xlog.c.

5344 {
5345  /*
5346  * For archive recovery, the WAL must be generated with at least 'replica'
5347  * wal_level.
5348  */
5350  {
5351  ereport(FATAL,
5352  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5353  errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5354  errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5355  errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5356  }
5357 
5358  /*
5359  * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5360  * must have at least as many backend slots as the primary.
5361  */
5363  {
5364  /* We ignore autovacuum_max_workers when we make this test. */
5365  RecoveryRequiresIntParameter("max_connections",
5368  RecoveryRequiresIntParameter("max_worker_processes",
5371  RecoveryRequiresIntParameter("max_wal_senders",
5374  RecoveryRequiresIntParameter("max_prepared_transactions",
5377  RecoveryRequiresIntParameter("max_locks_per_transaction",
5380  }
5381 }
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:141
int max_worker_processes
Definition: globals.c:142
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
int max_prepared_xacts
Definition: twophase.c:115
int max_wal_senders
Definition: walsender.c:121
bool EnableHotStandby
Definition: xlog.c:119
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:137
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3714 of file xlog.c.

3715 {
3716  int save_errno = errno;
3717  XLogSegNo lastRemovedSegNo;
3718 
3720  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3722 
3723  if (segno <= lastRemovedSegNo)
3724  {
3725  char filename[MAXFNAMELEN];
3726 
3727  XLogFileName(filename, tli, segno, wal_segment_size);
3728  errno = save_errno;
3729  ereport(ERROR,
3731  errmsg("requested WAL segment %s has already been removed",
3732  filename)));
3733  }
3734  errno = save_errno;
3735 }
static char * filename
Definition: pg_dumpall.c:119
XLogSegNo lastRemovedSegNo
Definition: xlog.c:459
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5254 of file xlog.c.

5256 {
5257  /*
5258  * Execute the recovery_end_command, if any.
5259  */
5260  if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5262  "recovery_end_command",
5263  true,
5264  WAIT_EVENT_RECOVERY_END_COMMAND);
5265 
5266  /*
5267  * We switched to a new timeline. Clean up segments on the old timeline.
5268  *
5269  * If there are any higher-numbered segments on the old timeline, remove
5270  * them. They might contain valid WAL, but they might also be
5271  * pre-allocated files containing garbage. In any case, they are not part
5272  * of the new timeline's history so we don't need them.
5273  */
5274  RemoveNonParentXlogFiles(EndOfLog, newTLI);
5275 
5276  /*
5277  * If the switch happened in the middle of a segment, what to do with the
5278  * last, partial segment on the old timeline? If we don't archive it, and
5279  * the server that created the WAL never archives it either (e.g. because
5280  * it was hit by a meteor), it will never make it to the archive. That's
5281  * OK from our point of view, because the new segment that we created with
5282  * the new TLI contains all the WAL from the old timeline up to the switch
5283  * point. But if you later try to do PITR to the "missing" WAL on the old
5284  * timeline, recovery won't find it in the archive. It's physically
5285  * present in the new file with new TLI, but recovery won't look there
5286  * when it's recovering to the older timeline. On the other hand, if we
5287  * archive the partial segment, and the original server on that timeline
5288  * is still running and archives the completed version of the same segment
5289  * later, it will fail. (We used to do that in 9.4 and below, and it
5290  * caused such problems).
5291  *
5292  * As a compromise, we rename the last segment with the .partial suffix,
5293  * and archive it. Archive recovery will never try to read .partial
5294  * segments, so they will normally go unused. But in the odd PITR case,
5295  * the administrator can copy them manually to the pg_wal directory
5296  * (removing the suffix). They can be useful in debugging, too.
5297  *
5298  * If a .done or .ready file already exists for the old timeline, however,
5299  * we had already determined that the segment is complete, so we can let
5300  * it be archived normally. (In particular, if it was restored from the
5301  * archive to begin with, it's expected to have a .done file).
5302  */
5303  if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5305  {
5306  char origfname[MAXFNAMELEN];
5307  XLogSegNo endLogSegNo;
5308 
5309  XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5310  XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5311 
5312  if (!XLogArchiveIsReadyOrDone(origfname))
5313  {
5314  char origpath[MAXPGPATH];
5315  char partialfname[MAXFNAMELEN];
5316  char partialpath[MAXPGPATH];
5317 
5318  XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5319  snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5320  snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5321 
5322  /*
5323  * Make sure there's no .done or .ready file for the .partial
5324  * file.
5325  */
5326  XLogArchiveCleanup(partialfname);
5327 
5328  durable_rename(origpath, partialpath, ERROR);
5329  XLogArchiveNotify(partialfname);
5330  }
5331  }
5332 }
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
#define MAXPGPATH
#define snprintf
Definition: port.h:238
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3927
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:83

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4148 of file xlog.c.

4149 {
4150  DIR *xldir;
4151  struct dirent *xlde;
4152  char path[MAXPGPATH + sizeof(XLOGDIR)];
4153 
4154  xldir = AllocateDir(XLOGDIR);
4155 
4156  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4157  {
4158  if (IsBackupHistoryFileName(xlde->d_name))
4159  {
4160  if (XLogArchiveCheckDone(xlde->d_name))
4161  {
4162  elog(DEBUG2, "removing WAL backup history file \"%s\"",
4163  xlde->d_name);
4164  snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4165  unlink(path);
4166  XLogArchiveCleanup(xlde->d_name);
4167  }
4168  }
4169  }
4170 
4171  FreeDir(xldir);
4172 }
#define DEBUG2
Definition: elog.h:29
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1218 of file xlog.c.

1220 {
1221  char *currpos;
1222  int freespace;
1223  int written;
1224  XLogRecPtr CurrPos;
1225  XLogPageHeader pagehdr;
1226 
1227  /*
1228  * Get a pointer to the right place in the right WAL buffer to start
1229  * inserting to.
1230  */
1231  CurrPos = StartPos;
1232  currpos = GetXLogBuffer(CurrPos, tli);
1233  freespace = INSERT_FREESPACE(CurrPos);
1234 
1235  /*
1236  * there should be enough space for at least the first field (xl_tot_len)
1237  * on this page.
1238  */
1239  Assert(freespace >= sizeof(uint32));
1240 
1241  /* Copy record data */
1242  written = 0;
1243  while (rdata != NULL)
1244  {
1245  char *rdata_data = rdata->data;
1246  int rdata_len = rdata->len;
1247 
1248  while (rdata_len > freespace)
1249  {
1250  /*
1251  * Write what fits on this page, and continue on the next page.
1252  */
1253  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1254  memcpy(currpos, rdata_data, freespace);
1255  rdata_data += freespace;
1256  rdata_len -= freespace;
1257  written += freespace;
1258  CurrPos += freespace;
1259 
1260  /*
1261  * Get pointer to beginning of next page, and set the xlp_rem_len
1262  * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1263  *
1264  * It's safe to set the contrecord flag and xlp_rem_len without a
1265  * lock on the page. All the other flags were already set when the
1266  * page was initialized, in AdvanceXLInsertBuffer, and we're the
1267  * only backend that needs to set the contrecord flag.
1268  */
1269  currpos = GetXLogBuffer(CurrPos, tli);
1270  pagehdr = (XLogPageHeader) currpos;
1271  pagehdr->xlp_rem_len = write_len - written;
1272  pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1273 
1274  /* skip over the page header */
1275  if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1276  {
1277  CurrPos += SizeOfXLogLongPHD;
1278  currpos += SizeOfXLogLongPHD;
1279  }
1280  else
1281  {
1282  CurrPos += SizeOfXLogShortPHD;
1283  currpos += SizeOfXLogShortPHD;
1284  }
1285  freespace = INSERT_FREESPACE(CurrPos);
1286  }
1287 
1288  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1289  memcpy(currpos, rdata_data, rdata_len);
1290  currpos += rdata_len;
1291  CurrPos += rdata_len;
1292  freespace -= rdata_len;
1293  written += rdata_len;
1294 
1295  rdata = rdata->next;
1296  }
1297  Assert(written == write_len);
1298 
1299  /*
1300  * If this was an xlog-switch, it's not enough to write the switch record,
1301  * we also have to consume all the remaining space in the WAL segment. We
1302  * have already reserved that space, but we need to actually fill it.
1303  */
1304  if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1305  {
1306  /* An xlog-switch record doesn't contain any data besides the header */
1307  Assert(write_len == SizeOfXLogRecord);
1308 
1309  /* Assert that we did reserve the right amount of space */
1310  Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0);
1311 
1312  /* Use up all the remaining space on the current page */
1313  CurrPos += freespace;
1314 
1315  /*
1316  * Cause all remaining pages in the segment to be flushed, leaving the
1317  * XLog position where it should be, at the start of the next segment.
1318  * We do this one page at a time, to make sure we don't deadlock
1319  * against ourselves if wal_buffers < wal_segment_size.
1320  */
1321  while (CurrPos < EndPos)
1322  {
1323  /*
1324  * The minimal action to flush the page would be to call
1325  * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1326  * AdvanceXLInsertBuffer(...). The page would be left initialized
1327  * mostly to zeros, except for the page header (always the short
1328  * variant, as this is never a segment's first page).
1329  *
1330  * The large vistas of zeros are good for compressibility, but the
1331  * headers interrupting them every XLOG_BLCKSZ (with values that
1332  * differ from page to page) are not. The effect varies with
1333  * compression tool, but bzip2 for instance compresses about an
1334  * order of magnitude worse if those headers are left in place.
1335  *
1336  * Rather than complicating AdvanceXLInsertBuffer itself (which is
1337  * called in heavily-loaded circumstances as well as this lightly-
1338  * loaded one) with variant behavior, we just use GetXLogBuffer
1339  * (which itself calls the two methods we need) to get the pointer
1340  * and zero most of the page. Then we just zero the page header.
1341  */
1342  currpos = GetXLogBuffer(CurrPos, tli);
1343  MemSet(currpos, 0, SizeOfXLogShortPHD);
1344 
1345  CurrPos += XLOG_BLCKSZ;
1346  }
1347  }
1348  else
1349  {
1350  /* Align the end position, so that the next record starts aligned */
1351  CurrPos = MAXALIGN64(CurrPos);
1352  }
1353 
1354  if (CurrPos != EndPos)
1355  ereport(PANIC,
1357  errmsg_internal("space reserved for WAL record does not match what was written"));
1358 }
unsigned int uint32
Definition: c.h:506
#define MAXALIGN64(LEN)
Definition: c.h:836
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:573
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1625
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert, XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 6821 of file xlog.c.

6822 {
6823  bool shutdown;
6824  CheckPoint checkPoint;
6825  XLogRecPtr recptr;
6826  XLogSegNo _logSegNo;
6828  uint32 freespace;
6829  XLogRecPtr PriorRedoPtr;
6830  XLogRecPtr last_important_lsn;
6831  VirtualTransactionId *vxids;
6832  int nvxids;
6833  int oldXLogAllowed = 0;
6834 
6835  /*
6836  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6837  * issued at a different time.
6838  */
6840  shutdown = true;
6841  else
6842  shutdown = false;
6843 
6844  /* sanity check */
6845  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6846  elog(ERROR, "can't create a checkpoint during recovery");
6847 
6848  /*
6849  * Prepare to accumulate statistics.
6850  *
6851  * Note: because it is possible for log_checkpoints to change while a
6852  * checkpoint proceeds, we always accumulate stats, even if
6853  * log_checkpoints is currently off.
6854  */
6855  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6857 
6858  /*
6859  * Let smgr prepare for checkpoint; this has to happen outside the
6860  * critical section and before we determine the REDO pointer. Note that
6861  * smgr must not do anything that'd have to be undone if we decide no
6862  * checkpoint is needed.
6863  */
6865 
6866  /*
6867  * Use a critical section to force system panic if we have trouble.
6868  */
6870 
6871  if (shutdown)
6872  {
6873  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6876  LWLockRelease(ControlFileLock);
6877  }
6878 
6879  /* Begin filling in the checkpoint WAL record */
6880  MemSet(&checkPoint, 0, sizeof(checkPoint));
6881  checkPoint.time = (pg_time_t) time(NULL);
6882 
6883  /*
6884  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6885  * pointer. This allows us to begin accumulating changes to assemble our
6886  * starting snapshot of locks and transactions.
6887  */
6888  if (!shutdown && XLogStandbyInfoActive())
6890  else
6892 
6893  /*
6894  * Get location of last important record before acquiring insert locks (as
6895  * GetLastImportantRecPtr() also locks WAL locks).
6896  */
6897  last_important_lsn = GetLastImportantRecPtr();
6898 
6899  /*
6900  * If this isn't a shutdown or forced checkpoint, and if there has been no
6901  * WAL activity requiring a checkpoint, skip it. The idea here is to
6902  * avoid inserting duplicate checkpoints when the system is idle.
6903  */
6905  CHECKPOINT_FORCE)) == 0)
6906  {
6907  if (last_important_lsn == ControlFile->checkPoint)
6908  {
6909  END_CRIT_SECTION();
6910  ereport(DEBUG1,
6911  (errmsg_internal("checkpoint skipped because system is idle")));
6912  return;
6913  }
6914  }
6915 
6916  /*
6917  * An end-of-recovery checkpoint is created before anyone is allowed to
6918  * write WAL. To allow us to write the checkpoint record, temporarily
6919  * enable XLogInsertAllowed.
6920  */
6921  if (flags & CHECKPOINT_END_OF_RECOVERY)
6922  oldXLogAllowed = LocalSetXLogInsertAllowed();
6923 
6924  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6925  if (flags & CHECKPOINT_END_OF_RECOVERY)
6926  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6927  else
6928  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6929 
6930  /*
6931  * We must block concurrent insertions while examining insert state.
6932  */
6934 
6935  checkPoint.fullPageWrites = Insert->fullPageWrites;
6936  checkPoint.wal_level = wal_level;
6937 
6938  if (shutdown)
6939  {
6940  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
6941 
6942  /*
6943  * Compute new REDO record ptr = location of next XLOG record.
6944  *
6945  * Since this is a shutdown checkpoint, there can't be any concurrent
6946  * WAL insertion.
6947  */
6948  freespace = INSERT_FREESPACE(curInsert);
6949  if (freespace == 0)
6950  {
6951  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
6952  curInsert += SizeOfXLogLongPHD;
6953  else
6954  curInsert += SizeOfXLogShortPHD;
6955  }
6956  checkPoint.redo = curInsert;
6957 
6958  /*
6959  * Here we update the shared RedoRecPtr for future XLogInsert calls;
6960  * this must be done while holding all the insertion locks.
6961  *
6962  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
6963  * left pointing past where it really needs to point. This is okay;
6964  * the only consequence is that XLogInsert might back up whole buffers
6965  * that it didn't really need to. We can't postpone advancing
6966  * RedoRecPtr because XLogInserts that happen while we are dumping
6967  * buffers must assume that their buffer changes are not included in
6968  * the checkpoint.
6969  */
6970  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
6971  }
6972 
6973  /*
6974  * Now we can release the WAL insertion locks, allowing other xacts to
6975  * proceed while we are flushing disk buffers.
6976  */
6978 
6979  /*
6980  * If this is an online checkpoint, we have not yet determined the redo
6981  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
6982  * record; the LSN at which it starts becomes the new redo pointer. We
6983  * don't do this for a shutdown checkpoint, because in that case no WAL
6984  * can be written between the redo point and the insertion of the
6985  * checkpoint record itself, so the checkpoint record itself serves to
6986  * mark the redo point.
6987  */
6988  if (!shutdown)
6989  {
6990  /* Include WAL level in record for WAL summarizer's benefit. */
6991  XLogBeginInsert();
6992  XLogRegisterData((char *) &wal_level, sizeof(wal_level));
6993  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
6994 
6995  /*
6996  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
6997  * shared memory and RedoRecPtr in backend-local memory, but we need
6998  * to copy that into the record that will be inserted when the
6999  * checkpoint is complete.
7000  */
7001  checkPoint.redo = RedoRecPtr;
7002  }
7003 
7004  /* Update the info_lck-protected copy of RedoRecPtr as well */
7006  XLogCtl->RedoRecPtr = checkPoint.redo;
7008 
7009  /*
7010  * If enabled, log checkpoint start. We postpone this until now so as not
7011  * to log anything if we decided to skip the checkpoint.
7012  */
7013  if (log_checkpoints)
7014  LogCheckpointStart(flags, false);
7015 
7016  /* Update the process title */
7017  update_checkpoint_display(flags, false, false);
7018 
7019  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7020 
7021  /*
7022  * Get the other info we need for the checkpoint record.
7023  *
7024  * We don't need to save oldestClogXid in the checkpoint, it only matters
7025  * for the short period in which clog is being truncated, and if we crash
7026  * during that we'll redo the clog truncation and fix up oldestClogXid
7027  * there.
7028  */
7029  LWLockAcquire(XidGenLock, LW_SHARED);
7030  checkPoint.nextXid = TransamVariables->nextXid;
7031  checkPoint.oldestXid = TransamVariables->oldestXid;
7033  LWLockRelease(XidGenLock);
7034 
7035  LWLockAcquire(CommitTsLock, LW_SHARED);
7038  LWLockRelease(CommitTsLock);
7039 
7040  LWLockAcquire(OidGenLock, LW_SHARED);
7041  checkPoint.nextOid = TransamVariables->nextOid;
7042  if (!shutdown)
7043  checkPoint.nextOid += TransamVariables->oidCount;
7044  LWLockRelease(OidGenLock);
7045 
7046  MultiXactGetCheckptMulti(shutdown,
7047  &checkPoint.nextMulti,
7048  &checkPoint.nextMultiOffset,
7049  &checkPoint.oldestMulti,
7050  &checkPoint.oldestMultiDB);
7051 
7052  /*
7053  * Having constructed the checkpoint record, ensure all shmem disk buffers
7054  * and commit-log buffers are flushed to disk.
7055  *
7056  * This I/O could fail for various reasons. If so, we will fail to
7057  * complete the checkpoint, but there is no reason to force a system
7058  * panic. Accordingly, exit critical section while doing it.
7059  */
7060  END_CRIT_SECTION();
7061 
7062  /*
7063  * In some cases there are groups of actions that must all occur on one
7064  * side or the other of a checkpoint record. Before flushing the
7065  * checkpoint record we must explicitly wait for any backend currently
7066  * performing those groups of actions.
7067  *
7068  * One example is end of transaction, so we must wait for any transactions
7069  * that are currently in commit critical sections. If an xact inserted
7070  * its commit record into XLOG just before the REDO point, then a crash
7071  * restart from the REDO point would not replay that record, which means
7072  * that our flushing had better include the xact's update of pg_xact. So
7073  * we wait till he's out of his commit critical section before proceeding.
7074  * See notes in RecordTransactionCommit().
7075  *
7076  * Because we've already released the insertion locks, this test is a bit
7077  * fuzzy: it is possible that we will wait for xacts we didn't really need
7078  * to wait for. But the delay should be short and it seems better to make
7079  * checkpoint take a bit longer than to hold off insertions longer than
7080  * necessary. (In fact, the whole reason we have this issue is that xact.c
7081  * does commit record XLOG insertion and clog update as two separate steps
7082  * protected by different locks, but again that seems best on grounds of
7083  * minimizing lock contention.)
7084  *
7085  * A transaction that has not yet set delayChkptFlags when we look cannot
7086  * be at risk, since it has not inserted its commit record yet; and one
7087  * that's already cleared it is not at risk either, since it's done fixing
7088  * clog and we will correctly flush the update below. So we cannot miss
7089  * any xacts we need to wait for.
7090  */
7092  if (nvxids > 0)
7093  {
7094  do
7095  {
7096  /*
7097  * Keep absorbing fsync requests while we wait. There could even
7098  * be a deadlock if we don't, if the process that prevents the
7099  * checkpoint is trying to add a request to the queue.
7100  */
7102 
7103  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7104  pg_usleep(10000L); /* wait for 10 msec */
7106  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7108  }
7109  pfree(vxids);
7110 
7111  CheckPointGuts(checkPoint.redo, flags);
7112 
7114  if (nvxids > 0)
7115  {
7116  do
7117  {
7119 
7120  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7121  pg_usleep(10000L); /* wait for 10 msec */
7123  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7125  }
7126  pfree(vxids);
7127 
7128  /*
7129  * Take a snapshot of running transactions and write this to WAL. This
7130  * allows us to reconstruct the state of running transactions during
7131  * archive recovery, if required. Skip, if this info disabled.
7132  *
7133  * If we are shutting down, or Startup process is completing crash
7134  * recovery we don't need to write running xact data.
7135  */
7136  if (!shutdown && XLogStandbyInfoActive())
7138 
7140 
7141  /*
7142  * Now insert the checkpoint record into XLOG.
7143  */
7144  XLogBeginInsert();
7145  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7146  recptr = XLogInsert(RM_XLOG_ID,
7147  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7149 
7150  XLogFlush(recptr);
7151 
7152  /*
7153  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7154  * overwritten at next startup. No-one should even try, this just allows
7155  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7156  * to just temporarily disable writing until the system has exited
7157  * recovery.
7158  */
7159  if (shutdown)
7160  {
7161  if (flags & CHECKPOINT_END_OF_RECOVERY)
7162  LocalXLogInsertAllowed = oldXLogAllowed;
7163  else
7164  LocalXLogInsertAllowed = 0; /* never again write WAL */
7165  }
7166 
7167  /*
7168  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7169  * = end of actual checkpoint record.
7170  */
7171  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7172  ereport(PANIC,
7173  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7174 
7175  /*
7176  * Remember the prior checkpoint's redo ptr for
7177  * UpdateCheckPointDistanceEstimate()
7178  */
7179  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7180 
7181  /*
7182  * Update the control file.
7183  */
7184  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7185  if (shutdown)
7188  ControlFile->checkPointCopy = checkPoint;
7189  /* crash recovery should always recover to the end of WAL */
7192 
7193  /*
7194  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7195  * unused on non-shutdown checkpoints, but seems useful to store it always
7196  * for debugging purposes.
7197  */
7199 
7201  LWLockRelease(ControlFileLock);
7202 
7203  /* Update shared-memory copy of checkpoint XID/epoch */
7205  XLogCtl->ckptFullXid = checkPoint.nextXid;
7207 
7208  /*
7209  * We are now done with critical updates; no need for system panic if we
7210  * have trouble while fooling with old log segments.
7211  */
7212  END_CRIT_SECTION();
7213 
7214  /*
7215  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7216  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7217  * where (a) we're not inside of a critical section and (b) we can be
7218  * certain that the relevant record has been flushed to disk, which must
7219  * happen before it can be summarized.
7220  *
7221  * If this is a shutdown checkpoint, then this happens reasonably
7222  * promptly: we've only just inserted and flushed the
7223  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7224  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7225  * record was written before we began flushing data to disk, and that
7226  * could be many minutes ago at this point. However, we don't XLogFlush()
7227  * after inserting that record, so we're not guaranteed that it's on disk
7228  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7229  * record.
7230  */
7232 
7233  /*
7234  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7235  */
7237 
7238  /*
7239  * Update the average distance between checkpoints if the prior checkpoint
7240  * exists.
7241  */
7242  if (PriorRedoPtr != InvalidXLogRecPtr)
7244 
7245  /*
7246  * Delete old log files, those no longer needed for last checkpoint to
7247  * prevent the disk holding the xlog from growing full.
7248  */
7250  KeepLogSeg(recptr, &_logSegNo);
7252  _logSegNo, InvalidOid,
7254  {
7255  /*
7256  * Some slots have been invalidated; recalculate the old-segment
7257  * horizon, starting again from RedoRecPtr.
7258  */
7260  KeepLogSeg(recptr, &_logSegNo);
7261  }
7262  _logSegNo--;
7263  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7264  checkPoint.ThisTimeLineID);
7265 
7266  /*
7267  * Make more log segments if needed. (Do this after recycling old log
7268  * segments, since that may supply some of the needed files.)
7269  */
7270  if (!shutdown)
7271  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7272 
7273  /*
7274  * Truncate pg_subtrans if possible. We can throw away all data before
7275  * the oldest XMIN of any running transaction. No future transaction will
7276  * attempt to reference any pg_subtrans entry older than that (see Asserts
7277  * in subtrans.c). During recovery, though, we mustn't do this because
7278  * StartupSUBTRANS hasn't been called yet.
7279  */
7280  if (!RecoveryInProgress())
7282 
7283  /* Real work is done; log and update stats. */
7284  LogCheckpointEnd(false);
7285 
7286  /* Reset the process title */
7287  update_checkpoint_display(flags, false, true);
7288 
7289  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7290  NBuffers,
7294 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:469
void AbsorbSyncRequests(void)
int NBuffers
Definition: globals.c:140
@ LW_SHARED
Definition: lwlock.h:115
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2274
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:114
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:115
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3047
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2884
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3093
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1804
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:170
int ckpt_segs_added
Definition: xlog.h:169
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:171
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:455
TimeLineID InsertTimeLineID
Definition: xlog.c:502
XLogRecPtr RedoRecPtr
Definition: xlog.c:454
TimeLineID PrevTimeLineID
Definition: xlog.c:503
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:462
XLogRecPtr RedoRecPtr
Definition: xlog.c:428
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void SetWalSummarizerLatch(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:251
bool RecoveryInProgress(void)
Definition: xlog.c:6290
static void WALInsertLockRelease(void)
Definition: xlog.c:1438
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1851
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1409
static void UpdateControlFile(void)
Definition: xlog.c:4524
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3852
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6586
static XLogRecPtr RedoRecPtr
Definition: xlog.c:271
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6618
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3677
bool log_checkpoints
Definition: xlog.c:127
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7883
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6378
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6493
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6721
static int LocalXLogInsertAllowed
Definition: xlog.c:234
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2789
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7440
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6759
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SetWalSummarizerLatch(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7305 of file xlog.c.

7306 {
7307  xl_end_of_recovery xlrec;
7308  XLogRecPtr recptr;
7309 
7310  /* sanity check */
7311  if (!RecoveryInProgress())
7312  elog(ERROR, "can only be used to end recovery");
7313 
7314  xlrec.end_time = GetCurrentTimestamp();
7315  xlrec.wal_level = wal_level;
7316 
7321 
7323 
7324  XLogBeginInsert();
7325  XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
7326  recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7327 
7328  XLogFlush(recptr);
7329 
7330  /*
7331  * Update the control file so that crash recovery can follow the timeline
7332  * changes to this point.
7333  */
7334  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7335  ControlFile->minRecoveryPoint = recptr;
7336  ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7338  LWLockRelease(ControlFileLock);
7339 
7340  END_CRIT_SECTION();
7341 }
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), wal_level, xl_end_of_recovery::wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7370 of file xlog.c.

7372 {
7374  XLogRecPtr recptr;
7375  XLogPageHeader pagehdr;
7376  XLogRecPtr startPos;
7377 
7378  /* sanity checks */
7379  if (!RecoveryInProgress())
7380  elog(ERROR, "can only be used at end of recovery");
7381  if (pagePtr % XLOG_BLCKSZ != 0)
7382  elog(ERROR, "invalid position for missing continuation record %X/%X",
7383  LSN_FORMAT_ARGS(pagePtr));
7384 
7385  /* The current WAL insert position should be right after the page header */
7386  startPos = pagePtr;
7387  if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7388  startPos += SizeOfXLogLongPHD;
7389  else
7390  startPos += SizeOfXLogShortPHD;
7391  recptr = GetXLogInsertRecPtr();
7392  if (recptr != startPos)
7393  elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
7394  LSN_FORMAT_ARGS(recptr));
7395 
7397 
7398  /*
7399  * Initialize the XLOG page header (by GetXLogBuffer), and set the
7400  * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7401  *
7402  * No other backend is allowed to write WAL yet, so acquiring the WAL
7403  * insertion lock is just pro forma.
7404  */
7406  pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7409 
7410  /*
7411  * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7412  * page. We know it becomes the first record, because no other backend is
7413  * allowed to write WAL yet.
7414  */
7415  XLogBeginInsert();
7416  xlrec.overwritten_lsn = aborted_lsn;
7418  XLogRegisterData((char *) &xlrec, sizeof(xl_overwrite_contrecord));
7419  recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7420 
7421  /* check that the record was inserted to the right place */
7422  if (ProcLastRecPtr != startPos)
7423  elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
7425 
7426  XLogFlush(recptr);
7427 
7428  END_CRIT_SECTION();
7429 
7430  return recptr;
7431 }
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
static void WALInsertLockAcquire(void)
Definition: xlog.c:1364
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9365
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7521 of file xlog.c.

7522 {
7523  XLogRecPtr lastCheckPointRecPtr;
7524  XLogRecPtr lastCheckPointEndPtr;
7525  CheckPoint lastCheckPoint;
7526  XLogRecPtr PriorRedoPtr;
7527  XLogRecPtr receivePtr;
7528  XLogRecPtr replayPtr;
7529  TimeLineID replayTLI;
7530  XLogRecPtr endptr;
7531  XLogSegNo _logSegNo;
7532  TimestampTz xtime;
7533 
7534  /* Concurrent checkpoint/restartpoint cannot happen */
7536 
7537  /* Get a local copy of the last safe checkpoint record. */
7539  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7540  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7541  lastCheckPoint = XLogCtl->lastCheckPoint;
7543 
7544  /*
7545  * Check that we're still in recovery mode. It's ok if we exit recovery
7546  * mode after this check, the restart point is valid anyway.
7547  */
7548  if (!RecoveryInProgress())
7549  {
7550  ereport(DEBUG2,
7551  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7552  return false;
7553  }
7554 
7555  /*
7556  * If the last checkpoint record we've replayed is already our last
7557  * restartpoint, we can't perform a new restart point. We still update
7558  * minRecoveryPoint in that case, so that if this is a shutdown restart
7559  * point, we won't start up earlier than before. That's not strictly
7560  * necessary, but when hot standby is enabled, it would be rather weird if
7561  * the database opened up for read-only connections at a point-in-time
7562  * before the last shutdown. Such time travel is still possible in case of
7563  * immediate shutdown, though.
7564  *
7565  * We don't explicitly advance minRecoveryPoint when we do create a
7566  * restartpoint. It's assumed that flushing the buffers will do that as a
7567  * side-effect.
7568  */
7569  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7570  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7571  {
7572  ereport(DEBUG2,
7573  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7574  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7575 
7577  if (flags & CHECKPOINT_IS_SHUTDOWN)
7578  {
7579  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7582  LWLockRelease(ControlFileLock);
7583  }
7584  return false;
7585  }
7586 
7587  /*
7588  * Update the shared RedoRecPtr so that the startup process can calculate
7589  * the number of segments replayed since last restartpoint, and request a
7590  * restartpoint if it exceeds CheckPointSegments.
7591  *
7592  * Like in CreateCheckPoint(), hold off insertions to update it, although
7593  * during recovery this is just pro forma, because no WAL insertions are
7594  * happening.
7595  */
7597  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7599 
7600  /* Also update the info_lck-protected copy */
7602  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7604 
7605  /*
7606  * Prepare to accumulate statistics.
7607  *
7608  * Note: because it is possible for log_checkpoints to change while a
7609  * checkpoint proceeds, we always accumulate stats, even if
7610  * log_checkpoints is currently off.
7611  */
7612  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7614 
7615  if (log_checkpoints)
7616  LogCheckpointStart(flags, true);
7617 
7618  /* Update the process title */
7619  update_checkpoint_display(flags, true, false);
7620 
7621  CheckPointGuts(lastCheckPoint.redo, flags);
7622 
7623  /*
7624  * This location needs to be after CheckPointGuts() to ensure that some
7625  * work has already happened during this checkpoint.
7626  */
7627  INJECTION_POINT("create-restart-point");
7628 
7629  /*
7630  * Remember the prior checkpoint's redo ptr for
7631  * UpdateCheckPointDistanceEstimate()
7632  */
7633  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7634 
7635  /*
7636  * Update pg_control, using current time. Check that it still shows an
7637  * older checkpoint, else do nothing; this is a quick hack to make sure
7638  * nothing really bad happens if somehow we get here after the
7639  * end-of-recovery checkpoint.
7640  */
7641  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7642  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7643  {
7644  /*
7645  * Update the checkpoint information. We do this even if the cluster
7646  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7647  * segments recycled below.
7648  */
7649  ControlFile->checkPoint = lastCheckPointRecPtr;
7650  ControlFile->checkPointCopy = lastCheckPoint;
7651 
7652  /*
7653  * Ensure minRecoveryPoint is past the checkpoint record and update it
7654  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7655  * this will have happened already while writing out dirty buffers,
7656  * but not necessarily - e.g. because no buffers were dirtied. We do
7657  * this because a backup performed in recovery uses minRecoveryPoint
7658  * to determine which WAL files must be included in the backup, and
7659  * the file (or files) containing the checkpoint record must be
7660  * included, at a minimum. Note that for an ordinary restart of
7661  * recovery there's no value in having the minimum recovery point any
7662  * earlier than this anyway, because redo will begin just after the
7663  * checkpoint record.
7664  */
7666  {
7667  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7668  {
7669  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7671 
7672  /* update local copy */
7675  }
7676  if (flags & CHECKPOINT_IS_SHUTDOWN)
7678  }
7680  }
7681  LWLockRelease(ControlFileLock);
7682 
7683  /*
7684  * Update the average distance between checkpoints/restartpoints if the
7685  * prior checkpoint exists.
7686  */
7687  if (PriorRedoPtr != InvalidXLogRecPtr)
7689 
7690  /*
7691  * Delete old log files, those no longer needed for last restartpoint to
7692  * prevent the disk holding the xlog from growing full.
7693  */
7695 
7696  /*
7697  * Retreat _logSegNo using the current end of xlog replayed or received,
7698  * whichever is later.
7699  */
7700  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7701  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7702  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7703  KeepLogSeg(endptr, &_logSegNo);
7705  _logSegNo, InvalidOid,
7707  {
7708  /*
7709  * Some slots have been invalidated; recalculate the old-segment
7710  * horizon, starting again from RedoRecPtr.
7711  */
7713  KeepLogSeg(endptr, &_logSegNo);
7714  }
7715  _logSegNo--;
7716 
7717  /*
7718  * Try to recycle segments on a useful timeline. If we've been promoted
7719  * since the beginning of this restartpoint, use the new timeline chosen
7720  * at end of recovery. If we're still in recovery, use the timeline we're
7721  * currently replaying.
7722  *
7723  * There is no guarantee that the WAL segments will be useful on the
7724  * current timeline; if recovery proceeds to a new timeline right after
7725  * this, the pre-allocated WAL segments on this timeline will not be used,
7726  * and will go wasted until recycled on the next restartpoint. We'll live
7727  * with that.
7728  */
7729  if (!RecoveryInProgress())
7730  replayTLI = XLogCtl->InsertTimeLineID;
7731 
7732  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7733 
7734  /*
7735  * Make more log segments if needed. (Do this after recycling old log
7736  * segments, since that may supply some of the needed files.)
7737  */
7738  PreallocXlogFiles(endptr, replayTLI);
7739 
7740  /*
7741  * Truncate pg_subtrans if possible. We can throw away all data before
7742  * the oldest XMIN of any running transaction. No future transaction will
7743  * attempt to reference any pg_subtrans entry older than that (see Asserts
7744  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7745  * this because StartupSUBTRANS hasn't been called yet.
7746  */
7747  if (EnableHotStandby)
7749 
7750  /* Real work is done; log and update stats. */
7751  LogCheckpointEnd(true);
7752 
7753  /* Reset the process title */
7754  update_checkpoint_display(flags, true, true);
7755 
7756  xtime = GetLatestXTime();
7758  (errmsg("recovery restart point at %X/%X",
7759  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7760  xtime ? errdetail("Last completed transaction was at log time %s.",
7761  timestamptz_to_str(xtime)) : 0));
7762 
7763  /*
7764  * Finally, execute archive_cleanup_command, if any.
7765  */
7766  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7768  "archive_cleanup_command",
7769  false,
7770  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7771 
7772  return true;
7773 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1854
int64 TimestampTz
Definition: timestamp.h:39
#define LOG
Definition: elog.h:31
bool IsUnderPostmaster
Definition: globals.c:118
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:357
BackendType MyBackendType
Definition: miscinit.c:63
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:538
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:536
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:537
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2709
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:639
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:640
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:84
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9324 of file xlog.c.

9325 {
9326  bool during_backup_start = DatumGetBool(arg);
9327 
9328  /* If called during backup start, there shouldn't be one already running */
9329  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9330 
9331  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9332  {
9336 
9339 
9340  if (!during_backup_start)
9341  ereport(WARNING,
9342  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9343  }
9344 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:436
static SessionBackupState sessionBackupState
Definition: xlog.c:389
@ SESSION_BACKUP_NONE
Definition: xlog.h:285

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8722 of file xlog.c.

8724 {
8726 
8727  Assert(state != NULL);
8729 
8730  /*
8731  * During recovery, we don't need to check WAL level. Because, if WAL
8732  * level is not sufficient, it's impossible to get here during recovery.
8733  */
8735  ereport(ERROR,
8736  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8737  errmsg("WAL level not sufficient for making an online backup"),
8738  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8739 
8740  if (strlen(backupidstr) > MAXPGPATH)
8741  ereport(ERROR,
8742  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8743  errmsg("backup label too long (max %d bytes)",
8744  MAXPGPATH)));
8745 
8746  strlcpy(state->name, backupidstr, sizeof(state->name));
8747 
8748  /*
8749  * Mark backup active in shared memory. We must do full-page WAL writes
8750  * during an on-line backup even if not doing so at other times, because
8751  * it's quite possible for the backup dump to obtain a "torn" (partially
8752  * written) copy of a database page if it reads the page concurrently with
8753  * our write to the same page. This can be fixed as long as the first
8754  * write to the page in the WAL sequence is a full-page write. Hence, we
8755  * increment runningBackups then force a CHECKPOINT, to ensure there are
8756  * no dirty pages in shared memory that might get dumped while the backup
8757  * is in progress without having a corresponding WAL record. (Once the
8758  * backup is complete, we need not force full-page writes anymore, since
8759  * we expect that any pages not modified during the backup interval must
8760  * have been correctly captured by the backup.)
8761  *
8762  * Note that forcing full-page writes has no effect during an online
8763  * backup from the standby.
8764  *
8765  * We must hold all the insertion locks to change the value of
8766  * runningBackups, to ensure adequate interlocking against
8767  * XLogInsertRecord().
8768  */
8772 
8773  /*
8774  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8775  * work correctly, it is critical that sessionBackupState is only updated
8776  * after this block is over.
8777  */
8779  {
8780  bool gotUniqueStartpoint = false;
8781  DIR *tblspcdir;
8782  struct dirent *de;
8783  tablespaceinfo *ti;
8784  int datadirpathlen;
8785 
8786  /*
8787  * Force an XLOG file switch before the checkpoint, to ensure that the
8788  * WAL segment the checkpoint is written to doesn't contain pages with
8789  * old timeline IDs. That would otherwise happen if you called
8790  * pg_backup_start() right after restoring from a PITR archive: the
8791  * first WAL segment containing the startup checkpoint has pages in
8792  * the beginning with the old timeline ID. That can cause trouble at
8793  * recovery: we won't have a history file covering the old timeline if
8794  * pg_wal directory was not included in the base backup and the WAL
8795  * archive was cleared too before starting the backup.
8796  *
8797  * This also ensures that we have emitted a WAL page header that has
8798  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8799  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8800  * compress out removable backup blocks, it won't remove any that
8801  * occur after this point.
8802  *
8803  * During recovery, we skip forcing XLOG file switch, which means that
8804  * the backup taken during recovery is not available for the special
8805  * recovery case described above.
8806  */
8808  RequestXLogSwitch(false);
8809 
8810  do
8811  {
8812  bool checkpointfpw;
8813 
8814  /*
8815  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8816  * page problems, this guarantees that two successive backup runs
8817  * will have different checkpoint positions and hence different
8818  * history file names, even if nothing happened in between.
8819  *
8820  * During recovery, establish a restartpoint if possible. We use
8821  * the last restartpoint as the backup starting checkpoint. This
8822  * means that two successive backup runs can have same checkpoint
8823  * positions.
8824  *
8825  * Since the fact that we are executing do_pg_backup_start()
8826  * during recovery means that checkpointer is running, we can use
8827  * RequestCheckpoint() to establish a restartpoint.
8828  *
8829  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8830  * passing fast = true). Otherwise this can take awhile.
8831  */
8833  (fast ? CHECKPOINT_IMMEDIATE : 0));
8834 
8835  /*
8836  * Now we need to fetch the checkpoint record location, and also
8837  * its REDO pointer. The oldest point in WAL that would be needed
8838  * to restore starting from the checkpoint is precisely the REDO
8839  * pointer.
8840  */
8841  LWLockAcquire(ControlFileLock, LW_SHARED);
8842  state->checkpointloc = ControlFile->checkPoint;
8843  state->startpoint = ControlFile->checkPointCopy.redo;
8845  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8846  LWLockRelease(ControlFileLock);
8847 
8849  {
8850  XLogRecPtr recptr;
8851 
8852  /*
8853  * Check to see if all WAL replayed during online backup
8854  * (i.e., since last restartpoint used as backup starting
8855  * checkpoint) contain full-page writes.
8856  */
8858  recptr = XLogCtl->lastFpwDisableRecPtr;
8860 
8861  if (!checkpointfpw || state->startpoint <= recptr)
8862  ereport(ERROR,
8863  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8864  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8865  "since last restartpoint"),
8866  errhint("This means that the backup being taken on the standby "
8867  "is corrupt and should not be used. "
8868  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8869  "and then try an online backup again.")));
8870 
8871  /*
8872  * During recovery, since we don't use the end-of-backup WAL
8873  * record and don't write the backup history file, the
8874  * starting WAL location doesn't need to be unique. This means
8875  * that two base backups started at the same time might use
8876  * the same checkpoint as starting locations.
8877  */
8878  gotUniqueStartpoint = true;
8879  }
8880 
8881  /*
8882  * If two base backups are started at the same time (in WAL sender
8883  * processes), we need to make sure that they use different
8884  * checkpoints as starting locations, because we use the starting
8885  * WAL location as a unique identifier for the base backup in the
8886  * end-of-backup WAL record and when we write the backup history
8887  * file. Perhaps it would be better generate a separate unique ID
8888  * for each backup instead of forcing another checkpoint, but
8889  * taking a checkpoint right after another is not that expensive
8890  * either because only few buffers have been dirtied yet.
8891  */
8893  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8894  {
8895  XLogCtl->Insert.lastBackupStart = state->startpoint;
8896  gotUniqueStartpoint = true;
8897  }
8899  } while (!gotUniqueStartpoint);
8900 
8901  /*
8902  * Construct tablespace_map file.
8903  */
8904  datadirpathlen = strlen(DataDir);
8905 
8906  /* Collect information about all tablespaces */
8907  tblspcdir = AllocateDir("pg_tblspc");
8908  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
8909  {
8910  char fullpath[MAXPGPATH + 10];
8911  char linkpath[MAXPGPATH];
8912  char *relpath = NULL;
8913  char *s;
8914  PGFileType de_type;
8915  char *badp;
8916  Oid tsoid;
8917 
8918  /*
8919  * Try to parse the directory name as an unsigned integer.
8920  *
8921  * Tablespace directories should be positive integers that can be
8922  * represented in 32 bits, with no leading zeroes or trailing
8923  * garbage. If we come across a name that doesn't meet those
8924  * criteria, skip it.
8925  */
8926  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8927  continue;
8928  errno = 0;
8929  tsoid = strtoul(de->d_name, &badp, 10);
8930  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
8931  continue;
8932 
8933  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
8934 
8935  de_type = get_dirent_type(fullpath, de, false, ERROR);
8936 
8937  if (de_type == PGFILETYPE_LNK)
8938  {
8939  StringInfoData escapedpath;
8940  int rllen;
8941 
8942  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
8943  if (rllen < 0)
8944  {
8945  ereport(WARNING,
8946  (errmsg("could not read symbolic link \"%s\": %m",
8947  fullpath)));
8948  continue;
8949  }
8950  else if (rllen >= sizeof(linkpath))
8951  {
8952  ereport(WARNING,
8953  (errmsg("symbolic link \"%s\" target is too long",
8954  fullpath)));
8955  continue;
8956  }
8957  linkpath[rllen] = '\0';
8958 
8959  /*
8960  * Relpath holds the relative path of the tablespace directory
8961  * when it's located within PGDATA, or NULL if it's located
8962  * elsewhere.
8963  */
8964  if (rllen > datadirpathlen &&
8965  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
8966  IS_DIR_SEP(linkpath[datadirpathlen]))
8967  relpath = pstrdup(linkpath + datadirpathlen + 1);
8968 
8969  /*
8970  * Add a backslash-escaped version of the link path to the
8971  * tablespace map file.
8972  */
8973  initStringInfo(&escapedpath);
8974  for (s = linkpath; *s; s++)
8975  {
8976  if (*s == '\n' || *s == '\r' || *s == '\\')
8977  appendStringInfoChar(&escapedpath, '\\');
8978  appendStringInfoChar(&escapedpath, *s);
8979  }
8980  appendStringInfo(tblspcmapfile, "%s %s\n",
8981  de->d_name, escapedpath.data);
8982  pfree(escapedpath.data);
8983  }
8984  else if (de_type == PGFILETYPE_DIR)
8985  {
8986  /*
8987  * It's possible to use allow_in_place_tablespaces to create
8988  * directories directly under pg_tblspc, for testing purposes
8989  * only.
8990  *
8991  * In this case, we store a relative path rather than an
8992  * absolute path into the tablespaceinfo.
8993  */
8994  snprintf(linkpath, sizeof(linkpath), "pg_tblspc/%s",
8995  de->d_name);
8996  relpath = pstrdup(linkpath);
8997  }
8998  else
8999  {
9000  /* Skip any other file type that appears here. */
9001  continue;
9002  }
9003 
9004  ti = palloc(sizeof(tablespaceinfo));
9005  ti->oid = tsoid;
9006  ti->path = pstrdup(linkpath);
9007  ti->rpath = relpath;
9008  ti->size = -1;
9009 
9010  if (tablespaces)
9011  *tablespaces = lappend(*tablespaces, ti);
9012  }
9013  FreeDir(tblspcdir);
9014 
9015  state->starttime = (pg_time_t) time(NULL);
9016  }
9018 
9019  state->started_in_recovery = backup_started_in_recovery;
9020 
9021  /*
9022  * Mark that the start phase has correctly finished for the backup.
9023  */
9025 }
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:941
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:69
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:102
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:94
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:544
XLogRecPtr lastBackupStart
Definition: xlog.c:437
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:7990
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9324
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:286
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9050 of file xlog.c.

9051 {
9052  bool backup_stopped_in_recovery = false;
9053  char histfilepath[MAXPGPATH];
9054  char lastxlogfilename[MAXFNAMELEN];
9055  char histfilename[MAXFNAMELEN];
9056  XLogSegNo _logSegNo;
9057  FILE *fp;
9058  int seconds_before_warning;
9059  int waits = 0;
9060  bool reported_waiting = false;
9061 
9062  Assert(state != NULL);
9063 
9064  backup_stopped_in_recovery = RecoveryInProgress();
9065 
9066  /*
9067  * During recovery, we don't need to check WAL level. Because, if WAL
9068  * level is not sufficient, it's impossible to get here during recovery.
9069  */
9070  if (!backup_stopped_in_recovery && !XLogIsNeeded())
9071  ereport(ERROR,
9072  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9073  errmsg("WAL level not sufficient for making an online backup"),
9074  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
9075 
9076  /*
9077  * OK to update backup counter and session-level lock.
9078  *
9079  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9080  * otherwise they can be updated inconsistently, which might cause
9081  * do_pg_abort_backup() to fail.
9082  */
9084 
9085  /*
9086  * It is expected that each do_pg_backup_start() call is matched by
9087  * exactly one do_pg_backup_stop() call.
9088  */
9091 
9092  /*
9093  * Clean up session-level lock.
9094  *
9095  * You might think that WALInsertLockRelease() can be called before
9096  * cleaning up session-level lock because session-level lock doesn't need
9097  * to be protected with WAL insertion lock. But since
9098  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9099  * cleaned up before it.
9100  */
9102 
9104 
9105  /*
9106  * If we are taking an online backup from the standby, we confirm that the
9107  * standby has not been promoted during the backup.
9108  */
9109  if (state->started_in_recovery && !backup_stopped_in_recovery)
9110  ereport(ERROR,
9111  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9112  errmsg("the standby was promoted during online backup"),
9113  errhint("This means that the backup being taken is corrupt "
9114  "and should not be used. "
9115  "Try taking another online backup.")));
9116 
9117  /*
9118  * During recovery, we don't write an end-of-backup record. We assume that
9119  * pg_control was backed up last and its minimum recovery point can be
9120  * available as the backup end location. Since we don't have an
9121  * end-of-backup record, we use the pg_control value to check whether
9122  * we've reached the end of backup when starting recovery from this
9123  * backup. We have no way of checking if pg_control wasn't backed up last
9124  * however.
9125  *
9126  * We don't force a switch to new WAL file but it is still possible to
9127  * wait for all the required files to be archived if waitforarchive is
9128  * true. This is okay if we use the backup to start a standby and fetch
9129  * the missing WAL using streaming replication. But in the case of an
9130  * archive recovery, a user should set waitforarchive to true and wait for
9131  * them to be archived to ensure that all the required files are
9132  * available.
9133  *
9134  * We return the current minimum recovery point as the backup end
9135  * location. Note that it can be greater than the exact backup end
9136  * location if the minimum recovery point is updated after the backup of
9137  * pg_control. This is harmless for current uses.
9138  *
9139  * XXX currently a backup history file is for informational and debug
9140  * purposes only. It's not essential for an online backup. Furthermore,
9141  * even if it's created, it will not be archived during recovery because
9142  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9143  * backup history file during recovery.
9144  */
9145  if (backup_stopped_in_recovery)
9146  {
9147  XLogRecPtr recptr;
9148 
9149  /*
9150  * Check to see if all WAL replayed during online backup contain
9151  * full-page writes.
9152  */
9154  recptr = XLogCtl->lastFpwDisableRecPtr;
9156 
9157  if (state->startpoint <= recptr)
9158  ereport(ERROR,
9159  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9160  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9161  "during online backup"),
9162  errhint("This means that the backup being taken on the standby "
9163  "is corrupt and should not be used. "
9164  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9165  "and then try an online backup again.")));
9166 
9167 
9168  LWLockAcquire(ControlFileLock, LW_SHARED);
9169  state->stoppoint = ControlFile->minRecoveryPoint;
9171  LWLockRelease(ControlFileLock);
9172  }
9173  else
9174  {
9175  char *history_file;
9176 
9177  /*
9178  * Write the backup-end xlog record
9179  */
9180  XLogBeginInsert();
9181  XLogRegisterData((char *) (&state->startpoint),
9182  sizeof(state->startpoint));
9183  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9184 
9185  /*
9186  * Given that we're not in recovery, InsertTimeLineID is set and can't
9187  * change, so we can read it without a lock.
9188  */
9189  state->stoptli = XLogCtl->InsertTimeLineID;
9190 
9191  /*
9192  * Force a switch to a new xlog segment file, so that the backup is
9193  * valid as soon as archiver moves out the current segment file.
9194  */
9195  RequestXLogSwitch(false);
9196 
9197  state->stoptime = (pg_time_t) time(NULL);
9198 
9199  /*
9200  * Write the backup history file
9201  */
9202  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9203  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9204  state->startpoint, wal_segment_size);
9205  fp = AllocateFile(histfilepath, "w");
9206  if (!fp)
9207  ereport(ERROR,
9209  errmsg("could not create file \"%s\": %m",
9210  histfilepath)));
9211 
9212  /* Build and save the contents of the backup history file */
9213  history_file = build_backup_content(state, true);
9214  fprintf(fp, "%s", history_file);
9215  pfree(history_file);
9216 
9217  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9218  ereport(ERROR,
9220  errmsg("could not write file \"%s\": %m",
9221  histfilepath)));
9222 
9223  /*
9224  * Clean out any no-longer-needed history files. As a side effect,
9225  * this will post a .ready file for the newly created history file,
9226  * notifying the archiver that history file may be archived
9227  * immediately.
9228  */
9230  }
9231 
9232  /*
9233  * If archiving is enabled, wait for all the required WAL files to be
9234  * archived before returning. If archiving isn't enabled, the required WAL
9235  * needs to be transported via streaming replication (hopefully with
9236  * wal_keep_size set high enough), or some more exotic mechanism like
9237  * polling and copying files from pg_wal with script. We have no knowledge
9238  * of those mechanisms, so it's up to the user to ensure that he gets all
9239  * the required WAL.
9240  *
9241  * We wait until both the last WAL file filled during backup and the
9242  * history file have been archived, and assume that the alphabetic sorting
9243  * property of the WAL files ensures any earlier WAL files are safely
9244  * archived as well.
9245  *
9246  * We wait forever, since archive_command is supposed to work and we
9247  * assume the admin wanted his backup to work completely. If you don't
9248  * wish to wait, then either waitforarchive should be passed in as false,
9249  * or you can set statement_timeout. Also, some notices are issued to
9250  * clue in anyone who might be doing this interactively.
9251  */
9252 
9253  if (waitforarchive &&
9254  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9255  (backup_stopped_in_recovery && XLogArchivingAlways())))
9256  {
9257  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9258  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9260 
9261  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9262  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9263  state->startpoint, wal_segment_size);
9264 
9265  seconds_before_warning = 60;
9266  waits = 0;
9267 
9268  while (XLogArchiveIsBusy(lastxlogfilename) ||
9269  XLogArchiveIsBusy(histfilename))
9270  {
9272 
9273  if (!reported_waiting && waits > 5)
9274  {
9275  ereport(NOTICE,
9276  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9277  reported_waiting = true;
9278  }
9279 
9280  (void) WaitLatch(MyLatch,
9282  1000L,
9283  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9285 
9286  if (++waits >= seconds_before_warning)
9287  {
9288  seconds_before_warning *= 2; /* This wraps in >10 years... */
9289  ereport(WARNING,
9290  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9291  waits),
9292  errhint("Check that your \"archive_command\" is executing properly. "
9293  "You can safely cancel this backup, "
9294  "but the database backup will not be usable without all the WAL segments.")));
9295  }
9296  }
9297 
9298  ereport(NOTICE,
9299  (errmsg("all required WAL segments have been archived")));
9300  }
9301  else if (waitforarchive)
9302  ereport(NOTICE,
9303  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9304 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2583
int FreeFile(FILE *file)
Definition: fd.c:2781
struct Latch * MyLatch
Definition: globals.c:61
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4148
#define XLogArchivingAlways()
Definition: xlog.h:102
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9031 of file xlog.c.

9032 {
9033  return sessionBackupState;
9034 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8523 of file xlog.c.

8524 {
8525  int o_direct_flag = 0;
8526 
8527  /*
8528  * Use O_DIRECT if requested, except in walreceiver process. The WAL
8529  * written by walreceiver is normally read by the startup process soon
8530  * after it's written. Also, walreceiver performs unaligned writes, which
8531  * don't work with O_DIRECT, so it is required for correctness too.
8532  */
8534  o_direct_flag = PG_O_DIRECT;
8535 
8536  /* If fsync is disabled, never open in sync mode */
8537  if (!enableFsync)
8538  return o_direct_flag;
8539 
8540  switch (method)
8541  {
8542  /*
8543  * enum values for all sync options are defined even if they are
8544  * not supported on the current platform. But if not, they are
8545  * not included in the enum option array, and therefore will never
8546  * be seen here.
8547  */
8548  case WAL_SYNC_METHOD_FSYNC:
8551  return o_direct_flag;
8552 #ifdef O_SYNC
8553  case WAL_SYNC_METHOD_OPEN:
8554  return O_SYNC | o_direct_flag;
8555 #endif
8556 #ifdef O_DSYNC
8558  return O_DSYNC | o_direct_flag;
8559 #endif
8560  default:
8561  /* can't happen (unless we are out of sync with option array) */
8562  elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8563  return 0; /* silence warning */
8564  }
8565 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:127
#define AmWalReceiverProcess()
Definition: miscadmin.h:383
#define O_DSYNC
Definition: win32_port.h:352
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4824 of file xlog.c.

4825 {
4826  return ControlFile->wal_level;
4827 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4569 of file xlog.c.

4570 {
4572 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:515

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

Definition at line 6455 of file xlog.c.

6456 {
6458 
6460 
6461  /*
6462  * If we're writing and flushing WAL, the time line can't be changing, so
6463  * no lock is required.
6464  */
6465  if (insertTLI)
6466  *insertTLI = XLogCtl->InsertTimeLineID;
6467 
6468  return LogwrtResult.Flush;
6469 }
RecoveryState SharedRecoveryState
Definition: xlog.c:509
XLogRecPtr Flush
Definition: xlog.c:326
@ RECOVERY_STATE_DONE
Definition: xlog.h:93

References Assert, XLogwrtResult::Flush, XLogCtlData::InsertTimeLineID, LogwrtResult, RECOVERY_STATE_DONE, RefreshXLogWriteResult, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by binary_upgrade_logical_slot_has_caught_up(), get_flush_position(), GetCurrentLSN(), GetLatestLSN(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), pg_replication_slot_advance(), read_local_xlog_page_guts(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6423 of file xlog.c.

6424 {
6425  *RedoRecPtr_p = RedoRecPtr;
6426  *doPageWrites_p = doPageWrites;
6427 }
static bool doPageWrites
Definition: xlog.c:284

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6438 of file xlog.c.

6439 {
6440  XLogRecPtr recptr;
6441 
6443  recptr = XLogCtl->LogwrtRqst.Write;
6445 
6446  return recptr;
6447 }

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6493 of file xlog.c.

6494 {
6496  int i;
6497 
6498  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6499  {
6500  XLogRecPtr last_important;
6501 
6502  /*
6503  * Need to take a lock to prevent torn reads of the LSN, which are
6504  * possible on some of the supported platforms. WAL insert locks only
6505  * support exclusive mode, so we have to use that.
6506  */
6508  last_important = WALInsertLocks[i].l.lastImportantAt;
6509  LWLockRelease(&WALInsertLocks[i].l.lock);
6510 
6511  if (res < last_important)
6512  res = last_important;
6513  }
6514 
6515  return res;
6516 }
int i
Definition: isn.c:73
XLogRecPtr lastImportantAt
Definition: xlog.c:369
WALInsertLock l
Definition: xlog.c:381
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:562
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:148

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6522 of file xlog.c.

6523 {
6524  pg_time_t result;
6525 
6526  /* Need WALWriteLock, but shared lock is sufficient */
6527  LWLockAcquire(WALWriteLock, LW_SHARED);
6528  result = XLogCtl->lastSegSwitchTime;
6529  *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6530  LWLockRelease(WALWriteLock);
6531 
6532  return result;
6533 }
pg_time_t lastSegSwitchTime
Definition: xlog.c:465
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:466

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4543 of file xlog.c.

4544 {
4545  Assert(ControlFile != NULL);
4547 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

Definition at line 9393 of file xlog.c.

9394 {
9395  LWLockAcquire(ControlFileLock, LW_SHARED);
9396  *oldrecptr = ControlFile->checkPointCopy.redo;
9398  LWLockRelease(ControlFileLock);
9399 }

References ControlFileData::checkPointCopy, ControlFile, LW_SHARED, LWLockAcquire(), LWLockRelease(), CheckPoint::redo, and CheckPoint::ThisTimeLineID.

Referenced by ExecuteRecoveryCommand(), and RestoreArchivedFile().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6326 of file xlog.c.

6327 {
6328  RecoveryState retval;
6329 
6331  retval = XLogCtl->SharedRecoveryState;
6333 
6334  return retval;
6335 }
RecoveryState
Definition: xlog.h:90

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6393 of file xlog.c.

6394 {
6395  XLogRecPtr ptr;
6396 
6397  /*
6398  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6399  * grabbed a WAL insertion lock to read the authoritative value in
6400  * Insert->RedoRecPtr, someone might update it just after we've released
6401  * the lock.
6402  */
6404  ptr = XLogCtl->RedoRecPtr;
6406 
6407  if (RedoRecPtr < ptr)
6408  RedoRecPtr = ptr;
6409 
6410  return RedoRecPtr;
6411 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7799 of file xlog.c.

7800 {
7801  XLogRecPtr currpos; /* current write LSN */
7802  XLogSegNo currSeg; /* segid of currpos */
7803  XLogSegNo targetSeg; /* segid of targetLSN */
7804  XLogSegNo oldestSeg; /* actual oldest segid */
7805  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7806  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7807  uint64 keepSegs;
7808 
7809  /*
7810  * slot does not reserve WAL. Either deactivated, or has never been active
7811  */
7812  if (XLogRecPtrIsInvalid(targetLSN))
7813  return WALAVAIL_INVALID_LSN;
7814 
7815  /*
7816  * Calculate the oldest segment currently reserved by all slots,
7817  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7818  * oldestSlotSeg to the current segment.
7819  */
7820  currpos = GetXLogWriteRecPtr();
7821  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7822  KeepLogSeg(currpos, &oldestSlotSeg);
7823 
7824  /*
7825  * Find the oldest extant segment file. We get 1 until checkpoint removes
7826  * the first WAL segment file since startup, which causes the status being
7827  * wrong under certain abnormal conditions but that doesn't actually harm.
7828  */
7829  oldestSeg = XLogGetLastRemovedSegno() + 1;
7830 
7831  /* calculate oldest segment by max_wal_size */
7832  XLByteToSeg(currpos, currSeg, wal_segment_size);
7834 
7835  if (currSeg > keepSegs)
7836  oldestSegMaxWalSize = currSeg - keepSegs;
7837  else
7838  oldestSegMaxWalSize = 1;
7839 
7840  /* the segment we care about */
7841  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7842 
7843  /*
7844  * No point in returning reserved or extended status values if the
7845  * targetSeg is known to be lost.
7846  */
7847  if (targetSeg >= oldestSlotSeg)
7848  {
7849  /* show "reserved" when targetSeg is within max_wal_size */
7850  if (targetSeg >= oldestSegMaxWalSize)
7851  return WALAVAIL_RESERVED;
7852 
7853  /* being retained by slots exceeding max_wal_size */
7854  return WALAVAIL_EXTENDED;
7855  }
7856 
7857  /* WAL segments are no longer retained but haven't been removed yet */
7858  if (targetSeg >= oldestSeg)
7859  return WALAVAIL_UNRESERVED;
7860 
7861  /* Definitely lost */
7862  return WALAVAIL_REMOVED;
7863 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3745
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9381
@ WALAVAIL_REMOVED
Definition: xlog.h:193
@ WALAVAIL_RESERVED
Definition: xlog.h:189
@ WALAVAIL_UNRESERVED
Definition: xlog.h:192
@ WALAVAIL_EXTENDED
Definition: xlog.h:190
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:188

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6476 of file xlog.c.

6477 {
6479 
6480  /* Since the value can't be changing, no lock is required. */
6481  return XLogCtl->InsertTimeLineID;
6482 }

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1625 of file xlog.c.

1626 {
1627  int idx;
1628  XLogRecPtr endptr;
1629  static uint64 cachedPage = 0;
1630  static char *cachedPos = NULL;
1631  XLogRecPtr expectedEndPtr;
1632 
1633  /*
1634  * Fast path for the common case that we need to access again the same
1635  * page as last time.
1636  */
1637  if (ptr / XLOG_BLCKSZ == cachedPage)
1638  {
1639  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1640  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1641  return cachedPos + ptr % XLOG_BLCKSZ;
1642  }
1643 
1644  /*
1645  * The XLog buffer cache is organized so that a page is always loaded to a
1646  * particular buffer. That way we can easily calculate the buffer a given
1647  * page must be loaded into, from the XLogRecPtr alone.
1648  */
1649  idx = XLogRecPtrToBufIdx(ptr);
1650 
1651  /*
1652  * See what page is loaded in the buffer at the moment. It could be the
1653  * page we're looking for, or something older. It can't be anything newer
1654  * - that would imply the page we're looking for has already been written
1655  * out to disk and evicted, and the caller is responsible for making sure
1656  * that doesn't happen.
1657  *
1658  * We don't hold a lock while we read the value. If someone is just about
1659  * to initialize or has just initialized the page, it's possible that we
1660  * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1661  * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1662  * we're looking for.
1663  */
1664  expectedEndPtr = ptr;
1665  expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1666 
1667  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1668  if (expectedEndPtr != endptr)
1669  {
1670  XLogRecPtr initializedUpto;
1671 
1672  /*
1673  * Before calling AdvanceXLInsertBuffer(), which can block, let others
1674  * know how far we're finished with inserting the record.
1675  *
1676  * NB: If 'ptr' points to just after the page header, advertise a
1677  * position at the beginning of the page rather than 'ptr' itself. If
1678  * there are no other insertions running, someone might try to flush
1679  * up to our advertised location. If we advertised a position after
1680  * the page header, someone might try to flush the page header, even
1681  * though page might actually not be initialized yet. As the first
1682  * inserter on the page, we are effectively responsible for making
1683  * sure that it's initialized, before we let insertingAt to move past
1684  * the page header.
1685  */
1686  if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1687  XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1688  initializedUpto = ptr - SizeOfXLogShortPHD;
1689  else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1690  XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1691  initializedUpto = ptr - SizeOfXLogLongPHD;
1692  else
1693  initializedUpto = ptr;
1694 
1695  WALInsertLockUpdateInsertingAt(initializedUpto);
1696 
1697  AdvanceXLInsertBuffer(ptr, tli, false);
1698  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1699 
1700  if (expectedEndPtr != endptr)
1701  elog(PANIC, "could not find WAL buffer for %X/%X",
1702  LSN_FORMAT_ARGS(ptr));
1703  }
1704  else
1705  {
1706  /*
1707  * Make sure the initialization of the page is visible to us, and
1708  * won't arrive later to overwrite the WAL data we write on the page.
1709  */
1711  }
1712 
1713  /*
1714  * Found the buffer holding this page. Return a pointer to the right
1715  * offset within the page.
1716  */
1717  cachedPage = ptr / XLOG_BLCKSZ;
1718  cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1719 
1720  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1721  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1722 
1723  return cachedPos + ptr % XLOG_BLCKSZ;
1724 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_memory_barrier()
Definition: atomics.h:136
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1464
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1978

References AdvanceXLInsertBuffer(), Assert, elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9365 of file xlog.c.

9366 {
9368  uint64 current_bytepos;
9369 
9370  SpinLockAcquire(&Insert->insertpos_lck);
9371  current_bytepos = Insert->CurrBytePos;
9372  SpinLockRelease(&Insert->insertpos_lck);
9373 
9374  return XLogBytePosToRecPtr(current_bytepos);
9375 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4191 of file xlog.c.

4192 {
4193  char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4194 
4195  /*
4196  * Generate a random nonce. This is used for authentication requests that
4197  * will fail because the user does not exist. The nonce is used to create
4198  * a genuine-looking password challenge for the non-existent user, in lieu
4199  * of an actual stored password.
4200  */
4201  if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4202  ereport(PANIC,
4203  (errcode(ERRCODE_INTERNAL_ERROR),
4204  errmsg("could not generate secret authorization token")));
4205 
4206  memset(ControlFile, 0, sizeof(ControlFileData));
4207  /* Initialize pg_control status fields */
4208  ControlFile->system_identifier = sysidentifier;
4209  memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
4212 
4213  /* Set important parameter values for use when replaying WAL */
4222  ControlFile->data_checksum_version = data_checksum_version;
4223 }
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:185
bool wal_log_hints
Definition: xlog.c:121
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4749 of file xlog.c.

4750 {
4752 
4754  {
4755  struct config_generic *guc;
4756 
4757  guc = find_option("wal_consistency_checking", false, false, ERROR);
4758 
4760 
4761  set_config_option_ext("wal_consistency_checking",
4763  guc->scontext, guc->source, guc->srole,
4764  GUC_ACTION_SET, true, ERROR, false);
4765 
4766  /* checking should not be deferred again */
4768  }
4769 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1234
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3380
@ GUC_ACTION_SET
Definition: guc.h:199
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:123

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3550 of file xlog.c.

3552 {
3553  char path[MAXPGPATH];
3554  struct stat stat_buf;
3555 
3556  Assert(tli != 0);
3557 
3558  XLogFilePath(path, tli, *segno, wal_segment_size);
3559 
3560  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3562  {
3563  LWLockRelease(ControlFileLock);
3564  return false;
3565  }
3566 
3567  if (!find_free)
3568  {
3569  /* Force installation: get rid of any pre-existing segment file */
3570  durable_unlink(path, DEBUG1);
3571  }
3572  else
3573  {
3574  /* Find a free slot to put it in */
3575  while (stat(path, &stat_buf) == 0)
3576  {
3577  if ((*segno) >= max_segno)
3578  {
3579  /* Failed to find a free slot within specified range */
3580  LWLockRelease(ControlFileLock);
3581  return false;
3582  }
3583  (*segno)++;
3584  XLogFilePath(path, tli, *segno, wal_segment_size);
3585  }
3586  }
3587 
3588  Assert(access(path, F_OK) != 0 && errno == ENOENT);
3589  if (durable_rename(tmppath, path, LOG) != 0)
3590  {
3591  LWLockRelease(ControlFileLock);
3592  /* durable_rename already emitted log message */
3593  return false;
3594  }
3595 
3596  LWLockRelease(ControlFileLock);
3597 
3598  return true;
3599 }
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:519
#define stat
Definition: win32_port.h:284

References Assert, DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9422 of file xlog.c.

9423 {
9424  bool result;
9425 
9426  LWLockAcquire(ControlFileLock, LW_SHARED);
9428  LWLockRelease(ControlFileLock);
9429 
9430  return result;
9431 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8613 of file xlog.c.

8614 {
8615  char *msg = NULL;
8616  instr_time start;
8617 
8618  Assert(tli != 0);
8619 
8620  /*
8621  * Quick exit if fsync is disabled or write() has already synced the WAL
8622  * file.
8623  */
8624  if (!enableFsync ||
8627  return;
8628 
8629  /* Measure I/O timing to sync the WAL file */
8630  if (track_wal_io_timing)
8632  else
8634 
8635  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8636  switch (wal_sync_method)
8637  {
8638  case WAL_SYNC_METHOD_FSYNC:
8639  if (pg_fsync_no_writethrough(fd) != 0)
8640  msg = _("could not fsync file \"%s\": %m");
8641  break;
8642 #ifdef HAVE_FSYNC_WRITETHROUGH
8644  if (pg_fsync_writethrough(fd) != 0)
8645  msg = _("could not fsync write-through file \"%s\": %m");
8646  break;
8647 #endif
8649  if (pg_fdatasync(fd) != 0)
8650  msg = _("could not fdatasync file \"%s\": %m");
8651  break;
8652  case WAL_SYNC_METHOD_OPEN:
8654  /* not reachable */
8655  Assert(false);
8656  break;
8657  default:
8658  ereport(PANIC,
8659  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8660  errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8661  break;
8662  }
8663 
8664  /* PANIC if failed to fsync */
8665  if (msg)
8666  {
8667  char xlogfname[MAXFNAMELEN];
8668  int save_errno = errno;
8669 
8670  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8671  errno = save_errno;
8672  ereport(PANIC,
8674  errmsg(msg, xlogfname)));
8675  }
8676 
8678 
8679  /*
8680  * Increment the I/O timing and the number of times WAL files were synced.
8681  */
8682  if (track_wal_io_timing)
8683  {
8684  instr_time end;
8685 
8688  }
8689 
8691 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:456
PgStat_Counter wal_sync
Definition: pgstat.h:454
bool track_wal_io_timing
Definition: xlog.c:135

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 7883 of file xlog.c.

7884 {
7885  XLogSegNo currSegNo;
7886  XLogSegNo segno;
7887  XLogRecPtr keep;
7888 
7889  XLByteToSeg(recptr, currSegNo, wal_segment_size);
7890  segno = currSegNo;
7891 
7892  /*
7893  * Calculate how many segments are kept by slots first, adjusting for
7894  * max_slot_wal_keep_size.
7895  */
7897  if (keep != InvalidXLogRecPtr && keep < recptr)
7898  {
7899  XLByteToSeg(keep, segno, wal_segment_size);
7900 
7901  /* Cap by max_slot_wal_keep_size ... */
7902  if (max_slot_wal_keep_size_mb >= 0)
7903  {
7904  uint64 slot_keep_segs;
7905 
7906  slot_keep_segs =
7908 
7909  if (currSegNo - segno > slot_keep_segs)
7910  segno = currSegNo - slot_keep_segs;
7911  }
7912  }
7913 
7914  /*
7915  * If WAL summarization is in use, don't remove WAL that has yet to be
7916  * summarized.
7917  */
7918  keep = GetOldestUnsummarizedLSN(NULL, NULL);
7919  if (keep != InvalidXLogRecPtr)
7920  {
7921  XLogSegNo unsummarized_segno;
7922 
7923  XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
7924  if (unsummarized_segno < segno)
7925  segno = unsummarized_segno;
7926  }
7927 
7928  /* but, keep at least wal_keep_size if that's set */
7929  if (wal_keep_size_mb > 0)
7930  {
7931  uint64 keep_segs;
7932 
7934  if (currSegNo - segno < keep_segs)
7935  {
7936  /* avoid underflow, don't go below 1 */
7937  if (currSegNo <= keep_segs)
7938  segno = 1;
7939  else
7940  segno = currSegNo - keep_segs;
7941  }
7942  }
7943 
7944  /* don't delete WAL segments newer than the calculated segment */
7945  if (segno < *logSegNo)
7946  *logSegNo = segno;
7947 }
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition: xlog.c:114
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2688
int max_slot_wal_keep_size_mb
Definition: xlog.c:133

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4811 of file xlog.c.

4812 {
4813  Assert(reset || ControlFile == NULL);
4814  ControlFile = palloc(sizeof(ControlFileData));
4815  ReadControlFile();
4816 }
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6378 of file xlog.c.

6379 {
6380  int oldXLogAllowed = LocalXLogInsertAllowed;
6381 
6383 
6384  return oldXLogAllowed;
6385 }

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6618 of file xlog.c.

6619 {
6620  long write_msecs,
6621  sync_msecs,
6622  total_msecs,
6623  longest_msecs,
6624  average_msecs;
6625  uint64 average_sync_time;
6626 
6628 
6631 
6634 
6635  /* Accumulate checkpoint timing summary data, in milliseconds. */
6636  PendingCheckpointerStats.write_time += write_msecs;
6637  PendingCheckpointerStats.sync_time += sync_msecs;
6638 
6639  /*
6640  * All of the published timing statistics are accounted for. Only
6641  * continue if a log message is to be written.
6642  */
6643  if (!log_checkpoints)
6644  return;
6645 
6648 
6649  /*
6650  * Timing values returned from CheckpointStats are in microseconds.
6651  * Convert to milliseconds for consistent printing.
6652  */
6653  longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6654 
6655  average_sync_time = 0;
6657  average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6659  average_msecs = (long) ((average_sync_time + 999) / 1000);
6660 
6661  /*
6662  * ControlFileLock is not required to see ControlFile->checkPoint and
6663  * ->checkPointCopy here as we are the only updator of those variables at
6664  * this moment.
6665  */
6666  if (restartpoint)
6667  ereport(LOG,
6668  (errmsg("restartpoint complete: wrote %d buffers (%.1f%%); "
6669  "%d WAL file(s) added, %d removed, %d recycled; "
6670  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6671  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6672  "distance=%d kB, estimate=%d kB; "
6673  "lsn=%X/%X, redo lsn=%X/%X",
6675  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6679  write_msecs / 1000, (int) (write_msecs % 1000),
6680  sync_msecs / 1000, (int) (sync_msecs % 1000),
6681  total_msecs / 1000, (int) (total_msecs % 1000),
6683  longest_msecs / 1000, (int) (longest_msecs % 1000),
6684  average_msecs / 1000, (int) (average_msecs % 1000),
6685  (int) (PrevCheckPointDistance / 1024.0),
6686  (int) (CheckPointDistanceEstimate / 1024.0),
6689  else
6690  ereport(LOG,
6691  (errmsg("checkpoint complete: wrote %d buffers (%.1f%%); "
6692  "%d WAL file(s) added, %d removed, %d recycled; "
6693  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6694  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6695  "distance=%d kB, estimate=%d kB; "
6696  "lsn=%X/%X, redo lsn=%X/%X",
6698  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6702  write_msecs / 1000, (int) (write_msecs % 1000),
6703  sync_msecs / 1000, (int) (sync_msecs % 1000),
6704  total_msecs / 1000, (int) (total_msecs % 1000),
6706  longest_msecs / 1000, (int) (longest_msecs % 1000),
6707  average_msecs / 1000, (int) (average_msecs % 1000),
6708  (int) (PrevCheckPointDistance / 1024.0),
6709  (int) (CheckPointDistanceEstimate / 1024.0),
6712 }
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1767
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:175
uint64 ckpt_longest_sync
Definition: xlog.h:174
TimestampTz ckpt_end_t
Definition: xlog.h:165
int ckpt_sync_rels
Definition: xlog.h:173
PgStat_Counter sync_time
Definition: pgstat.h:269
PgStat_Counter write_time
Definition: pgstat.h:268
static double CheckPointDistanceEstimate
Definition: xlog.c:157
static double PrevCheckPointDistance
Definition: xlog.c:158

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6586 of file xlog.c.

6587 {
6588  if (restartpoint)
6589  ereport(LOG,
6590  /* translator: the placeholders show checkpoint options */
6591  (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6592  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6593  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6594  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6595  (flags & CHECKPOINT_FORCE) ? " force" : "",
6596  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6597  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6598  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6599  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6600  else
6601  ereport(LOG,
6602  /* translator: the placeholders show checkpoint options */
6603  (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6604  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6605  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6606  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6607  (flags & CHECKPOINT_FORCE) ? " force" : "",
6608  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6609  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6610  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6611  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6612 }
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:148
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:143
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:149

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FLUSH_ALL, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6240 of file xlog.c.

6241 {
6242  bool promoted = false;
6243 
6244  /*
6245  * Perform a checkpoint to update all our recovery activity to disk.
6246  *
6247  * Note that we write a shutdown checkpoint rather than an on-line one.
6248  * This is not particularly critical, but since we may be assigning a new
6249  * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6250  * only changes in shutdown checkpoints, which allows some extra error
6251  * checking in xlog_redo.
6252  *
6253  * In promotion, only create a lightweight end-of-recovery record instead
6254  * of a full checkpoint. A checkpoint is requested later, after we're
6255  * fully out of recovery mode and already accepting queries.
6256  */
6259  {
6260  promoted = true;
6261 
6262  /*
6263  * Insert a special WAL record to mark the end of recovery, since we
6264  * aren't doing a checkpoint. That means that the checkpointer process
6265  * may likely be in the middle of a time-smoothed restartpoint and
6266  * could continue to be for minutes after this. That sounds strange,
6267  * but the effect is roughly the same and it would be stranger to try
6268  * to come out of the restartpoint and then checkpoint. We request a
6269  * checkpoint later anyway, just for safety.
6270  */
6272  }
6273  else
6274  {
6277  CHECKPOINT_WAIT);
6278  }
6279 
6280  return promoted;
6281 }
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7305
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3677 of file xlog.c.

3678 {
3679  XLogSegNo _logSegNo;
3680  int lf;
3681  bool added;
3682  char path[MAXPGPATH];
3683  uint64 offset;
3684 
3686  return; /* unlocked check says no */
3687 
3688  XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3689  offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3690  if (offset >= (uint32) (0.75 * wal_segment_size))
3691  {
3692  _logSegNo++;
3693  lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3694  if (lf >= 0)
3695  close(lf);
3696  if (added)
3698  }
3699 }
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3197

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6203 of file xlog.c.

6204 {
6205  /*
6206  * We have reached the end of base backup, as indicated by pg_control. The
6207  * data on disk is now consistent (unless minRecoveryPoint is further
6208  * ahead, which can happen if we crashed during previous recovery). Reset
6209  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6210  * make sure we don't allow starting up at an earlier point even if
6211  * recovery is stopped and restarted soon after this.
6212  */
6213  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6214 
6215  if (ControlFile->minRecoveryPoint < EndRecPtr)
6216  {
6217  ControlFile->minRecoveryPoint = EndRecPtr;
6219  }
6220 
6223  ControlFile->backupEndRequired = false;
6225 
6226  LWLockRelease(ControlFileLock);
6227 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4308 of file xlog.c.

4309 {
4310  pg_crc32c crc;
4311  int fd;
4312  static char wal_segsz_str[20];
4313  int r;
4314 
4315  /*
4316  * Read data...
4317  */
4319  O_RDWR | PG_BINARY);
4320  if (fd < 0)
4321  ereport(PANIC,
4323  errmsg("could not open file \"%s\": %m",
4324  XLOG_CONTROL_FILE)));
4325 
4326  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4327  r = read(fd, ControlFile, sizeof(ControlFileData));
4328  if (r != sizeof(ControlFileData))
4329  {
4330  if (r < 0)
4331  ereport(PANIC,
4333  errmsg("could not read file \"%s\": %m",
4334  XLOG_CONTROL_FILE)));
4335  else
4336  ereport(PANIC,
4338  errmsg("could not read file \"%s\": read %d of %zu",
4339  XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4340  }
4342 
4343  close(fd);
4344 
4345  /*
4346  * Check for expected pg_control format version. If this is wrong, the
4347  * CRC check will likely fail because we'll be checking the wrong number
4348  * of bytes. Complaining about wrong version will probably be more
4349  * enlightening than complaining about wrong CRC.
4350  */
4351 
4353  ereport(FATAL,
4354  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4355  errmsg("database files are incompatible with server"),
4356  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4357  " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4360  errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4361 
4363  ereport(FATAL,
4364  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4365  errmsg("database files are incompatible with server"),
4366  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4367  " but the server was compiled with PG_CONTROL_VERSION %d.",
4369  errhint("It looks like you need to initdb.")));
4370 
4371  /* Now check the CRC. */
4372  INIT_CRC32C(crc);
4373  COMP_CRC32C(crc,
4374  (char *) ControlFile,
4375  offsetof(ControlFileData, crc));
4376  FIN_CRC32C(crc);
4377 
4378  if (!EQ_CRC32C(crc, ControlFile->crc))
4379  ereport(FATAL,
4380  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4381  errmsg("incorrect checksum in control file")));
4382 
4383  /*
4384  * Do compatibility checking immediately. If the database isn't
4385  * compatible with the backend executable, we want to abort before we can
4386  * possibly do any damage.
4387  */
4389  ereport(FATAL,
4390  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4391  errmsg("database files are incompatible with server"),
4392  errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d,"
4393  " but the server was compiled with CATALOG_VERSION_NO %d.",
4395  errhint("It looks like you need to initdb.")));
4396  if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4397  ereport(FATAL,
4398  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4399  errmsg("database files are incompatible with server"),
4400  errdetail("The database cluster was initialized with MAXALIGN %d,"
4401  " but the server was compiled with MAXALIGN %d.",
4402  ControlFile->maxAlign, MAXIMUM_ALIGNOF),
4403  errhint("It looks like you need to initdb.")));
4405  ereport(FATAL,
4406  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4407  errmsg("database files are incompatible with server"),
4408  errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4409  errhint("It looks like you need to initdb.")));
4410  if (ControlFile->blcksz != BLCKSZ)
4411  ereport(FATAL,
4412  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4413  errmsg("database files are incompatible with server"),
4414  errdetail("The database cluster was initialized with BLCKSZ %d,"
4415  " but the server was compiled with BLCKSZ %d.",
4416  ControlFile->blcksz, BLCKSZ),
4417  errhint("It looks like you need to recompile or initdb.")));
4418  if (ControlFile->relseg_size != RELSEG_SIZE)
4419  ereport(FATAL,
4420  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4421  errmsg("database files are incompatible with server"),
4422  errdetail("The database cluster was initialized with RELSEG_SIZE %d,"
4423  " but the server was compiled with RELSEG_SIZE %d.",
4424  ControlFile->relseg_size, RELSEG_SIZE),
4425  errhint("It looks like you need to recompile or initdb.")));
4426  if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4427  ereport(FATAL,
4428  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4429  errmsg("database files are incompatible with server"),
4430  errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
4431  " but the server was compiled with XLOG_BLCKSZ %d.",
4432  ControlFile->xlog_blcksz, XLOG_BLCKSZ),
4433  errhint("It looks like you need to recompile or initdb.")));
4435  ereport(FATAL,
4436  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4437  errmsg("database files are incompatible with server"),
4438  errdetail("The database cluster was initialized with NAMEDATALEN %d,"
4439  " but the server was compiled with NAMEDATALEN %d.",
4441  errhint("It looks like you need to recompile or initdb.")));
4443  ereport(FATAL,
4444  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4445  errmsg("database files are incompatible with server"),
4446  errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d,"
4447  " but the server was compiled with INDEX_MAX_KEYS %d.",
4449  errhint("It looks like you need to recompile or initdb.")));
4451  ereport(FATAL,
4452  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4453  errmsg("database files are incompatible with server"),
4454  errdetail("The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
4455  " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
4457  errhint("It looks like you need to recompile or initdb.")));
4459  ereport(FATAL,
4460  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4461  errmsg("database files are incompatible with server"),
4462  errdetail("The database cluster was initialized with LOBLKSIZE %d,"
4463  " but the server was compiled with LOBLKSIZE %d.",
4464  ControlFile->loblksize, (int) LOBLKSIZE),
4465  errhint("It looks like you need to recompile or initdb.")));
4466 
4467 #ifdef USE_FLOAT8_BYVAL
4468  if (ControlFile->float8ByVal != true)
4469  ereport(FATAL,
4470  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4471  errmsg("database files are incompatible with server"),
4472  errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
4473  " but the server was compiled with USE_FLOAT8_BYVAL."),
4474  errhint("It looks like you need to recompile or initdb.")));
4475 #else
4476  if (ControlFile->float8ByVal != false)
4477  ereport(FATAL,
4478  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4479  errmsg("database files are incompatible with server"),
4480  errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
4481  " but the server was compiled without USE_FLOAT8_BYVAL."),
4482  errhint("It looks like you need to recompile or initdb.")));
4483 #endif
4484 
4486 
4488  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4489  errmsg_plural("invalid WAL segment size in control file (%d byte)",
4490  "invalid WAL segment size in control file (%d bytes)",
4493  errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4494 
4495  snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4496  SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4498 
4499  /* check and update variables dependent on wal_segment_size */
4501  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4502  errmsg("\"min_wal_size\" must be at least twice \"wal_segment_size\"")));
4503 
4505  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4506  errmsg("\"max_wal_size\" must be at least twice \"wal_segment_size\"")));
4507 
4509  (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4511 
4513 
4514  /* Make the initdb settings visible as GUC variables, too */
4515  SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4517 }
#define PG_BINARY
Definition: c.h:1273
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1180
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1087
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4282
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_INTERNAL
Definition: guc.h:69
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:201
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
uint32 nameDataLen
Definition: pg_control.h:213
uint32 indexMaxKeys
Definition: pg_control.h:214
uint32 relseg_size
Definition: pg_control.h:208
uint32 catalog_version_no
Definition: pg_control.h:126
double floatFormat
Definition: pg_control.h:200
uint32 xlog_blcksz
Definition: pg_control.h:210
uint32 loblksize
Definition: pg_control.h:217
pg_crc32c crc
Definition: pg_control.h:232
uint32 toast_max_chunk_size
Definition: pg_control.h:216
#define UsableBytesInPage
Definition: xlog.c:590
bool DataChecksumsEnabled(void)
Definition: xlog.c:4553
static int UsableBytesInSegment
Definition: xlog.c:599
int min_wal_size_mb
Definition: xlog.c:113
#define XLOG_CONTROL_FILE

References BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6290 of file xlog.c.

6291 {
6292  /*
6293  * We check shared state each time only until we leave recovery mode. We
6294  * can't re-enter recovery, so there's no need to keep checking after the
6295  * shared variable has once been seen false.
6296  */
6298  return false;
6299  else
6300  {
6301  /*
6302  * use volatile pointer to make sure we make a fresh read of the
6303  * shared variable.
6304  */
6305  volatile XLogCtlData *xlogctl = XLogCtl;
6306 
6308 
6309  /*
6310  * Note: We don't need a memory barrier when we're still in recovery.
6311  * We might exit recovery immediately after return, so the caller
6312  * can't rely on 'true' meaning that we're still in recovery anyway.
6313  */
6314 
6315  return LocalRecoveryInProgress;
6316  }
6317 }
static bool LocalRecoveryInProgress
Definition: xlog.c:222

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7480 of file xlog.c.

7481 {
7482  /*
7483  * Also refrain from creating a restartpoint if we have seen any
7484  * references to non-existent pages. Restarting recovery from the
7485  * restartpoint would not see the references, so we would lose the
7486  * cross-check that the pages belonged to a relation that was dropped
7487  * later.
7488  */
7489  if (XLogHaveInvalidPages())
7490  {
7491  elog(DEBUG2,
7492  "could not record restart point at %X/%X because there "
7493  "are unresolved references to invalid pages",
7494  LSN_FORMAT_ARGS(checkPoint->redo));
7495  return;
7496  }
7497 
7498  /*
7499  * Copy the checkpoint record to shared memory, so that checkpointer can
7500  * work out the next time it wants to perform a restartpoint.
7501  */
7505  XLogCtl->lastCheckPoint = *checkPoint;
7507 }
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:235

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9351 of file xlog.c.

9352 {
9353  static bool already_done = false;
9354 
9355  if (already_done)
9356  return;
9358  already_done = true;
9359 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3927 of file xlog.c.

3928 {
3929  DIR *xldir;
3930  struct dirent *xlde;
3931  char switchseg[MAXFNAMELEN];
3932  XLogSegNo endLogSegNo;
3933  XLogSegNo switchLogSegNo;
3934  XLogSegNo recycleSegNo;
3935 
3936  /*
3937  * Initialize info about where to begin the work. This will recycle,
3938  * somewhat arbitrarily, 10 future segments.
3939  */
3940  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3941  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3942  recycleSegNo = endLogSegNo + 10;
3943 
3944  /*
3945  * Construct a filename of the last segment to be kept.
3946  */
3947  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3948 
3949  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3950  switchseg);
3951 
3952  xldir = AllocateDir(XLOGDIR);
3953 
3954  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3955  {
3956  /* Ignore files that are not XLOG segments */
3957  if (!IsXLogFileName(xlde->d_name))
3958  continue;
3959 
3960  /*
3961  * Remove files that are on a timeline older than the new one we're
3962  * switching to, but with a segment number >= the first segment on the
3963  * new timeline.
3964  */
3965  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3966  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3967  {
3968  /*
3969  * If the file has already been marked as .ready, however, don't
3970  * remove it yet. It should be OK to remove it - files that are
3971  * not part of our timeline history are not required for recovery
3972  * - but seems safer to let them be archived and removed later.
3973  */
3974  if (!XLogArchiveIsReady(xlde->d_name))
3975  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3976  }
3977  }
3978 
3979  FreeDir(xldir);
3980 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:3996
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3852 of file xlog.c.

3854 {
3855  DIR *xldir;
3856  struct dirent *xlde;
3857  char lastoff[MAXFNAMELEN];
3858  XLogSegNo endlogSegNo;
3859  XLogSegNo recycleSegNo;
3860 
3861  /* Initialize info about where to try to recycle to */
3862  XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3863  recycleSegNo = XLOGfileslop(lastredoptr);
3864 
3865  /*
3866  * Construct a filename of the last segment to be kept. The timeline ID
3867  * doesn't matter, we ignore that in the comparison. (During recovery,
3868  * InsertTimeLineID isn't set, so we can't use that.)
3869  */
3870  XLogFileName(lastoff, 0, segno, wal_segment_size);
3871 
3872  elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3873  lastoff);
3874 
3875  xldir = AllocateDir(XLOGDIR);
3876 
3877  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3878  {
3879  /* Ignore files that are not XLOG segments */
3880  if (!IsXLogFileName(xlde->d_name) &&
3881  !IsPartialXLogFileName(xlde->d_name))
3882  continue;
3883 
3884  /*
3885  * We ignore the timeline part of the XLOG segment identifiers in
3886  * deciding whether a segment is still needed. This ensures that we
3887  * won't prematurely remove a segment from a parent timeline. We could
3888  * probably be a little more proactive about removing segments of
3889  * non-parent timelines, but that would be a whole lot more
3890  * complicated.
3891  *
3892  * We use the alphanumeric sorting property of the filenames to decide
3893  * which ones are earlier than the lastoff segment.
3894  */
3895  if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3896  {
3897  if (XLogArchiveCheckDone(xlde->d_name))
3898  {
3899  /* Update the last removed location in shared memory first */
3901 
3902  RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3903  }
3904  }
3905  }
3906 
3907  FreeDir(xldir);
3908 }
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2233
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3799
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3819 of file xlog.c.

3820 {
3821  DIR *xldir;
3822  struct dirent *xlde;
3823 
3824  elog(DEBUG2, "removing all temporary WAL segments");
3825 
3826  xldir = AllocateDir(XLOGDIR);
3827  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3828  {
3829  char path[MAXPGPATH];
3830 
3831  if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3832  continue;
3833 
3834  snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3835  unlink(path);
3836  elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3837  }
3838  FreeDir(xldir);
3839 }

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 3996 of file xlog.c.

3999 {
4000  char path[MAXPGPATH];
4001 #ifdef WIN32
4002  char newpath[MAXPGPATH];
4003 #endif
4004  const char *segname = segment_de->d_name;
4005 
4006  snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4007 
4008  /*
4009  * Before deleting the file, see if it can be recycled as a future log
4010  * segment. Only recycle normal files, because we don't want to recycle
4011  * symbolic links pointing to a separate archive directory.
4012  */
4013  if (wal_recycle &&
4014  *endlogSegNo <= recycleSegNo &&
4015  XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4016  get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4017  InstallXLogFileSegment(endlogSegNo, path,
4018  true, recycleSegNo, insertTLI))
4019  {
4020  ereport(DEBUG2,
4021  (errmsg_internal("recycled write-ahead log file \"%s\"",
4022  segname)));
4024  /* Needn't recheck that slot on future iterations */
4025  (*endlogSegNo)++;
4026  }
4027  else
4028  {
4029  /* No need for any more future segments, or recycling failed ... */
4030  int rc;
4031 
4032  ereport(DEBUG2,
4033  (errmsg_internal("removing write-ahead log file \"%s\"",
4034  segname)));
4035 
4036 #ifdef WIN32
4037 
4038  /*
4039  * On Windows, if another process (e.g another backend) holds the file
4040  * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4041  * will still show up in directory listing until the last handle is
4042  * closed. To avoid confusing the lingering deleted file for a live
4043  * WAL file that needs to be archived, rename it before deleting it.
4044  *
4045  * If another process holds the file open without FILE_SHARE_DELETE
4046  * flag, rename will fail. We'll try again at the next checkpoint.
4047  */
4048  snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4049  if (rename(path, newpath) != 0)
4050  {
4051  ereport(LOG,
4053  errmsg("could not rename file \"%s\": %m",
4054  path)));
4055  return;
4056  }
4057  rc = durable_unlink(newpath, LOG);
4058 #else
4059  rc = durable_unlink(path, LOG);
4060 #endif
4061  if (rc != 0)
4062  {
4063  /* Message already logged by durable_unlink() */
4064  return;
4065  }
4067  }
4068 
4069  XLogArchiveCleanup(segname);
4070 }
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3550
bool wal_recycle
Definition: xlog.c:126

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 7990 of file xlog.c.

7991 {
7992  XLogRecPtr RecPtr;
7993 
7994  /* XLOG SWITCH has no data */
7995  XLogBeginInsert();
7996 
7997  if (mark_unimportant)
7999  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
8000 
8001  return RecPtr;
8002 }
#define XLOG_SWITCH
Definition: pg_control.h:72
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1101 of file xlog.c.

1103 {
1105  uint64 startbytepos;
1106  uint64 endbytepos;
1107  uint64 prevbytepos;
1108 
1109  size = MAXALIGN(size);
1110 
1111  /* All (non xlog-switch) records should contain data. */
1113 
1114  /*
1115  * The duration the spinlock needs to be held is minimized by minimizing
1116  * the calculations that have to be done while holding the lock. The
1117  * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1118  * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1119  * page headers. The mapping between "usable" byte positions and physical
1120  * positions (XLogRecPtrs) can be done outside the locked region, and
1121  * because the usable byte position doesn't include any headers, reserving
1122  * X bytes from WAL is almost as simple as "CurrBytePos += X".
1123  */
1124  SpinLockAcquire(&Insert->insertpos_lck);
1125 
1126  startbytepos = Insert->CurrBytePos;
1127  endbytepos = startbytepos + size;
1128  prevbytepos = Insert->PrevBytePos;
1129  Insert->CurrBytePos = endbytepos;
1130  Insert->PrevBytePos = startbytepos;
1131 
1132  SpinLockRelease(&Insert->insertpos_lck);
1133 
1134  *StartPos = XLogBytePosToRecPtr(startbytepos);
1135  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1136  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1137 
1138  /*
1139  * Check that the conversions between "usable byte positions" and
1140  * XLogRecPtrs work consistently in both directions.
1141  */
1142  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1143  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1144  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1145 }
#define MAXALIGN(LEN)
Definition: c.h:811
static pg_noinline void Size size
Definition: slab.c:607
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1891
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1934

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1157 of file xlog.c.

1158 {
1160  uint64 startbytepos;
1161  uint64 endbytepos;
1162  uint64 prevbytepos;
1164  XLogRecPtr ptr;
1165  uint32 segleft;
1166 
1167  /*
1168  * These calculations are a bit heavy-weight to be done while holding a
1169  * spinlock, but since we're holding all the WAL insertion locks, there
1170  * are no other inserters competing for it. GetXLogInsertRecPtr() does
1171  * compete for it, but that's not called very frequently.
1172  */
1173  SpinLockAcquire(&Insert->insertpos_lck);
1174 
1175  startbytepos = Insert->CurrBytePos;
1176 
1177  ptr = XLogBytePosToEndRecPtr(startbytepos);
1178  if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1179  {
1180  SpinLockRelease(&Insert->insertpos_lck);
1181  *EndPos = *StartPos = ptr;
1182  return false;
1183  }
1184 
1185  endbytepos = startbytepos + size;
1186  prevbytepos = Insert->PrevBytePos;
1187 
1188  *StartPos = XLogBytePosToRecPtr(startbytepos);
1189  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1190 
1191  segleft = wal_segment_size - XLogSegmentOffset(*EndPos, wal_segment_size);
1192  if (segleft != wal_segment_size)
1193  {
1194  /* consume the rest of the segment */
1195  *EndPos += segleft;
1196  endbytepos = XLogRecPtrToBytePos(*EndPos);
1197  }
1198  Insert->CurrBytePos = endbytepos;
1199  Insert->PrevBytePos = startbytepos;
1200 
1201  SpinLockRelease(&Insert->insertpos_lck);
1202 
1203  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1204 
1205  Assert(XLogSegmentOffset(*EndPos, wal_segment_size) == 0);
1206  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1207  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1208  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1209 
1210  return true;
1211 }

References Assert, XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9414 of file xlog.c.

9415 {
9416  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9418  LWLockRelease(ControlFileLock);
9419 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9437 of file xlog.c.

9438 {
9440  XLogCtl->WalWriterSleeping = sleeping;
9442 }
bool WalWriterSleeping
Definition: xlog.c:526

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char* show_archive_command ( void  )

Definition at line 4775 of file xlog.c.

4776 {
4777  if (XLogArchivingActive())
4778  return XLogArchiveCommand;
4779  else
4780  return "(disabled)";
4781 }
char * XLogArchiveCommand
Definition: xlog.c:118

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char* show_in_hot_standby ( void  )

Definition at line 4787 of file xlog.c.

4788 {
4789  /*
4790  * We display the actual state based on shared memory, so that this GUC
4791  * reports up-to-date state if examined intra-query. The underlying
4792  * variable (in_hot_standby_guc) changes only when we transmit a new value
4793  * to the client.
4794  */
4795  return RecoveryInProgress() ? "on" : "off";
4796 }

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6539 of file xlog.c.

6540 {
6541  /*
6542  * We should have an aux process resource owner to use, and we should not
6543  * be in a transaction that's installed some other resowner.
6544  */
6546  Assert(CurrentResourceOwner == NULL ||
6549 
6550  /* Don't be chatty in standalone mode */
6552  (errmsg("shutting down")));
6553 
6554  /*
6555  * Signal walsenders to move to stopping state.
6556  */
6558 
6559  /*
6560  * Wait for WAL senders to be in stopping state. This prevents commands
6561  * from writing new WAL.
6562  */
6564 
6565  if (RecoveryInProgress())
6567  else
6568  {
6569  /*
6570  * If archiving is enabled, rotate the last XLOG file so that all the
6571  * remaining records are archived (postmaster wakes up the archiver
6572  * process one more time at the end of shutdown). The checkpoint
6573  * record will go to the next XLOG file and won't be archived (yet).
6574  */
6575  if (XLogArchivingActive())
6576  RequestXLogSwitch(false);
6577 
6579  }
6580 }
bool IsPostmasterEnvironment
Definition: globals.c:117
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3748
void WalSndWaitStopping(void)
Definition: walsender.c:3774
bool CreateRestartPoint(int flags)
Definition: xlog.c:7521
void CreateCheckPoint(int flags)
Definition: xlog.c:6821

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5387 of file xlog.c.

5388 {
5390  CheckPoint checkPoint;
5391  bool wasShutdown;
5392  bool didCrash;
5393  bool haveTblspcMap;
5394  bool haveBackupLabel;
5395  XLogRecPtr EndOfLog;
5396  TimeLineID EndOfLogTLI;
5397  TimeLineID newTLI;
5398  bool performedWalRecovery;
5399  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5402  TransactionId oldestActiveXID;
5403  bool promoted = false;
5404 
5405  /*
5406  * We should have an aux process resource owner to use, and we should not
5407  * be in a transaction that's installed some other resowner.
5408  */
5410  Assert(CurrentResourceOwner == NULL ||
5413 
5414  /*
5415  * Check that contents look valid.
5416  */
5418  ereport(FATAL,
5420  errmsg("control file contains invalid checkpoint location")));
5421 
5422  switch (ControlFile->state)
5423  {
5424  case DB_SHUTDOWNED:
5425 
5426  /*
5427  * This is the expected case, so don't be chatty in standalone
5428  * mode
5429  */
5431  (errmsg("database system was shut down at %s",
5432  str_time(ControlFile->time))));
5433  break;
5434 
5436  ereport(LOG,
5437  (errmsg("database system was shut down in recovery at %s",
5438  str_time(ControlFile->time))));
5439  break;
5440 
5441  case DB_SHUTDOWNING:
5442  ereport(LOG,
5443  (errmsg("database system shutdown was interrupted; last known up at %s",
5444  str_time(ControlFile->time))));
5445  break;
5446 
5447  case DB_IN_CRASH_RECOVERY:
5448  ereport(LOG,
5449  (errmsg("database system was interrupted while in recovery at %s",
5451  errhint("This probably means that some data is corrupted and"
5452  " you will have to use the last backup for recovery.")));
5453  break;
5454 
5456  ereport(LOG,
5457  (errmsg("database system was interrupted while in recovery at log time %s",
5459  errhint("If this has occurred more than once some data might be corrupted"
5460  " and you might need to choose an earlier recovery target.")));
5461  break;
5462 
5463  case DB_IN_PRODUCTION:
5464  ereport(LOG,
5465  (errmsg("database system was interrupted; last known up at %s",
5466  str_time(ControlFile->time))));
5467  break;
5468 
5469  default:
5470  ereport(FATAL,
5472  errmsg("control file contains invalid database cluster state")));
5473  }
5474 
5475  /* This is just to allow attaching to startup process with a debugger */
5476 #ifdef XLOG_REPLAY_DELAY
5478  pg_usleep(60000000L);
5479 #endif
5480 
5481  /*
5482  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5483  * In cases where someone has performed a copy for PITR, these directories
5484  * may have been excluded and need to be re-created.
5485  */
5487 
5488  /* Set up timeout handler needed to report startup progress. */
5492 
5493  /*----------
5494  * If we previously crashed, perform a couple of actions:
5495  *
5496  * - The pg_wal directory may still include some temporary WAL segments
5497  * used when creating a new segment, so perform some clean up to not
5498  * bloat this path. This is done first as there is no point to sync
5499  * this temporary data.
5500  *
5501  * - There might be data which we had written, intending to fsync it, but
5502  * which we had not actually fsync'd yet. Therefore, a power failure in
5503  * the near future might cause earlier unflushed writes to be lost, even
5504  * though more recent data written to disk from here on would be
5505  * persisted. To avoid that, fsync the entire data directory.
5506  */
5507  if (ControlFile->state != DB_SHUTDOWNED &&
5509  {
5512  didCrash = true;
5513  }
5514  else
5515  didCrash = false;
5516 
5517  /*
5518  * Prepare for WAL recovery if needed.
5519  *
5520  * InitWalRecovery analyzes the control file and the backup label file, if
5521  * any. It updates the in-memory ControlFile buffer according to the
5522  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5523  * It also applies the tablespace map file, if any.
5524  */
5525  InitWalRecovery(ControlFile, &wasShutdown,
5526  &haveBackupLabel, &haveTblspcMap);
5527  checkPoint = ControlFile->checkPointCopy;
5528 
5529  /* initialize shared memory variables from the checkpoint record */
5530  TransamVariables->nextXid = checkPoint.nextXid;
5531  TransamVariables->nextOid = checkPoint.nextOid;
5533  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5534  AdvanceOldestClogXid(checkPoint.oldestXid);
5535  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5536  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5538  checkPoint.newestCommitTsXid);
5539  XLogCtl->ckptFullXid = checkPoint.nextXid;
5540 
5541  /*
5542  * Clear out any old relcache cache files. This is *necessary* if we do
5543  * any WAL replay, since that would probably result in the cache files
5544  * being out of sync with database reality. In theory we could leave them
5545  * in place if the database had been cleanly shut down, but it seems
5546  * safest to just remove them always and let them be rebuilt during the
5547  * first backend startup. These files needs to be removed from all
5548  * directories including pg_tblspc, however the symlinks are created only
5549  * after reading tablespace_map file in case of archive recovery from
5550  * backup, so needs to clear old relcache files here after creating
5551  * symlinks.
5552  */
5554 
5555  /*
5556  * Initialize replication slots, before there's a chance to remove
5557  * required resources.
5558  */
5560 
5561  /*
5562  * Startup logical state, needs to be setup now so we have proper data
5563  * during crash recovery.
5564  */
5566 
5567  /*
5568  * Startup CLOG. This must be done after TransamVariables->nextXid has
5569  * been initialized and before we accept connections or begin WAL replay.
5570  */
5571  StartupCLOG();
5572 
5573  /*
5574  * Startup MultiXact. We need to do this early to be able to replay
5575  * truncations.
5576  */
5577  StartupMultiXact();
5578 
5579  /*
5580  * Ditto for commit timestamps. Activate the facility if the setting is
5581  * enabled in the control file, as there should be no tracking of commit
5582  * timestamps done when the setting was disabled. This facility can be
5583  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5584  */
5586  StartupCommitTs();
5587 
5588  /*
5589  * Recover knowledge about replay progress of known replication partners.
5590  */
5592 
5593  /*
5594  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5595  * control file. On recovery, all unlogged relations are blown away, so
5596  * the unlogged LSN counter can be reset too.
5597  */
5601  else
5604 
5605  /*
5606  * Copy any missing timeline history files between 'now' and the recovery
5607  * target timeline from archive to pg_wal. While we don't need those files
5608  * ourselves - the history file of the recovery target timeline covers all
5609  * the previous timelines in the history too - a cascading standby server
5610  * might be interested in them. Or, if you archive the WAL from this
5611  * server to a different archive than the primary, it'd be good for all
5612  * the history files to get archived there after failover, so that you can
5613  * use one of the old timelines as a PITR target. Timeline history files
5614  * are small, so it's better to copy them unnecessarily than not copy them
5615  * and regret later.
5616  */
5618 
5619  /*
5620  * Before running in recovery, scan pg_twophase and fill in its status to
5621  * be able to work on entries generated by redo. Doing a scan before
5622  * taking any recovery action has the merit to discard any 2PC files that
5623  * are newer than the first record to replay, saving from any conflicts at
5624  * replay. This avoids as well any subsequent scans when doing recovery
5625  * of the on-disk two-phase data.
5626  */
5628 
5629  /*
5630  * When starting with crash recovery, reset pgstat data - it might not be
5631  * valid. Otherwise restore pgstat data. It's safe to do this here,
5632  * because postmaster will not yet have started any other processes.
5633  *
5634  * NB: Restoring replication slot stats relies on slot state to have
5635  * already been restored from disk.
5636  *
5637  * TODO: With a bit of extra work we could just start with a pgstat file
5638  * associated with the checkpoint redo location we're starting from.
5639  */
5640  if (didCrash)
5642  else
5644 
5645  lastFullPageWrites = checkPoint.fullPageWrites;
5646 
5649 
5650  /* REDO */
5651  if (InRecovery)
5652  {
5653  /* Initialize state for RecoveryInProgress() */
5655  if (InArchiveRecovery)
5657  else
5660 
5661  /*
5662  * Update pg_control to show that we are recovering and to show the
5663  * selected checkpoint as the place we are starting from. We also mark
5664  * pg_control with any minimum recovery stop point obtained from a
5665  * backup history file.
5666  *
5667  * No need to hold ControlFileLock yet, we aren't up far enough.
5668  */
5670 
5671  /*
5672  * If there was a backup label file, it's done its job and the info
5673  * has now been propagated into pg_control. We must get rid of the
5674  * label file so that if we crash during recovery, we'll pick up at
5675  * the latest recovery restartpoint instead of going all the way back
5676  * to the backup start point. It seems prudent though to just rename
5677  * the file out of the way rather than delete it completely.
5678  */
5679  if (haveBackupLabel)
5680  {
5681  unlink(BACKUP_LABEL_OLD);
5683  }
5684 
5685  /*
5686  * If there was a tablespace_map file, it's done its job and the
5687  * symlinks have been created. We must get rid of the map file so
5688  * that if we crash during recovery, we don't create symlinks again.
5689  * It seems prudent though to just rename the file out of the way
5690  * rather than delete it completely.
5691  */
5692  if (haveTblspcMap)
5693  {
5694  unlink(TABLESPACE_MAP_OLD);
5696  }
5697 
5698  /*
5699  * Initialize our local copy of minRecoveryPoint. When doing crash
5700  * recovery we want to replay up to the end of WAL. Particularly, in
5701  * the case of a promoted standby minRecoveryPoint value in the
5702  * control file is only updated after the first checkpoint. However,
5703  * if the instance crashes before the first post-recovery checkpoint
5704  * is completed then recovery will use a stale location causing the
5705  * startup process to think that there are still invalid page
5706  * references when checking for data consistency.
5707  */
5708  if (InArchiveRecovery)
5709  {
5712  }
5713  else
5714  {
5717  }
5718 
5719  /* Check that the GUCs used to generate the WAL allow recovery */
5721 
5722  /*
5723  * We're in recovery, so unlogged relations may be trashed and must be
5724  * reset. This should be done BEFORE allowing Hot Standby
5725  * connections, so that read-only backends don't try to read whatever
5726  * garbage is left over from before.
5727  */
5729 
5730  /*
5731  * Likewise, delete any saved transaction snapshot files that got left
5732  * behind by crashed backends.
5733  */
5735 
5736  /*
5737  * Initialize for Hot Standby, if enabled. We won't let backends in
5738  * yet, not until we've reached the min recovery point specified in
5739  * control file and we've established a recovery snapshot from a
5740  * running-xacts WAL record.
5741  */
5743  {
5744  TransactionId *xids;
5745  int nxids;
5746 
5747  ereport(DEBUG1,
5748  (errmsg_internal("initializing for hot standby")));
5749 
5751 
5752  if (wasShutdown)
5753  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5754  else
5755  oldestActiveXID = checkPoint.oldestActiveXid;
5756  Assert(TransactionIdIsValid(oldestActiveXID));
5757 
5758  /* Tell procarray about the range of xids it has to deal with */
5760 
5761  /*
5762  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5763  * have already been started up and other SLRUs are not maintained
5764  * during recovery and need not be started yet.
5765  */
5766  StartupSUBTRANS(oldestActiveXID);
5767 
5768  /*
5769  * If we're beginning at a shutdown checkpoint, we know that
5770  * nothing was running on the primary at this point. So fake-up an
5771  * empty running-xacts record and use that here and now. Recover
5772  * additional standby state for prepared transactions.
5773  */
5774  if (wasShutdown)
5775  {
5776  RunningTransactionsData running;
5777  TransactionId latestCompletedXid;
5778 
5779  /* Update pg_subtrans entries for any prepared transactions */
5781 
5782  /*
5783  * Construct a RunningTransactions snapshot representing a
5784  * shut down server, with only prepared transactions still
5785  * alive. We're never overflowed at this point because all
5786  * subxids are listed with their parent prepared transactions.
5787  */
5788  running.xcnt = nxids;
5789  running.subxcnt = 0;
5791  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5792  running.oldestRunningXid = oldestActiveXID;
5793  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5794  TransactionIdRetreat(latestCompletedXid);
5795  Assert(TransactionIdIsNormal(latestCompletedXid));
5796  running.latestCompletedXid = latestCompletedXid;
5797  running.xids = xids;
5798 
5799  ProcArrayApplyRecoveryInfo(&running);
5800  }
5801  }
5802 
5803  /*
5804  * We're all set for replaying the WAL now. Do it.
5805  */
5807  performedWalRecovery = true;
5808  }
5809  else
5810  performedWalRecovery = false;
5811 
5812  /*
5813  * Finish WAL recovery.
5814  */
5815  endOfRecoveryInfo = FinishWalRecovery();
5816  EndOfLog = endOfRecoveryInfo->endOfLog;
5817  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5818  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5819  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5820 
5821  /*
5822  * Reset ps status display, so as no information related to recovery shows
5823  * up.
5824  */
5825  set_ps_display("");
5826 
5827  /*
5828  * When recovering from a backup (we are in recovery, and archive recovery
5829  * was requested), complain if we did not roll forward far enough to reach
5830  * the point where the database is consistent. For regular online
5831  * backup-from-primary, that means reaching the end-of-backup WAL record
5832  * (at which point we reset backupStartPoint to be Invalid), for
5833  * backup-from-replica (which can't inject records into the WAL stream),
5834  * that point is when we reach the minRecoveryPoint in pg_control (which
5835  * we purposefully copy last when backing up from a replica). For
5836  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5837  * or snapshot-style backups (which don't), backupEndRequired will be set
5838  * to false.
5839  *
5840  * Note: it is indeed okay to look at the local variable
5841  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5842  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5843  * been advanced beyond the WAL we processed.
5844  */
5845  if (InRecovery &&
5846  (EndOfLog < LocalMinRecoveryPoint ||
5848  {
5849  /*
5850  * Ran off end of WAL before reaching end-of-backup WAL record, or
5851  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5852  * recover from an online backup but never called pg_backup_stop(), or
5853  * you didn't archive all the WAL needed.
5854  */
5856  {
5858  ereport(FATAL,
5859  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5860  errmsg("WAL ends before end of online backup"),
5861  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5862  else
5863  ereport(FATAL,
5864  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5865  errmsg("WAL ends before consistent recovery point")));
5866  }
5867  }
5868 
5869  /*
5870  * Reset unlogged relations to the contents of their INIT fork. This is
5871  * done AFTER recovery is complete so as to include any unlogged relations
5872  * created during recovery, but BEFORE recovery is marked as having
5873  * completed successfully. Otherwise we'd not retry if any of the post
5874  * end-of-recovery steps fail.
5875  */
5876  if (InRecovery)
5878 
5879  /*
5880  * Pre-scan prepared transactions to find out the range of XIDs present.
5881  * This information is not quite needed yet, but it is positioned here so
5882  * as potential problems are detected before any on-disk change is done.
5883  */
5884  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5885 
5886  /*
5887  * Allow ordinary WAL segment creation before possibly switching to a new
5888  * timeline, which creates a new segment, and after the last ReadRecord().
5889  */
5891 
5892  /*
5893  * Consider whether we need to assign a new timeline ID.
5894  *
5895  * If we did archive recovery, we always assign a new ID. This handles a
5896  * couple of issues. If we stopped short of the end of WAL during
5897  * recovery, then we are clearly generating a new timeline and must assign
5898  * it a unique new ID. Even if we ran to the end, modifying the current
5899  * last segment is problematic because it may result in trying to
5900  * overwrite an already-archived copy of that segment, and we encourage
5901  * DBAs to make their archive_commands reject that. We can dodge the
5902  * problem by making the new active segment have a new timeline ID.
5903  *
5904  * In a normal crash recovery, we can just extend the timeline we were in.
5905  */
5906  newTLI = endOfRecoveryInfo->lastRecTLI;
5908  {
5909  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5910  ereport(LOG,
5911  (errmsg("selected new timeline ID: %u", newTLI)));
5912 
5913  /*
5914  * Make a writable copy of the last WAL segment. (Note that we also
5915  * have a copy of the last block of the old WAL in
5916  * endOfRecovery->lastPage; we will use that below.)
5917  */
5918  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5919 
5920  /*
5921  * Remove the signal files out of the way, so that we don't
5922  * accidentally re-enter archive recovery mode in a subsequent crash.
5923  */
5924  if (endOfRecoveryInfo->standby_signal_file_found)
5926 
5927  if (endOfRecoveryInfo->recovery_signal_file_found)
5929 
5930  /*
5931  * Write the timeline history file, and have it archived. After this
5932  * point (or rather, as soon as the file is archived), the timeline
5933  * will appear as "taken" in the WAL archive and to any standby
5934  * servers. If we crash before actually switching to the new
5935  * timeline, standby servers will nevertheless think that we switched
5936  * to the new timeline, and will try to connect to the new timeline.
5937  * To minimize the window for that, try to do as little as possible
5938  * between here and writing the end-of-recovery record.
5939  */
5941  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5942 
5943  ereport(LOG,
5944  (errmsg("archive recovery complete")));
5945  }
5946 
5947  /* Save the selected TimeLineID in shared memory, too */
5948  XLogCtl->InsertTimeLineID = newTLI;
5949  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5950 
5951  /*
5952  * Actually, if WAL ended in an incomplete record, skip the parts that
5953  * made it through and start writing after the portion that persisted.
5954  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5955  * we'll do as soon as we're open for writing new WAL.)
5956  */
5958  {
5959  /*
5960  * We should only have a missingContrecPtr if we're not switching to a
5961  * new timeline. When a timeline switch occurs, WAL is copied from the
5962  * old timeline to the new only up to the end of the last complete
5963  * record, so there can't be an incomplete WAL record that we need to
5964  * disregard.
5965  */
5966  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
5968  EndOfLog = missingContrecPtr;
5969  }
5970 
5971  /*
5972  * Prepare to write WAL starting at EndOfLog location, and init xlog
5973  * buffer cache using the block containing the last record from the
5974  * previous incarnation.
5975  */
5976  Insert = &XLogCtl->Insert;
5977  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
5978  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
5979 
5980  /*
5981  * Tricky point here: lastPage contains the *last* block that the LastRec
5982  * record spans, not the one it starts in. The last block is indeed the
5983  * one we want to use.
5984  */
5985  if (EndOfLog % XLOG_BLCKSZ != 0)
5986  {
5987  char *page;
5988  int len;
5989  int firstIdx;
5990 
5991  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
5992  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
5993  Assert(len < XLOG_BLCKSZ);
5994 
5995  /* Copy the valid part of the last block, and zero the rest */
5996  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
5997  memcpy(page, endOfRecoveryInfo->lastPage, len);
5998  memset(page + len, 0, XLOG_BLCKSZ - len);
5999 
6000  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6001  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6002  }
6003  else
6004  {
6005  /*
6006  * There is no partial block to copy. Just set InitializedUpTo, and
6007  * let the first attempt to insert a log record to initialize the next
6008  * buffer.
6009  */
6010  XLogCtl->InitializedUpTo = EndOfLog;
6011  }
6012 
6013  /*
6014  * Update local and shared status. This is OK to do without any locks
6015  * because no other process can be reading or writing WAL yet.
6016  */
6017  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6021  XLogCtl->LogwrtRqst.Write = EndOfLog;
6022  XLogCtl->LogwrtRqst.Flush = EndOfLog;
6023 
6024  /*
6025  * Preallocate additional log files, if wanted.
6026  */
6027  PreallocXlogFiles(EndOfLog, newTLI);
6028 
6029  /*
6030  * Okay, we're officially UP.
6031  */
6032  InRecovery = false;
6033 
6034  /* start the archive_timeout timer and LSN running */
6035  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6036  XLogCtl->lastSegSwitchLSN = EndOfLog;
6037 
6038  /* also initialize latestCompletedXid, to nextXid - 1 */
6039  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6042  LWLockRelease(ProcArrayLock);
6043 
6044  /*
6045  * Start up subtrans, if not already done for hot standby. (commit
6046  * timestamps are started below, if necessary.)
6047  */
6049  StartupSUBTRANS(oldestActiveXID);
6050 
6051  /*
6052  * Perform end of recovery actions for any SLRUs that need it.
6053  */
6054  TrimCLOG();
6055  TrimMultiXact();
6056 
6057  /*
6058  * Reload shared-memory state for prepared transactions. This needs to
6059  * happen before renaming the last partial segment of the old timeline as
6060  * it may be possible that we have to recovery some transactions from it.
6061  */
6063 
6064  /* Shut down xlogreader */
6066 
6067  /* Enable WAL writes for this backend only. */
6069 
6070  /* If necessary, write overwrite-contrecord before doing anything else */
6072  {
6075  }
6076 
6077  /*
6078  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6079  * record before resource manager writes cleanup WAL records or checkpoint
6080  * record is written.
6081  */
6082  Insert->fullPageWrites = lastFullPageWrites;
6084 
6085  /*
6086  * Emit checkpoint or end-of-recovery record in XLOG, if required.
6087  */
6088  if (performedWalRecovery)
6089  promoted = PerformRecoveryXLogAction();
6090 
6091  /*
6092  * If any of the critical GUCs have changed, log them before we allow
6093  * backends to write WAL.
6094  */
6096 
6097  /* If this is archive recovery, perform post-recovery cleanup actions. */
6099  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6100 
6101  /*
6102  * Local WAL inserts enabled, so it's time to finish initialization of
6103  * commit timestamp.
6104  */
6106 
6107  /*
6108  * All done with end-of-recovery actions.
6109  *
6110  * Now allow backends to write WAL and update the control file status in
6111  * consequence. SharedRecoveryState, that controls if backends can write
6112  * WAL, is updated while holding ControlFileLock to prevent other backends
6113  * to look at an inconsistent state of the control file in shared memory.
6114  * There is still a small window during which backends can write WAL and
6115  * the control file is still referring to a system not in DB_IN_PRODUCTION
6116  * state while looking at the on-disk control file.
6117  *
6118  * Also, we use info_lck to update SharedRecoveryState to ensure that
6119  * there are no race conditions concerning visibility of other recent
6120  * updates to shared memory.
6121  */
6122  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6124 
6128 
6130  LWLockRelease(ControlFileLock);
6131 
6132  /*
6133  * Shutdown the recovery environment. This must occur after
6134  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6135  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6136  * any session building a snapshot will not rely on KnownAssignedXids as
6137  * RecoveryInProgress() would return false at this stage. This is
6138  * particularly critical for prepared 2PC transactions, that would still
6139  * need to be included in snapshots once recovery has ended.
6140  */
6143 
6144  /*
6145  * If there were cascading standby servers connected to us, nudge any wal
6146  * sender processes to notice that we've been promoted.
6147  */
6148  WalSndWakeup(true, true);
6149 
6150  /*
6151  * If this was a promotion, request an (online) checkpoint now. This isn't
6152  * required for consistency, but the last restartpoint might be far back,
6153  * and in case of a crash, recovering from it might take a longer than is
6154  * appropriate now that we're not in standby mode anymore.
6155  */
6156  if (promoted)
6158 }
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:487
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:652
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
void SyncDataDirectory(void)
Definition: fd.c:3544
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
void TrimMultiXact(void)
Definition: multixact.c:2170
void StartupMultiXact(void)
Definition: multixact.c:2145
void StartupReplicationOrigin(void)
Definition: origin.c:699
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:450
void pgstat_discard_stats(void)
Definition: pgstat.c:462
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6801
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1917
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:469
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2083
void restoreTwoPhaseData(void)
Definition: twophase.c:1898
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1962
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2042
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3669
void UpdateFullPageWrites(void)
Definition: xlog.c:8096
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5164
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4086
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7370
static void XLogReportParameters(void)
Definition: xlog.c:8033
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6240
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5254
static bool lastFullPageWrites
Definition: xlog.c:215
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5179
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5343
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3819
#define TABLESPACE_MAP_OLD
Definition: xlog.h:304
#define TABLESPACE_MAP
Definition: xlog.h:303
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:299
#define BACKUP_LABEL_OLD
Definition: xlog.h:301
#define BACKUP_LABEL_FILE
Definition: xlog.h:300
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:298
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:138
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:373
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:372
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:512
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:122
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow)
static

Definition at line 5164 of file xlog.c.

5165 {
5166  static char buf[128];
5167 
5168  pg_strftime(buf, sizeof(buf),
5169  "%Y-%m-%d %H:%M:%S %Z",
5170  pg_localtime(&tnow, log_timezone));
5171 
5172  return buf;
5173 }
static char * buf
Definition: pg_test_fsync.c:73
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6165 of file xlog.c.

6166 {
6167  /* initialize minRecoveryPoint to this record */
6168  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6170  if (ControlFile->minRecoveryPoint < EndRecPtr)
6171  {
6172  ControlFile->minRecoveryPoint = EndRecPtr;
6173  ControlFile->minRecoveryPointTLI = replayTLI;
6174  }
6175  /* update local copy */
6178 
6179  /*
6180  * The startup process can update its local copy of minRecoveryPoint from
6181  * this point.
6182  */
6183  updateMinRecoveryPoint = true;
6184 
6186 
6187  /*
6188  * We update SharedRecoveryState while holding the lock on ControlFileLock
6189  * so both states are consistent in shared memory.
6190  */
6194 
6195  LWLockRelease(ControlFileLock);
6196 }
static bool updateMinRecoveryPoint
Definition: xlog.c:641

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6759 of file xlog.c.

6760 {
6761  /*
6762  * The status is reported only for end-of-recovery and shutdown
6763  * checkpoints or shutdown restartpoints. Updating the ps display is
6764  * useful in those situations as it may not be possible to rely on
6765  * pg_stat_activity to see the status of the checkpointer or the startup
6766  * process.
6767  */
6768  if ((flags & (CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IS_SHUTDOWN)) == 0)
6769  return;
6770 
6771  if (reset)
6772  set_ps_display("");
6773  else
6774  {
6775  char activitymsg[128];
6776 
6777  snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6778  (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6779  (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6780  restartpoint ? "restartpoint" : "checkpoint");
6781  set_ps_display(activitymsg);
6782  }
6783 }

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6721 of file xlog.c.

6722 {
6723  /*
6724  * To estimate the number of segments consumed between checkpoints, keep a
6725  * moving average of the amount of WAL generated in previous checkpoint
6726  * cycles. However, if the load is bursty, with quiet periods and busy
6727  * periods, we want to cater for the peak load. So instead of a plain
6728  * moving average, let the average decline slowly if the previous cycle
6729  * used less WAL than estimated, but bump it up immediately if it used
6730  * more.
6731  *
6732  * When checkpoints are triggered by max_wal_size, this should converge to
6733  * CheckpointSegments * wal_segment_size,
6734  *
6735  * Note: This doesn't pay any attention to what caused the checkpoint.
6736  * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6737  * starting a base backup, are counted the same as those created
6738  * automatically. The slow-decline will largely mask them out, if they are
6739  * not frequent. If they are frequent, it seems reasonable to count them
6740  * in as any others; if you issue a manual checkpoint every 5 minutes and
6741  * never let a timed checkpoint happen, it makes sense to base the
6742  * preallocation on that 5 minute interval rather than whatever
6743  * checkpoint_timeout is set to.
6744  */
6745  PrevCheckPointDistance = nbytes;
6746  if (CheckPointDistanceEstimate < nbytes)
6747  CheckPointDistanceEstimate = nbytes;
6748  else
6750  (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6751 }

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4524 of file xlog.c.

4525 {
4527 }
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8096 of file xlog.c.

8097 {
8099  bool recoveryInProgress;
8100 
8101  /*
8102  * Do nothing if full_page_writes has not been changed.
8103  *
8104  * It's safe to check the shared full_page_writes without the lock,
8105  * because we assume that there is no concurrently running process which
8106  * can update it.
8107  */
8108  if (fullPageWrites == Insert->fullPageWrites)
8109  return;
8110 
8111  /*
8112  * Perform this outside critical section so that the WAL insert
8113  * initialization done by RecoveryInProgress() doesn't trigger an
8114  * assertion failure.
8115  */
8116  recoveryInProgress = RecoveryInProgress();
8117 
8119 
8120  /*
8121  * It's always safe to take full page images, even when not strictly
8122  * required, but not the other round. So if we're setting full_page_writes
8123  * to true, first set it true and then write the WAL record. If we're
8124  * setting it to false, first write the WAL record and then set the global
8125  * flag.
8126  */
8127  if (fullPageWrites)
8128  {
8130  Insert->fullPageWrites = true;
8132  }
8133 
8134  /*
8135  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8136  * full_page_writes during archive recovery, if required.
8137  */
8138  if (XLogStandbyInfoActive() && !recoveryInProgress)
8139  {
8140  XLogBeginInsert();
8141  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8142 
8143  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8144  }
8145 
8146  if (!fullPageWrites)
8147  {
8149  Insert->fullPageWrites = false;
8151  }
8152  END_CRIT_SECTION();
8153 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3799 of file xlog.c.

3800 {
3801  uint32 tli;
3802  XLogSegNo segno;
3803 
3804  XLogFromFileName(filename, &tli, &segno, wal_segment_size);
3805 
3807  if (segno > XLogCtl->lastRemovedSegNo)
3808  XLogCtl->lastRemovedSegNo = segno;
3810 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2709 of file xlog.c.

2710 {
2711  /* Quick check using our local copy of the variable */
2712  if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2713  return;
2714 
2715  /*
2716  * An invalid minRecoveryPoint means that we need to recover all the WAL,
2717  * i.e., we're doing crash recovery. We never modify the control file's
2718  * value in that case, so we can short-circuit future checks here too. The
2719  * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2720  * updated until crash recovery finishes. We only do this for the startup
2721  * process as it should not update its own reference of minRecoveryPoint
2722  * until it has finished crash recovery to make sure that all WAL
2723  * available is replayed in this case. This also saves from extra locks
2724  * taken on the control file from the startup process.
2725  */
2727  {
2728  updateMinRecoveryPoint = false;
2729  return;
2730  }
2731 
2732  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2733 
2734  /* update local copy */
2737 
2739  updateMinRecoveryPoint = false;
2740  else if (force || LocalMinRecoveryPoint < lsn)
2741  {
2742  XLogRecPtr newMinRecoveryPoint;
2743  TimeLineID newMinRecoveryPointTLI;
2744 
2745  /*
2746  * To avoid having to update the control file too often, we update it
2747  * all the way to the last record being replayed, even though 'lsn'
2748  * would suffice for correctness. This also allows the 'force' case
2749  * to not need a valid 'lsn' value.
2750  *
2751  * Another important reason for doing it this way is that the passed
2752  * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2753  * the caller got it from a corrupted heap page. Accepting such a
2754  * value as the min recovery point would prevent us from coming up at
2755  * all. Instead, we just log a warning and continue with recovery.
2756  * (See also the comments about corrupt LSNs in XLogFlush.)
2757  */
2758  newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2759  if (!force && newMinRecoveryPoint < lsn)
2760  elog(WARNING,
2761  "xlog min recovery request %X/%X is past current point %X/%X",
2762  LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2763 
2764  /* update control file */
2765  if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2766  {
2767  ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2768  ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2770  LocalMinRecoveryPoint = newMinRecoveryPoint;
2771  LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2772 
2773  ereport(DEBUG2,
2774  (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
2775  LSN_FORMAT_ARGS(newMinRecoveryPoint),
2776  newMinRecoveryPointTLI)));
2777  }
2778  }
2779  LWLockRelease(ControlFileLock);
2780 }
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4086 of file xlog.c.

4087 {
4088  char path[MAXPGPATH];
4089  struct stat stat_buf;
4090 
4091  /* Check for pg_wal; if it doesn't exist, error out */
4092  if (stat(XLOGDIR, &stat_buf) != 0 ||
4093  !S_ISDIR(stat_buf.st_mode))
4094  ereport(FATAL,
4096  errmsg("required WAL directory \"%s\" does not exist",
4097  XLOGDIR)));
4098 
4099  /* Check for archive_status */
4100  snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4101  if (stat(path, &stat_buf) == 0)
4102  {
4103  /* Check for weird cases where it exists but isn't a directory */
4104  if (!S_ISDIR(stat_buf.st_mode))
4105  ereport(FATAL,
4107  errmsg("required WAL directory \"%s\" does not exist",
4108  path)));
4109  }
4110  else
4111  {
4112  ereport(LOG,
4113  (errmsg("creating missing WAL directory \"%s\"", path)));
4114  if (MakePGDirectory(path) < 0)
4115  ereport(FATAL,
4117  errmsg("could not create missing directory \"%s\": %m",
4118  path)));
4119  }
4120 
4121  /* Check for summaries */
4122  snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4123  if (stat(path, &stat_buf) == 0)
4124  {
4125  /* Check for weird cases where it exists but isn't a directory */
4126  if (!S_ISDIR(stat_buf.st_mode))
4127  ereport(FATAL,
4128  (errmsg("required WAL directory \"%s\" does not exist",
4129  path)));
4130  }
4131  else
4132  {
4133  ereport(LOG,
4134  (errmsg("creating missing WAL directory \"%s\"", path)));
4135  if (MakePGDirectory(path) < 0)
4136  ereport(FATAL,
4137  (errmsg("could not create missing directory \"%s\": %m",
4138  path)));
4139  }
4140 }
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
#define S_ISDIR(m)
Definition: win32_port.h:325

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1497 of file xlog.c.

1498 {
1499  uint64 bytepos;
1500  XLogRecPtr inserted;
1501  XLogRecPtr reservedUpto;
1502  XLogRecPtr finishedUpto;
1504  int i;
1505 
1506  if (MyProc == NULL)
1507  elog(PANIC, "cannot wait without a PGPROC structure");
1508 
1509  /*
1510  * Check if there's any work to do. Use a barrier to ensure we get the
1511  * freshest value.
1512  */
1514  if (upto <= inserted)
1515  return inserted;
1516 
1517  /* Read the current insert position */
1518  SpinLockAcquire(&Insert->insertpos_lck);
1519  bytepos = Insert->CurrBytePos;
1520  SpinLockRelease(&Insert->insertpos_lck);
1521  reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1522 
1523  /*
1524  * No-one should request to flush a piece of WAL that hasn't even been
1525  * reserved yet. However, it can happen if there is a block with a bogus
1526  * LSN on disk, for example. XLogFlush checks for that situation and
1527  * complains, but only after the flush. Here we just assume that to mean
1528  * that all WAL that has been reserved needs to be finished. In this
1529  * corner-case, the return value can be smaller than 'upto' argument.
1530  */
1531  if (upto > reservedUpto)
1532  {
1533  ereport(LOG,
1534  (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1535  LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
1536  upto = reservedUpto;
1537  }
1538 
1539  /*
1540  * Loop through all the locks, sleeping on any in-progress insert older
1541  * than 'upto'.
1542  *
1543  * finishedUpto is our return value, indicating the point upto which all
1544  * the WAL insertions have been finished. Initialize it to the head of
1545  * reserved WAL, and as we iterate through the insertion locks, back it
1546  * out for any insertion that's still in progress.
1547  */
1548  finishedUpto = reservedUpto;
1549  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1550  {
1551  XLogRecPtr insertingat = InvalidXLogRecPtr;
1552 
1553  do
1554  {
1555  /*
1556  * See if this insertion is in progress. LWLockWaitForVar will
1557  * wait for the lock to be released, or for the 'value' to be set
1558  * by a LWLockUpdateVar call. When a lock is initially acquired,
1559  * its value is 0 (InvalidXLogRecPtr), which means that we don't
1560  * know where it's inserting yet. We will have to wait for it. If
1561  * it's a small insertion, the record will most likely fit on the
1562  * same page and the inserter will release the lock without ever
1563  * calling LWLockUpdateVar. But if it has to sleep, it will
1564  * advertise the insertion point with LWLockUpdateVar before
1565  * sleeping.
1566  *
1567  * In this loop we are only waiting for insertions that started
1568  * before WaitXLogInsertionsToFinish was called. The lack of
1569  * memory barriers in the loop means that we might see locks as
1570  * "unused" that have since become used. This is fine because
1571  * they only can be used for later insertions that we would not
1572  * want to wait on anyway. Not taking a lock to acquire the
1573  * current insertingAt value means that we might see older
1574  * insertingAt values. This is also fine, because if we read a
1575  * value too old, we will add ourselves to the wait queue, which
1576  * contains atomic operations.
1577  */
1578  if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1580  insertingat, &insertingat))
1581  {
1582  /* the lock was free, so no insertion in progress */
1583  insertingat = InvalidXLogRecPtr;
1584  break;
1585  }
1586 
1587  /*
1588  * This insertion is still in progress. Have to wait, unless the
1589  * inserter has proceeded past 'upto'.
1590  */
1591  } while (insertingat < upto);
1592 
1593  if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1594  finishedUpto = insertingat;
1595  }
1596 
1597  /*
1598  * Advance the limit we know to have been inserted and return the freshest
1599  * value we know of, which might be beyond what we requested if somebody
1600  * is concurrently doing this with an 'upto' pointer ahead of us.
1601  */
1603  finishedUpto);
1604 
1605  return finishedUpto;
1606 }
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition: atomics.h:578
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1584
PGPROC * MyProc
Definition: proc.c:66
pg_atomic_uint64 insertingAt
Definition: xlog.c:368

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1364 of file xlog.c.

1365 {
1366  bool immed;
1367 
1368  /*
1369  * It doesn't matter which of the WAL insertion locks we acquire, so try
1370  * the one we used last time. If the system isn't particularly busy, it's
1371  * a good bet that it's still available, and it's good to have some
1372  * affinity to a particular lock so that you don't unnecessarily bounce
1373  * cache lines between processes when there's no contention.
1374  *
1375  * If this is the first time through in this backend, pick a lock
1376  * (semi-)randomly. This allows the locks to be used evenly if you have a
1377  * lot of very short connections.
1378  */
1379  static int lockToTry = -1;
1380 
1381  if (lockToTry == -1)
1382  lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1383  MyLockNo = lockToTry;
1384 
1385  /*
1386  * The insertingAt value is initially set to 0, as we don't know our
1387  * insert location yet.
1388  */
1390  if (!immed)
1391  {
1392  /*
1393  * If we couldn't get the lock immediately, try another lock next
1394  * time. On a system with more insertion locks than concurrent
1395  * inserters, this causes all the inserters to eventually migrate to a
1396  * lock that no-one else is using. On a system with more inserters
1397  * than locks, it still helps to distribute the inserters evenly
1398  * across the locks.
1399  */
1400  lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1401  }
1402 }
ProcNumber MyProcNumber
Definition: globals.c:88
static int MyLockNo
Definition: xlog.c:644

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1409 of file xlog.c.

1410 {
1411  int i;
1412 
1413  /*
1414  * When holding all the locks, all but the last lock's insertingAt
1415  * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1416  * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1417  */
1418  for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1419  {
1421  LWLockUpdateVar(&WALInsertLocks[i].l.lock,
1423  PG_UINT64_MAX);
1424  }
1425  /* Variable value reset to 0 at release */
1427 
1428  holdingAllLocks = true;
1429 }
#define PG_UINT64_MAX
Definition: c.h:593
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1720
static bool holdingAllLocks
Definition: xlog.c:645

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1438 of file xlog.c.

1439 {
1440  if (holdingAllLocks)
1441  {
1442  int i;
1443 
1444  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1447  0);
1448 
1449  holdingAllLocks = false;
1450  }
1451  else
1452  {
1455  0);
1456  }
1457 }
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1854

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1464 of file xlog.c.

1465 {
1466  if (holdingAllLocks)
1467  {
1468  /*
1469  * We use the last lock to mark our actual position, see comments in
1470  * WALInsertLockAcquireExclusive.
1471  */
1474  insertingAt);
1475  }
1476  else
1479  insertingAt);
1480 }

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1741 of file xlog.c.

1743 {
1744  char *pdst = dstbuf;
1745  XLogRecPtr recptr = startptr;
1746  XLogRecPtr inserted;
1747  Size nbytes = count;
1748 
1749  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1750  return 0;
1751 
1752  Assert(!XLogRecPtrIsInvalid(startptr));
1753 
1754  /*
1755  * Caller should ensure that the requested data has been inserted into WAL
1756  * buffers before we try to read it.
1757  */
1759  if (startptr + count > inserted)
1760  ereport(ERROR,
1761  errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1762  LSN_FORMAT_ARGS(startptr + count),
1763  LSN_FORMAT_ARGS(inserted)));
1764 
1765  /*
1766  * Loop through the buffers without a lock. For each buffer, atomically
1767  * read and verify the end pointer, then copy the data out, and finally
1768  * re-read and re-verify the end pointer.
1769  *
1770  * Once a page is evicted, it never returns to the WAL buffers, so if the
1771  * end pointer matches the expected end pointer before and after we copy
1772  * the data, then the right page must have been present during the data
1773  * copy. Read barriers are necessary to ensure that the data copy actually
1774  * happens between the two verification steps.
1775  *
1776  * If either verification fails, we simply terminate the loop and return
1777  * with the data that had been already copied out successfully.
1778  */
1779  while (nbytes > 0)
1780  {
1781  uint32 offset = recptr % XLOG_BLCKSZ;
1782  int idx = XLogRecPtrToBufIdx(recptr);
1783  XLogRecPtr expectedEndPtr;
1784  XLogRecPtr endptr;
1785  const char *page;
1786  const char *psrc;
1787  Size npagebytes;
1788 
1789  /*
1790  * Calculate the end pointer we expect in the xlblocks array if the
1791  * correct page is present.
1792  */
1793  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1794 
1795  /*
1796  * First verification step: check that the correct page is present in
1797  * the WAL buffers.
1798  */
1799  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1800  if (expectedEndPtr != endptr)
1801  break;
1802 
1803  /*
1804  * The correct page is present (or was at the time the endptr was
1805  * read; must re-verify later). Calculate pointer to source data and
1806  * determine how much data to read from this page.
1807  */
1808  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1809  psrc = page + offset;
1810  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1811 
1812  /*
1813  * Ensure that the data copy and the first verification step are not
1814  * reordered.
1815  */
1816  pg_read_barrier();
1817 
1818  /* data copy */
1819  memcpy(pdst, psrc, npagebytes);
1820 
1821  /*
1822  * Ensure that the data copy and the second verification step are not
1823  * reordered.
1824  */
1825  pg_read_barrier();
1826 
1827  /*
1828  * Second verification step: check that the page we read from wasn't
1829  * evicted while we were copying the data.
1830  */
1831  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1832  if (expectedEndPtr != endptr)
1833  break;
1834 
1835  pdst += npagebytes;
1836  recptr += npagebytes;
1837  nbytes -= npagebytes;
1838  }
1839 
1840  Assert(pdst - dstbuf <= count);
1841 
1842  return pdst - dstbuf;
1843 }
#define pg_read_barrier()
Definition: atomics.h:149
#define Min(x, y)
Definition: c.h:1004
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6476

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4226 of file xlog.c.

4227 {
4228  int fd;
4229  char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4230 
4231  /*
4232  * Initialize version and compatibility-check fields
4233  */
4236 
4237  ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4239 
4240  ControlFile->blcksz = BLCKSZ;
4241  ControlFile->relseg_size = RELSEG_SIZE;
4242  ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4244 
4247 
4250 
4252 
4253  /* Contents are protected with a CRC */
4256  (char *) ControlFile,
4257  offsetof(ControlFileData, crc));
4259 
4260  /*
4261  * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4262  * the excess over sizeof(ControlFileData). This reduces the odds of
4263  * premature-EOF errors when reading pg_control. We'll still fail when we
4264  * check the contents of the file, but hopefully with a more specific
4265  * error than "couldn't read pg_control".
4266  */
4267  memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4268  memcpy(buffer, ControlFile, sizeof(ControlFileData));
4269 
4271  O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4272  if (fd < 0)
4273  ereport(PANIC,
4275  errmsg("could not create file \"%s\": %m",
4276  XLOG_CONTROL_FILE)));
4277 
4278  errno = 0;
4279  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4281  {
4282  /* if write didn't set errno, assume problem is no disk space */
4283  if (errno == 0)
4284  errno = ENOSPC;
4285  ereport(PANIC,
4287  errmsg("could not write to file \"%s\": %m",
4288  XLOG_CONTROL_FILE)));
4289  }
4291 
4292  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4293  if (pg_fsync(fd) != 0)
4294  ereport(PANIC,
4296  errmsg("could not fsync file \"%s\": %m",
4297  XLOG_CONTROL_FILE)));
4299 
4300  if (close(fd) != 0)
4301  ereport(PANIC,
4303  errmsg("could not close file \"%s\": %m",
4304  XLOG_CONTROL_FILE)));
4305 }
#define FLOAT8PASSBYVAL
Definition: c.h:635
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, FLOAT8PASSBYVAL, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8165 of file xlog.c.

8166 {
8167  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8168  XLogRecPtr lsn = record->EndRecPtr;
8169 
8170  /*
8171  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8172  * XLOG_FPI_FOR_HINT records.
8173  */
8174  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8175  !XLogRecHasAnyBlockRefs(record));
8176 
8177  if (info == XLOG_NEXTOID)
8178  {
8179  Oid nextOid;
8180 
8181  /*
8182  * We used to try to take the maximum of TransamVariables->nextOid and
8183  * the recorded nextOid, but that fails if the OID counter wraps
8184  * around. Since no OID allocation should be happening during replay
8185  * anyway, better to just believe the record exactly. We still take
8186  * OidGenLock while setting the variable, just in case.
8187  */
8188  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8189  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8190  TransamVariables->nextOid = nextOid;
8192  LWLockRelease(OidGenLock);
8193  }
8194  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8195  {
8196  CheckPoint checkPoint;
8197  TimeLineID replayTLI;
8198 
8199  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8200  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8201  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8202  TransamVariables->nextXid = checkPoint.nextXid;
8203  LWLockRelease(XidGenLock);
8204  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8205  TransamVariables->nextOid = checkPoint.nextOid;
8207  LWLockRelease(OidGenLock);
8208  MultiXactSetNextMXact(checkPoint.nextMulti,
8209  checkPoint.nextMultiOffset);
8210 
8212  checkPoint.oldestMultiDB);
8213 
8214  /*
8215  * No need to set oldestClogXid here as well; it'll be set when we
8216  * redo an xl_clog_truncate if it changed since initialization.
8217  */
8218  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8219 
8220  /*
8221  * If we see a shutdown checkpoint while waiting for an end-of-backup
8222  * record, the backup was canceled and the end-of-backup record will
8223  * never arrive.
8224  */
8228  ereport(PANIC,
8229  (errmsg("online backup was canceled, recovery cannot continue")));
8230 
8231  /*
8232  * If we see a shutdown checkpoint, we know that nothing was running
8233  * on the primary at this point. So fake-up an empty running-xacts
8234  * record and use that here and now. Recover additional standby state
8235  * for prepared transactions.
8236  */
8238  {
8239  TransactionId *xids;
8240  int nxids;
8241  TransactionId oldestActiveXID;
8242  TransactionId latestCompletedXid;
8243  RunningTransactionsData running;
8244 
8245  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8246 
8247  /* Update pg_subtrans entries for any prepared transactions */
8249 
8250  /*
8251  * Construct a RunningTransactions snapshot representing a shut
8252  * down server, with only prepared transactions still alive. We're
8253  * never overflowed at this point because all subxids are listed
8254  * with their parent prepared transactions.
8255  */
8256  running.xcnt = nxids;
8257  running.subxcnt = 0;
8259  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8260  running.oldestRunningXid = oldestActiveXID;
8261  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8262  TransactionIdRetreat(latestCompletedXid);
8263  Assert(TransactionIdIsNormal(latestCompletedXid));
8264  running.latestCompletedXid = latestCompletedXid;
8265  running.xids = xids;
8266 
8267  ProcArrayApplyRecoveryInfo(&running);
8268  }
8269 
8270  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8271  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8272  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8273  LWLockRelease(ControlFileLock);
8274 
8275  /* Update shared-memory copy of checkpoint XID/epoch */
8277  XLogCtl->ckptFullXid = checkPoint.nextXid;
8279 
8280  /*
8281  * We should've already switched to the new TLI before replaying this
8282  * record.
8283  */
8284  (void) GetCurrentReplayRecPtr(&replayTLI);
8285  if (checkPoint.ThisTimeLineID != replayTLI)
8286  ereport(PANIC,
8287  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8288  checkPoint.ThisTimeLineID, replayTLI)));
8289 
8290  RecoveryRestartPoint(&checkPoint, record);
8291  }
8292  else if (info == XLOG_CHECKPOINT_ONLINE)
8293  {
8294  CheckPoint checkPoint;
8295  TimeLineID replayTLI;
8296 
8297  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8298  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8299  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8301  checkPoint.nextXid))
8302  TransamVariables->nextXid = checkPoint.nextXid;
8303  LWLockRelease(XidGenLock);
8304 
8305  /*
8306  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8307  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8308  * counter is from the start of the checkpoint and might well be stale
8309  * compared to later XLOG_NEXTOID records. We could try to take the
8310  * maximum of the nextOid counter and our latest value, but since
8311  * there's no particular guarantee about the speed with which the OID
8312  * counter wraps around, that's a risky thing to do. In any case,
8313  * users of the nextOid counter are required to avoid assignment of
8314  * duplicates, so that a somewhat out-of-date value should be safe.
8315  */
8316 
8317  /* Handle multixact */
8319  checkPoint.nextMultiOffset);
8320 
8321  /*
8322  * NB: This may perform multixact truncation when replaying WAL
8323  * generated by an older primary.
8324  */
8326  checkPoint.oldestMultiDB);
8328  checkPoint.oldestXid))
8329  SetTransactionIdLimit(checkPoint.oldestXid,
8330  checkPoint.oldestXidDB);
8331  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8332  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8333  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8334  LWLockRelease(ControlFileLock);
8335 
8336  /* Update shared-memory copy of checkpoint XID/epoch */
8338  XLogCtl->ckptFullXid = checkPoint.nextXid;
8340 
8341  /* TLI should not change in an on-line checkpoint */
8342  (void) GetCurrentReplayRecPtr(&replayTLI);
8343  if (checkPoint.ThisTimeLineID != replayTLI)
8344  ereport(PANIC,
8345  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8346  checkPoint.ThisTimeLineID, replayTLI)));
8347 
8348  RecoveryRestartPoint(&checkPoint, record);
8349  }
8350  else if (info == XLOG_OVERWRITE_CONTRECORD)
8351  {
8352  /* nothing to do here, handled in xlogrecovery_redo() */
8353  }
8354  else if (info == XLOG_END_OF_RECOVERY)
8355  {
8356  xl_end_of_recovery xlrec;
8357  TimeLineID replayTLI;
8358 
8359  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8360 
8361  /*
8362  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8363  * but this case is rarer and harder to test, so the benefit doesn't
8364  * outweigh the potential extra cost of maintenance.
8365  */
8366 
8367  /*
8368  * We should've already switched to the new TLI before replaying this
8369  * record.
8370  */
8371  (void) GetCurrentReplayRecPtr(&replayTLI);
8372  if (xlrec.ThisTimeLineID != replayTLI)
8373  ereport(PANIC,
8374  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8375  xlrec.ThisTimeLineID, replayTLI)));
8376  }
8377  else if (info == XLOG_NOOP)
8378  {
8379  /* nothing to do here */
8380  }
8381  else if (info == XLOG_SWITCH)
8382  {
8383  /* nothing to do here */
8384  }
8385  else if (info == XLOG_RESTORE_POINT)
8386  {
8387  /* nothing to do here, handled in xlogrecovery.c */
8388  }
8389  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8390  {
8391  /*
8392  * XLOG_FPI records contain nothing else but one or more block
8393  * references. Every block reference must include a full-page image
8394  * even if full_page_writes was disabled when the record was generated
8395  * - otherwise there would be no point in this record.
8396  *
8397  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8398  * WAL-logged because of a hint bit update. They are only generated
8399  * when checksums and/or wal_log_hints are enabled. They may include
8400  * no full-page images if full_page_writes was disabled when they were
8401  * generated. In this case there is nothing to do here.
8402  *
8403  * No recovery conflicts are generated by these generic records - if a
8404  * resource manager needs to generate conflicts, it has to define a
8405  * separate WAL record type and redo routine.
8406  */
8407  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8408  {
8409  Buffer buffer;
8410 
8411  if (!XLogRecHasBlockImage(record, block_id))
8412  {
8413  if (info == XLOG_FPI)
8414  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8415  continue;
8416  }
8417 
8418  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8419  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8420  UnlockReleaseBuffer(buffer);
8421  }
8422  }
8423  else if (info == XLOG_BACKUP_END)
8424  {
8425  /* nothing to do here, handled in xlogrecovery_redo() */
8426  }
8427  else if (info == XLOG_PARAMETER_CHANGE)
8428  {
8429  xl_parameter_change xlrec;
8430 
8431  /* Update our copy of the parameters in pg_control */
8432  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8433 
8434  /*
8435  * Invalidate logical slots if we are in hot standby and the primary
8436  * does not have a WAL level sufficient for logical decoding. No need
8437  * to search for potentially conflicting logically slots if standby is
8438  * running with wal_level lower than logical, because in that case, we
8439  * would have either disallowed creation of logical slots or
8440  * invalidated existing ones.
8441  */
8442  if (InRecovery && InHotStandby &&
8443  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8446  0, InvalidOid,
8448 
8449  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8455  ControlFile->wal_level = xlrec.wal_level;
8457 
8458  /*
8459  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8460  * recover back up to this point before allowing hot standby again.
8461  * This is important if the max_* settings are decreased, to ensure
8462  * you don't run queries against the WAL preceding the change. The
8463  * local copies cannot be updated as long as crash recovery is
8464  * happening and we expect all the WAL to be replayed.
8465  */
8466  if (InArchiveRecovery)
8467  {
8470  }
8472  {
8473  TimeLineID replayTLI;
8474 
8475  (void) GetCurrentReplayRecPtr(&replayTLI);
8477  ControlFile->minRecoveryPointTLI = replayTLI;
8478  }
8479 
8483 
8485  LWLockRelease(ControlFileLock);
8486 
8487  /* Check to see if any parameter change gives a problem on recovery */
8489  }
8490  else if (info == XLOG_FPW_CHANGE)
8491  {
8492  bool fpw;
8493 
8494  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8495 
8496  /*
8497  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8498  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8499  * full_page_writes has been disabled during online backup.
8500  */
8501  if (!fpw)
8502  {
8504  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8507  }
8508 
8509  /* Keep track of full_page_writes */
8510  lastFullPageWrites = fpw;
8511  }
8512  else if (info == XLOG_CHECKPOINT_REDO)
8513  {
8514  /* nothing to do here, just for informational purposes */
8515  }
8516 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4953
unsigned char uint8
Definition: c.h:504
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2528
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2503
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7480
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2977 of file xlog.c.

2978 {
2979  XLogwrtRqst WriteRqst;
2980  bool flexible = true;
2981  static TimestampTz lastflush;
2982  TimestampTz now;
2983  int flushblocks;
2984  TimeLineID insertTLI;
2985 
2986  /* XLOG doesn't need flushing during recovery */
2987  if (RecoveryInProgress())
2988  return false;
2989 
2990  /*
2991  * Since we're not in recovery, InsertTimeLineID is set and can't change,
2992  * so we can read it without a lock.
2993  */
2994  insertTLI = XLogCtl->InsertTimeLineID;
2995 
2996  /* read updated LogwrtRqst */
2998  WriteRqst = XLogCtl->LogwrtRqst;
3000 
3001  /* back off to last completed page boundary */
3002  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3003 
3004  /* if we have already flushed that far, consider async commit records */
3006  if (WriteRqst.Write <= LogwrtResult.Flush)
3007  {
3009  WriteRqst.Write = XLogCtl->asyncXactLSN;
3011  flexible = false; /* ensure it all gets written */
3012  }
3013 
3014  /*
3015  * If already known flushed, we're done. Just need to check if we are
3016  * holding an open file handle to a logfile that's no longer in use,
3017  * preventing the file from being deleted.
3018  */
3019  if (WriteRqst.Write <= LogwrtResult.Flush)
3020  {
3021  if (openLogFile >= 0)
3022  {
3025  {
3026  XLogFileClose();
3027  }
3028  }
3029  return false;
3030  }
3031 
3032  /*
3033  * Determine how far to flush WAL, based on the wal_writer_delay and
3034  * wal_writer_flush_after GUCs.
3035  *
3036  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3037  * wal_writer_flush_after, to decide when to wake us up. Make sure the
3038  * logic is the same in both places if you change this.
3039  */
3041  flushblocks =
3042  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3043 
3044  if (WalWriterFlushAfter == 0 || lastflush == 0)
3045  {
3046  /* first call, or block based limits disabled */
3047  WriteRqst.Flush = WriteRqst.Write;
3048  lastflush = now;
3049  }
3050  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3051  {
3052  /*
3053  * Flush the writes at least every WalWriterDelay ms. This is
3054  * important to bound the amount of time it takes for an asynchronous
3055  * commit to hit disk.
3056  */
3057  WriteRqst.Flush = WriteRqst.Write;
3058  lastflush = now;
3059  }
3060  else if (flushblocks >= WalWriterFlushAfter)
3061  {
3062  /* exceeded wal_writer_flush_after blocks, flush */
3063  WriteRqst.Flush = WriteRqst.Write;
3064  lastflush = now;
3065  }
3066  else
3067  {
3068  /* no flushing, this time round */
3069  WriteRqst.Flush = 0;
3070  }
3071 
3072 #ifdef WAL_DEBUG
3073  if (XLOG_DEBUG)
3074  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3075  LSN_FORMAT_ARGS(WriteRqst.Write),
3076  LSN_FORMAT_ARGS(WriteRqst.Flush),
3079 #endif
3080 
3082 
3083  /* now wait for any in-progress insertions to finish and get write lock */
3084  WaitXLogInsertionsToFinish(WriteRqst.Write);
3085  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3087  if (WriteRqst.Write > LogwrtResult.Write ||
3088  WriteRqst.Flush > LogwrtResult.Flush)
3089  {
3090  XLogWrite(WriteRqst, insertTLI, flexible);
3091  }
3092  LWLockRelease(WALWriteLock);
3093 
3094  END_CRIT_SECTION();
3095 
3096  /* wake up walsenders now that we've released heavily contended locks */
3098 
3099  /*
3100  * Great, done. To take some work off the critical path, try to initialize
3101  * as many of the no-longer-needed WAL buffers for future use as we can.
3102  */
3103  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3104 
3105  /*
3106  * If we determined that we need to write data, but somebody else
3107  * wrote/flushed already, it should be considered as being active, to
3108  * avoid hibernating too early.
3109  */
3110  return true;
3111 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1791
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1619
XLogRecPtr asyncXactLSN
Definition: xlog.c:456
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:66
int WalWriterFlushAfter
Definition: walwriter.c:72
int WalWriterDelay
Definition: walwriter.c:71
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1891 of file xlog.c.

1892 {
1893  uint64 fullsegs;
1894  uint64 fullpages;
1895  uint64 bytesleft;
1896  uint32 seg_offset;
1897  XLogRecPtr result;
1898 
1899  fullsegs = bytepos / UsableBytesInSegment;
1900  bytesleft = bytepos % UsableBytesInSegment;
1901 
1902  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1903  {
1904  /* fits on first page of segment */
1905  if (bytesleft == 0)
1906  seg_offset = 0;
1907  else
1908  seg_offset = bytesleft + SizeOfXLogLongPHD;
1909  }
1910  else
1911  {
1912  /* account for the first page on segment with long header */
1913  seg_offset = XLOG_BLCKSZ;
1914  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1915 
1916  fullpages = bytesleft / UsableBytesInPage;
1917  bytesleft = bytesleft % UsableBytesInPage;
1918 
1919  if (bytesleft == 0)
1920  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1921  else
1922  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1923  }
1924 
1925  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1926 
1927  return result;
1928 }
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1851 of file xlog.c.

1852 {
1853  uint64 fullsegs;
1854  uint64 fullpages;
1855  uint64 bytesleft;
1856  uint32 seg_offset;
1857  XLogRecPtr result;
1858 
1859  fullsegs = bytepos / UsableBytesInSegment;
1860  bytesleft = bytepos % UsableBytesInSegment;
1861 
1862  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1863  {
1864  /* fits on first page of segment */
1865  seg_offset = bytesleft + SizeOfXLogLongPHD;
1866  }
1867  else
1868  {
1869  /* account for the first page on segment with long header */
1870  seg_offset = XLOG_BLCKSZ;
1871  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1872 
1873  fullpages = bytesleft / UsableBytesInPage;
1874  bytesleft = bytesleft % UsableBytesInPage;
1875 
1876  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1877  }
1878 
1879  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1880 
1881  return result;
1882 }

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2283 of file xlog.c.

2284 {
2285  XLogSegNo old_segno;
2286 
2288 
2289  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2290  return true;
2291  return false;
2292 }

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4586 of file xlog.c.

4587 {
4588  int xbuffers;
4589 
4590  xbuffers = NBuffers / 32;
4591  if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4592  xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4593  if (xbuffers < 8)
4594  xbuffers = 8;
4595  return xbuffers;
4596 }

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3626 of file xlog.c.

3627 {
3628  Assert(openLogFile >= 0);
3629 
3630  /*
3631  * WAL segment files will not be re-read in normal operation, so we advise
3632  * the OS to release any cached pages. But do not do so if WAL archiving
3633  * or streaming is active, because archiver and walsender process could
3634  * use the cache to read the WAL segment.
3635  */
3636 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3637  if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3638  (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3639 #endif
3640 
3641  if (close(openLogFile) != 0)
3642  {
3643  char xlogfname[MAXFNAMELEN];
3644  int save_errno = errno;
3645 
3647  errno = save_errno;
3648  ereport(PANIC,
3650  errmsg("could not close file \"%s\": %m", xlogfname)));
3651  }
3652 
3653  openLogFile = -1;
3655 }
void ReleaseExternalFD(void)
Definition: fd.c:1239

References Assert, close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3405 of file xlog.c.

3408 {
3409  char path[MAXPGPATH];
3410  char tmppath[MAXPGPATH];
3411  PGAlignedXLogBlock buffer;
3412  int srcfd;
3413  int fd;
3414  int nbytes;
3415 
3416  /*
3417  * Open the source file
3418  */
3419  XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3420  srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3421  if (srcfd < 0)
3422  ereport(ERROR,
3424  errmsg("could not open file \"%s\": %m", path)));
3425 
3426  /*
3427  * Copy into a temp file name.
3428  */
3429  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3430 
3431  unlink(tmppath);
3432 
3433  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3434  fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3435  if (fd < 0)
3436  ereport(ERROR,
3438  errmsg("could not create file \"%s\": %m", tmppath)));
3439 
3440  /*
3441  * Do the data copying.
3442  */
3443  for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3444  {
3445  int nread;
3446 
3447  nread = upto - nbytes;
3448 
3449  /*
3450  * The part that is not read from the source file is filled with
3451  * zeros.
3452  */
3453  if (nread < sizeof(buffer))
3454  memset(buffer.data, 0, sizeof(buffer));
3455 
3456  if (nread > 0)
3457  {
3458  int r;
3459 
3460  if (nread > sizeof(buffer))
3461  nread = sizeof(buffer);
3462  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3463  r = read(srcfd, buffer.data, nread);
3464  if (r != nread)
3465  {
3466  if (r < 0)
3467  ereport(ERROR,
3469  errmsg("could not read file \"%s\": %m",
3470  path)));
3471  else
3472  ereport(ERROR,
3474  errmsg("could not read file \"%s\": read %d of %zu",
3475  path, r, (Size) nread)));
3476  }
3478  }
3479  errno = 0;
3480  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3481  if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3482  {
3483  int save_errno = errno;
3484 
3485  /*
3486  * If we fail to make the file, delete it to release disk space
3487  */
3488  unlink(tmppath);
3489  /* if write didn't set errno, assume problem is no disk space */
3490  errno = save_errno ? save_errno : ENOSPC;
3491 
3492  ereport(ERROR,
3494  errmsg("could not write to file \"%s\": %m", tmppath)));
3495  }
3497  }
3498 
3499  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3500  if (pg_fsync(fd) != 0)
3503  errmsg("could not fsync file \"%s\": %m", tmppath)));
3505 
3506  if (CloseTransientFile(fd) != 0)
3507  ereport(ERROR,
3509  errmsg("could not close file \"%s\": %m", tmppath)));
3510 
3511  if (CloseTransientFile(srcfd) != 0)
3512  ereport(ERROR,
3514  errmsg("could not close file \"%s\": %m", path)));
3515 
3516  /*
3517  * Now move the segment into place with its final name.
3518  */
3519  if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3520  elog(ERROR, "InstallXLogFileSegment should not have failed");
3521 }
int CloseTransientFile(int fd)
Definition: fd.c:2809
int data_sync_elevel(int elevel)
Definition: fd.c:3936
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
char data[XLOG_BLCKSZ]
Definition: c.h:1148

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3367 of file xlog.c.

3368 {
3369  bool ignore_added;
3370  char path[MAXPGPATH];
3371  int fd;
3372 
3373  Assert(logtli != 0);
3374 
3375  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3376  if (fd >= 0)
3377  return fd;
3378 
3379  /* Now open original target segment (might not be file I just made) */
3380  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3382  if (fd < 0)
3383  ereport(ERROR,
3385  errmsg("could not open file \"%s\": %m", path)));
3386  return fd;
3387 }
#define O_CLOEXEC
Definition: win32_port.h:359

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char *  path 
)
static

Definition at line 3197 of file xlog.c.

3199 {
3200  char tmppath[MAXPGPATH];
3201  XLogSegNo installed_segno;
3202  XLogSegNo max_segno;
3203  int fd;
3204  int save_errno;
3205  int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3206 
3207  Assert(logtli != 0);
3208 
3209  XLogFilePath(path, logtli, logsegno, wal_segment_size);
3210 
3211  /*
3212  * Try to use existent file (checkpoint maker may have created it already)
3213  */
3214  *added = false;
3215  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3217  if (fd < 0)
3218  {
3219  if (errno != ENOENT)
3220  ereport(ERROR,
3222  errmsg("could not open file \"%s\": %m", path)));
3223  }
3224  else
3225  return fd;
3226 
3227  /*
3228  * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3229  * another process is doing the same thing. If so, we will end up
3230  * pre-creating an extra log segment. That seems OK, and better than
3231  * holding the lock throughout this lengthy process.
3232  */
3233  elog(DEBUG2, "creating and filling new WAL file");
3234 
3235  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3236 
3237  unlink(tmppath);
3238 
3240  open_flags |= PG_O_DIRECT;
3241 
3242  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3243  fd = BasicOpenFile(tmppath, open_flags);
3244  if (fd < 0)
3245  ereport(ERROR,
3247  errmsg("could not create file \"%s\": %m", tmppath)));
3248 
3249  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3250  save_errno = 0;
3251  if (wal_init_zero)
3252  {
3253  ssize_t rc;
3254 
3255  /*
3256  * Zero-fill the file. With this setting, we do this the hard way to
3257  * ensure that all the file space has really been allocated. On
3258  * platforms that allow "holes" in files, just seeking to the end
3259  * doesn't allocate intermediate space. This way, we know that we
3260  * have all the space and (after the fsync below) that all the
3261  * indirect blocks are down on disk. Therefore, fdatasync(2) or
3262  * O_DSYNC will be sufficient to sync future writes to the log file.
3263  */
3265 
3266  if (rc < 0)
3267  save_errno = errno;
3268  }
3269  else
3270  {
3271  /*
3272  * Otherwise, seeking to the end and writing a solitary byte is
3273  * enough.
3274  */
3275  errno = 0;
3276  if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3277  {
3278  /* if write didn't set errno, assume no disk space */
3279  save_errno = errno ? errno : ENOSPC;
3280  }
3281  }
3283 
3284  if (save_errno)
3285  {
3286  /*
3287  * If we fail to make the file, delete it to release disk space
3288  */
3289  unlink(tmppath);
3290 
3291  close(fd);
3292 
3293  errno = save_errno;
3294 
3295  ereport(ERROR,
3297  errmsg("could not write to file \"%s\": %m", tmppath)));
3298  }
3299 
3300  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3301  if (pg_fsync(fd) != 0)
3302  {
3303  save_errno = errno;
3304  close(fd);
3305  errno = save_errno;
3306  ereport(ERROR,
3308  errmsg("could not fsync file \"%s\": %m", tmppath)));
3309  }
3311 
3312  if (close(fd) != 0)
3313  ereport(ERROR,
3315  errmsg("could not close file \"%s\": %m", tmppath)));
3316 
3317  /*
3318  * Now move the segment into place with its final name. Cope with
3319  * possibility that someone else has created the file while we were
3320  * filling ours: if so, use ours to pre-create a future log segment.
3321  */
3322  installed_segno = logsegno;
3323 
3324  /*
3325  * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3326  * that was a constant, but that was always a bit dubious: normally, at a
3327  * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3328  * here, it was the offset from the insert location. We can't do the
3329  * normal XLOGfileslop calculation here because we don't have access to
3330  * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3331  * CheckPointSegments.
3332  */
3333  max_segno = logsegno + CheckPointSegments;
3334  if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3335  logtli))
3336  {
3337  *added = true;
3338  elog(DEBUG2, "done creating and filling new WAL file");
3339  }
3340  else
3341  {
3342  /*
3343  * No need for any more future segments, or InstallXLogFileSegment()
3344  * failed to rename the file into place. If the rename failed, a
3345  * caller opening the file may fail.
3346  */
3347  unlink(tmppath);
3348  elog(DEBUG2, "abandoned new WAL file");
3349  }
3350 
3351  return -1;
3352 }
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:687
#define pg_pwrite
Definition: port.h:226
bool wal_init_zero
Definition: xlog.c:125

References Assert, BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3605 of file xlog.c.

3606 {
3607  char path[MAXPGPATH];
3608  int fd;
3609 
3610  XLogFilePath(path, tli, segno, wal_segment_size);
3611 
3612  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3614  if (fd < 0)
3615  ereport(PANIC,
3617  errmsg("could not open file \"%s\": %m", path)));
3618 
3619  return fd;
3620 }

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2233 of file xlog.c.

2234 {
2235  XLogSegNo minSegNo;
2236  XLogSegNo maxSegNo;
2237  double distance;
2238  XLogSegNo recycleSegNo;
2239 
2240  /*
2241  * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2242  * correspond to. Always recycle enough segments to meet the minimum, and
2243  * remove enough segments to stay below the maximum.
2244  */
2245  minSegNo = lastredoptr / wal_segment_size +
2247  maxSegNo = lastredoptr / wal_segment_size +
2249 
2250  /*
2251  * Between those limits, recycle enough segments to get us through to the
2252  * estimated end of next checkpoint.
2253  *
2254  * To estimate where the next checkpoint will finish, assume that the
2255  * system runs steadily consuming CheckPointDistanceEstimate bytes between
2256  * every checkpoint.
2257  */
2259  /* add 10% for good measure. */
2260  distance *= 1.10;
2261 
2262  recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2264 
2265  if (recycleSegNo < minSegNo)
2266  recycleSegNo = minSegNo;
2267  if (recycleSegNo > maxSegNo)
2268  recycleSegNo = maxSegNo;
2269 
2270  return recycleSegNo;
2271 }

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2789 of file xlog.c.

2790 {
2791  XLogRecPtr WriteRqstPtr;
2792  XLogwrtRqst WriteRqst;
2793  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2794 
2795  /*
2796  * During REDO, we are reading not writing WAL. Therefore, instead of
2797  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2798  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2799  * to act this way too, and because when it tries to write the
2800  * end-of-recovery checkpoint, it should indeed flush.
2801  */
2802  if (!XLogInsertAllowed())
2803  {
2804  UpdateMinRecoveryPoint(record, false);
2805  return;
2806  }
2807 
2808  /* Quick exit if already known flushed */
2809  if (record <= LogwrtResult.Flush)
2810  return;
2811 
2812 #ifdef WAL_DEBUG
2813  if (XLOG_DEBUG)
2814  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2815  LSN_FORMAT_ARGS(record),
2818 #endif
2819 
2821 
2822  /*
2823  * Since fsync is usually a horribly expensive operation, we try to
2824  * piggyback as much data as we can on each fsync: if we see any more data
2825  * entered into the xlog buffer, we'll write and fsync that too, so that
2826  * the final value of LogwrtResult.Flush is as large as possible. This
2827  * gives us some chance of avoiding another fsync immediately after.
2828  */
2829 
2830  /* initialize to given target; may increase below */
2831  WriteRqstPtr = record;
2832 
2833  /*
2834  * Now wait until we get the write lock, or someone else does the flush
2835  * for us.
2836  */
2837  for (;;)
2838  {
2839  XLogRecPtr insertpos;
2840 
2841  /* done already? */
2843  if (record <= LogwrtResult.Flush)
2844  break;
2845 
2846  /*
2847  * Before actually performing the write, wait for all in-flight
2848  * insertions to the pages we're about to write to finish.
2849  */
2851  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2852  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2854  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2855 
2856  /*
2857  * Try to get the write lock. If we can't get it immediately, wait
2858  * until it's released, and recheck if we still need to do the flush
2859  * or if the backend that held the lock did it for us already. This
2860  * helps to maintain a good rate of group committing when the system
2861  * is bottlenecked by the speed of fsyncing.
2862  */
2863  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2864  {
2865  /*
2866  * The lock is now free, but we didn't acquire it yet. Before we
2867  * do, loop back to check if someone else flushed the record for
2868  * us already.
2869  */
2870  continue;
2871  }
2872 
2873  /* Got the lock; recheck whether request is satisfied */
2875  if (record <= LogwrtResult.Flush)
2876  {
2877  LWLockRelease(WALWriteLock);
2878  break;
2879  }
2880 
2881  /*
2882  * Sleep before flush! By adding a delay here, we may give further
2883  * backends the opportunity to join the backlog of group commit
2884  * followers; this can significantly improve transaction throughput,
2885  * at the risk of increasing transaction latency.
2886  *
2887  * We do not sleep if enableFsync is not turned on, nor if there are
2888  * fewer than CommitSiblings other backends with active transactions.
2889  */
2890  if (CommitDelay > 0 && enableFsync &&
2892  {
2894 
2895  /*
2896  * Re-check how far we can now flush the WAL. It's generally not
2897  * safe to call WaitXLogInsertionsToFinish while holding
2898  * WALWriteLock, because an in-progress insertion might need to
2899  * also grab WALWriteLock to make progress. But we know that all
2900  * the insertions up to insertpos have already finished, because
2901  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2902  * We're only calling it again to allow insertpos to be moved
2903  * further forward, not to actually wait for anyone.
2904  */
2905  insertpos = WaitXLogInsertionsToFinish(insertpos);
2906  }
2907 
2908  /* try to write/flush later additions to XLOG as well */
2909  WriteRqst.Write = insertpos;
2910  WriteRqst.Flush = insertpos;
2911 
2912  XLogWrite(WriteRqst, insertTLI, false);
2913 
2914  LWLockRelease(WALWriteLock);
2915  /* done */
2916  break;
2917  }
2918 
2919  END_CRIT_SECTION();
2920 
2921  /* wake up walsenders now that we've released heavily contended locks */
2923 
2924  /*
2925  * If we still haven't flushed to the request point then we have a
2926  * problem; most likely, the requested flush point is past end of XLOG.
2927  * This has been seen to occur when a disk page has a corrupted LSN.
2928  *
2929  * Formerly we treated this as a PANIC condition, but that hurts the
2930  * system's robustness rather than helping it: we do not want to take down
2931  * the whole system due to corruption on one data page. In particular, if
2932  * the bad page is encountered again during recovery then we would be
2933  * unable to restart the database at all! (This scenario actually
2934  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2935  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2936  * the only time we can reach here during recovery is while flushing the
2937  * end-of-recovery checkpoint record, and we don't expect that to have a
2938  * bad LSN.
2939  *
2940  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2941  * since xact.c calls this routine inside a critical section. However,
2942  * calls from bufmgr.c are not within critical sections and so we will not
2943  * force a restart for a bad LSN on a data page.
2944  */
2945  if (LogwrtResult.Flush < record)
2946  elog(ERROR,
2947  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2948  LSN_FORMAT_ARGS(record),
2950 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1396
bool MinimumActiveBackends(int min)
Definition: procarray.c:3550
int CommitDelay
Definition: xlog.c:130
int CommitSiblings
Definition: xlog.c:131
bool XLogInsertAllowed(void)
Definition: xlog.c:6345

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3745 of file xlog.c.

3746 {
3747  XLogSegNo lastRemovedSegNo;
3748 
3750  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3752 
3753  return lastRemovedSegNo;
3754 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3761 of file xlog.c.

3762 {
3763  DIR *xldir;
3764  struct dirent *xlde;
3765  XLogSegNo oldest_segno = 0;
3766 
3767  xldir = AllocateDir(XLOGDIR);
3768  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3769  {
3770  TimeLineID file_tli;
3771  XLogSegNo file_segno;
3772 
3773  /* Ignore files that are not XLOG segments. */
3774  if (!IsXLogFileName(xlde->d_name))
3775  continue;
3776 
3777  /* Parse filename to get TLI and segno. */
3778  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3780 
3781  /* Ignore anything that's not from the TLI of interest. */
3782  if (tli != file_tli)
3783  continue;
3784 
3785  /* If it's the oldest so far, update oldest_segno. */
3786  if (oldest_segno == 0 || file_segno < oldest_segno)
3787  oldest_segno = file_segno;
3788  }
3789 
3790  FreeDir(xldir);
3791  return oldest_segno;
3792 }

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2688 of file xlog.c.

2689 {
2690  XLogRecPtr retval;
2691 
2693  retval = XLogCtl->replicationSlotMinLSN;
2695 
2696  return retval;
2697 }
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:457

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5179 of file xlog.c.

5180 {
5181  char xlogfname[MAXFNAMELEN];
5182  XLogSegNo endLogSegNo;
5183  XLogSegNo startLogSegNo;
5184 
5185  /* we always switch to a new timeline after archive recovery */
5186  Assert(endTLI != newTLI);
5187 
5188  /*
5189  * Update min recovery point one last time.
5190  */
5192 
5193  /*
5194  * Calculate the last segment on the old timeline, and the first segment
5195  * on the new timeline. If the switch happens in the middle of a segment,
5196  * they are the same, but if the switch happens exactly at a segment
5197  * boundary, startLogSegNo will be endLogSegNo + 1.
5198  */
5199  XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5200  XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5201 
5202  /*
5203  * Initialize the starting WAL segment for the new timeline. If the switch
5204  * happens in the middle of a segment, copy data from the last WAL segment
5205  * of the old timeline up to the switch point, to the starting WAL segment
5206  * on the new timeline.
5207  */
5208  if (endLogSegNo == startLogSegNo)
5209  {
5210  /*
5211  * Make a copy of the file on the new timeline.
5212  *
5213  * Writing WAL isn't allowed yet, so there are no locking
5214  * considerations. But we should be just as tense as XLogFileInit to
5215  * avoid emplacing a bogus file.
5216  */
5217  XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5218  XLogSegmentOffset(endOfLog, wal_segment_size));
5219  }
5220  else
5221  {
5222  /*
5223  * The switch happened at a segment boundary, so just create the next
5224  * segment on the new timeline.
5225  */
5226  int fd;
5227 
5228  fd = XLogFileInit(startLogSegNo, newTLI);
5229 
5230  if (close(fd) != 0)
5231  {
5232  int save_errno = errno;
5233 
5234  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5235  errno = save_errno;
5236  ereport(ERROR,
5238  errmsg("could not close file \"%s\": %m", xlogfname)));
5239  }
5240  }
5241 
5242  /*
5243  * Let's just make real sure there are not .ready or .done flags posted
5244  * for the new segment.
5245  */
5246  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5247  XLogArchiveCleanup(xlogfname);
5248 }
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3405

References Assert, close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6345 of file xlog.c.

6346 {
6347  /*
6348  * If value is "unconditionally true" or "unconditionally false", just
6349  * return it. This provides the normal fast path once recovery is known
6350  * done.
6351  */
6352  if (LocalXLogInsertAllowed >= 0)
6353  return (bool) LocalXLogInsertAllowed;
6354 
6355  /*
6356  * Else, must check to see if we're still in recovery.
6357  */
6358  if (RecoveryInProgress())
6359  return false;
6360 
6361  /*
6362  * On exit from recovery, reset to "unconditionally true", since there is
6363  * no need to keep checking.
6364  */
6366  return true;
6367 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 741 of file xlog.c.

746 {
748  pg_crc32c rdata_crc;
749  bool inserted;
750  XLogRecord *rechdr = (XLogRecord *) rdata->data;
751  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
753  XLogRecPtr StartPos;
754  XLogRecPtr EndPos;
755  bool prevDoPageWrites = doPageWrites;
756  TimeLineID insertTLI;
757 
758  /* Does this record type require special handling? */
759  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
760  {
761  if (info == XLOG_SWITCH)
762  class = WALINSERT_SPECIAL_SWITCH;
763  else if (info == XLOG_CHECKPOINT_REDO)
765  }
766 
767  /* we assume that all of the record header is in the first chunk */
768  Assert(rdata->len >= SizeOfXLogRecord);
769 
770  /* cross-check on whether we should be here or not */
771  if (!XLogInsertAllowed())
772  elog(ERROR, "cannot make new WAL entries during recovery");
773 
774  /*
775  * Given that we're not in recovery, InsertTimeLineID is set and can't
776  * change, so we can read it without a lock.
777  */
778  insertTLI = XLogCtl->InsertTimeLineID;
779 
780  /*----------
781  *
782  * We have now done all the preparatory work we can without holding a
783  * lock or modifying shared state. From here on, inserting the new WAL
784  * record to the shared WAL buffer cache is a two-step process:
785  *
786  * 1. Reserve the right amount of space from the WAL. The current head of
787  * reserved space is kept in Insert->CurrBytePos, and is protected by
788  * insertpos_lck.
789  *
790  * 2. Copy the record to the reserved WAL space. This involves finding the
791  * correct WAL buffer containing the reserved space, and copying the
792  * record in place. This can be done concurrently in multiple processes.
793  *
794  * To keep track of which insertions are still in-progress, each concurrent
795  * inserter acquires an insertion lock. In addition to just indicating that
796  * an insertion is in progress, the lock tells others how far the inserter
797  * has progressed. There is a small fixed number of insertion locks,
798  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
799  * boundary, it updates the value stored in the lock to the how far it has
800  * inserted, to allow the previous buffer to be flushed.
801  *
802  * Holding onto an insertion lock also protects RedoRecPtr and
803  * fullPageWrites from changing until the insertion is finished.
804  *
805  * Step 2 can usually be done completely in parallel. If the required WAL
806  * page is not initialized yet, you have to grab WALBufMappingLock to
807  * initialize it, but the WAL writer tries to do that ahead of insertions
808  * to avoid that from happening in the critical path.
809  *
810  *----------
811  */
813 
814  if (likely(class == WALINSERT_NORMAL))
815  {
817 
818  /*
819  * Check to see if my copy of RedoRecPtr is out of date. If so, may
820  * have to go back and have the caller recompute everything. This can
821  * only happen just after a checkpoint, so it's better to be slow in
822  * this case and fast otherwise.
823  *
824  * Also check to see if fullPageWrites was just turned on or there's a
825  * running backup (which forces full-page writes); if we weren't
826  * already doing full-page writes then go back and recompute.
827  *
828  * If we aren't doing full-page writes then RedoRecPtr doesn't
829  * actually affect the contents of the XLOG record, so we'll update
830  * our local copy but not force a recomputation. (If doPageWrites was
831  * just turned off, we could recompute the record without full pages,
832  * but we choose not to bother.)
833  */
834  if (RedoRecPtr != Insert->RedoRecPtr)
835  {
836  Assert(RedoRecPtr < Insert->RedoRecPtr);
837  RedoRecPtr = Insert->RedoRecPtr;
838  }
839  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
840 
841  if (doPageWrites &&
842  (!prevDoPageWrites ||
843  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
844  {
845  /*
846  * Oops, some buffer now needs to be backed up that the caller
847  * didn't back up. Start over.
848  */
851  return InvalidXLogRecPtr;
852  }
853 
854  /*
855  * Reserve space for the record in the WAL. This also sets the xl_prev
856  * pointer.
857  */
858  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
859  &rechdr->xl_prev);
860 
861  /* Normal records are always inserted. */
862  inserted = true;
863  }
864  else if (class == WALINSERT_SPECIAL_SWITCH)
865  {
866  /*
867  * In order to insert an XLOG_SWITCH record, we need to hold all of
868  * the WAL insertion locks, not just one, so that no one else can
869  * begin inserting a record until we've figured out how much space
870  * remains in the current WAL segment and claimed all of it.
871  *
872  * Nonetheless, this case is simpler than the normal cases handled
873  * below, which must check for changes in doPageWrites and RedoRecPtr.
874  * Those checks are only needed for records that can contain buffer
875  * references, and an XLOG_SWITCH record never does.
876  */
877  Assert(fpw_lsn == InvalidXLogRecPtr);
879  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
880  }
881  else
882  {
884 
885  /*
886  * We need to update both the local and shared copies of RedoRecPtr,
887  * which means that we need to hold all the WAL insertion locks.
888  * However, there can't be any buffer references, so as above, we need
889  * not check RedoRecPtr before inserting the record; we just need to
890  * update it afterwards.
891  */
892  Assert(fpw_lsn == InvalidXLogRecPtr);
894  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
895  &rechdr->xl_prev);
896  RedoRecPtr = Insert->RedoRecPtr = StartPos;
897  inserted = true;
898  }
899 
900  if (inserted)
901  {
902  /*
903  * Now that xl_prev has been filled in, calculate CRC of the record
904  * header.
905  */
906  rdata_crc = rechdr->xl_crc;
907  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
908  FIN_CRC32C(rdata_crc);
909  rechdr->xl_crc = rdata_crc;
910 
911  /*
912  * All the record data, including the header, is now ready to be
913  * inserted. Copy the record in the space reserved.
914  */
916  class == WALINSERT_SPECIAL_SWITCH, rdata,
917  StartPos, EndPos, insertTLI);
918 
919  /*
920  * Unless record is flagged as not important, update LSN of last
921  * important record in the current slot. When holding all locks, just
922  * update the first one.
923  */
924  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
925  {
926  int lockno = holdingAllLocks ? 0 : MyLockNo;
927 
928  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
929  }
930  }
931  else
932  {
933  /*
934  * This was an xlog-switch record, but the current insert location was
935  * already exactly at the beginning of a segment, so there was no need
936  * to do anything.
937  */
938  }
939 
940  /*
941  * Done! Let others know that we're finished.
942  */
944 
946 
948 
949  /*
950  * Mark top transaction id is logged (if needed) so that we should not try
951  * to log it again with the next WAL record in the current subtransaction.
952  */
953  if (topxid_included)
955 
956  /*
957  * Update shared LogwrtRqst.Write, if we crossed page boundary.
958  */
959  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
960  {
962  /* advance global request to include new block(s) */
963  if (XLogCtl->LogwrtRqst.Write < EndPos)
964  XLogCtl->LogwrtRqst.Write = EndPos;
967  }
968 
969  /*
970  * If this was an XLOG_SWITCH record, flush the record and the empty
971  * padding space that fills the rest of the segment, and perform
972  * end-of-segment actions (eg, notifying archiver).
973  */
974  if (class == WALINSERT_SPECIAL_SWITCH)
975  {
976  TRACE_POSTGRESQL_WAL_SWITCH();
977  XLogFlush(EndPos);
978 
979  /*
980  * Even though we reserved the rest of the segment for us, which is
981  * reflected in EndPos, we return a pointer to just the end of the
982  * xlog-switch record.
983  */
984  if (inserted)
985  {
986  EndPos = StartPos + SizeOfXLogRecord;
987  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
988  {
989  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
990 
991  if (offset == EndPos % XLOG_BLCKSZ)
992  EndPos += SizeOfXLogLongPHD;
993  else
994  EndPos += SizeOfXLogShortPHD;
995  }
996  }
997  }
998 
999 #ifdef WAL_DEBUG
1000  if (XLOG_DEBUG)
1001  {
1002  static XLogReaderState *debug_reader = NULL;
1003  XLogRecord *record;
1004  DecodedXLogRecord *decoded;
1006  StringInfoData recordBuf;
1007  char *errormsg = NULL;
1008  MemoryContext oldCxt;
1009 
1010  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1011 
1012  initStringInfo(&buf);
1013  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1014 
1015  /*
1016  * We have to piece together the WAL record data from the XLogRecData
1017  * entries, so that we can pass it to the rm_desc function as one
1018  * contiguous chunk.
1019  */
1020  initStringInfo(&recordBuf);
1021  for (; rdata != NULL; rdata = rdata->next)
1022  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1023 
1024  /* We also need temporary space to decode the record. */
1025  record = (XLogRecord *) recordBuf.data;
1026  decoded = (DecodedXLogRecord *)
1028 
1029  if (!debug_reader)
1030  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1031  XL_ROUTINE(.page_read = NULL,
1032  .segment_open = NULL,
1033  .segment_close = NULL),
1034  NULL);
1035  if (!debug_reader)
1036  {
1037  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1038  }
1039  else if (!DecodeXLogRecord(debug_reader,
1040  decoded,
1041  record,
1042  EndPos,
1043  &errormsg))
1044  {
1045  appendStringInfo(&buf, "error decoding record: %s",
1046  errormsg ? errormsg : "no error message");
1047  }
1048  else
1049  {
1050  appendStringInfoString(&buf, " - ");
1051 
1052  debug_reader->record = decoded;
1053  xlog_outdesc(&buf, debug_reader);
1054  debug_reader->record = NULL;
1055  }
1056  elog(LOG, "%s", buf.data);
1057 
1058  pfree(decoded);
1059  pfree(buf.data);
1060  pfree(recordBuf.data);
1061  MemoryContextSwitchTo(oldCxt);
1062  }
1063 #endif
1064 
1065  /*
1066  * Update our global variables
1067  */
1068  ProcLastRecPtr = StartPos;
1069  XactLastRecEnd = EndPos;
1070 
1071  /* Report WAL traffic to the instrumentation. */
1072  if (inserted)
1073  {
1074  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1076  pgWalUsage.wal_fpi += num_fpi;
1077  }
1078 
1079  return EndPos;
1080 }
#define likely(x)
Definition: c.h:310
#define unlikely(x)
Definition: c.h:311
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
MemoryContextSwitchTo(old_ctx)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:589
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:539
XLogRecPtr XactLastRecEnd
Definition: xlog.c:252
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1218
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1101
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1157
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3120 of file xlog.c.

3121 {
3122  /*
3123  * During recovery, we don't flush WAL but update minRecoveryPoint
3124  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3125  * would need to be updated.
3126  */
3127  if (RecoveryInProgress())
3128  {
3129  /*
3130  * An invalid minRecoveryPoint means that we need to recover all the
3131  * WAL, i.e., we're doing crash recovery. We never modify the control
3132  * file's value in that case, so we can short-circuit future checks
3133  * here too. This triggers a quick exit path for the startup process,
3134  * which cannot update its local copy of minRecoveryPoint as long as
3135  * it has not replayed all WAL available when doing crash recovery.
3136  */
3138  updateMinRecoveryPoint = false;
3139 
3140  /* Quick exit if already known to be updated or cannot be updated */
3142  return false;
3143 
3144  /*
3145  * Update local copy of minRecoveryPoint. But if the lock is busy,
3146  * just return a conservative guess.
3147  */
3148  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3149  return true;
3152  LWLockRelease(ControlFileLock);
3153 
3154  /*
3155  * Check minRecoveryPoint for any other process than the startup
3156  * process doing crash recovery, which should not update the control
3157  * file value if crash recovery is still running.
3158  */
3160  updateMinRecoveryPoint = false;
3161 
3162  /* check again */
3164  return false;
3165  else
3166  return true;
3167  }
3168 
3169  /* Quick exit if already known flushed */
3170  if (record <= LogwrtResult.Flush)
3171  return false;
3172 
3173  /* read LogwrtResult and update local state */
3175 
3176  /* check again */
3177  if (record <= LogwrtResult.Flush)
3178  return false;
3179 
3180  return true;
3181 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 7953 of file xlog.c.

7954 {
7955  XLogBeginInsert();
7956  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
7957  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
7958 
7959  /*
7960  * We need not flush the NEXTOID record immediately, because any of the
7961  * just-allocated OIDs could only reach disk as part of a tuple insert or
7962  * update that would have its own XLOG record that must follow the NEXTOID
7963  * record. Therefore, the standard buffer LSN interlock applied to those
7964  * records will ensure no such OID reaches disk before the NEXTOID record
7965  * does.
7966  *
7967  * Note, however, that the above statement only covers state "within" the
7968  * database. When we use a generated OID as a file or directory name, we
7969  * are in a sense violating the basic WAL rule, because that filesystem
7970  * change may reach disk before the NEXTOID WAL record does. The impact
7971  * of this is that if a database crash occurs immediately afterward, we
7972  * might after restart re-generate the same OID and find that it conflicts
7973  * with the leftover file or directory. But since for safety's sake we
7974  * always loop until finding a nonconflicting filename, this poses no real
7975  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
7976  */
7977 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1934 of file xlog.c.

1935 {
1936  uint64 fullsegs;
1937  uint32 fullpages;
1938  uint32 offset;
1939  uint64 result;
1940 
1941  XLByteToSeg(ptr, fullsegs, wal_segment_size);
1942 
1943  fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1944  offset = ptr % XLOG_BLCKSZ;
1945 
1946  if (fullpages == 0)
1947  {
1948  result = fullsegs * UsableBytesInSegment;
1949  if (offset > 0)
1950  {
1951  Assert(offset >= SizeOfXLogLongPHD);
1952  result += offset - SizeOfXLogLongPHD;
1953  }
1954  }
1955  else
1956  {
1957  result = fullsegs * UsableBytesInSegment +
1958  (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1959  (fullpages - 1) * UsableBytesInPage; /* full pages */
1960  if (offset > 0)
1961  {
1962  Assert(offset >= SizeOfXLogShortPHD);
1963  result += offset - SizeOfXLogShortPHD;
1964  }
1965  }
1966 
1967  return result;
1968 }

References Assert, SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8033 of file xlog.c.

8034 {
8035  if (wal_level != ControlFile->wal_level ||
8043  {
8044  /*
8045  * The change in number of backend slots doesn't need to be WAL-logged
8046  * if archiving is not enabled, as you can't start archive recovery
8047  * with wal_level=minimal anyway. We don't really care about the
8048  * values in pg_control either if wal_level=minimal, but seems better
8049  * to keep them up-to-date to avoid confusion.
8050  */
8052  {
8053  xl_parameter_change xlrec;
8054  XLogRecPtr recptr;
8055 
8061  xlrec.wal_level = wal_level;
8062  xlrec.wal_log_hints = wal_log_hints;
8064 
8065  XLogBeginInsert();
8066  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
8067 
8068  recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8069  XLogFlush(recptr);
8070  }
8071 
8072  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8073 
8083 
8084  LWLockRelease(ControlFileLock);
8085  }
8086 }

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8008 of file xlog.c.

8009 {
8010  XLogRecPtr RecPtr;
8011  xl_restore_point xlrec;
8012 
8013  xlrec.rp_time = GetCurrentTimestamp();
8014  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8015 
8016  XLogBeginInsert();
8017  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
8018 
8019  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8020 
8021  ereport(LOG,
8022  (errmsg("restore point \"%s\" created at %X/%X",
8023  rpName, LSN_FORMAT_ARGS(RecPtr))));
8024 
8025  return RecPtr;
8026 }
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2624 of file xlog.c.

2625 {
2626  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2627  bool sleeping;
2628  bool wakeup = false;
2629  XLogRecPtr prevAsyncXactLSN;
2630 
2632  sleeping = XLogCtl->WalWriterSleeping;
2633  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2634  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2635  XLogCtl->asyncXactLSN = asyncXactLSN;
2637 
2638  /*
2639  * If somebody else already called this function with a more aggressive
2640  * LSN, they will have done what we needed (and perhaps more).
2641  */
2642  if (asyncXactLSN <= prevAsyncXactLSN)
2643  return;
2644 
2645  /*
2646  * If the WALWriter is sleeping, kick it to make it come out of low-power
2647  * mode, so that this async commit will reach disk within the expected
2648  * amount of time. Otherwise, determine whether it has enough WAL
2649  * available to flush, the same way that XLogBackgroundFlush() does.
2650  */
2651  if (sleeping)
2652  wakeup = true;
2653  else
2654  {
2655  int flushblocks;
2656 
2658 
2659  flushblocks =
2660  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2661 
2662  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2663  wakeup = true;
2664  }
2665 
2668 }
void SetLatch(Latch *latch)
Definition: latch.c:632
PROC_HDR * ProcGlobal
Definition: proc.c:78
Latch * walwriterLatch
Definition: proc.h:411
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, XLogCtlData::info_lck, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterLatch, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4883 of file xlog.c.

4884 {
4885  bool foundCFile,
4886  foundXLog;
4887  char *allocptr;
4888  int i;
4889  ControlFileData *localControlFile;
4890 
4891 #ifdef WAL_DEBUG
4892 
4893  /*
4894  * Create a memory context for WAL debugging that's exempt from the normal
4895  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4896  * an allocation fails, but wal_debug is not for production use anyway.
4897  */
4898  if (walDebugCxt == NULL)
4899  {
4901  "WAL Debug",
4903  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4904  }
4905 #endif
4906 
4907 
4908  XLogCtl = (XLogCtlData *)
4909  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4910 
4911  localControlFile = ControlFile;
4913  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4914 
4915  if (foundCFile || foundXLog)
4916  {
4917  /* both should be present or neither */
4918  Assert(foundCFile && foundXLog);
4919 
4920  /* Initialize local copy of WALInsertLocks */
4922 
4923  if (localControlFile)
4924  pfree(localControlFile);
4925  return;
4926  }
4927  memset(XLogCtl, 0, sizeof(XLogCtlData));
4928 
4929  /*
4930  * Already have read control file locally, unless in bootstrap mode. Move
4931  * contents into shared memory.
4932  */
4933  if (localControlFile)
4934  {
4935  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4936  pfree(localControlFile);
4937  }
4938 
4939  /*
4940  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4941  * multiple of the alignment for same, so no extra alignment padding is
4942  * needed here.
4943  */
4944  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4945  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4946  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4947 
4948  for (i = 0; i < XLOGbuffers; i++)
4949  {
4951  }
4952 
4953  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4954  allocptr += sizeof(WALInsertLockPadded) -
4955  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4957  (WALInsertLockPadded *) allocptr;
4958  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4959 
4960  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4961  {
4965  }
4966 
4967  /*
4968  * Align the start of the page buffers to a full xlog block size boundary.
4969  * This simplifies some calculations in XLOG insertion. It is also
4970  * required for O_DIRECT.
4971  */
4972  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
4973  XLogCtl->pages = allocptr;
4974  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
4975 
4976  /*
4977  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
4978  * in additional info.)
4979  */
4983  XLogCtl->WalWriterSleeping = false;
4984 
4991 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:446
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:60
int XLogCacheBlck
Definition: xlog.c:492
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:442
slock_t insertpos_lck
Definition: xlog.c:396
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4833
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4833 of file xlog.c.

4834 {
4835  Size size;
4836 
4837  /*
4838  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4839  * This isn't an amazingly clean place to do this, but we must wait till
4840  * NBuffers has received its final value, and must do it before using the
4841  * value of XLOGbuffers to do anything important.
4842  *
4843  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4844  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4845  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4846  * the matter with PGC_S_OVERRIDE.
4847  */
4848  if (XLOGbuffers == -1)
4849  {
4850  char buf[32];
4851 
4852  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4853  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4855  if (XLOGbuffers == -1) /* failed to apply it? */
4856  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4857  PGC_S_OVERRIDE);
4858  }
4859  Assert(XLOGbuffers > 0);
4860 
4861  /* XLogCtl */
4862  size = sizeof(XLogCtlData);
4863 
4864  /* WAL insertion locks, plus alignment */
4866  /* xlblocks array */
4868  /* extra alignment padding for XLOG I/O buffers */
4869  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4870  /* and the buffers themselves */
4871  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4872 
4873  /*
4874  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4875  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4876  * routine again below to compute the actual allocation size.
4877  */
4878 
4879  return size;
4880 }
#define Max(x, y)
Definition: c.h:998
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9403 of file xlog.c.

9404 {
9405  ShutdownWalRcv();
9406 
9407  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9409  LWLockRelease(ControlFileLock);
9410 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2307 of file xlog.c.

2308 {
2309  bool ispartialpage;
2310  bool last_iteration;
2311  bool finishing_seg;
2312  int curridx;
2313  int npages;
2314  int startidx;
2315  uint32 startoffset;
2316 
2317  /* We should always be inside a critical section here */
2318  Assert(CritSectionCount > 0);
2319 
2320  /*
2321  * Update local LogwrtResult (caller probably did this already, but...)
2322  */
2324 
2325  /*
2326  * Since successive pages in the xlog cache are consecutively allocated,
2327  * we can usually gather multiple pages together and issue just one
2328  * write() call. npages is the number of pages we have determined can be
2329  * written together; startidx is the cache block index of the first one,
2330  * and startoffset is the file offset at which it should go. The latter
2331  * two variables are only valid when npages > 0, but we must initialize
2332  * all of them to keep the compiler quiet.
2333  */
2334  npages = 0;
2335  startidx = 0;
2336  startoffset = 0;
2337 
2338  /*
2339  * Within the loop, curridx is the cache block index of the page to
2340  * consider writing. Begin at the buffer containing the next unwritten
2341  * page, or last partially written page.
2342  */
2344 
2345  while (LogwrtResult.Write < WriteRqst.Write)
2346  {
2347  /*
2348  * Make sure we're not ahead of the insert process. This could happen
2349  * if we're passed a bogus WriteRqst.Write that is past the end of the
2350  * last page that's been initialized by AdvanceXLInsertBuffer.
2351  */
2352  XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2353 
2354  if (LogwrtResult.Write >= EndPtr)
2355  elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
2357  LSN_FORMAT_ARGS(EndPtr));
2358 
2359  /* Advance LogwrtResult.Write to end of current buffer page */
2360  LogwrtResult.Write = EndPtr;
2361  ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2362 
2365  {
2366  /*
2367  * Switch to new logfile segment. We cannot have any pending
2368  * pages here (since we dump what we have at segment end).
2369  */
2370  Assert(npages == 0);
2371  if (openLogFile >= 0)
2372  XLogFileClose();
2375  openLogTLI = tli;
2376 
2377  /* create/use new log file */
2380  }
2381 
2382  /* Make sure we have the current logfile open */
2383  if (openLogFile < 0)
2384  {
2387  openLogTLI = tli;
2390  }
2391 
2392  /* Add current page to the set of pending pages-to-dump */
2393  if (npages == 0)
2394  {
2395  /* first of group */
2396  startidx = curridx;
2397  startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2399  }
2400  npages++;
2401 
2402  /*
2403  * Dump the set if this will be the last loop iteration, or if we are
2404  * at the last page of the cache area (since the next page won't be
2405  * contiguous in memory), or if we are at the end of the logfile
2406  * segment.
2407  */
2408  last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2409 
2410  finishing_seg = !ispartialpage &&
2411  (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2412 
2413  if (last_iteration ||
2414  curridx == XLogCtl->XLogCacheBlck ||
2415  finishing_seg)
2416  {
2417  char *from;
2418  Size nbytes;
2419  Size nleft;
2420  ssize_t written;
2421  instr_time start;
2422 
2423  /* OK to write the page(s) */
2424  from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2425  nbytes = npages * (Size) XLOG_BLCKSZ;
2426  nleft = nbytes;
2427  do
2428  {
2429  errno = 0;
2430 
2431  /* Measure I/O timing to write WAL data */
2432  if (track_wal_io_timing)
2434  else
2436 
2437  pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2438  written = pg_pwrite(openLogFile, from, nleft, startoffset);
2440 
2441  /*
2442  * Increment the I/O timing and the number of times WAL data
2443  * were written out to disk.
2444  */
2445  if (track_wal_io_timing)
2446  {
2447  instr_time end;
2448 
2451  }
2452 
2454 
2455  if (written <= 0)
2456  {
2457  char xlogfname[MAXFNAMELEN];
2458  int save_errno;
2459 
2460  if (errno == EINTR)
2461  continue;
2462 
2463  save_errno = errno;
2464  XLogFileName(xlogfname, tli, openLogSegNo,
2466  errno = save_errno;
2467  ereport(PANIC,
2469  errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2470  xlogfname, startoffset, nleft)));
2471  }
2472  nleft -= written;
2473  from += written;
2474  startoffset += written;
2475  } while (nleft > 0);
2476 
2477  npages = 0;
2478 
2479  /*
2480  * If we just wrote the whole last page of a logfile segment,
2481  * fsync the segment immediately. This avoids having to go back
2482  * and re-open prior segments when an fsync request comes along
2483  * later. Doing it here ensures that one and only one backend will
2484  * perform this fsync.
2485  *
2486  * This is also the right place to notify the Archiver that the
2487  * segment is ready to copy to archival storage, and to update the
2488  * timer for archive_timeout, and to signal for a checkpoint if
2489  * too many logfile segments have been used since the last
2490  * checkpoint.
2491  */
2492  if (finishing_seg)
2493  {
2495 
2496  /* signal that we need to wakeup walsenders later */
2498 
2499  LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2500 
2501  if (XLogArchivingActive())
2503 
2504  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2506 
2507  /*
2508  * Request a checkpoint if we've consumed too much xlog since
2509  * the last one. For speed, we first check using the local
2510  * copy of RedoRecPtr, which might be out of date; if it looks
2511  * like a checkpoint is needed, forcibly update RedoRecPtr and
2512  * recheck.
2513  */
2515  {
2516  (void) GetRedoRecPtr();
2519  }
2520  }
2521  }
2522 
2523  if (ispartialpage)
2524  {
2525  /* Only asked to write a partial page */
2526  LogwrtResult.Write = WriteRqst.Write;
2527  break;
2528  }
2529  curridx = NextBufIdx(curridx);
2530 
2531  /* If flexible, break out of loop as soon as we wrote something */
2532  if (flexible && npages == 0)
2533  break;
2534  }
2535 
2536  Assert(npages == 0);
2537 
2538  /*
2539  * If asked to flush, do so
2540  */
2541  if (LogwrtResult.Flush < WriteRqst.Flush &&
2543  {
2544  /*
2545  * Could get here without iterating above loop, in which case we might
2546  * have no open file or the wrong one. However, we do not need to
2547  * fsync more than one file.
2548  */
2551  {
2552  if (openLogFile >= 0 &&
2555  XLogFileClose();
2556  if (openLogFile < 0)
2557  {
2560  openLogTLI = tli;
2563  }
2564 
2566  }
2567 
2568  /* signal that we need to wakeup walsenders later */
2570 
2572  }
2573 
2574  /*
2575  * Update shared-memory status
2576  *
2577  * We make sure that the shared 'request' values do not fall behind the
2578  * 'result' values. This is not absolutely essential, but it saves some
2579  * code in a couple of places.
2580  */
2587 
2588  /*
2589  * We write Write first, bar, then Flush. When reading, the opposite must
2590  * be done (with a matching barrier in between), so that we always see a
2591  * Flush value that trails behind the Write value seen.
2592  */
2594  pg_write_barrier();
2596 
2597 #ifdef USE_ASSERT_CHECKING
2598  {
2599  XLogRecPtr Flush;
2600  XLogRecPtr Write;
2602 
2604  pg_read_barrier();
2606  pg_read_barrier();
2608 
2609  /* WAL written to disk is always ahead of WAL flushed */
2610  Assert(Write >= Flush);
2611 
2612  /* WAL inserted to buffers is always ahead of WAL written */
2613  Assert(Insert >= Write);
2614  }
2615 #endif
2616 }
void ReserveExternalFD(void)
Definition: fd.c:1221
volatile uint32 CritSectionCount
Definition: globals.c:44
PgStat_Counter wal_write
Definition: pgstat.h:453
instr_time wal_write_time
Definition: pgstat.h:455
XLogRecPtr Flush
Definition: walreceiver.c:111
XLogRecPtr Write
Definition: walreceiver.c:110
#define WalSndWakeupRequest()
Definition: walsender.h:59
#define EINTR
Definition: win32_port.h:374
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6393
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3605
#define NextBufIdx(idx)
Definition: xlog.c:577
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8613
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2283
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert, CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_write, PgStat_PendingWalStats::wal_write_time, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

Definition at line 164 of file xlog.c.

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 164 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 157 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 130 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 131 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 120 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 215 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 222 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 234 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 133 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 113 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 629 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 630 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 158 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

bool track_wal_io_timing = false

Definition at line 135 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 641 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 122 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 124 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 123 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 134 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 125 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 114 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 121 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 126 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 141 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 164 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 116 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 115 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl